name-suggestion-index/scripts/collect_all.js
Bryan Housel 1c4fc70e3b WIP Modernize
- switch to type: module
- replace all CJS require/module.exports with ES6 import/expor
2021-06-22 00:04:52 -04:00

91 lines
2.7 KiB
JavaScript

#!/usr/bin/env node
// This script will process a planet file and collect frequently occuring tags that we care about.
// It produces files containing all the top names and tags: `dist/collected/names_all.json`
//
// `names_all.json` contains a dictionary object in the format:
// "key/value|name": count
// "amenity/cafe|Starbucks": 159
//
// Please see README.md for more info
import colors from 'colors/safe.js';
import fs from 'node:fs';
import osmium from 'osmium';
import shell from 'shelljs';
import stringify from '@aitodotai/json-stringify-pretty-compact';
import { sortObject } from '../lib/sort_object.js';
if (process.argv.length < 3) {
console.log('');
console.log('Usage: node scripts/collect_all.js <planet.osm>');
console.log('');
process.exit(1);
}
const POIKEYS = ['amenity', 'shop', 'leisure', 'tourism', 'office', 'craft', 'healthcare'];
const OPERATORKEYS = ['amenity', 'healthcare', 'emergency', 'power', 'route']; //, 'public_transport'];
const NETWORKKEYS = ['amenity', 'power', 'route']; //, 'public_transport'];
collect('name', POIKEYS, 50);
collect('brand', POIKEYS, 50);
collect('operator', OPERATORKEYS, 10);
collect('network', NETWORKKEYS, 10);
function collect(tag, fromKeys, threshold) {
const what = `${tag}s`; // names, brands, operators, networks
const file = `dist/collected/${what}_all.json`;
const START = '🏗 ' + colors.yellow(`Collecting ${what} from OSM planet...`);
const END = '👍 ' + colors.green(`${what} collected`);
console.log('');
console.log(START);
console.time(END);
// Start clean
shell.rm('-f', file);
let all = {};
// process one key at a time to reduce memory footprint
fromKeys.forEach(k => {
// count
console.log(` collecting ${what} from ${k}=*`);
let counted = {};
let handler = new osmium.Handler();
handler.options({ tagged_nodes_only: true });
handler.on('node', countEntity);
handler.on('way', countEntity);
handler.on('relation', countEntity);
let reader = new osmium.Reader(process.argv[2]);
osmium.apply(reader, handler);
// filter
console.log(` filtering ${k}`);
for (const kvn in counted) {
if (counted[kvn] > threshold) {
all[kvn] = counted[kvn]; // keep
}
}
function countEntity(entity) {
const n = entity.tags(tag);
if (!n) return;
// 'ncn','rcn','lcn', etc.. these are special and not actual networks - ignore them.
if (tag === 'network' && /^[inrl][chw]n$/.test(n)) return;
const v = entity.tags(k);
if (!v) return;
const kvn = `${k}/${v}|${n}`;
counted[kvn] = (counted[kvn] || 0) + 1;
}
});
fs.writeFileSync(file, stringify(sortObject(all)) + '\n');
console.timeEnd(END);
}