Add places feed to the repository

This commit is contained in:
Ilya Zverev 2018-05-28 13:23:31 +03:00
parent ff1ac46d36
commit a8e9db4863
3 changed files with 63360 additions and 0 deletions

63303
conflate/places.bin Normal file

File diff suppressed because one or more lines are too long

16
scripts/README.md Normal file
View file

@ -0,0 +1,16 @@
# Scripts
Here are some (one at the moment) scripts to prepare data for the conflator
or do stuff after conflating.
## pack_places.py
Prepares `places.bin` file for the geocoder. Requires three JSON files:
* places.json
* regions.json
* countries.json
These comprise the "places feed" and can be prepared using
[these scripts](https://github.com/mapsme/geocoding_data). You can
find a link to a ready-made feed in that repository.

41
scripts/pack_places.py Executable file
View file

@ -0,0 +1,41 @@
#!/usr/bin/env python3
import json
import struct
import os
import sys
def pack_coord(coord):
data = struct.pack('<l', round(coord * 10000))
return data[:-1]
if len(sys.argv) < 2:
path = '.'
else:
path = sys.argv[1]
with open(os.path.join(path, 'regions.json'), 'r') as f:
regions = [(r, int(rid)) for rid, r in json.load(f).items() if r.get('iso')]
reg_idx = {regions[i][1]: i for i in range(len(regions))}
with open(os.path.join(path, 'countries.json'), 'r') as f:
countries = [(r, int(rid)) for rid, r in json.load(f).items() if r.get('iso')]
c_idx = {countries[i][1]: i for i in range(len(countries))}
with open(os.path.join(path, 'places.json'), 'r') as f:
places = json.load(f)
out = open('places.bin', 'wb')
out.write(struct.pack('B', len(countries)))
for c, _ in countries:
out.write(struct.pack('2s', c['iso'].encode('ascii')))
out.write(struct.pack('<h', len(regions)))
for r, _ in regions:
rname = r['iso'].encode('ascii')
out.write(struct.pack('B', len(rname)))
out.write(rname)
for pl in places.values():
if pl['country'] not in c_idx:
continue
out.write(pack_coord(pl['lon']))
out.write(pack_coord(pl['lat']))
out.write(struct.pack('B', c_idx[pl['country']]))
out.write(struct.pack('<h', reg_idx.get(pl.get('region'), -1)))