1.3.2, also another profile

This commit is contained in:
Ilya Zverev 2018-04-19 19:32:33 +03:00
parent 284f952576
commit 5b7d2c40be
4 changed files with 172 additions and 26 deletions

View file

@ -2,6 +2,14 @@
## master branch
## 1.3.2
_Released 2018-04-19_
* Fixed bug in categories building.
* Fixed threshold for tags in duplicates check.
* Now the script prints "Done" when finished, to better measure time.
## 1.3.1
_Released 2018-03-20_

View file

@ -247,8 +247,15 @@ class OsmConflator:
for t in ('node', 'way', 'relation["type"="multipolygon"]'):
query += t + tag_str + bbox_str + ';'
if self.ref is not None:
for t in ('node', 'way', 'relation'):
query += t + '["' + self.ref + '"];'
if not self.profile.get('bounded_update', False):
for t in ('node', 'way', 'relation'):
query += t + '["' + self.ref + '"];'
else:
for bbox in bboxes:
bbox_str = '' if bbox is None else '(' + ','.join(
[str(x) for x in bbox]) + ')'
for t in ('node', 'way', 'relation'):
query += t + '["' + self.ref + '"]' + bbox_str + ';'
query += '); out meta qt center;'
return query
@ -368,30 +375,28 @@ class OsmConflator:
def match_query(tags, query):
for tag in query:
if len(tag) == 1:
if tag[0] in tags:
return tag[0] in tags
else:
value = tags.get(tag[0], None)
if tag[1] is None or tag[1] == '':
return value is None
if value is None:
return False
elif tag[1] is None or tag[1] == '':
if tag[0] not in tags:
return False
else:
value = tags.get(tag[0], None)
if value is None:
return False
found = False
for t2 in tag[1:]:
if t2[0] == '~':
m = re.search(t2[1:], value)
if not m:
return False
elif t2[0] == '!':
if t2[1:].lower() in value.lower():
found = True
elif t2 == value:
found = False
for t2 in tag[1:]:
if t2[0] == '~':
m = re.search(t2[1:], value)
if not m:
return False
elif t2[0] == '!':
if t2[1:].lower() in value.lower():
found = True
if found:
break
if not found:
return False
elif t2 == value:
found = True
if found:
break
if not found:
return False
return True
def tags_to_query(tags):
@ -1094,7 +1099,7 @@ def check_dataset_for_duplicates(profile, dataset, print_all=False):
for k in diff_tags:
if alt.data.tags.get(k) != d.tags.get(k):
tags_differ += 1
if tags_differ <= max(1, len(diff_tags) / 3):
if tags_differ <= len(diff_tags) / 3:
duplicates.add(alt.data.id)
d.exclusive_group = group
alt.data.exclusive_group = group
@ -1255,6 +1260,8 @@ def run(profile=None):
fc = {'type': 'FeatureCollection', 'features': conflator.changes}
json.dump(fc, options.changes, ensure_ascii=False, sort_keys=True, indent=1)
logging.info('Done')
if __name__ == '__main__':
run()

View file

@ -1 +1 @@
__version__ = '1.3.1'
__version__ = '1.3.2'

131
profiles/moscow_addr.py Normal file
View file

@ -0,0 +1,131 @@
import json
import logging
source = 'dit.mos.ru'
no_dataset_id = True
query = [[('addr:housenumber',)], [('building',)]]
max_distance = 50
max_request_boxes = 2
master_tags = ('addr:housenumber', 'addr:street')
COMPLEX = False
ADMS = {
'1': 'Северо-Западный административный округ',
'2': 'Северный административный округ',
'3': 'Северо-Восточный административный округ',
'4': 'Западный административный округ',
'5': 'Центральный административный округ',
'6': 'Восточный административный округ',
'7': 'Юго-Западный административный округ',
'8': 'Южный административный округ',
'9': 'Юго-Восточный административный округ',
'10': 'Зеленоградский административный округ',
'11': 'Троицкий административный округ',
'12': 'Новомосковский административный округ',
}
ADM = ADMS['2']
if param:
if param[0] == 'c':
COMPLEX = True
param = param[1:]
if param in ADMS:
ADM = ADMS[param]
def dataset(fileobj):
def find_center(geodata):
if not geodata:
return None
if 'center' in geodata:
return geodata['center'][0]
if 'coordinates' in geodata:
typ = geodata['type']
lonlat = [0, 0]
cnt = 0
if typ == 'Polygon':
for p in geodata['coordinates'][0]:
lonlat[0] += p[0]
lonlat[1] += p[1]
cnt += 1
elif typ == 'LineString':
for p in geodata['coordinates']:
lonlat[0] += p[0]
lonlat[1] += p[1]
cnt += 1
elif typ == 'Point':
p = geodata['coordinates']
lonlat[0] += p[0]
lonlat[1] += p[1]
cnt += 1
if cnt > 0:
return [lonlat[0]/cnt, lonlat[1]/cnt]
return None
global COMPLEX, ADM
import zipfile
zf = zipfile.ZipFile(fileobj)
data = []
no_geodata = 0
no_addr = 0
count = 0
for zname in zf.namelist():
source = json.loads(zf.read(zname).decode('cp1251'))
for el in source:
gid = el['global_id']
try:
adm_area = el['ADM_AREA']
if adm_area != ADM:
continue
count += 1
lonlat = find_center(el.get('geoData'))
if not lonlat:
no_geodata += 1
street = el.get('P7')
house = el.get('L1_VALUE')
htype = el.get('L1_TYPE')
corpus = el.get('L2_VALUE')
ctype = el.get('L2_TYPE')
stroenie = el.get('L3_VALUE')
stype = el.get('L3_TYPE')
if not street or not house:
no_addr += 1
continue
if not lonlat:
continue
is_complex = False
housenumber = house.replace(' ', '')
if htype != 'дом':
is_complex = True
if htype in ('владение', 'домовладение'):
housenumber = 'вл' + housenumber
else:
logging.warn('Unknown house number type: %s', htype)
continue
if corpus:
if ctype == 'корпус':
housenumber += ' к{}'.format(corpus)
else:
logging.warn('Unknown corpus type: %s', ctype)
continue
if stroenie:
is_complex = True
if stype == 'строение' or stype == 'сооружение':
housenumber += ' с{}'.format(stroenie)
else:
logging.warn('Unknown stroenie type: %s', stype)
continue
if is_complex != COMPLEX:
continue
tags = {
'addr:street': street,
'addr:housenumber': housenumber,
}
data.append(SourcePoint(gid, lonlat[1], lonlat[0], tags))
except Exception as e:
logging.warning('PROFILE: Failed to get attributes for address %s: %s', gid, str(e))
logging.warning(json.dumps(el, ensure_ascii=False))
if no_addr + no_geodata > 0:
logging.warning('%.2f%% of data have no centers, and %.2f%% have no streets or house numbers',
100*no_geodata/count, 100*no_addr/count)
return data