1.3.2, also another profile
This commit is contained in:
parent
284f952576
commit
5b7d2c40be
4 changed files with 172 additions and 26 deletions
|
@ -2,6 +2,14 @@
|
|||
|
||||
## master branch
|
||||
|
||||
## 1.3.2
|
||||
|
||||
_Released 2018-04-19_
|
||||
|
||||
* Fixed bug in categories building.
|
||||
* Fixed threshold for tags in duplicates check.
|
||||
* Now the script prints "Done" when finished, to better measure time.
|
||||
|
||||
## 1.3.1
|
||||
|
||||
_Released 2018-03-20_
|
||||
|
|
|
@ -247,8 +247,15 @@ class OsmConflator:
|
|||
for t in ('node', 'way', 'relation["type"="multipolygon"]'):
|
||||
query += t + tag_str + bbox_str + ';'
|
||||
if self.ref is not None:
|
||||
for t in ('node', 'way', 'relation'):
|
||||
query += t + '["' + self.ref + '"];'
|
||||
if not self.profile.get('bounded_update', False):
|
||||
for t in ('node', 'way', 'relation'):
|
||||
query += t + '["' + self.ref + '"];'
|
||||
else:
|
||||
for bbox in bboxes:
|
||||
bbox_str = '' if bbox is None else '(' + ','.join(
|
||||
[str(x) for x in bbox]) + ')'
|
||||
for t in ('node', 'way', 'relation'):
|
||||
query += t + '["' + self.ref + '"]' + bbox_str + ';'
|
||||
query += '); out meta qt center;'
|
||||
return query
|
||||
|
||||
|
@ -368,30 +375,28 @@ class OsmConflator:
|
|||
def match_query(tags, query):
|
||||
for tag in query:
|
||||
if len(tag) == 1:
|
||||
if tag[0] in tags:
|
||||
return tag[0] in tags
|
||||
else:
|
||||
value = tags.get(tag[0], None)
|
||||
if tag[1] is None or tag[1] == '':
|
||||
return value is None
|
||||
if value is None:
|
||||
return False
|
||||
elif tag[1] is None or tag[1] == '':
|
||||
if tag[0] not in tags:
|
||||
return False
|
||||
else:
|
||||
value = tags.get(tag[0], None)
|
||||
if value is None:
|
||||
return False
|
||||
found = False
|
||||
for t2 in tag[1:]:
|
||||
if t2[0] == '~':
|
||||
m = re.search(t2[1:], value)
|
||||
if not m:
|
||||
return False
|
||||
elif t2[0] == '!':
|
||||
if t2[1:].lower() in value.lower():
|
||||
found = True
|
||||
elif t2 == value:
|
||||
found = False
|
||||
for t2 in tag[1:]:
|
||||
if t2[0] == '~':
|
||||
m = re.search(t2[1:], value)
|
||||
if not m:
|
||||
return False
|
||||
elif t2[0] == '!':
|
||||
if t2[1:].lower() in value.lower():
|
||||
found = True
|
||||
if found:
|
||||
break
|
||||
if not found:
|
||||
return False
|
||||
elif t2 == value:
|
||||
found = True
|
||||
if found:
|
||||
break
|
||||
if not found:
|
||||
return False
|
||||
return True
|
||||
|
||||
def tags_to_query(tags):
|
||||
|
@ -1094,7 +1099,7 @@ def check_dataset_for_duplicates(profile, dataset, print_all=False):
|
|||
for k in diff_tags:
|
||||
if alt.data.tags.get(k) != d.tags.get(k):
|
||||
tags_differ += 1
|
||||
if tags_differ <= max(1, len(diff_tags) / 3):
|
||||
if tags_differ <= len(diff_tags) / 3:
|
||||
duplicates.add(alt.data.id)
|
||||
d.exclusive_group = group
|
||||
alt.data.exclusive_group = group
|
||||
|
@ -1255,6 +1260,8 @@ def run(profile=None):
|
|||
fc = {'type': 'FeatureCollection', 'features': conflator.changes}
|
||||
json.dump(fc, options.changes, ensure_ascii=False, sort_keys=True, indent=1)
|
||||
|
||||
logging.info('Done')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = '1.3.1'
|
||||
__version__ = '1.3.2'
|
||||
|
|
131
profiles/moscow_addr.py
Normal file
131
profiles/moscow_addr.py
Normal file
|
@ -0,0 +1,131 @@
|
|||
import json
|
||||
import logging
|
||||
|
||||
source = 'dit.mos.ru'
|
||||
no_dataset_id = True
|
||||
query = [[('addr:housenumber',)], [('building',)]]
|
||||
max_distance = 50
|
||||
max_request_boxes = 2
|
||||
master_tags = ('addr:housenumber', 'addr:street')
|
||||
|
||||
COMPLEX = False
|
||||
ADMS = {
|
||||
'1': 'Северо-Западный административный округ',
|
||||
'2': 'Северный административный округ',
|
||||
'3': 'Северо-Восточный административный округ',
|
||||
'4': 'Западный административный округ',
|
||||
'5': 'Центральный административный округ',
|
||||
'6': 'Восточный административный округ',
|
||||
'7': 'Юго-Западный административный округ',
|
||||
'8': 'Южный административный округ',
|
||||
'9': 'Юго-Восточный административный округ',
|
||||
'10': 'Зеленоградский административный округ',
|
||||
'11': 'Троицкий административный округ',
|
||||
'12': 'Новомосковский административный округ',
|
||||
}
|
||||
ADM = ADMS['2']
|
||||
if param:
|
||||
if param[0] == 'c':
|
||||
COMPLEX = True
|
||||
param = param[1:]
|
||||
if param in ADMS:
|
||||
ADM = ADMS[param]
|
||||
|
||||
|
||||
def dataset(fileobj):
|
||||
def find_center(geodata):
|
||||
if not geodata:
|
||||
return None
|
||||
if 'center' in geodata:
|
||||
return geodata['center'][0]
|
||||
if 'coordinates' in geodata:
|
||||
typ = geodata['type']
|
||||
lonlat = [0, 0]
|
||||
cnt = 0
|
||||
if typ == 'Polygon':
|
||||
for p in geodata['coordinates'][0]:
|
||||
lonlat[0] += p[0]
|
||||
lonlat[1] += p[1]
|
||||
cnt += 1
|
||||
elif typ == 'LineString':
|
||||
for p in geodata['coordinates']:
|
||||
lonlat[0] += p[0]
|
||||
lonlat[1] += p[1]
|
||||
cnt += 1
|
||||
elif typ == 'Point':
|
||||
p = geodata['coordinates']
|
||||
lonlat[0] += p[0]
|
||||
lonlat[1] += p[1]
|
||||
cnt += 1
|
||||
if cnt > 0:
|
||||
return [lonlat[0]/cnt, lonlat[1]/cnt]
|
||||
return None
|
||||
|
||||
global COMPLEX, ADM
|
||||
import zipfile
|
||||
zf = zipfile.ZipFile(fileobj)
|
||||
data = []
|
||||
no_geodata = 0
|
||||
no_addr = 0
|
||||
count = 0
|
||||
for zname in zf.namelist():
|
||||
source = json.loads(zf.read(zname).decode('cp1251'))
|
||||
for el in source:
|
||||
gid = el['global_id']
|
||||
try:
|
||||
adm_area = el['ADM_AREA']
|
||||
if adm_area != ADM:
|
||||
continue
|
||||
count += 1
|
||||
lonlat = find_center(el.get('geoData'))
|
||||
if not lonlat:
|
||||
no_geodata += 1
|
||||
street = el.get('P7')
|
||||
house = el.get('L1_VALUE')
|
||||
htype = el.get('L1_TYPE')
|
||||
corpus = el.get('L2_VALUE')
|
||||
ctype = el.get('L2_TYPE')
|
||||
stroenie = el.get('L3_VALUE')
|
||||
stype = el.get('L3_TYPE')
|
||||
if not street or not house:
|
||||
no_addr += 1
|
||||
continue
|
||||
if not lonlat:
|
||||
continue
|
||||
is_complex = False
|
||||
housenumber = house.replace(' ', '')
|
||||
if htype != 'дом':
|
||||
is_complex = True
|
||||
if htype in ('владение', 'домовладение'):
|
||||
housenumber = 'вл' + housenumber
|
||||
else:
|
||||
logging.warn('Unknown house number type: %s', htype)
|
||||
continue
|
||||
if corpus:
|
||||
if ctype == 'корпус':
|
||||
housenumber += ' к{}'.format(corpus)
|
||||
else:
|
||||
logging.warn('Unknown corpus type: %s', ctype)
|
||||
continue
|
||||
if stroenie:
|
||||
is_complex = True
|
||||
if stype == 'строение' or stype == 'сооружение':
|
||||
housenumber += ' с{}'.format(stroenie)
|
||||
else:
|
||||
logging.warn('Unknown stroenie type: %s', stype)
|
||||
continue
|
||||
if is_complex != COMPLEX:
|
||||
continue
|
||||
tags = {
|
||||
'addr:street': street,
|
||||
'addr:housenumber': housenumber,
|
||||
}
|
||||
data.append(SourcePoint(gid, lonlat[1], lonlat[0], tags))
|
||||
except Exception as e:
|
||||
logging.warning('PROFILE: Failed to get attributes for address %s: %s', gid, str(e))
|
||||
logging.warning(json.dumps(el, ensure_ascii=False))
|
||||
|
||||
if no_addr + no_geodata > 0:
|
||||
logging.warning('%.2f%% of data have no centers, and %.2f%% have no streets or house numbers',
|
||||
100*no_geodata/count, 100*no_addr/count)
|
||||
return data
|
Loading…
Add table
Reference in a new issue