From 2786513ba72435fda3a9a6b6b39d25ddf560339c Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Wed, 25 Apr 2018 15:42:22 +0300 Subject: [PATCH] Few fixes --- CHANGELOG.md | 3 +++ conflate/conflate.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d10b2f..f082171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## master branch +* Fixed processing of `''` tag value. +* More that 3 duplicate points in a single place are processed correctly. + ## 1.3.2 _Released 2018-04-19_ diff --git a/conflate/conflate.py b/conflate/conflate.py index b516eda..25fb659 100755 --- a/conflate/conflate.py +++ b/conflate/conflate.py @@ -78,6 +78,7 @@ class OSMPoint(SourcePoint): The id is compound and created from object type and object id.""" def __init__(self, ptype, pid, version, lat, lon, tags=None, categories=None): super().__init__('{}{}'.format(ptype[0], pid), lat, lon, tags) + self.tags = {k: v for k, v in self.tags.items() if v is not None and len(v) > 0} self.osm_type = ptype self.osm_id = pid self.version = version @@ -598,7 +599,7 @@ class OsmConflator: continue tags[osm_key] = v changed = True - elif osm_key in p.tags and (v == '' or retagging): + elif osm_key in tags and (v == '' or retagging): del tags[osm_key] changed = True return changed @@ -1082,7 +1083,7 @@ def check_dataset_for_duplicates(profile, dataset, print_all=False): # And then for near-duplicate points with similar tags max_distance = profile.get('max_distance', MAX_DISTANCE) - uncond_distance = profile.get('duplicate_distance', 0) + uncond_distance = profile.get('duplicate_distance', 1) diff_tags = [k for k in tags if tags[k] == '---'] kd = kdtree.create(list(dataset)) duplicates = set() @@ -1091,9 +1092,12 @@ def check_dataset_for_duplicates(profile, dataset, print_all=False): if d.id in duplicates: continue group += 1 - for alt, _ in kd.search_knn(d, 3): # The first one will be equal to d + dups = kd.search_knn(d, 2) # The first one will be equal to d + if len(dups) < 2 or dups[1][0].data.distance(d) > max_distance: + continue + for alt, _ in kd.search_knn(d, 20): dist = alt.data.distance(d) - if alt.data.id != d.id and dist < max_distance: + if alt.data.id != d.id and dist <= max_distance: tags_differ = 0 if dist > uncond_distance: for k in diff_tags: