From 2786513ba72435fda3a9a6b6b39d25ddf560339c Mon Sep 17 00:00:00 2001
From: Ilya Zverev <zverik@textual.ru>
Date: Wed, 25 Apr 2018 15:42:22 +0300
Subject: [PATCH] Few fixes

---
 CHANGELOG.md         |  3 +++
 conflate/conflate.py | 12 ++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d10b2f..f082171 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ## master branch
 
+* Fixed processing of `''` tag value.
+* More that 3 duplicate points in a single place are processed correctly.
+
 ## 1.3.2
 
 _Released 2018-04-19_
diff --git a/conflate/conflate.py b/conflate/conflate.py
index b516eda..25fb659 100755
--- a/conflate/conflate.py
+++ b/conflate/conflate.py
@@ -78,6 +78,7 @@ class OSMPoint(SourcePoint):
     The id is compound and created from object type and object id."""
     def __init__(self, ptype, pid, version, lat, lon, tags=None, categories=None):
         super().__init__('{}{}'.format(ptype[0], pid), lat, lon, tags)
+        self.tags = {k: v for k, v in self.tags.items() if v is not None and len(v) > 0}
         self.osm_type = ptype
         self.osm_id = pid
         self.version = version
@@ -598,7 +599,7 @@ class OsmConflator:
                                 continue
                             tags[osm_key] = v
                             changed = True
-                        elif osm_key in p.tags and (v == '' or retagging):
+                        elif osm_key in tags and (v == '' or retagging):
                             del tags[osm_key]
                             changed = True
             return changed
@@ -1082,7 +1083,7 @@ def check_dataset_for_duplicates(profile, dataset, print_all=False):
 
     # And then for near-duplicate points with similar tags
     max_distance = profile.get('max_distance', MAX_DISTANCE)
-    uncond_distance = profile.get('duplicate_distance', 0)
+    uncond_distance = profile.get('duplicate_distance', 1)
     diff_tags = [k for k in tags if tags[k] == '---']
     kd = kdtree.create(list(dataset))
     duplicates = set()
@@ -1091,9 +1092,12 @@ def check_dataset_for_duplicates(profile, dataset, print_all=False):
         if d.id in duplicates:
             continue
         group += 1
-        for alt, _ in kd.search_knn(d, 3):  # The first one will be equal to d
+        dups = kd.search_knn(d, 2)  # The first one will be equal to d
+        if len(dups) < 2 or dups[1][0].data.distance(d) > max_distance:
+            continue
+        for alt, _ in kd.search_knn(d, 20):
             dist = alt.data.distance(d)
-            if alt.data.id != d.id and dist < max_distance:
+            if alt.data.id != d.id and dist <= max_distance:
                 tags_differ = 0
                 if dist > uncond_distance:
                     for k in diff_tags: