From f4c112b59fcee233ba33b0308c2db9d6e49f3a9f Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Thu, 31 Mar 2016 09:51:32 +0300 Subject: [PATCH 1/6] strings::NormalizeDigits for full-width unicode numbers. --- base/base_tests/string_utils_test.cpp | 13 +++++++++++++ base/string_utils.cpp | 15 +++++++++++++++ base/string_utils.hpp | 3 +++ 3 files changed, 31 insertions(+) diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp index 55b256bacf..05c269d29c 100644 --- a/base/base_tests/string_utils_test.cpp +++ b/base/base_tests/string_utils_test.cpp @@ -612,3 +612,16 @@ UNIT_TEST(EditDistance) testUniStringEditDistance("ll", "l1", 1); testUniStringEditDistance("\u0132ij", "\u0133IJ", 3); } + +UNIT_TEST(NormalizeDigits) +{ + auto const nd = [](string str) -> string + { + strings::NormalizeDigits(str); + return str; + }; + TEST_EQUAL(nd(""), "", ()); + TEST_EQUAL(nd("z12345//"), "z12345//", ()); + TEST_EQUAL(nd("a0192 "), "a0192 ", ()); + TEST_EQUAL(nd("3456789"), "3456789", ()); +} diff --git a/base/string_utils.cpp b/base/string_utils.cpp index e9f2aa1d39..6e643d18f2 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -113,6 +113,21 @@ UniString Normalize(UniString const & s) return result; } +void NormalizeDigits(string & utf8) +{ + for (size_t i = 0; i + 2 < utf8.size(); ++i) + { + if (utf8[i] == '\xEF' && utf8[i + 1] == '\xBC') + { + char const n = utf8[i + 2]; + if (n < '\x90' || n > '\x99') + continue; + utf8[i] = n - 0x90 + '0'; + utf8.erase(i + 1, 2); + } + } +} + namespace { char ascii_to_lower(char in) diff --git a/base/string_utils.hpp b/base/string_utils.hpp index 4d006a0d08..a7a6a4290d 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -43,6 +43,9 @@ UniString MakeLowerCase(UniString const & s); void NormalizeInplace(UniString & s); UniString Normalize(UniString const & s); +/// Replaces "full width" unicode digits with ascii ones. +void NormalizeDigits(string & utf8); + /// Counts number of start symbols in string s (that is not lower and not normalized) that maches /// to lower and normalized string low_s. If s doen't starts with low_s then returns 0; otherwise /// returns number of start symbols in s that equivalent to lowStr From dd8be8cc6e60aa486f8250037f985ee693dd5387 Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Thu, 31 Mar 2016 10:25:35 +0300 Subject: [PATCH 2/6] Correct building:levels parsing in generator. --- .../generator_tests/metadata_parser_test.cpp | 43 ------------------- generator/generator_tests/osm2meta_test.cpp | 15 +++++++ generator/osm2meta.cpp | 19 +++++--- generator/osm2meta.hpp | 2 +- 4 files changed, 30 insertions(+), 49 deletions(-) diff --git a/generator/generator_tests/metadata_parser_test.cpp b/generator/generator_tests/metadata_parser_test.cpp index fbe11bfe07..8603f192fd 100644 --- a/generator/generator_tests/metadata_parser_test.cpp +++ b/generator/generator_tests/metadata_parser_test.cpp @@ -138,49 +138,6 @@ UNIT_TEST(Metadata_ValidateAndFormat_height) TEST_EQUAL(md.Get(Metadata::FMD_HEIGHT), "6", ()); } -UNIT_TEST(Metadata_ValidateAndFormat_building_levels) -{ - FeatureParams params; - MetadataTagProcessor p(params); - Metadata & md = params.GetMetadata(); - - p("building:levels", "0"); - TEST(md.Empty(), ()); - - p("building:levels", "0,0000"); - TEST(md.Empty(), ()); - - p("building:levels", "0.0"); - TEST(md.Empty(), ()); - - p("building:levels", "1"); - TEST_EQUAL(md.Get(Metadata::FMD_BUILDING_LEVELS), "1", ()); - md.Drop(Metadata::FMD_BUILDING_LEVELS); - - p("building:levels", "3.2"); - TEST_EQUAL(md.Get(Metadata::FMD_BUILDING_LEVELS), "3.2", ()); - md.Drop(Metadata::FMD_BUILDING_LEVELS); - - p("building:levels", "1.0"); - TEST_EQUAL(md.Get(Metadata::FMD_BUILDING_LEVELS), "1", ()); - md.Drop(Metadata::FMD_BUILDING_LEVELS); - - - p("building:levels", "1.0"); - p("height", "4.0"); - TEST_EQUAL(md.Get(Metadata::FMD_BUILDING_LEVELS), "1", ()); - md.Drop(Metadata::FMD_BUILDING_LEVELS); - - p("height", "4.0"); - p("building:levels", "1"); - TEST_EQUAL(md.Get(Metadata::FMD_BUILDING_LEVELS), "1", ()); - md.Drop(Metadata::FMD_BUILDING_LEVELS); - md.Drop(Metadata::FMD_HEIGHT); - - p("building:levels", "Level 1"); - TEST(md.Empty(), ()); -} - UNIT_TEST(Metadata_ValidateAndFormat_wikipedia) { char const * kWikiKey = "wikipedia"; diff --git a/generator/generator_tests/osm2meta_test.cpp b/generator/generator_tests/osm2meta_test.cpp index 57706624de..af4a7c6a56 100644 --- a/generator/generator_tests/osm2meta_test.cpp +++ b/generator/generator_tests/osm2meta_test.cpp @@ -40,3 +40,18 @@ UNIT_TEST(ValidateAndFormat_ele) TEST_EQUAL(tagProc.ValidateAndFormat_ele("11'"), "3.35", ()); TEST_EQUAL(tagProc.ValidateAndFormat_ele("11'4\""), "3.45", ()); } + +UNIT_TEST(ValidateAndFormat_building_levels) +{ + FeatureParams params; + MetadataTagProcessorImpl tp(params); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("4"), "4", ()); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("4floors"), "4", ()); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("between 1 and 4"), "", ()); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("0"), "0", ("OSM has many zero-level buildings.")); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("0.0"), "0", ()); + TEST_EQUAL(tp.ValidateAndFormat_building_levels(""), "", ()); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("Level 1"), "", ()); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("2.51"), "2.5", ()); + TEST_EQUAL(tp.ValidateAndFormat_building_levels("250"), "", ("Too many levels.")); +} diff --git a/generator/osm2meta.cpp b/generator/osm2meta.cpp index 0f4a0ea73d..7455729b58 100644 --- a/generator/osm2meta.cpp +++ b/generator/osm2meta.cpp @@ -9,6 +9,8 @@ #include "std/algorithm.hpp" #include "std/cctype.hpp" +#include "std/cmath.hpp" +#include "std/cstdlib.hpp" #include "std/unordered_set.hpp" namespace @@ -158,12 +160,19 @@ string MetadataTagProcessorImpl::ValidateAndFormat_height(string const & v) cons return MeasurementUtils::OSMDistanceToMetersString(v, false /*supportZeroAndNegativeValues*/, 1); } -string MetadataTagProcessorImpl::ValidateAndFormat_building_levels(string const & v) const +string MetadataTagProcessorImpl::ValidateAndFormat_building_levels(string v) const { - double d; - if (!strings::to_double(v, d) || d == 0) - return {}; - return strings::to_string_dac(d, 1); + // https://en.wikipedia.org/wiki/List_of_tallest_buildings_in_the_world + auto constexpr kMaxBuildingLevelsInTheWorld = 167; + // Some mappers use full width unicode digits. We can handle that. + strings::NormalizeDigits(v); + char * stop; + char const * s = v.c_str(); + double const levels = strtod(s, &stop); + if (s != stop && isfinite(levels) && levels >= 0 && levels <= kMaxBuildingLevelsInTheWorld) + return strings::to_string_dac(levels, 1); + + return {}; } string MetadataTagProcessorImpl::ValidateAndFormat_denomination(string const & v) const diff --git a/generator/osm2meta.hpp b/generator/osm2meta.hpp index 2e9f0969e7..2a334fe6e0 100644 --- a/generator/osm2meta.hpp +++ b/generator/osm2meta.hpp @@ -29,7 +29,7 @@ struct MetadataTagProcessorImpl string ValidateAndFormat_flats(string const & v) const; string ValidateAndFormat_internet(string v) const; string ValidateAndFormat_height(string const & v) const; - string ValidateAndFormat_building_levels(string const & v) const; + string ValidateAndFormat_building_levels(string v) const; string ValidateAndFormat_denomination(string const & v) const; string ValidateAndFormat_wikipedia(string v) const; From f5d1ff127fb68117a3f744ed732ecceaf416485b Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Thu, 31 Mar 2016 10:26:04 +0300 Subject: [PATCH 3/6] Base validation for building:levels on a client side. --- indexer/editable_map_object.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/indexer/editable_map_object.cpp b/indexer/editable_map_object.cpp index b8b12d7e21..fa7bb218f2 100644 --- a/indexer/editable_map_object.cpp +++ b/indexer/editable_map_object.cpp @@ -161,7 +161,10 @@ void EditableMapObject::SetFlats(string const & flats) void EditableMapObject::SetBuildingLevels(string const & buildingLevels) { - m_metadata.Set(feature::Metadata::FMD_BUILDING_LEVELS, buildingLevels); + auto constexpr kMaximumLevelsEditableByUsers = 50; + uint64_t levels; + if (strings::to_uint64(buildingLevels, levels) && levels <= kMaximumLevelsEditableByUsers) + m_metadata.Set(feature::Metadata::FMD_BUILDING_LEVELS, buildingLevels); } string const & EditableMapObject::GetStreet() const { return m_street; } From c535bc4ab3d5ff852ad6a14f79f5251862a0539f Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Thu, 31 Mar 2016 10:33:40 +0300 Subject: [PATCH 4/6] [editor] Correctly merge editable fields for building + any other type. --- data/editor.config | 11 +++-------- editor/editor_config.cpp | 19 ++++++++++++++++--- editor/editor_config.hpp | 3 ++- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/data/editor.config b/data/editor.config index 893912ea8f..997f09c2ca 100644 --- a/data/editor.config +++ b/data/editor.config @@ -91,8 +91,8 @@ - - + + @@ -124,6 +124,7 @@ + @@ -732,12 +733,6 @@ - - - - - - diff --git a/editor/editor_config.cpp b/editor/editor_config.cpp index eaa70d2faf..9920464136 100644 --- a/editor/editor_config.cpp +++ b/editor/editor_config.cpp @@ -37,7 +37,7 @@ static unordered_map const kNamesToFMD= { {"height", feature::Metadata::FMD_HEIGHT}, // {"", feature::Metadata::FMD_MIN_HEIGHT}, {"denomination", feature::Metadata::FMD_DENOMINATION}, - {"building_levels", feature::Metadata::FMD_BUILDING_LEVELS} + {"building:levels", feature::Metadata::FMD_BUILDING_LEVELS} // description }; @@ -120,9 +120,22 @@ EditorConfig::EditorConfig(string const & fileName) Reload(); } -bool EditorConfig::GetTypeDescription(vector const & classificatorTypes, +bool EditorConfig::GetTypeDescription(vector classificatorTypes, TypeAggregatedDescription & outDesc) const { + bool isBuilding = false; + for (auto it = classificatorTypes.begin(); it != classificatorTypes.end(); ++it) + { + if (*it == "building") + { + outDesc.m_address = isBuilding = true; + outDesc.m_editableFields.push_back(feature::Metadata::FMD_BUILDING_LEVELS); + outDesc.m_editableFields.push_back(feature::Metadata::FMD_POSTCODE); + classificatorTypes.erase(it); + break; + } + } + auto const typeNodes = GetPrioritizedTypes(m_document); auto const it = find_if(begin(typeNodes), end(typeNodes), [&classificatorTypes](pugi::xml_node const & node) @@ -131,7 +144,7 @@ bool EditorConfig::GetTypeDescription(vector const & classificatorTypes, node.attribute("id").value()) != end(classificatorTypes); }); if (it == end(typeNodes)) - return false; + return isBuilding; return TypeDescriptionFromXml(m_document, *it, outDesc); } diff --git a/editor/editor_config.hpp b/editor/editor_config.hpp index 7985185af4..399f03c10a 100644 --- a/editor/editor_config.hpp +++ b/editor/editor_config.hpp @@ -43,7 +43,8 @@ class EditorConfig public: EditorConfig(string const & fileName = "editor.config"); - bool GetTypeDescription(vector const & classificatorTypes, TypeAggregatedDescription & outDesc) const; + // TODO(mgsergio): Reduce overhead by matching uint32_t types instead of strings. + bool GetTypeDescription(vector classificatorTypes, TypeAggregatedDescription & outDesc) const; vector GetTypesThatCanBeAdded() const; bool EditingEnable() const; From 32a50c6a59b012024cc59fb71a7bcad5fe360f43 Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Fri, 1 Apr 2016 14:09:05 +0300 Subject: [PATCH 5/6] [qt] editor.config is a part of project bundle. --- qt/qt.pro | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qt/qt.pro b/qt/qt.pro index c5d8039d20..2befa2c463 100644 --- a/qt/qt.pro +++ b/qt/qt.pro @@ -59,7 +59,9 @@ OTHER_RES.path = $$DATADIR OTHER_RES.files = ../data/copyright.html ../data/eula.html ../data/welcome.html \ ../data/countries.txt \ ../data/languages.txt ../data/categories.txt \ - ../data/packed_polygons.bin res/logo.png + ../data/packed_polygons.bin res/logo.png \ + ../data/editor.config \ + CLASSIFICATOR_RES.path = $$DATADIR CLASSIFICATOR_RES.files = ../data/classificator.txt \ ../data/types.txt \ From baf4a0c0cd07bea2472e9934970a0f16778898cd Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Mon, 4 Apr 2016 17:15:45 +0300 Subject: [PATCH 6/6] Speed improvement from Yury Gorshenin. --- base/string_utils.cpp | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 6e643d18f2..857c190c9b 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -113,19 +113,42 @@ UniString Normalize(UniString const & s) return result; } -void NormalizeDigits(string & utf8) -{ - for (size_t i = 0; i + 2 < utf8.size(); ++i) +void NormalizeDigits(string &utf8) { + size_t const n = utf8.size(); + size_t const m = n >= 2 ? n - 2 : 0; + + size_t i = 0; + while (i < n && utf8[i] != '\xEF') + ++i; + size_t j = i; + + // Following invariant holds before/between/after loop iterations below: + // * utf8[0, i) represents a checked part of the input string. + // * utf8[0, j) represents a normalized version of the utf8[0, i). + while (i < m) { if (utf8[i] == '\xEF' && utf8[i + 1] == '\xBC') { - char const n = utf8[i + 2]; - if (n < '\x90' || n > '\x99') - continue; - utf8[i] = n - 0x90 + '0'; - utf8.erase(i + 1, 2); + auto const n = utf8[i + 2]; + if (n >= '\x90' && n <= '\x99') + { + utf8[j++] = n - 0x90 + '0'; + i += 3; + } + else + { + utf8[j++] = utf8[i++]; + utf8[j++] = utf8[i++]; + } + } + else + { + utf8[j++] = utf8[i++]; } } + while (i < n) + utf8[j++] = utf8[i++]; + utf8.resize(j); } namespace