diff --git a/generator/generator_tests/osm_type_test.cpp b/generator/generator_tests/osm_type_test.cpp index 2187b9599d..9e24d42fa0 100644 --- a/generator/generator_tests/osm_type_test.cpp +++ b/generator/generator_tests/osm_type_test.cpp @@ -317,7 +317,7 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Capital) auto const params = GetFeatureBuilderParams(tags); TEST_EQUAL(params.m_types.size(), 1, (params)); - TEST(params.IsTypeExist(GetType({"place", "city", "capital", "6"})), ()); + TEST(params.IsTypeExist(GetType({"place", "city", "capital", "6"})), (params)); } { @@ -335,18 +335,18 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Capital) { Tags const tags = { - { "place", "city" }, - { "admin_level", "4" }, - { "boundary", "administrative" }, - { "capital", "2" }, - { "place", "city" }, + {"boundary", "administrative"}, + {"capital", "2"}, + {"place", "city"}, + {"admin_level", "4"}, }; auto const params = GetFeatureBuilderParams(tags); - TEST_EQUAL(params.m_types.size(), 2, (params)); - TEST(params.IsTypeExist(GetType({"place", "city", "capital", "2"})), ()); - TEST(params.IsTypeExist(GetType({"boundary", "administrative", "4"})), ()); + TEST_EQUAL(params.m_types.size(), 3, (params)); + TEST(params.IsTypeExist(GetType({"place", "city", "capital", "2"})), (params)); + TEST(params.IsTypeExist(GetType({"boundary", "administrative", "4"})), (params)); + TEST(params.IsTypeExist(GetType({"place", "city", "capital", "4"})), (params)); } } @@ -543,7 +543,7 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Ferry) TEST(carModel.IsRoadType(type), ()); type = GetType({"route", "ferry", "motorcar"}); - TEST(params.IsTypeExist(type), ()); + TEST(params.IsTypeExist(type), (params)); TEST(carModel.IsRoadType(type), ()); type = GetType({"route", "ferry"}); @@ -655,11 +655,11 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Subway) { Tags const tags = { - { "name", "14th Street-8th Avenue (A,C,E,L)" }, - { "network", "New York City Subway" }, - { "railway", "station" }, - { "wheelchair", "yes" }, - { "route", "subway" }, + {"name", "14th Street-8th Avenue (A,C,E,L)"}, + {"network", "New York City Subway"}, + {"railway", "station"}, + {"wheelchair", "yes"}, + {"transport", "subway"}, }; auto const params = GetFeatureBuilderParams(tags); @@ -771,7 +771,7 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Entrance) UNIT_CLASS_TEST(TestWithClassificator, OsmType_Moscow) { { - Tags const tags = { + Tags const tags = { { "addr:country", "RU" }, { "addr:region", "Москва" }, { "admin_level", "2" }, @@ -843,3 +843,96 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Cuisine) TEST(params.IsTypeExist(GetType({"cuisine", "coffee_shop"})), (params)); } } + +UNIT_CLASS_TEST(TestWithClassificator, OsmType_MergeTags) +{ + { + Tags const tags = { + {"amenity", "parking"}, + {"parking", "multi-storey"}, + }; + + auto const params = GetFeatureBuilderParams(tags); + + TEST_EQUAL(params.m_types.size(), 1, (params)); + TEST(params.IsTypeExist(GetType({"amenity", "parking", "multi-storey"})), (params)); + } + { + Tags const tags = { + {"amenity", "parking"}, + {"location", "underground"}, + }; + + auto const params = GetFeatureBuilderParams(tags); + + TEST_EQUAL(params.m_types.size(), 1, (params)); + TEST(params.IsTypeExist(GetType({"amenity", "parking", "underground"})), (params)); + } + { + Tags const tags = { + {"amenity", "parking_space"}, + {"parking", "underground"}, + }; + + auto const params = GetFeatureBuilderParams(tags); + + TEST_EQUAL(params.m_types.size(), 1, (params)); + TEST(params.IsTypeExist(GetType({"amenity", "parking_space", "underground"})), (params)); + } +} + +UNIT_CLASS_TEST(TestWithClassificator, OsmType_ReuseTags) +{ + { + Tags const tags = { + {"amenity", "parking"}, + {"access", "private"}, + {"fee", "yes"}, + }; + + auto const params = GetFeatureBuilderParams(tags); + + TEST_EQUAL(params.m_types.size(), 2, (params)); + TEST(params.IsTypeExist(GetType({"amenity", "parking", "private"})), (params)); + TEST(params.IsTypeExist(GetType({"amenity", "parking", "fee"})), (params)); + } +} + +UNIT_CLASS_TEST(TestWithClassificator, OsmType_DoNotMergeTags) +{ + { + Tags const tags = { + {"place", "unknown_place_value"}, + {"country", "unknown_country_value"}, + }; + + auto const params = GetFeatureBuilderParams(tags); + + TEST_EQUAL(params.m_types.size(), 0, (params)); + } + { + Tags const tags = { + {"amenity", "hospital"}, + {"emergency", "yes"}, + {"phone", "77777777"}, + }; + + auto const params = GetFeatureBuilderParams(tags); + + TEST_EQUAL(params.m_types.size(), 1, (params)); + TEST(params.IsTypeExist(GetType({"amenity", "hospital"})), (params)); + TEST(!params.IsTypeExist(GetType({"emergency", "phone"})), (params)); + } + { + Tags const tags = { + {"shop", "unknown_shop_value"}, + {"photo", "photo_url"}, + }; + + auto const params = GetFeatureBuilderParams(tags); + + TEST_EQUAL(params.m_types.size(), 1, (params)); + TEST(params.IsTypeExist(GetType({"shop"})), (params)); + TEST(!params.IsTypeExist(GetType({"shop", "photo"})), (params)); + } +} diff --git a/generator/osm2meta.hpp b/generator/osm2meta.hpp index 7f845d76a7..22ccb6bb08 100644 --- a/generator/osm2meta.hpp +++ b/generator/osm2meta.hpp @@ -39,13 +39,11 @@ class MetadataTagProcessor : private MetadataTagProcessorImpl public: /// Make base class constructor public. using MetadataTagProcessorImpl::MetadataTagProcessorImpl; - /// Since it is used as a functor which stops iteration in ftype::ForEachTag - /// and the is no need for interrupting it always returns false. /// TODO(mgsergio): Move to cpp after merge with https://github.com/mapsme/omim/pull/1314 - bool operator() (std::string const & k, std::string const & v) + void operator()(std::string const & k, std::string const & v) { if (v.empty()) - return false; + return; using feature::Metadata; Metadata & md = m_params.GetMetadata(); @@ -60,7 +58,7 @@ public: if (!md.Has(Metadata::FMD_MIN_HEIGHT)) md.Set(Metadata::FMD_MIN_HEIGHT, ValidateAndFormat_building_levels(v)); } - return false; + return; } std::string valid; @@ -102,6 +100,5 @@ public: case Metadata::FMD_COUNT: CHECK(false, (mdType, "should not be parsed from OSM.")); } md.Set(mdType, valid); - return false; } }; diff --git a/generator/osm2type.cpp b/generator/osm2type.cpp index bbcfab3dc8..f3c88897da 100644 --- a/generator/osm2type.cpp +++ b/generator/osm2type.cpp @@ -3,16 +3,20 @@ #include "generator/osm2meta.hpp" #include "generator/osm_element.hpp" #include "generator/osm_element_helpers.hpp" +#include "generator/utils.hpp" #include "indexer/classificator.hpp" #include "indexer/feature_impl.hpp" +#include "platform/platform.hpp" + #include "geometry/mercator.hpp" #include "base/assert.hpp" #include "base/stl_helpers.hpp" #include "base/string_utils.hpp" +#include #include #include #include @@ -24,89 +28,11 @@ namespace ftype { namespace { -bool NeedMatchValue(string const & k, string const & v) +template +void ForEachTag(OsmElement * p, ToDo && toDo) { - // Take numbers only for "capital" and "admin_level" now. - // NOTE! If you add a new type into classificator, which has a number in it - // (like admin_level=1 or capital=2), please don't forget to insert it here too. - // Otherwise generated data will not contain your newly added features. - return !strings::is_number(v) || k == "admin_level" || k == "capital"; -} - -bool IgnoreTag(string const & k, string const & v) -{ - static string const negativeValues[] = {"no", "false", "-1"}; - // If second component of these pairs is true we need to process this key else ignore it - static pair const processedKeys[] = { - {"description", true}, - // [highway=primary][cycleway=lane] parsed as [highway=cycleway] - {"cycleway", true}, - // [highway=proposed][proposed=primary] parsed as [highway=primary] - {"proposed", true}, - // [highway=primary][construction=primary] parsed as [highway=construction] - {"construction", true}, - // [wheelchair=no] should be processed - {"wheelchair", false}, - // process in any case - {"layer", false}, - // process in any case - {"oneway", false}}; - - // Ignore empty key. - if (k.empty()) - return true; - - // Process special keys. - for (auto const & key : processedKeys) - { - if (k == key.first) - return key.second; - } - - // Ignore keys with negative values. - for (auto const & value : negativeValues) - { - if (v == value) - return true; - } - - return false; -} - -template -Result ForEachTag(OsmElement * p, ToDo && toDo) -{ - Result res = {}; for (auto & e : p->m_tags) - { - if (IgnoreTag(e.m_key, e.m_value)) - continue; - - res = toDo(e.m_key, e.m_value); - if (res) - return res; - } - return res; -} - -template -Result ForEachTagEx(OsmElement * p, set & skipTags, ToDo && toDo) -{ - int id = 0; - return ForEachTag(p, [&](string const & k, string const & v) { - int currentId = id++; - if (skipTags.count(currentId) != 0) - return Result(); - if (string::npos != k.find("name")) - { - skipTags.insert(currentId); - return Result(); - } - Result res = toDo(k, v); - if (res) - skipTags.insert(currentId); - return res; - }); + toDo(e.m_key, e.m_value); } class NamesExtractor @@ -145,16 +71,15 @@ public: return m_savedNames.insert(lang).second; } - bool operator()(string & k, string & v) + void operator()(string & k, string & v) { string lang; if (v.empty() || !GetLangByKey(k, lang)) - return false; + return; m_params.AddName(lang, v); k.clear(); v.clear(); - return false; } private: @@ -312,83 +237,56 @@ private: buffer_vector(Type::Count)> m_types; }; -// This function tries to match osm tags with classificator types: -// amenity=parking + parking=underground + fee=yes -> amenity-parking-underground-fee. -// Now it works wrong with some tags combinations: -// place=quarter + country=MX -> place-country -// emergency=yes + phone=7777777 -> emergency-phone -// route=ferry + car=yes + foot=yes -> route-ferry-car -// See https://jira.mail.ru/browse/MAPSME-10611. -void MatchTypes(OsmElement * p, FeatureBuilderParams & params, function filterType) +void LeaveLongestTypes(vector & matchedTypes) { - set skipRows; - vector path; - ClassifObject const * current = nullptr; + auto const less = [](auto const & lhs, auto const & rhs) { + for (auto lhsIt = lhs.begin(), rhsIt = rhs.begin();; ++lhsIt, ++rhsIt) + { + if (lhsIt == lhs.end() && rhsIt == rhs.end()) + return false; + if (lhsIt == lhs.end() && rhsIt != rhs.end()) + return false; + if (lhsIt != lhs.end() && rhsIt == rhs.end()) + return true; - auto matchTagToClassificator = [&path, ¤t](string const & k, string const & v) -> bool { - // First try to match key. - ClassifObjectPtr elem = current->BinaryFind(k); - if (!elem) - return false; - - path.push_back(elem); - - // Now try to match correspondent value. - if (!NeedMatchValue(k, v)) - return true; - - if (ClassifObjectPtr velem = elem->BinaryFind(v)) - path.push_back(velem); + if (*lhsIt != *rhsIt) + return *lhsIt < *rhsIt; + } + }; + auto const equals = [](auto const & lhs, auto const & rhs) { + for (auto lhsIt = lhs.begin(), rhsIt = rhs.begin(); lhsIt != lhs.end() && rhsIt != rhs.end(); + ++lhsIt, ++rhsIt) + { + if (*lhsIt != *rhsIt) + return false; + } return true; }; - do + base::SortUnique(matchedTypes, less, equals); +} + +void MatchTypes(OsmElement * p, FeatureBuilderParams & params, function filterType) +{ + auto static const rules = generator::ParseMapCSS(GetPlatform().GetReader("mapcss-mapping.csv")); + + vector matchedTypes; + for (auto const & rule : rules) { - current = classif().GetRoot(); - path.clear(); + if (rule.second.Matches(p->m_tags)) + matchedTypes.push_back(rule.first); + } - // Find first root object by key. - if (!ForEachTagEx(p, skipRows, matchTagToClassificator)) - break; - CHECK(!path.empty(), ()); + LeaveLongestTypes(matchedTypes); - do - { - // Continue find path from last element. - current = path.back().get(); - - // Next objects trying to find by value first. - // Prevent merging different tags (e.g. shop=pet from shop=abandoned, was:shop=pet). - - ClassifObjectPtr pObj; - if (path.size() != 1) - { - pObj = ForEachTagEx( - p, skipRows, [¤t](string const & k, string const & v) { - return NeedMatchValue(k, v) ? current->BinaryFind(v) : ClassifObjectPtr(); - }); - } - - if (pObj) - { - path.push_back(pObj); - } - else if (!ForEachTagEx(p, skipRows, matchTagToClassificator)) - { - // If no - try find object by key (in case of k = "area", v = "yes"). - break; - } - } while (true); - - // Assign type. - uint32_t t = ftype::GetEmptyValue(); - for (auto const & e : path) - ftype::PushValue(t, e.GetIndex()); + for (auto const & path : matchedTypes) + { + uint32_t const t = classif().GetTypeByPath(path); if (filterType(t)) params.AddType(t); - } while (true); + } } string MatchCity(OsmElement const * p) @@ -840,7 +738,7 @@ void GetNameAndType(OsmElement * p, FeatureBuilderParams & params, PreprocessElement(p); // Stage2: Process feature name on all languages. - ForEachTag(p, NamesExtractor(params)); + ForEachTag(p, NamesExtractor(params)); // Stage3: Process base feature tags. TagProcessor(p).ApplyRules({ @@ -904,6 +802,6 @@ void GetNameAndType(OsmElement * p, FeatureBuilderParams & params, // Stage6: Collect additional information about feature such as // hotel stars, opening hours, cuisine, ... - ForEachTag(p, MetadataTagProcessor(params)); + ForEachTag(p, MetadataTagProcessor(params)); } } // namespace ftype diff --git a/generator/utils.cpp b/generator/utils.cpp index baa6f63762..940c3182aa 100644 --- a/generator/utils.cpp +++ b/generator/utils.cpp @@ -10,6 +10,8 @@ #include "platform/local_country_file_utils.hpp" #include "platform/platform.hpp" +#include "coding/reader_streambuf.hpp" + #include "base/assert.hpp" #include "base/cancellable.hpp" #include "base/exception.hpp" @@ -138,4 +140,117 @@ search::CBV GetLocalities(std::string const & dataPath) base::Cancellable const cancellable; return search::CategoriesCache(search::LocalitiesSource{}, cancellable).Get(context); } + +bool MapcssRule::Matches(std::vector const & tags) const +{ + for (auto const & tag : m_tags) + { + if (!std::any_of(tags.begin(), tags.end(), [&](auto const & t) { return t == tag; })) + return false; + } + for (auto const & key : m_mandatoryKeys) + { + if (!std::any_of(tags.begin(), tags.end(), + [&](auto const & t) { return t.m_key == key && t.m_value != "no"; })) + { + return false; + } + } + for (auto const & key : m_forbiddenKeys) + { + if (!std::all_of(tags.begin(), tags.end(), + [&](auto const & t) { return t.m_key != key || t.m_value == "no"; })) + { + return false; + } + } + return true; +} + +MapcssRules ParseMapCSS(std::unique_ptr reader) +{ + ReaderStreamBuf buffer(std::move(reader)); + std::istream data(&buffer); + data.exceptions(std::fstream::badbit); + + MapcssRules rules; + + auto const processShort = [&rules](std::string const & typeString) { + auto const typeTokens = strings::Tokenize(typeString, "|"); + CHECK(typeTokens.size() == 2, (typeString)); + MapcssRule rule; + rule.m_tags = {{typeTokens[0], typeTokens[1]}}; + rules.push_back({typeTokens, rule}); + }; + + auto const processFull = [&rules](std::string const & typeString, + std::string const & selectorsString) { + auto const typeTokens = strings::Tokenize(typeString, "|"); + for (auto const & selector : strings::Tokenize(selectorsString, ",")) + { + CHECK(!selector.empty(), (selectorsString)); + CHECK_EQUAL(selector[0], '[', (selectorsString)); + CHECK_EQUAL(selector.back(), ']', (selectorsString)); + + MapcssRule rule; + auto tags = strings::Tokenize(selector, "["); + for (auto & rawTag : tags) + { + strings::Trim(rawTag, "]"); + CHECK(!rawTag.empty(), (selector, tags)); + auto tag = strings::Tokenize(rawTag, "="); + if (tag.size() == 1) + { + CHECK(!tag[0].empty(), (rawTag)); + auto const forbidden = tag[0][0] == '!'; + strings::Trim(tag[0], "?!"); + if (forbidden) + rule.m_forbiddenKeys.push_back(tag[0]); + else + rule.m_mandatoryKeys.push_back(tag[0]); + } + else + { + CHECK_EQUAL(tag.size(), 2, (tag)); + rule.m_tags.push_back({tag[0], tag[1]}); + } + } + rules.push_back({typeTokens, rule}); + } + }; + + // Mapcss-mapping maps tags to types. + // Types can be marked obsolete or replaced with a different type. + // + // Example row: highway|bus_stop;[highway=bus_stop];;name;int_name;22; + // It contains: + // - type name: "highway|bus_stop" ('|' is converted to '-' internally) + // - mapcss selectors for tags: "[highway=bus_stop]", multiple selectors are separated with comma + // - "x" for an obsolete type or an empty cell otherwise + // - primary title tag (usually "name") + // - secondary title tag (usually "int_name") + // - type id, sequential starting from 1 + // - replacement type for an obsolete tag, if exists + // + // A shorter format for above example: highway|bus_stop;22; + // It leaves only columns 1, 6 and 7. For obsolete types with no replacement put "x" into the last + // column. It works only for simple types that are produced from tags replacing '=' with '|'. + + std::string line; + while (getline(data, line)) + { + std::vector fields; + strings::ParseCSVRow(line, ';', fields); + CHECK(fields.size() == 3 || fields.size() == 7, (fields.size(), fields, line)); + // Short format without replacement. + if (fields.size() == 3 && fields[2].empty()) + processShort(fields[0]); + + // Ful format, not obsolete. + if (fields.size() == 7 && fields[2] != "x") + processFull(fields[0], fields[1]); + } + + return rules; +} } // namespace generator diff --git a/generator/utils.hpp b/generator/utils.hpp index 5fc4ed9b57..b8c2108863 100644 --- a/generator/utils.hpp +++ b/generator/utils.hpp @@ -1,6 +1,7 @@ #pragma once #include "generator/gen_mwm_info.hpp" +#include "generator/osm_element.hpp" #include "search/cbv.hpp" @@ -93,4 +94,18 @@ bool ParseFeatureIdToTestIdMapping(std::string const & path, std::unordered_map & mapping); search::CBV GetLocalities(std::string const & dataPath); + +struct MapcssRule +{ + bool Matches(std::vector const & tags) const; + + std::vector m_tags; + std::vector m_mandatoryKeys; + std::vector m_forbiddenKeys; +}; + +using TypeStrings = std::vector; +using MapcssRules = std::vector>; + +MapcssRules ParseMapCSS(std::unique_ptr reader); } // namespace generator diff --git a/indexer/feature_data.cpp b/indexer/feature_data.cpp index 596e6c5486..9cb463f050 100644 --- a/indexer/feature_data.cpp +++ b/indexer/feature_data.cpp @@ -412,42 +412,7 @@ void FeatureParams::AddTypes(FeatureParams const & rhs, uint32_t skipType2) bool FeatureParams::FinishAddingTypes() { - static uint32_t const boundary = classif().GetTypeByPath({ "boundary", "administrative" }); - - vector newTypes; - newTypes.reserve(m_types.size()); - - for (size_t i = 0; i < m_types.size(); ++i) - { - uint32_t candidate = m_types[i]; - - // Assume that classificator types are equal if they are equal for 2-arity dimension - // (e.g. "place-city-capital" is equal to "place-city" and we leave the longest one "place-city-capital"). - // The only exception is "boundary-administrative" type. - - uint32_t type = m_types[i]; - ftype::TruncValue(type, 2); - if (type != boundary) - { - // Find all equal types (2-arity). - auto j = base::RemoveIfKeepValid(m_types.begin() + i + 1, m_types.end(), [type] (uint32_t t) - { - ftype::TruncValue(t, 2); - return (type == t); - }); - - // Choose the best type from equals by arity level. - for (auto k = j; k != m_types.end(); ++k) - if (ftype::GetLevel(*k) > ftype::GetLevel(candidate)) - candidate = *k; - - // Delete equal types. - m_types.erase(j, m_types.end()); - } - - newTypes.push_back(candidate); - } - + vector newTypes = m_types; base::SortUnique(newTypes); if (newTypes.size() > kMaxTypesCount)