From 8a7d2dec2cb3c5fa7d128fe621423fc78accb2b3 Mon Sep 17 00:00:00 2001 From: Viktor Govako Date: Thu, 19 Oct 2023 10:03:50 -0300 Subject: [PATCH] [generator] Added "brand" meta. Choose one (best) value for "operator" and "brand". Signed-off-by: Viktor Govako --- base/string_utils.cpp | 29 ++----------- base/string_utils.hpp | 8 +--- coding/string_utf8_multilang.cpp | 2 +- generator/feature_builder.cpp | 18 ++++++++ .../generator_tests/metadata_parser_test.cpp | 15 +++---- generator/generator_tests/osm_type_test.cpp | 35 +++++++++++++--- generator/maxspeeds_parser.cpp | 5 +-- generator/osm2meta.cpp | 41 ++++++++++++++----- generator/osm2meta.hpp | 32 +++++++++++++++ indexer/brands_holder.hpp | 3 +- indexer/feature_meta.cpp | 4 +- 11 files changed, 130 insertions(+), 62 deletions(-) diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 079cc8daeb..226f87319b 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -307,12 +307,7 @@ bool StartsWith(UniString const & s, UniString const & p) return StartsWith(s.begin(), s.end(), p.begin(), p.end()); } -bool StartsWith(std::string const & s1, char const * s2) -{ - return (s1.compare(0, strlen(s2), s2) == 0); -} - -bool StartsWith(std::string const & s1, std::string_view s2) +bool StartsWith(std::string_view s1, std::string_view s2) { return (s1.compare(0, s2.length(), s2) == 0); } @@ -322,11 +317,6 @@ bool StartsWith(std::string const & s, std::string::value_type c) return s.empty() ? false : s.front() == c; } -bool StartsWith(std::string const & s1, std::string const & s2) -{ - return (s1.compare(0, s2.length(), s2) == 0); -} - bool EndsWith(UniString const & s1, UniString const & s2) { if (s1.size() < s2.size()) @@ -335,12 +325,7 @@ bool EndsWith(UniString const & s1, UniString const & s2) return std::equal(s1.end() - s2.size(), s1.end(), s2.begin()); } -bool EndsWith(std::string const & s1, char const * s2) -{ - return EndsWith(s1, std::string_view(s2)); -} - -bool EndsWith(std::string const & s1, std::string_view s2) +bool EndsWith(std::string_view s1, std::string_view s2) { size_t const n = s1.size(); size_t const m = s2.size(); @@ -354,11 +339,6 @@ bool EndsWith(std::string const & s, std::string::value_type c) return s.empty() ? false : s.back() == c; } -bool EndsWith(std::string const & s1, std::string const & s2) -{ - return s1.size() >= s2.size() && s1.compare(s1.size() - s2.size(), s2.size(), s2) == 0; -} - bool EatPrefix(std::string & s, std::string const & prefix) { if (!StartsWith(s, prefix)) @@ -417,7 +397,7 @@ std::string to_string_dac(double d, int dac) bool IsHTML(std::string const & utf8) { - std::string::const_iterator it = utf8.begin(); + auto it = utf8.begin(); size_t ltCount = 0; size_t gtCount = 0; while (it != utf8.end()) @@ -433,8 +413,7 @@ bool IsHTML(std::string const & utf8) bool AlmostEqual(std::string const & str1, std::string const & str2, size_t mismatchedCount) { - std::pair mis(str1.begin(), - str2.begin()); + std::pair mis(str1.begin(), str2.begin()); auto const str1End = str1.end(); auto const str2End = str2.end(); diff --git a/base/string_utils.hpp b/base/string_utils.hpp index 78054c30fa..28a7ac1fb7 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -591,16 +591,12 @@ bool StartsWith(IterT1 beg, IterT1 end, IterT2 begPrefix, IterT2 endPrefix) } bool StartsWith(UniString const & s, UniString const & p); -bool StartsWith(std::string const & s1, char const * s2); -bool StartsWith(std::string const & s1, std::string_view s2); +bool StartsWith(std::string_view s1, std::string_view s2); bool StartsWith(std::string const & s, std::string::value_type c); -bool StartsWith(std::string const & s1, std::string const & s2); bool EndsWith(UniString const & s1, UniString const & s2); -bool EndsWith(std::string const & s1, char const * s2); -bool EndsWith(std::string const & s1, std::string_view s2); +bool EndsWith(std::string_view s1, std::string_view s2); bool EndsWith(std::string const & s, std::string::value_type c); -bool EndsWith(std::string const & s1, std::string const & s2); // If |s| starts with |prefix|, deletes it from |s| and returns true. // Otherwise, leaves |s| unmodified and returns false. diff --git a/coding/string_utf8_multilang.cpp b/coding/string_utf8_multilang.cpp index d83a33cb58..d37ba9f32e 100644 --- a/coding/string_utf8_multilang.cpp +++ b/coding/string_utf8_multilang.cpp @@ -109,7 +109,7 @@ StringUtf8Multilang::Languages const & StringUtf8Multilang::GetSupportedLanguage } // static -int8_t StringUtf8Multilang::GetLangIndex(std::string_view const lang) +int8_t StringUtf8Multilang::GetLangIndex(std::string_view lang) { if (lang == kReservedLang) return kUnsupportedLanguageCode; diff --git a/generator/feature_builder.cpp b/generator/feature_builder.cpp index b23b5961d6..878a82bd92 100644 --- a/generator/feature_builder.cpp +++ b/generator/feature_builder.cpp @@ -270,6 +270,24 @@ bool FeatureBuilder::PreSerialize() return false; } + // Stats shows that 1706197 POIs out of 2258011 have name == brand. + // Can remove duplicates, since we use "brand" only in search. + /// @todo Remove, when we will make valid localized brands and store brand-id instead raw name. + auto & meta = GetMetadata(); + auto const brand = meta.Get(Metadata::FMD_BRAND); + if (!brand.empty()) + { + m_params.name.ForEach([brand, &meta](int8_t, std::string_view name) + { + if (brand == name) + { + meta.Drop(Metadata::FMD_BRAND); + return base::ControlFlow::Break; + } + return base::ControlFlow::Continue; + }); + } + return true; } diff --git a/generator/generator_tests/metadata_parser_test.cpp b/generator/generator_tests/metadata_parser_test.cpp index e1f429fc86..7d75c33b02 100644 --- a/generator/generator_tests/metadata_parser_test.cpp +++ b/generator/generator_tests/metadata_parser_test.cpp @@ -89,20 +89,17 @@ UNIT_CLASS_TEST(TestWithClassificator, Metadata_ValidateAndFormat_operator) TEST(md.Empty(), ()); params.SetType(typeAtm); - p("operator", "Some"); - TEST_EQUAL(md.Get(Metadata::FMD_OPERATOR), "Some", ()); - md.Drop(Metadata::FMD_OPERATOR); + p("operator", "Some1"); + TEST_EQUAL(md.Get(Metadata::FMD_OPERATOR), "Some1", ()); params.SetType(typeFuel); - p("operator", "Some"); - TEST_EQUAL(md.Get(Metadata::FMD_OPERATOR), "Some", ()); - md.Drop(Metadata::FMD_OPERATOR); + p("operator", "Some2"); + TEST_EQUAL(md.Get(Metadata::FMD_OPERATOR), "Some2", ()); params.SetType(typeCarSharing); params.AddType(typeCarRental); - p("operator", "Some"); - TEST_EQUAL(md.Get(Metadata::FMD_OPERATOR), "Some", ()); - md.Drop(Metadata::FMD_OPERATOR); + p("operator", "Some3"); + TEST_EQUAL(md.Get(Metadata::FMD_OPERATOR), "Some3", ()); } UNIT_TEST(Metadata_ValidateAndFormat_height) diff --git a/generator/generator_tests/osm_type_test.cpp b/generator/generator_tests/osm_type_test.cpp index 72c95a69e7..b51cc32c90 100644 --- a/generator/generator_tests/osm_type_test.cpp +++ b/generator/generator_tests/osm_type_test.cpp @@ -1801,6 +1801,17 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Recycling) UNIT_CLASS_TEST(TestWithClassificator, OsmType_Metadata) { + auto const getDescr = [](FeatureBuilderParams const & params, std::string_view lang) + { + std::string buffer(params.GetMetadata().Get(feature::Metadata::FMD_DESCRIPTION)); + TEST(!buffer.empty(), ()); + auto const mlStr = StringUtf8Multilang::FromBuffer(std::move(buffer)); + + std::string_view desc; + mlStr.GetString(StringUtf8Multilang::GetLangIndex(lang), desc); + return std::string(desc); + }; + { Tags const tags = { {"amenity", "restaurant" }, @@ -1811,14 +1822,26 @@ UNIT_CLASS_TEST(TestWithClassificator, OsmType_Metadata) TEST_EQUAL(params.m_types.size(), 1, (params)); TEST(params.IsTypeExist(GetType({"amenity", "restaurant"})), (params)); + TEST_EQUAL(getDescr(params, "ru"), "Хорошие настойки", ()); + } - std::string buffer(params.GetMetadata().Get(feature::Metadata::FMD_DESCRIPTION)); - TEST(!buffer.empty(), ()); - auto const mlStr = StringUtf8Multilang::FromBuffer(std::move(buffer)); + { + Tags const tags = { + {"amenity", "atm" }, + {"operator", "Default"}, + {"operator:en", "English"}, + {"brand::kk", "KK language"}, + {"brand:en", "English"}, + {"description", "Default"}, + {"description::kk", "KK language"}, + }; - std::string_view desc; - mlStr.GetString(StringUtf8Multilang::GetLangIndex("ru"), desc); - TEST_EQUAL(desc, "Хорошие настойки", ()); + auto const params = GetFeatureBuilderParams(tags); + TEST_EQUAL(params.m_types.size(), 1, (params)); + TEST(params.IsTypeExist(GetType({"amenity", "atm"})), (params)); + TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_OPERATOR), "Default", ()); + TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_BRAND), "English", ()); + TEST_EQUAL(getDescr(params, "default"), "Default", ()); } } diff --git a/generator/maxspeeds_parser.cpp b/generator/maxspeeds_parser.cpp index 8710ae06fe..e27502117f 100644 --- a/generator/maxspeeds_parser.cpp +++ b/generator/maxspeeds_parser.cpp @@ -176,8 +176,7 @@ bool ParseMaxspeedTag(std::string const & maxspeedValue, routing::SpeedInUnits & while (i < maxspeedValue.size() && isspace(maxspeedValue[i])) ++i; - if (maxspeedValue.size() == i || - strings::StartsWith({maxspeedValue.begin() + i, maxspeedValue.end()}, "kmh")) + if (maxspeedValue.size() == i || strings::StartsWith(maxspeedValue.substr(i), "kmh")) { uint64_t kmph = 0; if (!strings::to_uint64(speedStr.c_str(), kmph) || kmph == 0 || kmph > std::numeric_limits::max()) @@ -188,7 +187,7 @@ bool ParseMaxspeedTag(std::string const & maxspeedValue, routing::SpeedInUnits & return true; } - if (strings::StartsWith({maxspeedValue.begin() + i, maxspeedValue.end()}, "mph")) + if (strings::StartsWith(maxspeedValue.substr(i), "mph")) { uint64_t mph = 0; if (!strings::to_uint64(speedStr.c_str(), mph) || mph == 0 || mph > std::numeric_limits::max()) diff --git a/generator/osm2meta.cpp b/generator/osm2meta.cpp index 394e70be18..7742de2e20 100644 --- a/generator/osm2meta.cpp +++ b/generator/osm2meta.cpp @@ -321,6 +321,11 @@ std::string MetadataTagProcessorImpl::ValidateAndFormat_airport_iata(std::string return str; } +std::string MetadataTagProcessorImpl::ValidateAndFormat_brand(std::string const & v) +{ + return v; +} + std::string MetadataTagProcessorImpl::ValidateAndFormat_duration(std::string const & v) const { if (!ftypes::IsWayWithDurationChecker::Instance()(m_params.m_types)) @@ -442,19 +447,27 @@ void MetadataTagProcessor::operator()(std::string const & k, std::string const & using feature::Metadata; Metadata & md = m_params.GetMetadata(); + auto const getLang = [view = std::string_view(k)]() + { + size_t const i = view.find(':'); + if (i != std::string_view::npos) + return view.substr(i + 1); + return std::string_view(); + }; + if (strings::StartsWith(k, "description")) { - // Process description tags. - int8_t lang = StringUtf8Multilang::kDefaultCode; - size_t const i = k.find(':'); - if (i != std::string::npos) + // Separate description tags processing. + int8_t langIdx = StringUtf8Multilang::kDefaultCode; + auto const lang = getLang(); + if (!lang.empty()) { - int8_t const l = StringUtf8Multilang::GetLangIndex(k.substr(i+1)); - if (l != StringUtf8Multilang::kUnsupportedLanguageCode) - lang = l; + langIdx = StringUtf8Multilang::GetLangIndex(lang); + if (langIdx == StringUtf8Multilang::kUnsupportedLanguageCode) + return; } - m_description.AddString(lang, v); + m_description.AddString(langIdx, v); return; } @@ -469,7 +482,11 @@ void MetadataTagProcessor::operator()(std::string const & k, std::string const & case Metadata::FMD_FAX_NUMBER: // The same validator as for phone. case Metadata::FMD_PHONE_NUMBER: valid = ValidateAndFormat_phone(v); break; case Metadata::FMD_STARS: valid = ValidateAndFormat_stars(v); break; - case Metadata::FMD_OPERATOR: valid = ValidateAndFormat_operator(v); break; + case Metadata::FMD_OPERATOR: + if (!m_operatorF.Add(getLang())) + return; + valid = ValidateAndFormat_operator(v); + break; case Metadata::FMD_WEBSITE: valid = ValidateAndFormat_url(v); break; case Metadata::FMD_CONTACT_FACEBOOK: valid = osm::ValidateAndFormat_facebook(v); break; case Metadata::FMD_CONTACT_INSTAGRAM: valid = osm::ValidateAndFormat_instagram(v); break; @@ -496,10 +513,14 @@ void MetadataTagProcessor::operator()(std::string const & k, std::string const & case Metadata::FMD_BUILDING_LEVELS: valid = ValidateAndFormat_building_levels(v); break; case Metadata::FMD_LEVEL: valid = ValidateAndFormat_level(v); break; case Metadata::FMD_AIRPORT_IATA: valid = ValidateAndFormat_airport_iata(v); break; + case Metadata::FMD_BRAND: + if (!m_brandF.Add(getLang())) + return; + valid = ValidateAndFormat_brand(v); + break; case Metadata::FMD_DURATION: valid = ValidateAndFormat_duration(v); break; // Metadata types we do not get from OSM. case Metadata::FMD_CUISINE: - case Metadata::FMD_BRAND: case Metadata::FMD_DESCRIPTION: // processed separately case Metadata::FMD_TEST_ID: case Metadata::FMD_CUSTOM_IDS: diff --git a/generator/osm2meta.hpp b/generator/osm2meta.hpp index 5895eea649..231c25763b 100644 --- a/generator/osm2meta.hpp +++ b/generator/osm2meta.hpp @@ -33,6 +33,7 @@ struct MetadataTagProcessorImpl static std::string ValidateAndFormat_wikipedia(std::string v) ; static std::string ValidateAndFormat_wikimedia_commons(std::string v) ; std::string ValidateAndFormat_airport_iata(std::string const & v) const; + static std::string ValidateAndFormat_brand(std::string const & v); std::string ValidateAndFormat_duration(std::string const & v) const; protected: @@ -43,6 +44,37 @@ class MetadataTagProcessor : private MetadataTagProcessorImpl { StringUtf8Multilang m_description; + struct LangFlags + { + // Select one value with priority: + // - Default + // - English + // - Others + bool Add(std::string_view lang) + { + if (lang.empty()) + { + m_defAdded = true; + return true; + } + else if (lang == "en") + { + if (m_defAdded) + return false; + m_enAdded = true; + return true; + } + + return !m_defAdded && !m_enAdded; + } + + bool m_defAdded = false; + bool m_enAdded = false; + }; + + /// @todo Make operator and brand multilangual like description. + LangFlags m_operatorF, m_brandF; + public: /// Make base class constructor public. using MetadataTagProcessorImpl::MetadataTagProcessorImpl; diff --git a/indexer/brands_holder.hpp b/indexer/brands_holder.hpp index f3239c4866..f41463536a 100644 --- a/indexer/brands_holder.hpp +++ b/indexer/brands_holder.hpp @@ -67,7 +67,8 @@ BrandsHolder const & GetDefaultBrands(); template void ForEachLocalizedBrands(std::string_view brand, FnT && fn) { bool processed = false; - /// @todo Wait for the new cpp standard. + /// @todo Remove temporary string with the new cpp standard. + /// Localized brands are not working as expected now because we store raw names from OSM, not brand IDs. GetDefaultBrands().ForEachNameByKey(std::string(brand), [&fn, &processed](auto const & name) { fn(name); diff --git a/indexer/feature_meta.cpp b/indexer/feature_meta.cpp index cdd5c0e3f0..c5202e2e71 100644 --- a/indexer/feature_meta.cpp +++ b/indexer/feature_meta.cpp @@ -63,7 +63,7 @@ bool Metadata::TypeFromString(string_view k, Metadata::EType & outType) outType = Metadata::EType::FMD_FAX_NUMBER; else if (k == "stars") outType = Metadata::FMD_STARS; - else if (k == "operator") + else if (strings::StartsWith(k, "operator")) outType = Metadata::FMD_OPERATOR; else if (k == "url" || k == "website" || k == "contact:website") outType = Metadata::FMD_WEBSITE; @@ -118,6 +118,8 @@ bool Metadata::TypeFromString(string_view k, Metadata::EType & outType) outType = Metadata::FMD_LEVEL; else if (k == "iata") outType = Metadata::FMD_AIRPORT_IATA; + else if (strings::StartsWith(k, "brand")) + outType = Metadata::FMD_BRAND; else if (k == "duration") outType = Metadata::FMD_DURATION; else