diff --git a/search/house_numbers_matcher.cpp b/search/house_numbers_matcher.cpp index e813f56c8c..2e48c07174 100644 --- a/search/house_numbers_matcher.cpp +++ b/search/house_numbers_matcher.cpp @@ -567,6 +567,11 @@ bool LooksLikeHouseNumber(strings::UniString const & s, bool isPrefix) return classifier.LooksGood(s, isPrefix); } +bool LooksLikeHouseNumber(string const & s, bool isPrefix) +{ + return LooksLikeHouseNumber(strings::MakeUniString(s), isPrefix); +} + string DebugPrint(Token::Type type) { switch (type) diff --git a/search/house_numbers_matcher.hpp b/search/house_numbers_matcher.hpp index 8faac42070..6214c8b77a 100644 --- a/search/house_numbers_matcher.hpp +++ b/search/house_numbers_matcher.hpp @@ -72,6 +72,7 @@ bool HouseNumbersMatch(strings::UniString const & houseNumber, // Returns true if |s| looks like a house number. bool LooksLikeHouseNumber(strings::UniString const & s, bool isPrefix); +bool LooksLikeHouseNumber(std::string const & s, bool isPrefix); std::string DebugPrint(Token::Type type); diff --git a/search/search_quality/samples_generation_tool/samples_generation_tool.cpp b/search/search_quality/samples_generation_tool/samples_generation_tool.cpp index b779129751..b4d4187a98 100644 --- a/search/search_quality/samples_generation_tool/samples_generation_tool.cpp +++ b/search/search_quality/samples_generation_tool/samples_generation_tool.cpp @@ -1,4 +1,5 @@ #include "search/categories_cache.hpp" +#include "search/house_numbers_matcher.hpp" #include "search/mwm_context.hpp" #include "search/reverse_geocoder.hpp" #include "search/search_quality/helpers.hpp" @@ -38,16 +39,25 @@ using namespace search::search_quality; using namespace search; using namespace std; -DEFINE_string(data_path, "", "Path to data directory (resources dir)"); -DEFINE_string(mwm_path, "", "Path to mwm files (writable dir)"); -DEFINE_string(out_buildings_path, "buildings.json", "Path to output file for buildings samples"); -DEFINE_string(out_cafes_path, "cafes.json", "Path to output file for cafes samples"); - double constexpr kMaxDistanceToObjectM = 7500.0; double constexpr kMinViewportSizeM = 100.0; double constexpr kMaxViewportSizeM = 5000.0; size_t constexpr kMaxSamplesPerMwm = 20; +DEFINE_string(data_path, "", "Path to data directory (resources dir)."); +DEFINE_string(mwm_path, "", "Path to mwm files (writable dir)."); +DEFINE_string(out_buildings_path, "buildings.json", "Path to output file for buildings samples."); +DEFINE_string(out_cafes_path, "cafes.json", "Path to output file for cafes samples."); +DEFINE_double(max_distance_to_object, kMaxDistanceToObjectM, + "Maximal distance from user position to object (meters)."); +DEFINE_double(min_viewport_size, kMinViewportSizeM, "Minimal size of viewport (meters)."); +DEFINE_double(max_viewport_size, kMaxViewportSizeM, "Maximal size of viewport (meters)."); +DEFINE_uint64(max_samples_per_mwm, kMaxSamplesPerMwm, + "Maximal number of samples of each type (buildings/cafes) per mwm."); +DEFINE_bool(add_misprints, false, "Add random misprints."); +DEFINE_bool(add_cafe_address, false, "Add address."); +DEFINE_bool(add_cafe_type, false, "Add cafe type (restaurant/cafe/bar) in local language."); + std::random_device g_rd; mt19937 g_rng(g_rd()); @@ -95,6 +105,9 @@ void AddRandomMisprint(strings::UniString & str) void AddMisprints(string & str) { + if (!FLAGS_add_misprints) + return; + auto tokens = strings::Tokenize(str, " -&"); str.clear(); for (size_t i = 0; i < tokens.size(); ++i) @@ -192,20 +205,25 @@ void ModifyHouse(uint8_t lang, string & str) m2::PointD GenerateNearbyPosition(m2::PointD const & point) { - uniform_real_distribution dis(-kMaxDistanceToObjectM, kMaxDistanceToObjectM); + auto const maxDistance = FLAGS_max_distance_to_object; + uniform_real_distribution dis(-maxDistance, maxDistance); return MercatorBounds::GetSmPoint(point, dis(g_rng) /* dX */, dis(g_rng) /* dY */); } m2::RectD GenerateNearbyViewport(m2::PointD const & point) { - uniform_real_distribution dis(kMinViewportSizeM, kMaxViewportSizeM); + uniform_real_distribution dis(FLAGS_min_viewport_size, FLAGS_max_viewport_size); return MercatorBounds::RectByCenterXYAndSizeInMeters(GenerateNearbyPosition(point), dis(g_rng)); } bool GetBuildingInfo(FeatureType & ft, search::ReverseGeocoder const & coder, string & street) { - if (ft.GetHouseNumber().empty()) + auto const houseNumber = ft.GetHouseNumber(); + if (houseNumber.empty() || + !search::house_numbers::LooksLikeHouseNumber(houseNumber, false /* prefix */)) + { return false; + } street = coder.GetFeatureStreetName(ft); if (street.empty()) @@ -262,7 +280,7 @@ string CombineRandomly(string const & mandatory, string const & optional) void ModifyCafe(string const & name, string const & type, string & out) { - out = CombineRandomly(name, type); + out = FLAGS_add_cafe_type ? CombineRandomly(name, type) : name; AddMisprints(out); } @@ -312,7 +330,9 @@ boost::optional GenerateRequest( auto const house = ft.GetHouseNumber(); auto const featureCenter = feature::GetCenter(ft); auto const address = ModifyAddress(street, house, lang); - auto const query = cafeStr.empty() ? address : CombineRandomly(cafeStr, address); + auto query = address; + if (!cafeStr.empty()) + query = FLAGS_add_cafe_address ? CombineRandomly(cafeStr, address) : cafeStr; Sample sample; sample.m_query = strings::MakeUniString(query); @@ -430,7 +450,7 @@ int main(int argc, char * argv[]) size_t numSamples = 0; for (auto const fid : fids) { - if (numSamples >= kMaxSamplesPerMwm) + if (numSamples >= FLAGS_max_samples_per_mwm) break; auto ft = g.GetFeatureByIndex(fid); diff --git a/search/search_tests/house_numbers_matcher_test.cpp b/search/search_tests/house_numbers_matcher_test.cpp index 3272855c07..e467528278 100644 --- a/search/search_tests/house_numbers_matcher_test.cpp +++ b/search/search_tests/house_numbers_matcher_test.cpp @@ -65,11 +65,6 @@ bool CheckParser(string const & utf8s, string const & expected) return true; } - -bool LooksLikeHouseNumber(string const & s, bool isPrefix) -{ - return house_numbers::LooksLikeHouseNumber(MakeUniString(s), isPrefix); -} } // namespace UNIT_TEST(HouseNumberTokenizer_Smoke)