diff --git a/base/stl_helpers.hpp b/base/stl_helpers.hpp index f4f3ca3b97..3375867f75 100644 --- a/base/stl_helpers.hpp +++ b/base/stl_helpers.hpp @@ -45,12 +45,18 @@ struct Comparer // Sorts and removes duplicate entries from |v|. template -void SortUnique(std::vector & v) +void SortUnique(vector & v) { sort(v.begin(), v.end()); v.erase(unique(v.begin(), v.end()), v.end()); } +template +void EraseIf(vector & v, TFn && fn) +{ + v.erase(remove_if(v.begin(), v.end(), forward(fn)), v.end()); +} + // Creates a comparer being able to compare two instances of class C // (given by reference or pointer) by a field or const method of C. // For example, to create comparer that is able to compare pairs of diff --git a/data/synonyms.txt b/data/synonyms.txt index c6efad4712..950ed0ee34 100644 --- a/data/synonyms.txt +++ b/data/synonyms.txt @@ -1,6 +1,6 @@ United Kingdom: UK -United States of America: US, USA +United States of America: US, USA, США Alabama: AL Alaska: AK Arizona: AZ diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index f129fe0ca5..47b6fb1657 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -177,10 +177,12 @@ public: // the search phase this part of the query will be matched against the // "street" in the categories branch of the search index. // However, we still add it when there are two or more street tokens - // ("industrial st", "улица набережная"). + // ("avenue st", "улица набережная"). + + size_t const tokensCount = tokens.size(); size_t numStreetTokens = 0; - vector isStreet(tokens.size()); - for (size_t i = 0; i < tokens.size(); ++i) + vector isStreet(tokensCount); + for (size_t i = 0; i < tokensCount; ++i) { if (search::IsStreetSynonym(tokens[i])) { @@ -189,11 +191,11 @@ public: } } - for (size_t i = 0; i < tokens.size(); ++i) + for (size_t i = 0; i < tokensCount; ++i) { if (numStreetTokens == 1 && isStreet[i] && m_hasStreetType) { - LOG(LDEBUG, ("skipping token:", tokens[i], "in", name)); + //LOG(LDEBUG, ("Skipping token:", tokens[i], "in", name)); continue; } AddToken(lang, tokens[i]); @@ -334,8 +336,7 @@ void BuildAddressTable(FilesContainerR & container, Writer & writer) size_t streetIndex; bool streetMatched = false; - string street; - search::GetStreetNameAsKey(data.Get(feature::AddressData::STREET), street); + strings::UniString const street = search::GetStreetNameAsKey(data.Get(feature::AddressData::STREET)); if (!street.empty()) { FeatureType ft; diff --git a/indexer/indexer_tests/search_string_utils_test.cpp b/indexer/indexer_tests/search_string_utils_test.cpp index f84ff8f33b..27f8ef7cb3 100644 --- a/indexer/indexer_tests/search_string_utils_test.cpp +++ b/indexer/indexer_tests/search_string_utils_test.cpp @@ -4,9 +4,20 @@ #include "base/string_utils.hpp" +using namespace search; +using namespace strings; + +namespace +{ +bool TestStreetPrefixMatch(char const * s) +{ + return IsStreetSynonymPrefix(MakeUniString(s)); +} +} // namespace + UNIT_TEST(FeatureTypeToString) { - TEST_EQUAL("!type:123", strings::ToUtf8(search::FeatureTypeToString(123)), ()); + TEST_EQUAL("!type:123", ToUtf8(FeatureTypeToString(123)), ()); } UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines) @@ -37,18 +48,29 @@ UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines) }; for (size_t i = 0; i < ARRAY_SIZE(arr); i += 2) - TEST_EQUAL(arr[i + 1], strings::ToUtf8(search::NormalizeAndSimplifyString(arr[i])), (i)); + TEST_EQUAL(arr[i + 1], ToUtf8(NormalizeAndSimplifyString(arr[i])), (i)); } UNIT_TEST(Contains) { constexpr char const * kTestStr = "ØøÆ挜 Ўвага!"; - TEST(search::ContainsNormalized(kTestStr, ""), ()); - TEST(!search::ContainsNormalized("", "z"), ()); - TEST(search::ContainsNormalized(kTestStr, "ooae"), ()); - TEST(search::ContainsNormalized(kTestStr, " у"), ()); - TEST(search::ContainsNormalized(kTestStr, "Ў"), ()); - TEST(search::ContainsNormalized(kTestStr, "ўв"), ()); - TEST(!search::ContainsNormalized(kTestStr, "ага! "), ()); - TEST(!search::ContainsNormalized(kTestStr, "z"), ()); + TEST(ContainsNormalized(kTestStr, ""), ()); + TEST(!ContainsNormalized("", "z"), ()); + TEST(ContainsNormalized(kTestStr, "ooae"), ()); + TEST(ContainsNormalized(kTestStr, " у"), ()); + TEST(ContainsNormalized(kTestStr, "Ў"), ()); + TEST(ContainsNormalized(kTestStr, "ўв"), ()); + TEST(!ContainsNormalized(kTestStr, "ага! "), ()); + TEST(!ContainsNormalized(kTestStr, "z"), ()); +} + +UNIT_TEST(StreetPrefixMatch) +{ + TEST(TestStreetPrefixMatch("п"), ()); + TEST(TestStreetPrefixMatch("пр"), ()); + TEST(TestStreetPrefixMatch("про"), ()); + TEST(TestStreetPrefixMatch("прое"), ()); + TEST(TestStreetPrefixMatch("проез"), ()); + TEST(TestStreetPrefixMatch("проезд"), ()); + TEST(!TestStreetPrefixMatch("проездд"), ()); } diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 1839e90dd2..24aaaa3764 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -1,121 +1,226 @@ #include "search_string_utils.hpp" -#include "std/set.hpp" +#include "base/macros.hpp" +#include "base/stl_helpers.hpp" + +#include "std/algorithm.hpp" #include "std/transform_iterator.hpp" -#include "base/macros.hpp" +using namespace strings; -char const * STREET_TOKENS_SEPARATOR = "\t -,."; - -strings::UniString search::FeatureTypeToString(uint32_t type) +namespace search { - string const s = "!type:" + strings::to_string(type); - return strings::UniString(s.begin(), s.end()); + +UniString NormalizeAndSimplifyString(string const & s) +{ + UniString uniString = MakeUniString(s); + for (size_t i = 0; i < uniString.size(); ++i) + { + UniChar & c = uniString[i]; + switch (c) + { + // Replace "d with stroke" to simple d letter. Used in Vietnamese. + // (unicode-compliant implementation leaves it unchanged) + case 0x0110: + case 0x0111: c = 'd'; break; + // Replace small turkish dotless 'ı' with dotted 'i'. + // Our own invented hack to avoid well-known Turkish I-letter bug. + case 0x0131: c = 'i'; break; + // Replace capital turkish dotted 'İ' with dotted lowercased 'i'. + // Here we need to handle this case manually too, because default unicode-compliant implementation + // of MakeLowerCase converts 'İ' to 'i' + 0x0307. + case 0x0130: c = 'i'; break; + // Some Danish-specific hacks. + case 0x00d8: // Ø + case 0x00f8: c = 'o'; break; // ø + case 0x0152: // Œ + case 0x0153: // œ + c = 'o'; + uniString.insert(uniString.begin() + (i++) + 1, 'e'); + break; + case 0x00c6: // Æ + case 0x00e6: // æ + c = 'a'; + uniString.insert(uniString.begin() + (i++) + 1, 'e'); + break; + } + } + + MakeLowerCaseInplace(uniString); + NormalizeInplace(uniString); + + // Remove accents that can appear after NFKD normalization. + uniString.erase_if([](UniChar const & c) + { + // ̀ COMBINING GRAVE ACCENT + // ́ COMBINING ACUTE ACCENT + return (c == 0x0300 || c == 0x0301); + }); + + return uniString; + + /// @todo Restore this logic to distinguish и-й in future. + /* + // Just after lower casing is a correct place to avoid normalization for specific chars. + static auto const isSpecificChar = [](UniChar c) -> bool + { + return c == 0x0439; // й + }; + UniString result; + result.reserve(uniString.size()); + for (auto i = uniString.begin(), end = uniString.end(); i != end;) + { + auto j = find_if(i, end, isSpecificChar); + // We don't check if (j != i) because UniString and Normalize handle it correctly. + UniString normString(i, j); + NormalizeInplace(normString); + result.insert(result.end(), normString.begin(), normString.end()); + if (j == end) + break; + result.push_back(*j); + i = j + 1; + } + return result; + */ } +UniString FeatureTypeToString(uint32_t type) +{ + string const s = "!type:" + to_string(type); + return UniString(s.begin(), s.end()); +} + +namespace +{ +char const * kStreetTokensSeparator = "\t -,."; + /// @todo Move prefixes, suffixes into separate file (autogenerated). -/// "Набережная" улица встречается в городах +/// It's better to distinguish synonyms comparison according to language/region. -char const * affics[] = +class StreetsSynonymsHolder { - // Russian - "аллея", "бульвар", "набережная", "переулок", "площадь", "проезд", "проспект", "шоссе", "тупик", "улица", "тракт", "ал", "бул", "наб", "пер", "пл", "пр", "просп", "ш", "туп", "ул", "тр", + vector m_synonyms; +public: + StreetsSynonymsHolder() + { + char const * affics[] = + { + // Russian + "аллея", "бульвар", "набережная", "переулок", "площадь", "проезд", "проспект", "шоссе", "тупик", "улица", "тракт", "ал", "бул", "наб", "пер", "пл", "пр", "просп", "ш", "туп", "ул", "тр", - // English - "street", "avenue", "square", "road", "boulevard", "drive", "highway", "lane", "way", "close", "court", "crescent", "industrial", "st", "av", "ave", "sq", "rd", "blvd", "dr", "hwy", "ln", "cl", "ct", "cres", "ind", "wy", + // English + "street", "avenue", "square", "road", "boulevard", "drive", "highway", "lane", "way", "circle", "st", "av", "ave", "sq", "rd", "blvd", "dr", "hwy", "ln", - // German - "strasse", "weg", "platz", + // German + "strasse", "weg", "platz", - // Lithuanian - "g", "pr", "pl", "kel", + // Lithuanian + "g", "pr", "pl", "kel", - // Български език - Bulgarian - "Блок", "бл", "Булевард", "Вход", "вх", "Генерал", "ген", "Град", "гр", "Доктор", "д-р", "Доцент", "доц", "Капитан", "кап", "Митрополит", "мит", "Площад", "Професор", "проф", "Свети", "Св", "Улица", "Село", "с", "Квартал", "кв", "Жилищен Комплекс", "ж.к", + // Български език - Bulgarian + "булевард", "бул", "площад", "пл", "улица", "ул", "квартал", "кв", - // Canada - Canada - "abbey", "acres", "allée", "allÉe", "alley", "autoroute", "aut", "bay", "beach", "bend", "boul", "by-pass", "bypass", "byway", "campus", "cape", "carré", "car", "carrefour", "carref", "centre", "ctr", "c", "cercle", "chase", "chemin", "ch", "circle", "cir", "circuit", "circt", "common", "concession", "conc", "corners", "crnrs", "côte", "cÔte", "cour", "cours", "crt", "cove", "croissant", "crois", "crossing", "cross", "cul-de-sac", "cds", "dale", "dell", "diversion", "divers", "downs", "Échangeur", "Éch", "end", "esplanade", "espl", "estates", "estate", "expressway", "expy", "extension", "exten", "farm", "field", "forest", "freeway", "fwy", "front", "gardens", "gdns", "gate", "glade", "glen", "green", "grounds", "grnds", "grove", "harbour", "harbr", "heath", "heights", "hts", "highlands", "hghlds", "hill", "hollow", "Île", "impasse", "imp", "inlet", "island", "key", "knoll", "landing", "landng", "limits", "lmts", "line", "link", "lookout", "lkout", "loop", "mall", "manor", "maze", "meadow", "mews", "montée", "montÉe", "moor", "mount", "mountain", "mtn", "orchard", "orch", "parade", "parc", "park", "pk", "parkway", "pky", "passage", "pass", "path", "pathway", "ptway", "pines", "place", "plateau", "plat", "plaza", "point", "pt", "pointe", "port", "private", "pvt", "promenade", "prom", "quai", "quay", "ramp", "rang", "range", "rg", "ridge", "rise", "rond-point", "rdpt", "route", "rte", "row", "rue", "ruelle", "rle", "run", "sentier", "sent", "subdivision", "subdiv", "terrace", "terr", "terrasse", "tsse", "thicket", "thick", "towers", "townline", "tline", "trail", "turnabout", "trnabt", "vale", "via", "view", "village", "villge", "villas", "vista", "voie", "walk", "wharf", "wood", "wynd", + // Canada - Canada + "allee", "alley", "autoroute", "aut", "bypass", "byway", "carrefour", "carref", "chemin", "cercle", "circle", "côte", "crossing", "cross", "expressway", "freeway", "fwy", "line", "link", "loop", "parkway", "pky", "pkwy", "path", "pathway", "ptway", "route", "rue", "rte", "trail", "walk", - // Cesky - Czech - "ulice", "ul", "náměstí", "nám", + // Cesky - Czech + "ulice", "ul", "náměstí", "nám", - // Dansk - Danish - "lille", "ll", "nordre", "ndr", "søndre", "sdr", "store", "gammel", "gl", + // Deutsch - German + "allee", "al", "brücke", "br", "chaussee", "gasse", "gr", "pfad", "straße", "str", - // Deutsch - German - "am", "a", "an der", "a.d", "auf der", "allee", "al", "berg", "bg", "burg", "bahnhof", "bhf", "bf", "brücke", "br", "bürgermeister", "bgm", "chaussee", "dorf", "df", "damm", "d", "doktor", "gasse", "großer", "große", "großes", "gr", "hauptbahnhof", "hbf", "heiligen", "hl", "hof", "h", "im", "in", "in der", "kamp", "k", "kleiner", "kleine", "kleines", "kl", "ob", "oberer", "obere", "oberes", "pfad", "p", "quelle", "qu", "rhein", "rh", "sankt", "straße", "str", "unterer", "untere", "unteres", "von", "von der", "vor der", + // Español - Spanish + "avenida", "avd", "avda", "bulevar", "bulev", "calle", "calleja", "cllja", "callejón", "callej", "cjon", "cllon", "callejuela", "cjla", "callizo", "cllzo", "calzada", "czada", "costera", "coste", "plza", "pza", "plazoleta", "pzta", "plazuela", "plzla", "tránsito", "trans", "transversal", "trval", "trasera", "tras", "travesía", "trva", - // Español - Spanish - "acceso", "acces", "acequia", "aceq", "alameda", "alam", "alquería", "alque", "andador", "andad", "angosta", "angta", "apartamentos", "aptos", "apeadero", "apdro", "arboleda", "arb", "arrabal", "arral", "arroyo", "arry", "autopista", "auto", "autovía", "autov", "avenida", "avd", "avda", "bajada", "bjada", "balneario", "balnr", "banda", "b", "bda", "barranco", "branc", "barranquil", "bqllo", "barriada", "barda", "barrio", "barro", "bo", "bloque", "blque", "brazal", "brzal", "bulevar", "bulev", "calle", "c/", "calleja", "cllja", "callejón", "callej", "cjon", "cllon", "callejuela", "cjla", "callizo", "cllzo", "calzada", "czada", "camino", "cno", "cmno", "camino hondo", "c.h", "camino nuevo", "c.n", "camino viejo", "c.v", "camping", "campg", "cantera", "cantr", "cantina", "canti", "cantón", "cant", "carrera", "cra", "carrero", "cro", "carretera", "ctra", "carreterín", "ctrin", "carretil", "crtil", "carril", "crril", "caserío", "csrio", "chalet", "chlet", "cinturón", "cint", "circunvalación", "ccvcn", "cobertizo", "cbtiz", "colonia", "col", "complejo", "compj", "conjunto", "cjto", "convento", "cnvto", "cooperativa", "coop", "corral", "crral", "corralillo", "crrlo", "corredor", "crrdo", "cortijo", "crtjo", "costanilla", "cstan", "costera", "coste", "cuadra", "cuadr", "cuesta", "custa", "dehesa", "dhsa", "demarcación", "demar", "diagonal", "diag", "diseminado", "disem", "edificio", "edifc", "empresa", "empr", "entrada", "entd", "escalera", "esca", "escalinata", "escal", "espalda", "eslda", "estación", "estcn", "estrada", "estda", "explanada", "expla", "extramuros", "extrm", "extrarradio", "extrr", "fábrica", "fca", "fbrca", "galería", "gale", "glorieta", "gta", "gran vía", "g.v", "granja", "granj", "hipódromo", "hipod", "jardín", "jdin", "ladera", "ldera", "llanura", "llnra", "malecón", "malec", "mercado", "merc", "mirador", "mrdor", "monasterio", "mtrio", "muelle", "muell", "núcleo", "ncleo", "palacio", "palac", "pantano", "pant", "paraje", "praje", "parque", "pque", "particular", "parti", "partida", "ptda", "pasadizo", "pzo", "pasaje", "psaje", "paseo", "pº", "paseo marítimo", "psmar", "pasillo", "psllo", "plza", "pza", "plazoleta", "pzta", "plazuela", "plzla", "poblado", "pbdo", "polígono", "polig", "polígono industrial", "pgind", "p.i", "portal", "prtal", "pórtico", "prtco", "portillo", "ptilo", "prazuela", "przla", "prolongación", "prol", "pueblo", "pblo", "puente", "pnte", "puerta", "pta", "puerto", "pto", "punto kilométrico", "p.k", "rambla", "rbla", "rampla", "rampa", "residencial", "resid", "ribera", "rbra", "rincón", "rcon", "rinconada", "rcda", "rotonda", "rtda", "san", "s", "sanatorio", "sanat", "santa", "sta", "santuario", "santu", "sector", "sect", "sendera", "sedra", "sendero", "send", "subida", "sbida", "torrente", "trrnt", "tránsito", "trans", "transversal", "trval", "trasera", "tras", "travesía", "trva", "urbanización", "urb", "vecindario", "vecin", "vereda", "vreda", "viaducto", "vcto", "viviendas", "vvdas", + // Français - French + "rue", "avenue", "carré", "cercle", "route", "boulevard", "drive", "autoroute", "lane", "chemin", - // Français - French - "abbaye", "abe", "agglomération", "agl", "aire", "aires", "all", "allées", "ancien chemin", "ach", "ancienne route", "art", "anciennes routes", "anse", "arcade", "arc", "arcades", "barrière", "bre", "barrières", "bas chemin", "bch", "bastide", "bstd", "baston", "bast", "béguinage", "begi", "béguinages", "berge", "ber", "berges", "bois", "boucle", "bcle", "bd", "bourg", "brg", "butte", "but", "cité", "cite", "cités", "cote", "côteau", "cale", "camp", "campagne", "cgne", "cpg", "carreau", "cau", "carrière", "care", "carrières", "carr", "castel", "cst", "cavée", "cav", "central", "ctre", "chl", "chapelle", "chp", "charmille", "chi", "chaussée", "chs", "chaussées", "che", "chemin vicinal", "chv", "cheminement", "chem", "cheminements", "chemins", "chemins vicinaux", "chez", "château", "cht", "cloître", "cloi", "clos", "colline", "coli", "collines", "contour", "corniche", "cor", "corniches", "cottage", "cott", "cottages", "crs", "darse", "dars", "degré", "deg", "degrés", "descente", "dsg", "descentes", "digue", "dig", "digues", "domaine", "dom", "domaines", "Écluse", "ecl", "Écl", "Écluses", "Église", "egl", "Égl", "enceinte", "en", "enclave", "env", "enclos", "enc", "escalier", "esc", "escaliers", "espace", "espa", "esp", "esplanades", "Étang", "etang", "faubourg", "fg", "ferme", "frm", "fermes", "fontaine", "fon", "fort", "forum", "form", "fosse", "fos", "fosses", "foyer", "foyr", "galerie", "gal", "galeries", "gare", "garenne", "garn", "grand boulevard", "gbd", "grand ensemble", "gden", "grand’rue", "grande rue", "grandes rues", "grands ensembles", "grille", "gri", "grimpette", "grim", "groupe", "gpe", "groupement", "gpt", "groupes", "halle", "hle", "halles", "hameau", "ham", "hameaux", "haut chemin", "hch", "hauts chemins", "hippodrome", "hip", "hlm", "ile", "immeuble", "imm", "immeubles", "impasses", "jardin", "jard", "jardins", "jetée", "jte", "jetées", "levée", "leve", "lieu-dit", "ld", "lotissement", "lot", "lotissements", "mail", "maison forestière", "mf", "manoir", "man", "marche", "mar", "marches", "mas", "monseigneur", "mgr", "mont", "mt", "mte", "montées", "moulin", "mln", "moulins", "musée", "mus", "métro", "met", "mÉt", "nouvelle route", "nte", "palais", "pal", "parcs", "parking", "pkg", "parvis", "prv", "pas", "passage à niveau", "pn", "passe", "passerelle", "ple", "passerelles", "passes", "patio", "pat", "pavillon", "pav", "pavillons", "petit chemin", "pch", "petite allée", "petite avenue", "pae", "petite impasse", "pim", "petite route", "prt", "petite rue", "ptr", "petites allées", "placis", "plci", "plage", "plag", "plages", "plaine", "pln", "plan", "plt", "plateaux", "pnt", "pont", "ponts", "porche", "porte", "pte", "portique", "porq", "portiques", "poterne", "pot", "pourtour", "pour", "presqu’île", "prq", "pré", "pre", "prÉ", "périphérique", "peri", "péristyle", "psty", "quartier", "qua", "raccourci", "rac", "raidillon", "raid", "rampe", "rpe", "rempart", "rem", "roc", "rocade", "rond point", "rpt", "roquet", "roqt", "rotonde", "rtd", "routes", "r", "ruelles", "rues", "résidence", "res", "résidences", "saint", "sainte", "ste", "sente", "sen", "sentes", "sentiers", "stade", "stde", "station", "terrain", "trn", "terrasses", "terre plein", "tpl", "tertre", "trt", "tertres", "tour", "traverse", "tra", "vallon", "val", "vallée", "venelle", "ven", "venelles", "vieille route", "vte", "vieux chemin", "vche", "villa", "vla", "vge", "villages", "voi", "voies", "zone", "zone artisanale", "za", "zone d'aménagement concerté", "zac", "zone d'aménagement différé", "zad", "zone industrielle", "zi", "zone à urbaniser en priorité", "zup", + // Nederlands - Dutch + "laan", "ln.", "straat", "steenweg", "stwg", "st", - // Nederlands - Dutch - "broeder", "burgemeester", "commandant", "cmdt", "doctor", "dokter", "dominee", "ds", "gebroeders", "gebr", "generaal", "gen", "gracht", "ingenieur", "ir.", "ir", "jonkheer", "jhr", "kolonel", "kol", "kanunnik", "kan", "kardinaal", "kard", "kort(e)", "kte.", "koning", "kon", "koningin", "laan", "ln.", "lange", "l", "luitenant", "luit", "markt", "mkt", "meester", "mr.", "mr", "mevrouw", "mevr", "mgr.", "onze-lieve-vrouw(e)-", "o.l.v.-", "o.l.v", "olv-", "olv", "pastoor", "past", "plein", "pln.", "president", "pres", "prins", "prinses", "professor", "prof", "straat", "steenweg", "stwg", "sint-", "st.-", "st.", "van", "v", "van de", "v. d", "vd", "vliet", "vlt", + // Norsk - Norwegian + "vei", "veien", "vn", "gaten", "gata", "gt", "plass", "plassen", "sving", "svingen", "sv", - // Norsk - Norwegian - "vei", "veien", "vn", "gaten", "gata", "gt", "plass", "plassen", "sving", "svingen", "sv", + // Polski - Polish + "aleja", "aleje", "aleji", "alejach", "aleją", "plac", "placu", "placem", "ulica", "ulicy", - // Polski - Polish - "aleja", "aleje", "aleji", "alejach", "aleją", "dolny", "dolna", "dolne", "dln", "drugi", "druga", "drugie", "ii", "duży", "duża", "duże", "dz", "dż", "górny", "górna", "górne", "grn", "kolonia", "koło", "kolo", "mały", "mała", "małe", "ml", "mł", "mazowiecka", "mazowiecki", "mazowieckie", "maz", "miasto", "m", "nowy", "nowa", "nowe", "nw", "n", "osiedle", "osiedlu", "os", "no", "pierwszy", "pierwsza", "pierwsze", "plac", "placu", "placem", "stary", "stara", "stare", "Świętego", "Świętej", "Świętych", "św", "trzeci", "trzecia", "trzecie", "iii", "ulica", "ulicą", "ulicy", "wielki", "wielka", "wielkie", "wlk", "wielkopolski", "wielkopolska", "wielkopolskie", "wlkp", "województwo", "województwie", "woj", + // Português - Portuguese + "street", "avenida", "quadrado", "estrada", "boulevard", "carro", "auto-estrada", "lane", "caminho", - // Português - Portuguese - "associação", "ass", "alferes", "alf", "almirante", "alm", "arquitecto", "arq", "arqº", "arquiteto", "auto-estrada", "avª", "azinhaga", "az", "bairro", "bº", "beco", "bc", "bco", "bloco", "bl", "bombeiros voluntários", "bv", "b.v", "brigadeiro", "cacique", "cac", "calçada", "cc", "calçadinha", "ccnh", "câmara municipal", "cm", "c.m", "c. m", "caminho", "cam", "capitão", "cap", "casal", "csl", "cave", "cv", "centro comercial", "c.c", "ciclo do ensino básico", "ceb", "c.e.b", "c. e. b", "comandante", "comendador", "comend", "companhia", "cª", "conselheiro", "cons", "coronel", "cel", "de", "d´", "d'", "deputado", "dep", "direito", "dto", "dona", "dª", "doutor", "doutora", "drª", "dra", "duque", "dq", "edifício", "ed", "edf", "embaixador", "emb", "empresa pública", "ep", "e.p", "enfermeiro", "enfo", "enfº", "enf", "engenheiro", "eng", "engº", "engenheira", "engª", "escadas", "escadinhas", "escnh", "escola básica", "eb", "e.b", "e. b", "esquerdo", "esq", "estação de tratamento de Águas residuais", "etar", "e.t.a.r", "estr", "estrada municipal", "em", "estrada nacional", "estrada regional", "er", "frei", "fr", "frente", "ft", "futebol clube", "fc", "f.c", "f. c", "guarda nacional republicana", "gnr", "general", "habitação", "hab", "infante", "inf", "instituto", "inst", "irmã", "ima", "imª", "irmão", "imo", "imº", "itinerário complementar", "ic", "itinerário principal", "ip", "jardim", "jrd", "júnior", "jr", "largo", "lg", "limitada", "lda", "loja", "lj", "lote", "lt", "loteamento", "loteam", "lugar", "lug", "maestro", "mto", "major", "maj", "marechal", "mal", "marquês", "mq", "madre", "me", "mestre", "ministério", "min", "monsenhor", "mons", "municipal", "nacional", "nossa", "nª", "nossa senhora", "ns", "nosso", "número", "nº", "padre", "pe", "pq", "part", "pátio", "pavilhão", "polícia de segurança pública", "psp", "p.s.p", "polícia judiciária", "pj", "p.j", "praça", "pc", "pç", "praceta", "pct", "pctª", "presidente", "presid", "primeiro", "1º", "professora", "profª", "projectada", "proj", "projetada", "prolongamento", "prolng", "quadra", "q", "qd", "quinta", "qta", "regional", "rés-do-chão", "r/c", "rc", "rotunda", "rot", "ribeira", "rª", "rib", "ribª", "rio", "rua", "stª", "santo", "sto", "stº", "são", "sargento", "sarg", "sem número", "s/n", "sn", "senhor", "sr", "senhora", "sª", "srª", "sr.ª", "s.ra", "sra", "sobre-loja", "slj", "sociedade", "soc", "sociedade anónima", "sa", "s.a", "sport clube", "sc", "sub-cave", "scv", "superquadra", "tenente", "ten", "torre", "tr", "transv", "travessa", "trav", "trv", "tv", "universidade", "univ", "urbanização", "vila", "vl", "visconde", "visc", "vivenda", "vv", "zona", "zn", + // Română - Romanian + "bul", "bdul", "blv", "bulevard", "bulevardu", "calea", "cal", "piața", "pţa", "pța", "strada", "stra", "stradela", "sdla", "stradă", "unitate", "autostradă", "lane", - // Română - Romanian - "aleea", "ale", "alea", "bulevardul", "bul", "bdul", "blv", "b-dul", "b.dul", "bulevard", "bulevardu", "calea", "cal", "fundătura", "fnd", "fundacul", "fdc", "intrarea", "int", "intr", "piața", "pţa", "pța", "p-ta", "p-ţa", "p-ța", "piaţa", "piață", "piaţă", "piată", "piata", "strada", "stra", "stradela", "str-la", "sdla", "Șoseaua", "sos", "soseaua", "splaiul", "sp", "spl", "vârful", "vf", "virful", "vîrful", "varful", "virf", "varf", "muntele", "m-tele", "m-te", "mnt", + // Slovenščina - Slovenian + "cesta", - // Slovenščina - Slovenian - "cesta", "spodnja", "spodnje", "spodnji", "srednja", "srednje", "srednji", "sveta", "sveti", "velika", "velike", "veliki", "veliko", "zgornja", "zg", "zgornje", "zgornji", + // Suomi - Finnish + "kaari", "kri", "katu", "kuja", "kj", "kylä", "polku", "tie", "t", "tori", "väylä", "vlä", - // Suomi - Finnish - "alue", "aukio", "auk", "kaari", "kri", "katu", "kuja", "kj", "kylä", "penger", "pgr", "polku", "puistikko", "pko", "puisto", "ps", "raitti", "ranta", "rt", "rinne", "rn", "taival", "tvl", "tie", "t", "tori", "väylä", "vlä", + // Svenska - Swedish + "väg", "vägen", "gatan", "gränd", "gränden", "stig", "stigen", "plats", "platsen", - // Svenska - Swedish - "väg", "vägen", "gatan", "gränd", "gränden", "gamla", "g:la", "södra", "s:a", "norra", "n:a", "östra", "västra", "v:a", "stig", "stigen", "sankta", "plats", "platsen", "lilla", "stora", + // Türkçe - Turkish + "sokak", "sk", "sok", "sokağı", "cadde", "cd", "caddesi", "bulvar", "bulvarı", - // Türkçe - Turkish - "sokak", "sk", "sok", "sokağı", "cadde", "cd", "caddesi", "bulvar", "bulvarı", + // Tiếng Việt – Vietnamese + "quốc lộ", "ql", "tỉnh lộ", "tl", "Đại lộ", "Đl", "Đường", "Đ", "Đường sắt", "Đs", "Đường phố", "Đp", "vuông", "con Đường", "Đại lộ", "Đường cao tốc", - // Tiếng Việt – Vietnamese - "thành phố", "tp", "thị xã", "tx", "thị trấn", "tt", "quận", "phường", "ph", "quốc lộ", "ql", "tỉnh lộ", "tl", "Đại lộ", "Đl", "Đường", "Đ", "công trường", "quảng trường", "qt", "sân bay", "sb", "sân bay quốc tế", "sbqt", "phi trường", "Đường sắt", "Đs", "trung tâm", "trung tâm thương mại", "tttm", "khách sạn", "ks", "k/s", "bưu điện", "bĐ", "Đại học", "Đh", "cao đẳng", "cĐ", "trung học phổ thông", "thpt", "trung học cơ sở", "thcs", "tiểu học", "th", "khu công nghiệp", "kcn", "khu nghỉ mát", "knm", "khu du lịch", "kdl", "công viên văn hóa", "cvvh", "công viên", "vươn quốc gia", "vqg", "viện bảo tàng", "vbt", "sân vận động", "svĐ", "nhà thi đấu", "ntĐ", "câu lạc bộ", "clb", "nhà thờ", "nt", "nhà hát", "nh", "rạp hát", "công ty", "cty", "tổng công ty", "tcty", "tct", "công ty cổ phần", "ctcp", "cty cp", "căn cứ không quân", "cckq", + // Українська - Ukrainian + "дорога", "провулок", "площа", "шосе", "вулиция", "дор", "пров", "вул" + }; - // Українська - Ukrainian - "дорога", "провулок", "площа", "шосе", "вулиция", "дор", "пров", "вул" + m_synonyms.assign(make_transform_iterator(affics, &NormalizeAndSimplifyString), + make_transform_iterator(affics + ARRAY_SIZE(affics), &NormalizeAndSimplifyString)); + my::SortUnique(m_synonyms); + } + + bool MatchPrefix(UniString const & prefix) const + { + auto const it = lower_bound(m_synonyms.begin(), m_synonyms.end(), prefix); + return (it != m_synonyms.end() && StartsWith(*it, prefix)); + } + + bool FullMatch(UniString const & name) const + { + return binary_search(m_synonyms.begin(), m_synonyms.end(), name); + } }; -void search::GetStreetName(strings::SimpleTokenizer iter, string & streetName) +StreetsSynonymsHolder g_streets; + +} // namespace + +UniString GetStreetNameAsKey(string const & name) { + UniString res; + SimpleTokenizer iter(name, kStreetTokensSeparator); while (iter) { - string const s = strings::MakeLowerCase(*iter); + UniString const s = NormalizeAndSimplifyString(*iter); ++iter; - char const ** end = affics + ARRAY_SIZE(affics); - - if (find(affics, end, s) == end) - streetName += s; + if (!g_streets.FullMatch(s)) + res.append(s); } + + // In case when street name has only synonym tokens, but we should return valid key. + return (res.empty() ? NormalizeAndSimplifyString(name) : res); } -void search::GetStreetNameAsKey(string const & name, string & res) +bool IsStreetSynonym(UniString const & s) { - strings::SimpleTokenizer iter(name, STREET_TOKENS_SEPARATOR); - GetStreetName(iter, res); + return g_streets.FullMatch(s); } -bool search::IsStreetSynonym(strings::UniString const & s) +bool IsStreetSynonymPrefix(UniString const & s) { - static set const kSynonyms( - make_transform_iterator(affics, &search::NormalizeAndSimplifyString), - make_transform_iterator(affics + ARRAY_SIZE(affics), &search::NormalizeAndSimplifyString)); - return kSynonyms.count(s) != 0; + return g_streets.MatchPrefix(s); } -bool search::ContainsNormalized(string const & str, string const & substr) +bool ContainsNormalized(string const & str, string const & substr) { - strings::UniString const ustr = search::NormalizeAndSimplifyString(str); - strings::UniString const usubstr = search::NormalizeAndSimplifyString(substr); + UniString const ustr = NormalizeAndSimplifyString(str); + UniString const usubstr = NormalizeAndSimplifyString(substr); return std::search(ustr.begin(), ustr.end(), usubstr.begin(), usubstr.end()) != ustr.end(); } +} // namespace search diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index afa4459f7a..5d1365c084 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -8,78 +8,7 @@ namespace search // This function should be used for all search strings normalization. // It does some magic text transformation which greatly helps us to improve our search. -inline strings::UniString NormalizeAndSimplifyString(string const & s) -{ - strings::UniString uniString = strings::MakeUniString(s); - for (size_t i = 0; i < uniString.size(); ++i) - { - strings::UniChar & c = uniString[i]; - switch (c) - { - // Replace "d with stroke" to simple d letter. Used in Vietnamese. - // (unicode-compliant implementation leaves it unchanged) - case 0x0110: - case 0x0111: c = 'd'; break; - // Replace small turkish dotless 'ı' with dotted 'i'. - // Our own invented hack to avoid well-known Turkish I-letter bug. - case 0x0131: c = 'i'; break; - // Replace capital turkish dotted 'İ' with dotted lowercased 'i'. - // Here we need to handle this case manually too, because default unicode-compliant implementation - // of MakeLowerCase converts 'İ' to 'i' + 0x0307. - case 0x0130: c = 'i'; break; - // Some Danish-specific hacks. - case 0x00d8: // Ø - case 0x00f8: c = 'o'; break; // ø - case 0x0152: // Œ - case 0x0153: // œ - c = 'o'; - uniString.insert(uniString.begin() + (i++) + 1, 'e'); - break; - case 0x00c6: // Æ - case 0x00e6: // æ - c = 'a'; - uniString.insert(uniString.begin() + (i++) + 1, 'e'); - break; - } - } - - MakeLowerCaseInplace(uniString); - NormalizeInplace(uniString); - - // Remove accents that can appear after NFKD normalization. - uniString.erase_if([](strings::UniChar const & c) - { - // ̀ COMBINING GRAVE ACCENT - // ́ COMBINING ACUTE ACCENT - return (c == 0x0300 || c == 0x0301); - }); - - return uniString; - - /// @todo Restore this logic to distinguish и-й in future. - /* - // Just after lower casing is a correct place to avoid normalization for specific chars. - static auto const isSpecificChar = [](UniChar c) -> bool - { - return c == 0x0439; // й - }; - UniString result; - result.reserve(uniString.size()); - for (auto i = uniString.begin(), end = uniString.end(); i != end;) - { - auto j = find_if(i, end, isSpecificChar); - // We don't check if (j != i) because UniString and Normalize handle it correctly. - UniString normString(i, j); - NormalizeInplace(normString); - result.insert(result.end(), normString.begin(), normString.end()); - if (j == end) - break; - result.push_back(*j); - i = j + 1; - } - return result; - */ -} +strings::UniString NormalizeAndSimplifyString(string const & s); template void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims) @@ -110,10 +39,10 @@ bool TokenizeStringAndCheckIfLastTokenIsPrefix(string const & s, delimiter); } -void GetStreetName(strings::SimpleTokenizer iter, string & streetName); -void GetStreetNameAsKey(string const & name, string & res); +strings::UniString GetStreetNameAsKey(string const & name); bool IsStreetSynonym(strings::UniString const & s); +bool IsStreetSynonymPrefix(strings::UniString const & s); /// Normalizes both str and substr, and then returns true if substr is found in str. /// Used in native platform code for search in localized strings (cuisines, categories, strings etc.). diff --git a/map/map_tests/address_tests.cpp b/map/map_tests/address_tests.cpp index 6c8c651afd..d8423812db 100644 --- a/map/map_tests/address_tests.cpp +++ b/map/map_tests/address_tests.cpp @@ -19,8 +19,7 @@ void TestAddress(ReverseGeocoder & coder, ms::LatLon const & ll, ReverseGeocoder::Address addr; coder.GetNearbyAddress(MercatorBounds::FromLatLon(ll), addr); - string key; - GetStreetNameAsKey(addr.m_street.m_name, key); + string const key = strings::ToUtf8(GetStreetNameAsKey(addr.m_street.m_name)); TEST_EQUAL(stName, key, (addr)); TEST_EQUAL(hNumber, addr.m_building.m_name, (addr)); diff --git a/search/house_detector.cpp b/search/house_detector.cpp index 49072ee010..8ce620fb8f 100644 --- a/search/house_detector.cpp +++ b/search/house_detector.cpp @@ -287,7 +287,7 @@ double Street::GetPrefixLength(size_t numSegs) const void Street::SetName(string const & name) { m_name = name; - GetStreetNameAsKey(name, m_processedName); + m_processedName = strings::ToUtf8(GetStreetNameAsKey(name)); } namespace diff --git a/search/reverse_geocoder.cpp b/search/reverse_geocoder.cpp index bc30412ccc..87f74b31a0 100644 --- a/search/reverse_geocoder.cpp +++ b/search/reverse_geocoder.cpp @@ -62,11 +62,9 @@ void ReverseGeocoder::GetNearbyStreets(FeatureType & ft, vector & street } // static -size_t ReverseGeocoder::GetMatchedStreetIndex(string const & keyName, +size_t ReverseGeocoder::GetMatchedStreetIndex(strings::UniString const & keyName, vector const & streets) { - strings::UniString const expected = strings::MakeUniString(keyName); - // Find the exact match or the best match in kSimilarityTresholdPercent limit. size_t const count = streets.size(); size_t result = count; @@ -74,12 +72,10 @@ size_t ReverseGeocoder::GetMatchedStreetIndex(string const & keyName, for (size_t i = 0; i < count; ++i) { - string key; - search::GetStreetNameAsKey(streets[i].m_name, key); - strings::UniString const actual = strings::MakeUniString(key); + strings::UniString const actual = GetStreetNameAsKey(streets[i].m_name); - size_t const editDistance = - strings::EditDistance(expected.begin(), expected.end(), actual.begin(), actual.end()); + size_t const editDistance = strings::EditDistance(keyName.begin(), keyName.end(), + actual.begin(), actual.end()); if (editDistance == 0) return i; diff --git a/search/reverse_geocoder.hpp b/search/reverse_geocoder.hpp index 3751a78f35..2984a888f6 100644 --- a/search/reverse_geocoder.hpp +++ b/search/reverse_geocoder.hpp @@ -4,6 +4,8 @@ #include "indexer/feature_decl.hpp" +#include "base/string_utils.hpp" + #include "std/string.hpp" #include "std/utility.hpp" #include "std/vector.hpp" @@ -58,7 +60,8 @@ public: } }; - static size_t GetMatchedStreetIndex(string const & keyName, vector const & streets); + static size_t GetMatchedStreetIndex(strings::UniString const & keyName, + vector const & streets); struct Address { diff --git a/search/search_integration_tests/helpers.hpp b/search/search_integration_tests/helpers.hpp index 30a01159ef..0d3a9dcb37 100644 --- a/search/search_integration_tests/helpers.hpp +++ b/search/search_integration_tests/helpers.hpp @@ -71,8 +71,6 @@ protected: Platform & m_platform; my::ScopedLogLevelChanger m_scopedLog; vector m_files; - vector m_countries; - unique_ptr m_infoGetter; tests_support::TestSearchEngine m_engine; m2::RectD m_viewport; }; diff --git a/search/search_integration_tests/search_query_v2_test.cpp b/search/search_integration_tests/search_query_v2_test.cpp index e4242b811f..d18d2a51a7 100644 --- a/search/search_integration_tests/search_query_v2_test.cpp +++ b/search/search_integration_tests/search_query_v2_test.cpp @@ -26,7 +26,8 @@ class SearchQueryV2Test : public SearchTest UNIT_CLASS_TEST(SearchQueryV2Test, Smoke) { - TestCountry wonderlandCountry(m2::PointD(10, 10), "Wonderland", "en"); + string const countryName = "Wonderland"; + TestCountry wonderlandCountry(m2::PointD(10, 10), countryName, "en"); TestCity losAlamosCity(m2::PointD(10, 10), "Los Alamos", "en", 100 /* rank */); TestCity mskCity(m2::PointD(0, 0), "Moscow", "en", 100 /* rank */); @@ -72,7 +73,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke) builder.Add(mskCity); }); auto wonderlandId = - BuildMwm("wonderland", feature::DataHeader::country, [&](TestMwmBuilder & builder) + BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder) { builder.Add(losAlamosCity); builder.Add(mskCity); @@ -99,7 +100,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke) builder.Add(lantern2); }); - RegisterCountry("wonderland", m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(10.1, 10.1))); + RegisterCountry(countryName, m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(10.1, 10.1))); SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0))); @@ -169,7 +170,8 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke) UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld) { - TestCountry wonderland(m2::PointD(0, 0), "Wonderland", "en"); + string const countryName = "Wonderland"; + TestCountry wonderland(m2::PointD(0, 0), countryName, "en"); TestCity losAlamos(m2::PointD(0, 0), "Los Alamos", "en", 100 /* rank */); auto testWorldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder) @@ -178,7 +180,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld) builder.Add(losAlamos); }); - RegisterCountry("Wonderland", m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0))); + RegisterCountry(countryName, m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0))); SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.5, -0.5))); { @@ -197,6 +199,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld) UNIT_CLASS_TEST(SearchQueryV2Test, SearchByName) { + string const countryName = "Wonderland"; TestCity london(m2::PointD(1, 1), "London", "en", 100 /* rank */); TestPark hydePark(vector{m2::PointD(0.5, 0.5), m2::PointD(1.5, 0.5), m2::PointD(1.5, 1.5), m2::PointD(0.5, 1.5)}, @@ -208,12 +211,12 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchByName) builder.Add(london); }); auto wonderlandId = - BuildMwm("wonderland", feature::DataHeader::country, [&](TestMwmBuilder & builder) + BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder) { builder.Add(hydePark); builder.Add(cafe); }); - RegisterCountry("Wonderland", m2::RectD(m2::PointD(0, 0), m2::PointD(2, 2))); + RegisterCountry(countryName, m2::RectD(m2::PointD(0, 0), m2::PointD(2, 2))); SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.9, -0.9))); diff --git a/search/search_tests/house_detector_tests.cpp b/search/search_tests/house_detector_tests.cpp index 38677ec996..d8e8a6e100 100644 --- a/search/search_tests/house_detector_tests.cpp +++ b/search/search_tests/house_detector_tests.cpp @@ -57,7 +57,7 @@ class CollectStreetIDs static bool GetKey(string const & name, string & key) { TEST(!name.empty(), ()); - search::GetStreetNameAsKey(name, key); + key = strings::ToUtf8(search::GetStreetNameAsKey(name)); if (key.empty()) { @@ -331,19 +331,15 @@ UNIT_TEST(HS_StreetsCompare) namespace { - string GetStreetKey(string const & name) { - string res; - search::GetStreetNameAsKey(name, res); - return res; -} - + return strings::ToUtf8(search::GetStreetNameAsKey(name)); } +} // namespace UNIT_TEST(HS_StreetKey) { - TEST_EQUAL("крупской", GetStreetKey("улица Крупской"), ()); + TEST_EQUAL("крупскои", GetStreetKey("улица Крупской"), ()); TEST_EQUAL("уручская", GetStreetKey("Уручская ул."), ()); TEST_EQUAL("газетыправда", GetStreetKey("Пр. Газеты Правда"), ()); TEST_EQUAL("якупалы", GetStreetKey("улица Я. Купалы"), ()); diff --git a/search/search_tests_support/test_feature.cpp b/search/search_tests_support/test_feature.cpp index 43491f9022..61acd9dd82 100644 --- a/search/search_tests_support/test_feature.cpp +++ b/search/search_tests_support/test_feature.cpp @@ -66,6 +66,9 @@ void TestCountry::Serialize(FeatureBuilder1 & fb) const TestFeature::Serialize(fb); auto const & classificator = classif(); fb.SetType(classificator.GetTypeByPath({"place", "country"})); + + // Localities should have default name too. + fb.AddName("default", m_name); } string TestCountry::ToString() const diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 30c434c0ed..a64c3cdb18 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -416,10 +416,10 @@ void Geocoder::SetParams(Params const & params) if (m_params.m_tokens.size() > 1) { for (auto & v : m_params.m_tokens) - v.erase(remove_if(v.begin(), v.end(), &IsStopWord), v.end()); + my::EraseIf(v, &IsStopWord); auto & v = m_params.m_tokens; - v.erase(remove_if(v.begin(), v.end(), mem_fn(&Params::TSynonymsVector::empty)), v.end()); + my::EraseIf(v, mem_fn(&Params::TSynonymsVector::empty)); // If all tokens are stop words - give up. if (m_params.m_tokens.empty()) @@ -488,10 +488,10 @@ void Geocoder::GoInViewport(TResultList & results) vector> infos; m_index.GetMwmsInfo(infos); - infos.erase(remove_if(infos.begin(), infos.end(), [this](shared_ptr const & info) + my::EraseIf(infos, [this](shared_ptr const & info) { return !m_params.m_pivot.IsIntersect(info->m_limitRect); - }), infos.end()); + }); GoImpl(infos, true /* inViewport */); } @@ -998,7 +998,7 @@ void Geocoder::GreedilyMatchStreets() for (; curToken < m_numTokens && !m_usedTokens[curToken]; ++curToken) { auto const & token = m_params.GetTokens(curToken).front(); - if (IsStreetSynonym(token)) + if (IsStreetSynonymPrefix(token)) continue; if (feature::IsHouseNumber(token)) @@ -1129,7 +1129,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) return !features->GetBit(featureId); }; for (auto & cluster : clusters) - cluster.erase(remove_if(cluster.begin(), cluster.end(), noFeature), cluster.end()); + my::EraseIf(cluster, noFeature); } for (size_t i = 0; i < ARRAY_SIZE(clusters); ++i) diff --git a/storage/country_info_getter.cpp b/storage/country_info_getter.cpp index 16caa4b01a..95100a1fdf 100644 --- a/storage/country_info_getter.cpp +++ b/storage/country_info_getter.cpp @@ -322,6 +322,16 @@ void CountryInfoGetterForTesting::AddCountry(CountryDef const & country) m_id2info[name].m_name = name; } +void CountryInfoGetterForTesting::GetMatchedRegions(string const & affiliation, + IdSet & regions) const +{ + for (size_t i = 0; i < m_countries.size(); ++i) + { + if (m_countries[i].m_name == affiliation) + regions.push_back(i); + } +} + void CountryInfoGetterForTesting::ClearCachesImpl() const {} bool CountryInfoGetterForTesting::IsBelongToRegionImpl(size_t id, diff --git a/storage/country_info_getter.hpp b/storage/country_info_getter.hpp index e45962f990..3bb974d9d1 100644 --- a/storage/country_info_getter.hpp +++ b/storage/country_info_getter.hpp @@ -63,7 +63,7 @@ public: m2::RectD GetLimitRectForLeaf(TCountryId const & leafCountryId) const; // Returns identifiers for all regions matching to correspondent |affiliation|. - void GetMatchedRegions(string const & affiliation, IdSet & regions) const; + virtual void GetMatchedRegions(string const & affiliation, IdSet & regions) const; // Returns true when |pt| belongs to at least one of the specified // |regions|. @@ -163,6 +163,9 @@ public: void AddCountry(CountryDef const & country); + // CountryInfoGetter overrides: + void GetMatchedRegions(string const & affiliation, IdSet & regions) const override; + protected: // CountryInfoGetter overrides: void ClearCachesImpl() const override; diff --git a/storage/storage.cpp b/storage/storage.cpp index 04f52d3a6a..e83cb5a8a0 100644 --- a/storage/storage.cpp +++ b/storage/storage.cpp @@ -33,11 +33,6 @@ namespace storage { namespace { -template -void RemoveIf(vector & v, function const & p) -{ - v.erase(remove_if(v.begin(), v.end(), p), v.end()); -} uint64_t GetLocalSize(shared_ptr file, MapOptions opt) {