From b1fdd9c8806fd7c389338c58e8829c26fdee17ec Mon Sep 17 00:00:00 2001 From: vng Date: Fri, 7 Feb 2014 12:10:36 +0300 Subject: [PATCH] [search] Refactoring of MWM full test. --- search/ftypes_matcher.cpp | 47 ++++ search/ftypes_matcher.hpp | 30 +++ search/house_detector.cpp | 32 ++- search/house_detector.hpp | 4 + search/intermediate_result.cpp | 42 +--- search/search.pro | 6 +- search/search_tests/house_detector_tests.cpp | 236 ++++++++++++------- 7 files changed, 263 insertions(+), 134 deletions(-) create mode 100644 search/ftypes_matcher.cpp create mode 100644 search/ftypes_matcher.hpp diff --git a/search/ftypes_matcher.cpp b/search/ftypes_matcher.cpp new file mode 100644 index 0000000000..367f5b0250 --- /dev/null +++ b/search/ftypes_matcher.cpp @@ -0,0 +1,47 @@ +#include "ftypes_matcher.hpp" + +#include "../indexer/feature.hpp" +#include "../indexer/feature_data.hpp" +#include "../indexer/classificator.hpp" + + +namespace ftypes +{ + +bool BaseChecker::operator() (feature::TypesHolder const & types) const +{ + for (size_t i = 0; i < types.Size(); ++i) + { + uint32_t t = types[i]; + ftype::TruncValue(t, 2); + + if (find(m_types.begin(), m_types.end(), t) != m_types.end()) + return true; + } + return false; +} + +bool BaseChecker::operator() (FeatureType const & ft) const +{ + return this->operator() (feature::TypesHolder(ft)); +} + +IsStreetChecker::IsStreetChecker() +{ + Classificator const & c = classif(); + char const * arr[][2] = { + { "highway", "trunk" }, + { "highway", "primary" }, + { "highway", "secondary" }, + { "highway", "residential" }, + { "highway", "pedestrian" }, + { "highway", "tertiary" }, + { "highway", "construction" }, + { "highway", "living_street" } + }; + + for (size_t i = 0; i < ARRAY_SIZE(arr); ++i) + m_types.push_back(c.GetTypeByPath(vector(arr[i], arr[i] + 2))); +} + +} diff --git a/search/ftypes_matcher.hpp b/search/ftypes_matcher.hpp new file mode 100644 index 0000000000..257ad4dec5 --- /dev/null +++ b/search/ftypes_matcher.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include "../base/base.hpp" + +#include "../std/vector.hpp" + + +namespace feature { class TypesHolder; } +class FeatureType; + +namespace ftypes +{ + +class BaseChecker +{ +protected: + vector m_types; + +public: + bool operator() (feature::TypesHolder const & types) const; + bool operator() (FeatureType const & ft) const; +}; + +class IsStreetChecker : public BaseChecker +{ +public: + IsStreetChecker(); +}; + +} diff --git a/search/house_detector.cpp b/search/house_detector.cpp index 15b68ee658..f7948f6eca 100644 --- a/search/house_detector.cpp +++ b/search/house_detector.cpp @@ -93,37 +93,41 @@ public: #endif /// @todo Move prefixes, suffixes into separate file (autogenerated). +/// "Набережная" улица встречается в городах string affics1[] = { - "аллея", "бульвар", "набережная", - "переулок", "площадь", "проезд", - "проспект", "шоссе", "тупик", "улица" + "аллея", "бульвар", "набережная", "переулок", + "площадь", "проезд", "проспект", "шоссе", + "тупик", "улица", "тракт" }; string affics2[] = { - "ал.", "бул.", "наб.", "пер.", - "пл.", "пр.", "просп.", "ш.", - "туп.", "ул." + "ал", "бул", "наб", "пер", + "пл", "пр", "просп", "ш", + "туп", "ул", "тр" }; void GetStreetName(strings::SimpleTokenizer iter, string & streetName) { while (iter) { + string const s = strings::MakeLowerCase(*iter); + ++iter; + bool flag = true; for (size_t i = 0; i < ARRAY_SIZE(affics2); ++i) { - if (*iter == affics2[i] || *iter == affics1[i]) + if (s == affics2[i] || s == affics1[i]) { flag = false; break; } } + if (flag) - streetName += *iter; - ++iter; + streetName += s; } } @@ -150,6 +154,12 @@ public: } +void GetStreetNameAsKey(string const & name, string & res) +{ + strings::SimpleTokenizer iter(name, "\t -,."); + GetStreetName(iter, res); +} + void House::InitHouseNumber() { strings::SimpleTokenizer it(m_number, ",-; "); @@ -246,9 +256,7 @@ m2::RectD Street::GetLimitRect(double offsetMeters) const void Street::SetName(string const & name) { m_name = name; - strings::SimpleTokenizer iter(name, "\t -,."); - GetStreetName(iter, m_processedName); - strings::MakeLowerCase(m_processedName); + GetStreetNameAsKey(name, m_processedName); } namespace diff --git a/search/house_detector.hpp b/search/house_detector.hpp index 34db07344e..d6ed7a8658 100644 --- a/search/house_detector.hpp +++ b/search/house_detector.hpp @@ -11,6 +11,10 @@ namespace search { + +void GetStreetNameAsKey(string const & name, string & res); + + class FeatureLoader { Index const * m_pIndex; diff --git a/search/intermediate_result.cpp b/search/intermediate_result.cpp index 8c43c8a565..5e289b171d 100644 --- a/search/intermediate_result.cpp +++ b/search/intermediate_result.cpp @@ -1,4 +1,5 @@ #include "intermediate_result.hpp" +#include "ftypes_matcher.hpp" #include "../storage/country_info.hpp" @@ -336,43 +337,6 @@ namespace return (find(m_index, e, FirstLevelIndex(type)) != e); } }; - - class IsStreetChecker - { - vector m_types; - - public: - IsStreetChecker() - { - Classificator const & c = classif(); - char const * arr[][2] = { - { "highway", "trunk" }, - { "highway", "primary" }, - { "highway", "secondary" }, - { "highway", "residential" }, - { "highway", "pedestrian" }, - { "highway", "tertiary" }, - { "highway", "construction" }, - { "highway", "living_street" } - }; - - for (size_t i = 0; i < ARRAY_SIZE(arr); ++i) - m_types.push_back(c.GetTypeByPath(vector(arr[i], arr[i] + 2))); - } - - bool IsMy(feature::TypesHolder const & types) const - { - for (size_t i = 0; i < types.Size(); ++i) - { - uint32_t t = types[i]; - ftype::TruncValue(t, 2); - - if (find(m_types.begin(), m_types.end(), t) != m_types.end()) - return true; - } - return false; - } - }; } bool PreResult2::LessLinearTypesF::operator() (PreResult2 const & r1, PreResult2 const & r2) const @@ -414,8 +378,8 @@ bool PreResult2::EqualLinearTypesF::operator() (PreResult2 const & r1, PreResult bool PreResult2::IsStreet() const { - static IsStreetChecker checker; - return checker.IsMy(m_types); + static ftypes::IsStreetChecker checker; + return checker(m_types); } string PreResult2::DebugPrint() const diff --git a/search/search.pro b/search/search.pro index d11ddab37c..3ef0c5d361 100644 --- a/search/search.pro +++ b/search/search.pro @@ -20,7 +20,8 @@ HEADERS += \ feature_offset_match.hpp \ keyword_lang_matcher.hpp \ params.hpp \ - house_detector.hpp + house_detector.hpp \ + ftypes_matcher.hpp \ SOURCES += \ search_engine.cpp \ @@ -32,4 +33,5 @@ SOURCES += \ approximate_string_match.cpp \ keyword_lang_matcher.cpp \ params.cpp \ - house_detector.cpp + house_detector.cpp \ + ftypes_matcher.cpp \ diff --git a/search/search_tests/house_detector_tests.cpp b/search/search_tests/house_detector_tests.cpp index f73a8b5d6c..9d595a4725 100644 --- a/search/search_tests/house_detector_tests.cpp +++ b/search/search_tests/house_detector_tests.cpp @@ -1,5 +1,8 @@ #include "../../testing/testing.hpp" +#include "../house_detector.hpp" +#include "../ftypes_matcher.hpp" + #include "../../base/logging.hpp" #include "../../platform/platform.hpp" @@ -8,12 +11,11 @@ #include "../../indexer/index.hpp" #include "../../indexer/classificator_loader.hpp" -#include "../house_detector.hpp" - #include "../../std/iostream.hpp" #include "../../std/fstream.hpp" -UNIT_TEST(LESS_WITH_EPSILON) + +UNIT_TEST(HS_LessPoints) { double q = 3.0 * 360.0 / 40.0E06; search::HouseDetector::LessWithEpsilon compare(&q); @@ -73,7 +75,7 @@ UNIT_TEST(LESS_WITH_EPSILON) } } -class Process +class StreetIDsByName { vector vect; @@ -85,15 +87,11 @@ public: if (f.GetFeatureType() == feature::GEOM_LINE) { string name; - if (f.GetName(0, name)) - { - for (size_t i = 0; i < streetNames.size(); ++i) - if (name == streetNames[i]) - { - vect.push_back(f.GetID()); - break; - } - } + if (f.GetName(0, name) && + find(streetNames.begin(), streetNames.end(), name) != streetNames.end()) + { + vect.push_back(f.GetID()); + } } } @@ -106,7 +104,60 @@ public: } }; -UNIT_TEST(STREET_MERGE_TEST) +class CollectStreetIDs +{ + static bool GetKey(string const & name, string & key) + { + TEST(!name.empty(), ()); + search::GetStreetNameAsKey(name, key); + + if (key.empty()) + { + LOG(LWARNING, ("Empty street key for name", name)); + return false; + } + return true; + } + + typedef map > ContT; + ContT m_ids; + vector m_empty; + +public: + void operator() (FeatureType const & f) + { + static ftypes::IsStreetChecker checker; + + if (f.GetFeatureType() == feature::GEOM_LINE) + { + string name; + if (f.GetName(0, name) && checker(f)) + { + string key; + if (GetKey(name, key)) + m_ids[key].push_back(f.GetID()); + } + } + } + + void Finish() + { + for (ContT::iterator i = m_ids.begin(); i != m_ids.end(); ++i) + sort(i->second.begin(), i->second.end()); + } + + vector const & Get(string const & name) const + { + string key; + if (!GetKey(name, key)) + return m_empty; + + ContT::const_iterator i = m_ids.find(key); + return (i == m_ids.end() ? m_empty : i->second); + } +}; + +UNIT_TEST(HS_StreetsMerge) { classificator::Load(); @@ -117,7 +168,7 @@ UNIT_TEST(STREET_MERGE_TEST) { search::HouseDetector houser(&index); - Process toDo; + StreetIDsByName toDo; toDo.streetNames.push_back("улица Володарского"); index.ForEachInScale(toDo, scales::GetUpperScale()); houser.LoadStreets(toDo.GetFeatureIDs()); @@ -126,7 +177,7 @@ UNIT_TEST(STREET_MERGE_TEST) { search::HouseDetector houser(&index); - Process toDo; + StreetIDsByName toDo; toDo.streetNames.push_back("Московская улица"); index.ForEachInScale(toDo, scales::GetUpperScale()); houser.LoadStreets(toDo.GetFeatureIDs()); @@ -135,7 +186,7 @@ UNIT_TEST(STREET_MERGE_TEST) { search::HouseDetector houser(&index); - Process toDo; + StreetIDsByName toDo; toDo.streetNames.push_back("проспект Независимости"); toDo.streetNames.push_back("Московская улица"); index.ForEachInScale(toDo, scales::GetUpperScale()); @@ -145,7 +196,7 @@ UNIT_TEST(STREET_MERGE_TEST) { search::HouseDetector houser(&index); - Process toDo; + StreetIDsByName toDo; toDo.streetNames.push_back("проспект Независимости"); toDo.streetNames.push_back("Московская улица"); toDo.streetNames.push_back("Вишнёвый переулок"); @@ -158,7 +209,7 @@ UNIT_TEST(STREET_MERGE_TEST) { search::HouseDetector houser(&index); - Process toDo; + StreetIDsByName toDo; toDo.streetNames.push_back("проспект Независимости"); toDo.streetNames.push_back("Московская улица"); toDo.streetNames.push_back("улица Кирова"); @@ -177,7 +228,7 @@ m2::PointD FindHouse(Index & index, vector const & streets, { search::HouseDetector houser(&index); - Process toDo; + StreetIDsByName toDo; toDo.streetNames = streets; index.ForEachInScale(toDo, scales::GetUpperScale()); @@ -195,7 +246,7 @@ m2::PointD FindHouse(Index & index, vector const & streets, } -UNIT_TEST(SEARCH_HOUSE_NUMBER_SMOKE_TEST) +UNIT_TEST(HS_FindHouseSmoke) { classificator::Load(); @@ -221,7 +272,7 @@ UNIT_TEST(SEARCH_HOUSE_NUMBER_SMOKE_TEST) } -UNIT_TEST(STREET_COMPARE_TEST) +UNIT_TEST(HS_StreetsCompare) { search::Street A, B; TEST(search::Street::IsSameStreets(&A, &B), ()); @@ -250,15 +301,22 @@ bool LessHouseNumber(search::House const & h1, search::House const & h2) return search::House::LessHouseNumber()(&h1, &h2); } +string GetStreetKey(string const & name) +{ + string res; + search::GetStreetNameAsKey(name, res); + return res; } -UNIT_TEST(HOUSE_COMPARE_TEST) +} + +UNIT_TEST(HS_HousesCompare) { m2::PointD p(1,1); TEST(LessHouseNumber(search::House("1", p), search::House("2", p)), ()); -// TEST(LessHouseNumber(search::House("18a", p), search::House("18b", p)), ()); -// TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7A", p)), ()); -// TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7B", p)), ()); + TEST(LessHouseNumber(search::House("18a", p), search::House("18b", p)), ()); + TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7A", p)), ()); + TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7B", p)), ()); TEST(!LessHouseNumber(search::House("4", p), search::House("4", p)), ()); TEST(!LessHouseNumber(search::House("95", p), search::House("82-b", p)), ()); @@ -269,95 +327,111 @@ UNIT_TEST(HOUSE_COMPARE_TEST) TEST(!LessHouseNumber(search::House("120 7B", p), search::House("120 1A", p)), ()); } -UNIT_TEST(VNG_TEST) +UNIT_TEST(HS_StreetKey) { - search::House h1("32", m2::PointD(1,1)); - search::House h2("32А", m2::PointD(1,1)); - if (search::House::LessHouseNumber()(&h1, &h2)) - cout << "Success" << endl; + TEST_EQUAL("крупской", GetStreetKey("улица Крупской"), ()); + TEST_EQUAL("уручская", GetStreetKey("Уручская ул."), ()); + TEST_EQUAL("газетыправда", GetStreetKey("Пр. Газеты Правда"), ()); + TEST_EQUAL("якупалы", GetStreetKey("улица Я. Купалы"), ()); + TEST_EQUAL("францискаскорины", GetStreetKey("Франциска Скорины Тракт"), ()); } -bool cmp(double a, double b) +UNIT_TEST(HS_MWMSearch) { - return fabs(a - b) <= 1e-4; -} - -UNIT_TEST(ALGORITHM_TEST) -{ - string const path = GetPlatform().WritableDir() + "adresses.txt"; + string const path = GetPlatform().WritableDir() + "minsk-pass.addr"; ifstream file(path.c_str()); if (!file.good()) { - TEST(false, ("Can't open file")); + LOG(LWARNING, ("Address file not found")); return; } - string line; + Index index; m2::RectD rect; - index.Add("my_minsk.mwm", rect); - int all = 0; - set strset; + if (!index.Add("minsk-pass.mwm", rect)) + { + LOG(LWARNING, ("MWM file not found")); + return; + } - vector match; - vector not_match; + CollectStreetIDs streetIDs; + index.ForEachInScale(streetIDs, scales::GetUpperScale()); + streetIDs.Finish(); + + search::HouseDetector detector(&index); + + size_t all = 0, matched = 0, notMatched = 0; + set addrSet; + + string line; while (file.good()) { getline(file, line); if (line.empty()) continue; + vector v; strings::Tokenize(line, "|", MakeBackInsertFunctor(v)); - vector houseNumber; - //House number is in v[1], sometime it contains house name - strings::Tokenize(v[1], ",", MakeBackInsertFunctor(houseNumber)); - v[1] = houseNumber[0]; - if (strset.find(v[0] + v[1]) != strset.end()) + + // House number is in v[1], sometime it contains house name after comma. + strings::SimpleTokenizer house(v[1], ","); + TEST(house, ()); + v[1] = *house; + + TEST(!v[0].empty(), ()); + TEST(!v[1].empty(), ()); + + if (!addrSet.insert(v[0] + v[1]).second) continue; - strset.insert(v[0]+v[1]); - ++all; - search::HouseDetector houser(&index); - Process toDo; - toDo.streetNames.push_back(v[0]); - index.ForEachInScale(toDo, scales::GetUpperScale()); - houser.LoadStreets(toDo.GetFeatureIDs()); - - houser.MergeStreets(); - houser.ReadAllHouses(200); - vector houses; - houser.GetHouseForName(v[1], houses); - if (houses.empty()) + vector const & streets = streetIDs.Get(v[0]); + if (streets.empty()) { - LOG(LINFO, ("Empty", v[0], v[1])); + LOG(LWARNING, ("Missing street in mwm", v[0])); continue; } - double lon; - strings::to_double(v[2], lon); - double lat; - strings::to_double(v[3], lat); - bool flag = false; - for (size_t i = 0; i < houses.size(); ++i) + ++all; + + detector.LoadStreets(streets); + detector.MergeStreets(); + detector.ReadAllHouses(200); + + vector houses; + detector.GetHouseForName(v[1], houses); + if (houses.empty()) + { + LOG(LINFO, ("No houses", v[0], v[1])); + continue; + } + + double lat, lon; + TEST(strings::to_double(v[2], lat), (v[2])); + TEST(strings::to_double(v[3], lon), (v[3])); + + size_t i = 0; + size_t const count = houses.size(); + for (; i < count; ++i) { m2::PointD p = houses[i]->GetPosition(); p.x = MercatorBounds::XToLon(p.x); p.y = MercatorBounds::YToLat(p.y); - if (!cmp(p.x, lat) || !cmp(p.y, lon)) + + double const eps = 1.0E-4; + if (fabs(p.x - lon) < eps && fabs(p.y - lat) < eps) { - continue; + ++matched; + break; } - flag = true; - match.push_back(v[0] + " " + v[1]); - break; } - if (!flag) + + if (i == count) { - not_match.push_back(v[0] + " " + v[1]); - LOG(LINFO, ("No match", v[0], v[1]));//, lat, lon, p.x, p.y)); + ++notMatched; + LOG(LINFO, ("Bad matched", v[0], v[1])); } } - LOG(LINFO, (match.size(), not_match.size(), all - match.size() - not_match.size())); - double t = double(match.size()) / double(all); - LOG(LINFO, (all, t)); + LOG(LINFO, ("Matched =", matched, "Not matched =", notMatched, "Not found =", all - matched - notMatched)); + LOG(LINFO, ("All count =", all, "Percent matched =", matched / double(all))); }