[search] Refactoring of MWM full test.

This commit is contained in:
vng 2014-02-07 12:10:36 +03:00 committed by Alex Zolotarev
parent 610b4b2a67
commit b1fdd9c880
7 changed files with 263 additions and 134 deletions

47
search/ftypes_matcher.cpp Normal file
View file

@ -0,0 +1,47 @@
#include "ftypes_matcher.hpp"
#include "../indexer/feature.hpp"
#include "../indexer/feature_data.hpp"
#include "../indexer/classificator.hpp"
namespace ftypes
{
bool BaseChecker::operator() (feature::TypesHolder const & types) const
{
for (size_t i = 0; i < types.Size(); ++i)
{
uint32_t t = types[i];
ftype::TruncValue(t, 2);
if (find(m_types.begin(), m_types.end(), t) != m_types.end())
return true;
}
return false;
}
bool BaseChecker::operator() (FeatureType const & ft) const
{
return this->operator() (feature::TypesHolder(ft));
}
IsStreetChecker::IsStreetChecker()
{
Classificator const & c = classif();
char const * arr[][2] = {
{ "highway", "trunk" },
{ "highway", "primary" },
{ "highway", "secondary" },
{ "highway", "residential" },
{ "highway", "pedestrian" },
{ "highway", "tertiary" },
{ "highway", "construction" },
{ "highway", "living_street" }
};
for (size_t i = 0; i < ARRAY_SIZE(arr); ++i)
m_types.push_back(c.GetTypeByPath(vector<string>(arr[i], arr[i] + 2)));
}
}

30
search/ftypes_matcher.hpp Normal file
View file

@ -0,0 +1,30 @@
#pragma once
#include "../base/base.hpp"
#include "../std/vector.hpp"
namespace feature { class TypesHolder; }
class FeatureType;
namespace ftypes
{
class BaseChecker
{
protected:
vector<uint32_t> m_types;
public:
bool operator() (feature::TypesHolder const & types) const;
bool operator() (FeatureType const & ft) const;
};
class IsStreetChecker : public BaseChecker
{
public:
IsStreetChecker();
};
}

View file

@ -93,37 +93,41 @@ public:
#endif
/// @todo Move prefixes, suffixes into separate file (autogenerated).
/// "Набережная" улица встречается в городах
string affics1[] =
{
"аллея", "бульвар", "набережная",
"переулок", "площадь", "проезд",
"проспект", "шоссе", "тупик", "улица"
"аллея", "бульвар", "набережная", "переулок",
"площадь", "проезд", "проспект", "шоссе",
"тупик", "улица", "тракт"
};
string affics2[] =
{
"ал.", "бул.", "наб.", "пер.",
"пл.", "пр.", "просп.", "ш.",
"туп.", "ул."
"ал", "бул", "наб", "пер",
"пл", "пр", "просп", "ш",
"туп", "ул", "тр"
};
void GetStreetName(strings::SimpleTokenizer iter, string & streetName)
{
while (iter)
{
string const s = strings::MakeLowerCase(*iter);
++iter;
bool flag = true;
for (size_t i = 0; i < ARRAY_SIZE(affics2); ++i)
{
if (*iter == affics2[i] || *iter == affics1[i])
if (s == affics2[i] || s == affics1[i])
{
flag = false;
break;
}
}
if (flag)
streetName += *iter;
++iter;
streetName += s;
}
}
@ -150,6 +154,12 @@ public:
}
void GetStreetNameAsKey(string const & name, string & res)
{
strings::SimpleTokenizer iter(name, "\t -,.");
GetStreetName(iter, res);
}
void House::InitHouseNumber()
{
strings::SimpleTokenizer it(m_number, ",-; ");
@ -246,9 +256,7 @@ m2::RectD Street::GetLimitRect(double offsetMeters) const
void Street::SetName(string const & name)
{
m_name = name;
strings::SimpleTokenizer iter(name, "\t -,.");
GetStreetName(iter, m_processedName);
strings::MakeLowerCase(m_processedName);
GetStreetNameAsKey(name, m_processedName);
}
namespace

View file

@ -11,6 +11,10 @@
namespace search
{
void GetStreetNameAsKey(string const & name, string & res);
class FeatureLoader
{
Index const * m_pIndex;

View file

@ -1,4 +1,5 @@
#include "intermediate_result.hpp"
#include "ftypes_matcher.hpp"
#include "../storage/country_info.hpp"
@ -336,43 +337,6 @@ namespace
return (find(m_index, e, FirstLevelIndex(type)) != e);
}
};
class IsStreetChecker
{
vector<uint32_t> m_types;
public:
IsStreetChecker()
{
Classificator const & c = classif();
char const * arr[][2] = {
{ "highway", "trunk" },
{ "highway", "primary" },
{ "highway", "secondary" },
{ "highway", "residential" },
{ "highway", "pedestrian" },
{ "highway", "tertiary" },
{ "highway", "construction" },
{ "highway", "living_street" }
};
for (size_t i = 0; i < ARRAY_SIZE(arr); ++i)
m_types.push_back(c.GetTypeByPath(vector<string>(arr[i], arr[i] + 2)));
}
bool IsMy(feature::TypesHolder const & types) const
{
for (size_t i = 0; i < types.Size(); ++i)
{
uint32_t t = types[i];
ftype::TruncValue(t, 2);
if (find(m_types.begin(), m_types.end(), t) != m_types.end())
return true;
}
return false;
}
};
}
bool PreResult2::LessLinearTypesF::operator() (PreResult2 const & r1, PreResult2 const & r2) const
@ -414,8 +378,8 @@ bool PreResult2::EqualLinearTypesF::operator() (PreResult2 const & r1, PreResult
bool PreResult2::IsStreet() const
{
static IsStreetChecker checker;
return checker.IsMy(m_types);
static ftypes::IsStreetChecker checker;
return checker(m_types);
}
string PreResult2::DebugPrint() const

View file

@ -20,7 +20,8 @@ HEADERS += \
feature_offset_match.hpp \
keyword_lang_matcher.hpp \
params.hpp \
house_detector.hpp
house_detector.hpp \
ftypes_matcher.hpp \
SOURCES += \
search_engine.cpp \
@ -32,4 +33,5 @@ SOURCES += \
approximate_string_match.cpp \
keyword_lang_matcher.cpp \
params.cpp \
house_detector.cpp
house_detector.cpp \
ftypes_matcher.cpp \

View file

@ -1,5 +1,8 @@
#include "../../testing/testing.hpp"
#include "../house_detector.hpp"
#include "../ftypes_matcher.hpp"
#include "../../base/logging.hpp"
#include "../../platform/platform.hpp"
@ -8,12 +11,11 @@
#include "../../indexer/index.hpp"
#include "../../indexer/classificator_loader.hpp"
#include "../house_detector.hpp"
#include "../../std/iostream.hpp"
#include "../../std/fstream.hpp"
UNIT_TEST(LESS_WITH_EPSILON)
UNIT_TEST(HS_LessPoints)
{
double q = 3.0 * 360.0 / 40.0E06;
search::HouseDetector::LessWithEpsilon compare(&q);
@ -73,7 +75,7 @@ UNIT_TEST(LESS_WITH_EPSILON)
}
}
class Process
class StreetIDsByName
{
vector<FeatureID> vect;
@ -85,15 +87,11 @@ public:
if (f.GetFeatureType() == feature::GEOM_LINE)
{
string name;
if (f.GetName(0, name))
{
for (size_t i = 0; i < streetNames.size(); ++i)
if (name == streetNames[i])
{
vect.push_back(f.GetID());
break;
}
}
if (f.GetName(0, name) &&
find(streetNames.begin(), streetNames.end(), name) != streetNames.end())
{
vect.push_back(f.GetID());
}
}
}
@ -106,7 +104,60 @@ public:
}
};
UNIT_TEST(STREET_MERGE_TEST)
class CollectStreetIDs
{
static bool GetKey(string const & name, string & key)
{
TEST(!name.empty(), ());
search::GetStreetNameAsKey(name, key);
if (key.empty())
{
LOG(LWARNING, ("Empty street key for name", name));
return false;
}
return true;
}
typedef map<string, vector<FeatureID> > ContT;
ContT m_ids;
vector<FeatureID> m_empty;
public:
void operator() (FeatureType const & f)
{
static ftypes::IsStreetChecker checker;
if (f.GetFeatureType() == feature::GEOM_LINE)
{
string name;
if (f.GetName(0, name) && checker(f))
{
string key;
if (GetKey(name, key))
m_ids[key].push_back(f.GetID());
}
}
}
void Finish()
{
for (ContT::iterator i = m_ids.begin(); i != m_ids.end(); ++i)
sort(i->second.begin(), i->second.end());
}
vector<FeatureID> const & Get(string const & name) const
{
string key;
if (!GetKey(name, key))
return m_empty;
ContT::const_iterator i = m_ids.find(key);
return (i == m_ids.end() ? m_empty : i->second);
}
};
UNIT_TEST(HS_StreetsMerge)
{
classificator::Load();
@ -117,7 +168,7 @@ UNIT_TEST(STREET_MERGE_TEST)
{
search::HouseDetector houser(&index);
Process toDo;
StreetIDsByName toDo;
toDo.streetNames.push_back("улица Володарского");
index.ForEachInScale(toDo, scales::GetUpperScale());
houser.LoadStreets(toDo.GetFeatureIDs());
@ -126,7 +177,7 @@ UNIT_TEST(STREET_MERGE_TEST)
{
search::HouseDetector houser(&index);
Process toDo;
StreetIDsByName toDo;
toDo.streetNames.push_back("Московская улица");
index.ForEachInScale(toDo, scales::GetUpperScale());
houser.LoadStreets(toDo.GetFeatureIDs());
@ -135,7 +186,7 @@ UNIT_TEST(STREET_MERGE_TEST)
{
search::HouseDetector houser(&index);
Process toDo;
StreetIDsByName toDo;
toDo.streetNames.push_back("проспект Независимости");
toDo.streetNames.push_back("Московская улица");
index.ForEachInScale(toDo, scales::GetUpperScale());
@ -145,7 +196,7 @@ UNIT_TEST(STREET_MERGE_TEST)
{
search::HouseDetector houser(&index);
Process toDo;
StreetIDsByName toDo;
toDo.streetNames.push_back("проспект Независимости");
toDo.streetNames.push_back("Московская улица");
toDo.streetNames.push_back("Вишнёвый переулок");
@ -158,7 +209,7 @@ UNIT_TEST(STREET_MERGE_TEST)
{
search::HouseDetector houser(&index);
Process toDo;
StreetIDsByName toDo;
toDo.streetNames.push_back("проспект Независимости");
toDo.streetNames.push_back("Московская улица");
toDo.streetNames.push_back("улица Кирова");
@ -177,7 +228,7 @@ m2::PointD FindHouse(Index & index, vector<string> const & streets,
{
search::HouseDetector houser(&index);
Process toDo;
StreetIDsByName toDo;
toDo.streetNames = streets;
index.ForEachInScale(toDo, scales::GetUpperScale());
@ -195,7 +246,7 @@ m2::PointD FindHouse(Index & index, vector<string> const & streets,
}
UNIT_TEST(SEARCH_HOUSE_NUMBER_SMOKE_TEST)
UNIT_TEST(HS_FindHouseSmoke)
{
classificator::Load();
@ -221,7 +272,7 @@ UNIT_TEST(SEARCH_HOUSE_NUMBER_SMOKE_TEST)
}
UNIT_TEST(STREET_COMPARE_TEST)
UNIT_TEST(HS_StreetsCompare)
{
search::Street A, B;
TEST(search::Street::IsSameStreets(&A, &B), ());
@ -250,15 +301,22 @@ bool LessHouseNumber(search::House const & h1, search::House const & h2)
return search::House::LessHouseNumber()(&h1, &h2);
}
string GetStreetKey(string const & name)
{
string res;
search::GetStreetNameAsKey(name, res);
return res;
}
UNIT_TEST(HOUSE_COMPARE_TEST)
}
UNIT_TEST(HS_HousesCompare)
{
m2::PointD p(1,1);
TEST(LessHouseNumber(search::House("1", p), search::House("2", p)), ());
// TEST(LessHouseNumber(search::House("18a", p), search::House("18b", p)), ());
// TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7A", p)), ());
// TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7B", p)), ());
TEST(LessHouseNumber(search::House("18a", p), search::House("18b", p)), ());
TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7A", p)), ());
TEST(LessHouseNumber(search::House("120 1A", p), search::House("120 7B", p)), ());
TEST(!LessHouseNumber(search::House("4", p), search::House("4", p)), ());
TEST(!LessHouseNumber(search::House("95", p), search::House("82-b", p)), ());
@ -269,95 +327,111 @@ UNIT_TEST(HOUSE_COMPARE_TEST)
TEST(!LessHouseNumber(search::House("120 7B", p), search::House("120 1A", p)), ());
}
UNIT_TEST(VNG_TEST)
UNIT_TEST(HS_StreetKey)
{
search::House h1("32", m2::PointD(1,1));
search::House h2("32А", m2::PointD(1,1));
if (search::House::LessHouseNumber()(&h1, &h2))
cout << "Success" << endl;
TEST_EQUAL("крупской", GetStreetKey("улица Крупской"), ());
TEST_EQUAL("уручская", GetStreetKey("Уручская ул."), ());
TEST_EQUAL("газетыправда", GetStreetKey("Пр. Газеты Правда"), ());
TEST_EQUAL("якупалы", GetStreetKey("улица Я. Купалы"), ());
TEST_EQUAL("францискаскорины", GetStreetKey("Франциска Скорины Тракт"), ());
}
bool cmp(double a, double b)
UNIT_TEST(HS_MWMSearch)
{
return fabs(a - b) <= 1e-4;
}
UNIT_TEST(ALGORITHM_TEST)
{
string const path = GetPlatform().WritableDir() + "adresses.txt";
string const path = GetPlatform().WritableDir() + "minsk-pass.addr";
ifstream file(path.c_str());
if (!file.good())
{
TEST(false, ("Can't open file"));
LOG(LWARNING, ("Address file not found"));
return;
}
string line;
Index index;
m2::RectD rect;
index.Add("my_minsk.mwm", rect);
int all = 0;
set <string> strset;
if (!index.Add("minsk-pass.mwm", rect))
{
LOG(LWARNING, ("MWM file not found"));
return;
}
vector <string> match;
vector <string> not_match;
CollectStreetIDs streetIDs;
index.ForEachInScale(streetIDs, scales::GetUpperScale());
streetIDs.Finish();
search::HouseDetector detector(&index);
size_t all = 0, matched = 0, notMatched = 0;
set<string> addrSet;
string line;
while (file.good())
{
getline(file, line);
if (line.empty())
continue;
vector<string> v;
strings::Tokenize(line, "|", MakeBackInsertFunctor(v));
vector <string> houseNumber;
//House number is in v[1], sometime it contains house name
strings::Tokenize(v[1], ",", MakeBackInsertFunctor(houseNumber));
v[1] = houseNumber[0];
if (strset.find(v[0] + v[1]) != strset.end())
// House number is in v[1], sometime it contains house name after comma.
strings::SimpleTokenizer house(v[1], ",");
TEST(house, ());
v[1] = *house;
TEST(!v[0].empty(), ());
TEST(!v[1].empty(), ());
if (!addrSet.insert(v[0] + v[1]).second)
continue;
strset.insert(v[0]+v[1]);
++all;
search::HouseDetector houser(&index);
Process toDo;
toDo.streetNames.push_back(v[0]);
index.ForEachInScale(toDo, scales::GetUpperScale());
houser.LoadStreets(toDo.GetFeatureIDs());
houser.MergeStreets();
houser.ReadAllHouses(200);
vector<search::House const *> houses;
houser.GetHouseForName(v[1], houses);
if (houses.empty())
vector<FeatureID> const & streets = streetIDs.Get(v[0]);
if (streets.empty())
{
LOG(LINFO, ("Empty", v[0], v[1]));
LOG(LWARNING, ("Missing street in mwm", v[0]));
continue;
}
double lon;
strings::to_double(v[2], lon);
double lat;
strings::to_double(v[3], lat);
bool flag = false;
for (size_t i = 0; i < houses.size(); ++i)
++all;
detector.LoadStreets(streets);
detector.MergeStreets();
detector.ReadAllHouses(200);
vector<search::House const *> houses;
detector.GetHouseForName(v[1], houses);
if (houses.empty())
{
LOG(LINFO, ("No houses", v[0], v[1]));
continue;
}
double lat, lon;
TEST(strings::to_double(v[2], lat), (v[2]));
TEST(strings::to_double(v[3], lon), (v[3]));
size_t i = 0;
size_t const count = houses.size();
for (; i < count; ++i)
{
m2::PointD p = houses[i]->GetPosition();
p.x = MercatorBounds::XToLon(p.x);
p.y = MercatorBounds::YToLat(p.y);
if (!cmp(p.x, lat) || !cmp(p.y, lon))
double const eps = 1.0E-4;
if (fabs(p.x - lon) < eps && fabs(p.y - lat) < eps)
{
continue;
++matched;
break;
}
flag = true;
match.push_back(v[0] + " " + v[1]);
break;
}
if (!flag)
if (i == count)
{
not_match.push_back(v[0] + " " + v[1]);
LOG(LINFO, ("No match", v[0], v[1]));//, lat, lon, p.x, p.y));
++notMatched;
LOG(LINFO, ("Bad matched", v[0], v[1]));
}
}
LOG(LINFO, (match.size(), not_match.size(), all - match.size() - not_match.size()));
double t = double(match.size()) / double(all);
LOG(LINFO, (all, t));
LOG(LINFO, ("Matched =", matched, "Not matched =", notMatched, "Not found =", all - matched - notMatched));
LOG(LINFO, ("All count =", all, "Percent matched =", matched / double(all)));
}