[search] house number recognition

This commit is contained in:
Kirill Zhdanovich 2014-03-18 17:47:12 +03:00 committed by Alex Zolotarev
parent e415ecf457
commit 87a6919eba
6 changed files with 120 additions and 40 deletions

View file

@ -1,6 +1,7 @@
#include "feature_impl.hpp"
#include "../base/string_utils.hpp"
#include "../base/logging.hpp"
namespace feature
@ -22,12 +23,43 @@ bool IsNumber(strings::UniString const & s)
return true;
}
/// Check that token can be house number.
bool IsHouseNumber(strings::UniString const & s)
bool IsStreetNumber(strings::UniString const & s)
{
size_t count = s.size();
if (count >= 2)
{
/// add different localities in future, if it's a problem.
string streetEndings [] = {"st", "nd", "rd", "th"};
for (size_t i = 0; i < ARRAY_SIZE(streetEndings); ++i)
{
size_t start = count - streetEndings[i].size();
bool flag = false;
for (size_t j = 0; j < streetEndings[i].size(); ++j)
{
if (streetEndings[i][j] != s[start + j])
{
flag = true;
break;
}
}
if (flag)
return false;
}
return true;
}
return false;
}
bool IsHouseNumberDeepCheck(strings::UniString const & s)
{
size_t const count = s.size();
/// @todo Probably, call some check function from House::
return (count > 0 && count < 8 && IsDigit(s[0]));
if (count == 0)
return false;
if (!IsDigit(s[0]))
return false;
if (IsStreetNumber(s))
return false;
return (count < 8);
}
bool IsHouseNumber(string const & s)

View file

@ -29,6 +29,7 @@ namespace feature
}
bool IsNumber(strings::UniString const & s);
bool IsHouseNumber(strings::UniString const & s);
bool IsHouseNumber(string const & s);
bool IsHouseNumberDeepCheck(strings::UniString const & s);
}

View file

@ -686,6 +686,12 @@ string const & MergedStreet::GetDbgName() const
return m_cont.front()->GetDbgName();
}
string const & MergedStreet::GetName() const
{
ASSERT(!m_cont.empty(), ());
return m_cont.front()->GetName();
}
bool MergedStreet::IsHousesReaded() const
{
ASSERT(!m_cont.empty(), ());
@ -803,6 +809,8 @@ void HouseDetector::ReadHouse(FeatureType const & f, Street * st, ProjectionCalc
static ftypes::IsBuildingChecker checker;
string const houseNumber = f.GetHouseNumber();
/// @todo After new data generation we can skip IsHouseNumber check here.
if (checker(f) && feature::IsHouseNumber(houseNumber))
{
HouseMapT::iterator const it = m_id2house.find(f.GetID());
@ -961,11 +969,11 @@ public:
m_results[ind] = ScoredHouse(p.m_house, p.m_distance);
}
void FlushResults(vector<House const *> & res) const
void FlushResults(vector<AddressSearchResult> & res, MergedStreet const & st) const
{
for (size_t i = 0; i < ARRAY_SIZE(m_results) - 1; ++i)
if (m_results[i].house)
res.push_back(m_results[i].house);
res.push_back(AddressSearchResult(m_results[i].house, &st));
}
House const * GetBestMatchHouse() const
@ -1015,7 +1023,7 @@ void AddToQueue(int houseNumber, queue<int> & q)
struct HouseChain
{
vector<HouseProjection const *> houses;
set<string> s;
set<string> chainHouses;
double score;
int minHouseNumber;
int maxHouseNumber;
@ -1034,7 +1042,7 @@ struct HouseChain
void Add(HouseProjection const * h)
{
if (s.insert(h->m_house->GetNumber()).second)
if (chainHouses.insert(h->m_house->GetNumber()).second)
{
int num = h->m_house->GetIntNumber();
if (num < minHouseNumber)
@ -1047,7 +1055,7 @@ struct HouseChain
bool Find(string const & str)
{
return (s.find(str) != s.end());
return (chainHouses.find(str) != chainHouses.end());
}
void CountScore()
@ -1196,7 +1204,6 @@ void ProccessHouses(vector<HouseProjection const *> const & st, ResultAccumulato
void GetBestHouseWithNumber(MergedStreet const & st, double offsetMeters, ResultAccumulator & acc)
{
vector<HouseProjection const *> v;
for (MergedStreet::Index i = st.Begin(); !st.IsEnd(i); st.Inc(i))
{
@ -1252,7 +1259,7 @@ void GetLSHouse(MergedStreet const & st, double offsetMeters, ResultAccumulator
}
void ProduceVoting(vector <ResultAccumulator> const & acc, vector<House const *> & res)
void ProduceVoting(vector <ResultAccumulator> const & acc, vector<AddressSearchResult> & res, MergedStreet const & st)
{
vector < pair<House const *, vector<size_t> > > voting;
for (size_t i = 0; i < acc.size(); ++i)
@ -1272,12 +1279,12 @@ void ProduceVoting(vector <ResultAccumulator> const & acc, vector<House const *>
{
if (voting[0].second.size() > 1)
{
acc[voting[0].second.front()].FlushResults(res);
acc[voting[0].second.front()].FlushResults(res, st);
return;
}
if (voting.size() > 1 && voting[1].second.size() > 1)
{
acc[voting[1].second.front()].FlushResults(res);
acc[voting[1].second.front()].FlushResults(res, st);
return;
}
}
@ -1287,12 +1294,13 @@ void ProduceVoting(vector <ResultAccumulator> const & acc, vector<House const *>
for (size_t i = 0; i < acc.size(); ++i)
if (acc[i].HasBestMatch())
{
acc[i].FlushResults(res);
break;
acc[i].FlushResults(res, st);
return;
}
acc[0].FlushResults(res, st);
}
void HouseDetector::GetHouseForName(string const & houseNumber, vector<House const *> & res)
void HouseDetector::GetHouseForName(string const & houseNumber, vector<AddressSearchResult> &res)
{
size_t const count = m_streets.size();
res.reserve(count);
@ -1332,7 +1340,7 @@ void HouseDetector::GetHouseForName(string const & houseNumber, vector<House con
break;
}
ProduceVoting(acc, res);
ProduceVoting(acc, res, m_streets[i]);
for (size_t j = 0; j < acc.size(); ++j)
acc[j].Reset();
}

View file

@ -130,6 +130,7 @@ public:
}
inline string const & GetDbgName() const { return m_processedName; }
inline string const & GetName() const { return m_name; }
};
class MergedStreet
@ -141,6 +142,7 @@ public:
MergedStreet() : m_length(0.0) {}
string const & GetDbgName() const;
string const & GetName() const;
bool IsHousesReaded() const;
void FinishReadingHouses();
@ -204,6 +206,8 @@ inline void swap(MergedStreet & s1, MergedStreet & s2)
s1.Swap(s2);
}
struct AddressSearchResult;
class HouseDetector
{
FeatureLoader m_loader;
@ -242,9 +246,30 @@ public:
static int const DEFAULT_OFFSET_M = 200;
void ReadAllHouses(double offsetMeters = DEFAULT_OFFSET_M);
void GetHouseForName(string const & houseNumber, vector<House const *> & res);
void GetHouseForName(string const & houseNumber, vector<AddressSearchResult> &res);
void ClearCaches();
};
struct AddressSearchResult
{
House const * m_house;
MergedStreet const * m_street;
AddressSearchResult(House const * house, MergedStreet const * street):m_house(house), m_street(street)
{}
bool operator<(AddressSearchResult const & a) const
{
if (m_house == a.m_house)
return m_street < a.m_street;
return m_house < a.m_house;
}
bool operator==(AddressSearchResult const & a) const
{
return (m_house == a.m_house && m_street == a.m_street);
}
};
}

View file

@ -277,6 +277,32 @@ void Query::SetQuery(string const & query)
search::Delimiters delims;
SplitUniString(NormalizeAndSimplifyString(query), MakeBackInsertFunctor(m_tokens), delims);
#ifdef HOUSE_SEARCH_TEST
int tokenIndex = m_tokens.size() - 1;
while(tokenIndex >= 0)
{
if (feature::IsHouseNumberDeepCheck(m_tokens[tokenIndex]))
{
if (m_tokens.size() > 1)
{
m_house.swap(m_tokens[tokenIndex]);
m_tokens[tokenIndex].swap(m_tokens.back());
m_tokens.pop_back();
}
break;
}
--tokenIndex;
}
if (!m_tokens.empty() && m_house.empty() && feature::IsHouseNumberDeepCheck(m_prefix))
{
m_house.swap(m_prefix);
m_prefix.clear();
}
#endif
if (!m_tokens.empty() && !delims(strings::LastUniChar(query)))
{
m_prefix.swap(m_tokens.back());
@ -309,20 +335,6 @@ void Query::Search(Results & res, bool searchAddress)
if (m_cancel) return;
SuggestStrings(res);
#ifdef HOUSE_SEARCH_TEST
/// @todo Select best token for house number.
if (m_tokens.size() > 1 && feature::IsHouseNumber(m_tokens.back()))
{
m_house.swap(m_tokens.back());
m_tokens.pop_back();
}
else if (feature::IsHouseNumber(m_prefix))
{
m_house.swap(m_prefix);
m_prefix.clear();
}
#endif
if (m_cancel) return;
if (searchAddress)
SearchAddress();
@ -545,14 +557,15 @@ void Query::FlushResults(Results & res, void (Results::*pAddFn)(Result const &))
m_houseDetector.ReadAllHouses();
vector<search::House const *> houses;
vector<search::AddressSearchResult> houses;
m_houseDetector.GetHouseForName(strings::ToUtf8(m_house), houses);
for (size_t i = 0; i < houses.size(); ++i)
{
(res.*pAddFn)(Result(houses[i]->GetPosition(), houses[i]->GetNumber(),
House const * h = houses[i].m_house;
(res.*pAddFn)(Result(h->GetPosition(), h->GetNumber() + ", " + houses[i].m_street->GetName(),
string(), string(),
IsValidPosition() ? houses[i]->GetPosition().Length(m_position) : -1.0));
IsValidPosition() ? h->GetPosition().Length(m_position) : -1.0));
}
}
#endif

View file

@ -259,11 +259,11 @@ m2::PointD FindHouse(Index & index, vector<string> const & streets,
houser.ReadAllHouses(offset);
vector<search::House const *> houses;
vector<search::AddressSearchResult> houses;
houser.GetHouseForName(houseName, houses);
TEST_EQUAL(houses.size(), 1, ());
return houses[0]->GetPosition();
return houses[0].m_house->GetPosition();
}
}
@ -442,7 +442,7 @@ UNIT_TEST(HS_MWMSearch)
detector.MergeStreets();
detector.ReadAllHouses();
vector<search::House const *> houses;
vector<search::AddressSearchResult> houses;
detector.GetHouseForName(a.m_house, houses);
if (houses.empty())
{
@ -454,7 +454,8 @@ UNIT_TEST(HS_MWMSearch)
size_t const count = houses.size();
for (; j < count; ++j)
{
m2::PointD p = houses[j]->GetPosition();
search::House const * h = houses[j].m_house;
m2::PointD p = h->GetPosition();
p.x = MercatorBounds::XToLon(p.x);
p.y = MercatorBounds::YToLat(p.y);