[geocoder] Fix correct rank for eponymous Town and Hamlet: calculate result ranks by kind field (in jsonl)

This commit is contained in:
Anatoly Serdtcev 2019-11-27 14:06:03 +03:00 committed by LaGrunge
parent 1711eb5454
commit fd63ea617c
12 changed files with 365 additions and 129 deletions

View file

@ -158,6 +158,7 @@ geocore_add_library(${PROJECT_NAME} ${SRC})
geocore_link_libraries(${PROJECT_NAME}
base
coding
geocoder
indexer
platform
geometry

View file

@ -6,6 +6,8 @@
#include "generator/regions/collector_region_info.hpp"
#include "generator/regions/region_info.hpp"
#include "geocoder/types.hpp"
#include "platform/platform.hpp"
#include "base/geo_object_id.hpp"
@ -165,3 +167,15 @@ UNIT_TEST(RegionInfoCollector_MergeAndSave)
TEST(!rg.GetIsoCodeAlphaNumeric(), ());
}
}
// PlaceType tests --------------------------------------------------------------------------------
UNIT_TEST(CollectorRegionInfo_PlaceType2KindTest)
{
TEST_EQUAL(static_cast<int>(PlaceType::Unknown), 0, ());
for (auto i = 1; i < static_cast<int>(PlaceType::Count); ++i)
{
auto const placeType = static_cast<PlaceType>(i);
TEST_NOT_EQUAL(
geocoder::KindFromString(StringifyPlaceType(placeType)), geocoder::Kind::Unknown, ());
}
}

View file

@ -77,6 +77,8 @@ char const * StringifyPlaceType(PlaceType placeType)
return "neighbourhood";
case PlaceType::Unknown:
return "unknown";
case PlaceType::Count:
UNREACHABLE();
};
UNREACHABLE();

View file

@ -64,6 +64,8 @@ enum class PlaceType: uint8_t
Suburb = 12,
Quarter = 13,
Neighbourhood = 14,
Count
};
PlaceType EncodePlaceType(std::string const & place);

View file

@ -61,6 +61,8 @@ PlaceLevel CountrySpecifier::GetLevel(PlaceType placeType)
return PlaceLevel::Sublocality;
case PlaceType::Unknown:
break;
case PlaceType::Count:
UNREACHABLE();
}
return PlaceLevel::Unknown;

View file

@ -59,18 +59,44 @@ double GetWeight(Type t)
switch (t)
{
case Type::Country: return 10.0;
case Type::Locality: return 5.0;
case Type::Region: return 4.0;
case Type::Subregion: return 4.0;
case Type::Locality: return 5.0;
case Type::Street: return 2.0;
case Type::Suburb: return 1.0;
case Type::Sublocality: return 1.0;
case Type::Street: return 2.0;
case Type::Building: return 0.1;
case Type::Count: return 0.0;
}
UNREACHABLE();
}
double GetWeight(Kind kind)
{
switch (kind)
{
case Kind::Country: return 10.0;
case Kind::City: return 5.05;
case Kind::Town: return 5.04;
case Kind::State: return 4.05;
case Kind::Province: return 4.01;
case Kind::District: return 4.01;
case Kind::County: return 4.01;
case Kind::Municipality: return 4.0;
case Kind::Village: return 3.0;
case Kind::Street: return 2.0;
case Kind::Hamlet: return 1.06;
case Kind::Suburb: return 1.05;
case Kind::Quarter: return 1.01;
case Kind::Neighbourhood: return 1.0;
case Kind::IsolatedDwelling: return 0.5;
case Kind::Building: return 0.1;
case Kind::Unknown: return 0.0;
case Kind::Count: return 0.0;
}
UNREACHABLE();
}
// todo(@m) This is taken from search/geocoder.hpp. Refactor.
class ScopedMarkTokens
{
@ -126,6 +152,20 @@ strings::UniString MakeHouseNumber(Tokens const & tokens)
}
} // namespace
// Geocoder::Layer ---------------------------------------------------------------------------------
Geocoder::Layer::Layer(Type type)
: m_type{type}
{
}
void Geocoder::Layer::SetCandidates(std::vector<Candidate> && candidates)
{
std::sort(candidates.begin(), candidates.end(), [](auto const & a, auto const & b) {
return a.m_totalCertainty < b.m_totalCertainty;
});
m_candidatesByCertainty = std::move(candidates);
}
// Geocoder::Context -------------------------------------------------------------------------------
Geocoder::Context::Context(string const & query) : m_beam(kMaxResults)
{
@ -378,8 +418,7 @@ void Geocoder::Go(Context & ctx, Type type) const
subquery.push_back(ctx.GetToken(j));
subqueryTokenIds.push_back(j);
Layer curLayer;
curLayer.m_type = type;
Layer curLayer{type};
// Buildings are indexed separately.
if (type == Type::Building)
@ -391,7 +430,7 @@ void Geocoder::Go(Context & ctx, Type type) const
FillRegularLayer(ctx, type, subquery, curLayer);
}
if (curLayer.m_entries.empty())
if (curLayer.GetCandidatesByCertainty().empty())
continue;
ScopedMarkTokens mark(ctx, type, i, j + 1);
@ -400,7 +439,7 @@ void Geocoder::Go(Context & ctx, Type type) const
if (type == Type::Street)
MarkStreetSynonym(ctx, streetSynonymMark);
AddResults(ctx, curLayer.m_entries);
AddResults(ctx, curLayer.GetCandidatesByCertainty());
ctx.GetLayers().emplace_back(move(curLayer));
SCOPE_GUARD(pop, [&] { ctx.GetLayers().pop_back(); });
@ -425,7 +464,7 @@ void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, vector
for (auto const & layer : boost::adaptors::reverse(ctx.GetLayers()))
{
if (layer.m_type != Type::Street && layer.m_type != Type::Locality)
if (layer.GetType() != Type::Street && layer.GetType() != Type::Locality)
continue;
// We've already filled a street/location layer and now see something that resembles
@ -433,11 +472,14 @@ void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, vector
// let's stay on the safer side and mark the tokens as potential house number.
ctx.MarkHouseNumberPositionsInQuery(subqueryTokenIds);
auto candidates = std::vector<Candidate>{};
auto const & lastLayer = ctx.GetLayers().back();
auto const forSublocalityLayer =
lastLayer.m_type == Type::Suburb || lastLayer.m_type == Type::Sublocality;
for (auto const & docId : layer.m_entries)
lastLayer.GetType() == Type::Suburb || lastLayer.GetType() == Type::Sublocality;
for (auto const & buildingOwnerCandidate : layer.GetCandidatesByCertainty())
{
auto const & docId = buildingOwnerCandidate.m_entry;
m_index.ForEachRelatedBuilding(docId, [&](Index::DocId const & buildingDocId) {
auto const & building = m_index.GetDoc(buildingDocId);
auto const & multipleHN = building.GetNormalizedMultipleNames(
@ -447,14 +489,23 @@ void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, vector
if (search::house_numbers::HouseNumbersMatch(realHNUniStr, subqueryHN,
false /* queryIsPrefix */))
{
if (forSublocalityLayer && !HasParent(ctx.GetLayers(), building))
auto && parentCandidateCertainty =
forSublocalityLayer ? FindMaxCertaintyInParentCandidates(ctx.GetLayers(), building)
: boost::optional<double>{buildingOwnerCandidate.m_totalCertainty};
if (!parentCandidateCertainty)
return;
curLayer.m_entries.emplace_back(buildingDocId);
static auto const buildingTokenWeight = GetWeight(Kind::Building);
auto totalCertainty =
*parentCandidateCertainty + buildingTokenWeight * subqueryTokenIds.size();
candidates.push_back({buildingDocId, totalCertainty});
}
});
}
if (!candidates.empty())
curLayer.SetCandidates(std::move(candidates));
break;
}
}
@ -462,30 +513,38 @@ void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, vector
void Geocoder::FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery,
Layer & curLayer) const
{
auto candidates = std::vector<Candidate>{};
m_index.ForEachDocId(subquery, [&](Index::DocId const & docId) {
auto const & d = m_index.GetDoc(docId);
if (d.m_type != type)
return;
if (ctx.GetLayers().empty() || HasParent(ctx.GetLayers(), d))
{
if (type > Type::Locality && !IsRelevantLocalityMember(ctx, d, subquery))
return;
auto && parentCandidateCertainty = FindMaxCertaintyInParentCandidates(ctx.GetLayers(), d);
if (!parentCandidateCertainty)
return;
curLayer.m_entries.emplace_back(docId);
}
if (type > Type::Locality && !IsRelevantLocalityMember(ctx, d, subquery))
return;
auto subqueryWeight =
(d.m_kind != Kind::Unknown ? GetWeight(d.m_kind) : GetWeight(d.m_type)) * subquery.size();
auto totalCertainty = *parentCandidateCertainty + subqueryWeight;
candidates.push_back({docId, totalCertainty});
});
if (!candidates.empty())
curLayer.SetCandidates(std::move(candidates));
}
void Geocoder::AddResults(Context & ctx, std::vector<Index::DocId> const & entries) const
void Geocoder::AddResults(Context & ctx, std::vector<Candidate> const & candidates) const
{
double certainty = 0;
vector<size_t> tokenIds;
vector<Type> allTypes;
for (size_t tokId = 0; tokId < ctx.GetNumTokens(); ++tokId)
{
auto const t = ctx.GetTokenType(tokId);
certainty += GetWeight(t);
if (t != Type::Count)
{
tokenIds.push_back(tokId);
@ -493,11 +552,11 @@ void Geocoder::AddResults(Context & ctx, std::vector<Index::DocId> const & entri
}
}
for (auto const & docId : entries)
for (auto const & candidate : candidates)
{
auto const & docId = candidate.m_entry;
auto const & entry = m_index.GetDoc(docId);
auto entryCertainty = certainty;
auto entryCertainty = candidate.m_totalCertainty;
if (InCityState(entry))
{
@ -536,19 +595,23 @@ bool Geocoder::InCityState(Hierarchy::Entry const & entry) const
return false;
}
bool Geocoder::HasParent(vector<Geocoder::Layer> const & layers, Hierarchy::Entry const & e) const
boost::optional<double> Geocoder::FindMaxCertaintyInParentCandidates(
vector<Geocoder::Layer> const & layers, Hierarchy::Entry const & e) const
{
CHECK(!layers.empty(), ());
if (layers.empty())
return 0;
auto const & layer = layers.back();
for (auto const & docId : layer.m_entries)
for (auto const & candidate : layer.GetCandidatesByCertainty())
{
auto const & docId = candidate.m_entry;
// Note that the relationship is somewhat inverted: every ancestor
// is stored in the address but the nodes have no information
// about their children.
if (m_hierarchy.IsParentTo(m_index.GetDoc(docId), e))
return true;
return candidate.m_totalCertainty;
}
return false;
return {};
}
bool Geocoder::IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member,
@ -562,14 +625,15 @@ bool Geocoder::HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry
{
for (auto const & layer : ctx.GetLayers())
{
auto const layerType = layer.m_type;
auto const layerType = layer.GetType();
if (layerType > Type::Locality)
break;
if (layerType != Type::Locality)
continue;
for (auto const docId : layer.m_entries)
for (auto const & candidate : layer.GetCandidatesByCertainty())
{
auto const & docId = candidate.m_entry;
auto const & matchedEntry = m_index.GetDoc(docId);
if (m_hierarchy.IsParentTo(matchedEntry, member))
return true;

View file

@ -18,6 +18,7 @@
#include <utility>
#include <vector>
#include <boost/optional.hpp>
#include <boost/serialization/version.hpp>
namespace geocoder
@ -45,11 +46,30 @@ public:
DECLARE_EXCEPTION(Exception, RootException);
DECLARE_EXCEPTION(OpenException, Exception);
// A Layer contains all entries matched by a subquery of consecutive tokens.
struct Layer
// Candidate contain matched entry with certainty of all matched tokens.
struct Candidate
{
Type m_type = Type::Count;
std::vector<Index::DocId> m_entries;
Index::DocId m_entry;
double m_totalCertainty;
};
// A Layer contains all entries matched by a subquery of consecutive tokens.
class Layer
{
public:
Layer() = default;
Layer(Type type);
Type GetType() const noexcept { return m_type; }
std::vector<Candidate> const & GetCandidatesByCertainty() const noexcept
{
return m_candidatesByCertainty;
}
void SetCandidates(std::vector<Candidate> && candidates);
private:
Type m_type{Type::Count};
std::vector<Candidate> m_candidatesByCertainty;
};
// This class is very similar to the one we use in search/.
@ -158,13 +178,16 @@ private:
Layer & curLayer) const;
void FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery,
Layer & curLayer) const;
void AddResults(Context & ctx, std::vector<Index::DocId> const & entries) const;
void AddResults(Context & ctx, std::vector<Candidate> const & candidates) const;
bool InCityState(Hierarchy::Entry const & entry) const;
// Returns whether any of the paths through |layers| can be extended
// by appending |e|.
bool HasParent(std::vector<Geocoder::Layer> const & layers, Hierarchy::Entry const & e) const;
// Find max certainty in parent candidates.
// 0 - first candidate.
// none - there is no parent from candidates.
boost::optional<double> FindMaxCertaintyInParentCandidates(
std::vector<Geocoder::Layer> const & layers, Hierarchy::Entry const & e) const;
bool IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member,
Tokens const & subquery) const;
bool HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry const & member) const;

View file

@ -24,9 +24,9 @@ using Id = base::GeoObjectId;
double const kCertaintyEps = 1e-4;
string const kRegionsData = R"#(
C00000000004B279 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-80.1142033187951, 21.55511095]}, "properties": {"locales": {"default": {"name": "Cuba", "address": {"country": "Cuba"}}}, "rank": 2}}
C0000000001C4CA7 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.7260117405499, 21.74300205]}, "properties": {"locales": {"default": {"name": "Ciego de Ávila", "address": {"region": "Ciego de Ávila", "country": "Cuba"}}}, "rank": 4}}
C00000000059D6B5 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.9263054493181, 22.08185765]}, "properties": {"locales": {"default": {"name": "Florencia", "address": {"subregion": "Florencia", "region": "Ciego de Ávila", "country": "Cuba"}}}, "rank": 6}}
C00000000004B279 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-80.1142033187951, 21.55511095]}, "properties": {"kind": "country", "locales": {"default": {"name": "Cuba", "address": {"country": "Cuba"}}}, "rank": 2}}
C0000000001C4CA7 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.7260117405499, 21.74300205]}, "properties": {"kind": "province", "locales": {"default": {"name": "Ciego de Ávila", "address": {"region": "Ciego de Ávila", "country": "Cuba"}}}, "rank": 4}}
C00000000059D6B5 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.9263054493181, 22.08185765]}, "properties": {"kind": "district", "locales": {"default": {"name": "Florencia", "address": {"subregion": "Florencia", "region": "Ciego de Ávila", "country": "Cuba"}}}, "rank": 6}}
)#";
} // namespace
@ -44,7 +44,7 @@ void TestGeocoder(Geocoder & geocoder, string const & query, vector<Result> && e
TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0,
(query, actual[i].m_certainty));
TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, (query));
TEST(base::AlmostEqualAbs(actual[i].m_certainty, expected[i].m_certainty, kCertaintyEps),
TEST_NEAR(actual[i].m_certainty, expected[i].m_certainty, kCertaintyEps,
(query, actual[i].m_certainty, expected[i].m_certainty));
}
}
@ -59,8 +59,8 @@ UNIT_TEST(Geocoder_Smoke)
base::GeoObjectId const cubaId(0xc00000000004b279);
TestGeocoder(geocoder, "florencia", {{florenciaId, 1.0}});
TestGeocoder(geocoder, "cuba florencia", {{florenciaId, 1.0}, {cubaId, 0.714286}});
TestGeocoder(geocoder, "florencia somewhere in cuba", {{cubaId, 0.714286}, {florenciaId, 1.0}});
TestGeocoder(geocoder, "cuba florencia", {{florenciaId, 1.0}, {cubaId, 0.713776}});
TestGeocoder(geocoder, "florencia somewhere in cuba", {{cubaId, 0.713776}, {florenciaId, 1.0}});
}
UNIT_TEST(Geocoder_Hierarchy)
@ -88,31 +88,31 @@ UNIT_TEST(Geocoder_Hierarchy)
UNIT_TEST(Geocoder_EnglishNames)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица Новый Арбат"}}, "en": {"address": {"locality": "Moscow", "street": "New Arbat Avenue"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}}}
11 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица Новый Арбат"}}, "en": {"address": {"locality": "Moscow", "street": "New Arbat Avenue"}}}}}
)#";
Geocoder geocoder;
ScopedFile const regionsJsonFile("regions.jsonl", kData);
geocoder.LoadFromJsonl(regionsJsonFile.GetFullPath());
TestGeocoder(geocoder, "Moscow, New Arbat", {{Id{0x11}, 1.0}, {Id{0x10}, 0.5555}});
TestGeocoder(geocoder, "Moscow, New Arbat", {{Id{0x11}, 1.0}, {Id{0x10}, 0.558011}});
}
UNIT_TEST(Geocoder_OnlyBuildings)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Some Locality"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Some Locality"}}}}}
21 {"properties": {"locales": {"default": {"address": {"street": "Good", "locality": "Some Locality"}}}}}
22 {"properties": {"locales": {"default": {"address": {"building": "5", "street": "Good", "locality": "Some Locality"}}}}}
21 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Good", "locality": "Some Locality"}}}}}
22 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "5", "street": "Good", "locality": "Some Locality"}}}}}
31 {"properties": {"locales": {"default": {"address": {"street": "Bad", "locality": "Some Locality"}}}}}
32 {"properties": {"locales": {"default": {"address": {"building": "10", "street": "Bad", "locality": "Some Locality"}}}}}
31 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Bad", "locality": "Some Locality"}}}}}
32 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "10", "street": "Bad", "locality": "Some Locality"}}}}}
40 {"properties": {"locales": {"default": {"address": {"street": "MaybeNumbered", "locality": "Some Locality"}}}}}
41 {"properties": {"locales": {"default": {"address": {"street": "MaybeNumbered-3", "locality": "Some Locality"}}}}}
42 {"properties": {"locales": {"default": {"address": {"building": "3", "street": "MaybeNumbered", "locality": "Some Locality"}}}}}
40 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "MaybeNumbered", "locality": "Some Locality"}}}}}
41 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "MaybeNumbered-3", "locality": "Some Locality"}}}}}
42 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "3", "street": "MaybeNumbered", "locality": "Some Locality"}}}}}
)#";
Geocoder geocoder;
@ -126,8 +126,8 @@ UNIT_TEST(Geocoder_OnlyBuildings)
base::GeoObjectId const building10(0x32);
TestGeocoder(geocoder, "some locality", {{localityId, 1.0}});
TestGeocoder(geocoder, "some locality good", {{goodStreetId, 1.0}, {localityId, 0.833333}});
TestGeocoder(geocoder, "some locality bad", {{badStreetId, 1.0}, {localityId, 0.833333}});
TestGeocoder(geocoder, "some locality good", {{goodStreetId, 1.0}, {localityId, 0.834711}});
TestGeocoder(geocoder, "some locality bad", {{badStreetId, 1.0}, {localityId, 0.834711}});
TestGeocoder(geocoder, "some locality good 5", {{building5, 1.0}});
TestGeocoder(geocoder, "some locality bad 10", {{building10, 1.0}});
@ -142,20 +142,20 @@ UNIT_TEST(Geocoder_OnlyBuildings)
base::GeoObjectId const numberedStreet(0x41);
base::GeoObjectId const houseOnANonNumberedStreet(0x42);
TestGeocoder(geocoder, "some locality maybenumbered 3",
{{numberedStreet, 1.0}, {houseOnANonNumberedStreet, 0.864286}});
{{numberedStreet, 1.0}, {houseOnANonNumberedStreet, 0.865248}});
}
UNIT_TEST(Geocoder_MismatchedLocality)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Moscow"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Paris"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Moscow"}}}}}
11 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Paris"}}}}}
21 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Moscow"}}}}}
22 {"properties": {"locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Moscow"}}}}}
21 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Krymskaya", "locality": "Moscow"}}}}}
22 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Moscow"}}}}}
31 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Paris"}}}}}
32 {"properties": {"locales": {"default": {"address": {"building": "3", "street": "Krymskaya", "locality": "Paris"}}}}}
31 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Krymskaya", "locality": "Paris"}}}}}
32 {"properties": {"kind": "builidng", "locales": {"default": {"address": {"building": "3", "street": "Krymskaya", "locality": "Paris"}}}}}
)#";
Geocoder geocoder;
@ -174,34 +174,34 @@ UNIT_TEST(Geocoder_MismatchedLocality)
UNIT_TEST(Geocoder_MoscowLocalityRank)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"region": "Москва"}}}, "rank": 2}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}, "rank": 4}}
12 {"properties": {"locales": {"default": {"address": {"street": "Ленинский проспект", "locality": "Москва", "region": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}}}
10 {"properties": {"kind": "state", "locales": {"default": {"address": {"region": "Москва"}}}, "rank": 2}}
11 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва", "region": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}, "rank": 4}}
12 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Ленинский проспект", "locality": "Москва", "region": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}}}
20 {"properties": {"locales": {"default": {"address": {"region": "Тверская Область"}}}, "rank": 2}}
21 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Тверская Область"}}}, "rank": 4}}
22 {"properties": {"locales": {"default": {"address": {"street": "Ленинский проспект", "locality": "Москва", "region": "Тверская Область"}}}}}
20 {"properties": {"kind": "state", "locales": {"default": {"address": {"region": "Тверская Область"}}}, "rank": 2}}
21 {"properties": {"kind": "hamlet", "locales": {"default": {"address": {"locality": "Москва", "region": "Тверская Область"}}}, "rank": 4}}
22 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Ленинский проспект", "locality": "Москва", "region": "Тверская Область"}}}}}
)#";
Geocoder geocoder;
ScopedFile const regionsJsonFile("regions.jsonl", kData);
geocoder.LoadFromJsonl(regionsJsonFile.GetFullPath());
TestGeocoder(geocoder, "Москва", {{Id{0x11}, 1.0}, {Id{0x21}, 0.990099}, {Id{0x10}, 0.792079}});
TestGeocoder(geocoder, "Москва, Ленинский проспект", {{Id{0x12}, 1.0}, {Id{0x22}, 0.994475},
{Id{0x11}, 0.558011}, {Id{0x21}, 0.552486},
{Id{0x10}, 0.441989}});
TestGeocoder(geocoder, "Москва", {{Id{0x11}, 1.0}, {Id{0x21}, 0.207843}, {Id{0x10}, 0.794118}});
TestGeocoder(geocoder, "Москва, Ленинский проспект",
{{Id{0x12}, 1.0}, {Id{0x22}, 0.556044}, {Id{0x11}, 0.56044}, {Id{0x10}, 0.445055},
{Id{0x21}, 0.116484}});
}
// Geocoder_StreetWithNumber* ----------------------------------------------------------------------
UNIT_TEST(Geocoder_StreetWithNumberInCity)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
20 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск"}}}}}
28 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}}}
20 {"properties": {"kind": "town", "locales": {"default": {"address": {"locality": "Краснокамск"}}}}}
28 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}}}
)#";
Geocoder geocoder;
@ -214,8 +214,8 @@ UNIT_TEST(Geocoder_StreetWithNumberInCity)
UNIT_TEST(Geocoder_StreetWithNumberInClassifiedCity)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
)#";
Geocoder geocoder;
@ -228,11 +228,11 @@ UNIT_TEST(Geocoder_StreetWithNumberInClassifiedCity)
UNIT_TEST(Geocoder_StreetWithNumberInAnyCity)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
20 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск"}}}}}
28 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}}}
20 {"properties": {"kind": "town", "locales": {"default": {"address": {"locality": "Краснокамск"}}}}}
28 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}}}
)#";
Geocoder geocoder;
@ -245,8 +245,8 @@ UNIT_TEST(Geocoder_StreetWithNumberInAnyCity)
UNIT_TEST(Geocoder_StreetWithNumberAndWithoutStreetSynonym)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}}
)#";
Geocoder geocoder;
@ -259,8 +259,8 @@ UNIT_TEST(Geocoder_StreetWithNumberAndWithoutStreetSynonym)
UNIT_TEST(Geocoder_UntypedStreetWithNumberAndStreetSynonym)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
13 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "8 Марта"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
13 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "8 Марта"}}}}}
)#";
Geocoder geocoder;
@ -273,10 +273,10 @@ UNIT_TEST(Geocoder_UntypedStreetWithNumberAndStreetSynonym)
UNIT_TEST(Geocoder_StreetWithTwoNumbers)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
12 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "4-я улица 8 Марта"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
12 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "4-я улица 8 Марта"}}}}}
13 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}}}
13 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}}}
)#";
Geocoder geocoder;
@ -289,9 +289,9 @@ UNIT_TEST(Geocoder_StreetWithTwoNumbers)
UNIT_TEST(Geocoder_BuildingOnStreetWithNumber)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
13 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}}}
15 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта", "building": "4"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
13 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}}}
15 {"properties": {"kind": "street", "locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта", "building": "4"}}}}}
)#";
Geocoder geocoder;
@ -305,10 +305,10 @@ UNIT_TEST(Geocoder_BuildingOnStreetWithNumber)
UNIT_TEST(Geocoder_LocalityBuilding)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Zelenograd"}}}}}
22 {"properties": {"locales": {"default": {"address": {"building": "2", "locality": "Zelenograd"}}}}}
31 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Zelenograd"}}}}}
32 {"properties": {"locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Zelenograd"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Zelenograd"}}}}}
22 {"properties": {"kind": "builiding", "locales": {"default": {"address": {"building": "2", "locality": "Zelenograd"}}}}}
31 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Krymskaya", "locality": "Zelenograd"}}}}}
32 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Zelenograd"}}}}}
)#";
Geocoder geocoder;
@ -323,44 +323,44 @@ UNIT_TEST(Geocoder_LocalityBuilding)
UNIT_TEST(Geocoder_LocalityBuildingRankWithSuburb)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"locales": {"default": {"address": {"suburb": "Арбат", "locality": "Москва"}}}}}
12 {"properties": {"locales": {"default": {"address": {"building": "1", "suburb": "Арбат", "locality": "Москва"}}}}}
13 {"properties": {"locales": {"default": {"address": {"suburb": "район Северный", "locality": "Москва"}}}}}
14 {"properties": {"locales": {"default": {"address": {"building": "1", "suburb": "район Северный", "locality": "Москва"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва"}}}}}
11 {"properties": {"kind": "suburb", "locales": {"default": {"address": {"suburb": "Арбат", "locality": "Москва"}}}}}
12 {"properties": {"kind": "builidng", "locales": {"default": {"address": {"building": "1", "suburb": "Арбат", "locality": "Москва"}}}}}
13 {"properties": {"kind": "suburb", "locales": {"default": {"address": {"suburb": "район Северный", "locality": "Москва"}}}}}
14 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "1", "suburb": "район Северный", "locality": "Москва"}}}}}
)#";
Geocoder geocoder;
ScopedFile const regionsJsonFile("regions.jsonl", kData);
geocoder.LoadFromJsonl(regionsJsonFile.GetFullPath());
TestGeocoder(geocoder, "Москва, Арбат 1", {{Id{0x12}, 1.0}, {Id{0x14}, 0.836066}});
TestGeocoder(geocoder, "Москва, Арбат 1", {{Id{0x12}, 1.0}, {Id{0x14}, 0.830645}});
}
//--------------------------------------------------------------------------------------------------
UNIT_TEST(Geocoder_LocalityAndStreetBuildingsRank)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"locality": "Zelenograd"}}}}}
22 {"properties": {"locales": {"default": {"address": {"building": "2", "locality": "Zelenograd"}}}}}
31 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Zelenograd"}}}}}
32 {"properties": {"locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Zelenograd"}}}}}
10 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Zelenograd"}}}}}
22 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "2", "locality": "Zelenograd"}}}}}
31 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Krymskaya", "locality": "Zelenograd"}}}}}
32 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Zelenograd"}}}}}
)#";
Geocoder geocoder;
ScopedFile const regionsJsonFile("regions.jsonl", kData);
geocoder.LoadFromJsonl(regionsJsonFile.GetFullPath());
TestGeocoder(geocoder, "Zelenograd, Krymskaya 2", {{Id{0x32}, 1.0}, {Id{0x22}, 0.71831}});
TestGeocoder(geocoder, "Zelenograd, Krymskaya 2", {{Id{0x32}, 1.0}, {Id{0x22}, 0.72028}});
}
// Geocoder_Subregion* -----------------------------------------------------------------------------
UNIT_TEST(Geocoder_SubregionInLocality)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"region": "Москва"}}}, "rank": 2}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Москва"}}}, "rank": 4}}
12 {"properties": {"locales": {"default": {"address": {"subregion": "Северный административный округ", "locality": "Москва", "region": "Москва"}}}, "rank": 3}}
10 {"properties": {"kind": "state", "locales": {"default": {"address": {"region": "Москва"}}}, "rank": 2}}
11 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва", "region": "Москва"}}}, "rank": 4}}
12 {"properties": {"kind": "district", "locales": {"default": {"address": {"subregion": "Северный административный округ", "locality": "Москва", "region": "Москва"}}}, "rank": 3}}
)#";
Geocoder geocoder;
@ -369,20 +369,20 @@ UNIT_TEST(Geocoder_SubregionInLocality)
TestGeocoder(geocoder, "Северный административный округ", {{Id{0x12}, 1.0}});
TestGeocoder(geocoder, "Москва, Северный административный округ",
{{Id{0x12}, 1.0}, {Id{0x11}, 0.314642}, {Id{0x10}, 0.249221}});
TestGeocoder(geocoder, "Москва", {{Id{0x11}, 1.0}, {Id{0x10}, 0.792079}});
{{Id{0x12}, 1.0}, {Id{0x11}, 0.316181}, {Id{0x10}, 0.251085}});
TestGeocoder(geocoder, "Москва", {{Id{0x11}, 1.0}, {Id{0x10}, 0.794118}});
}
// Geocoder_NumericalSuburb* ----------------------------------------------------------------------
UNIT_TEST(Geocoder_NumericalSuburbRelevance)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"region": "Metro Manila"}}}}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Caloocan", "region": "Metro Manila"}}}}}
12 {"properties": {"locales": {"default": {"address": {"suburb": "60", "locality": "Caloocan", "region": "Metro Manila"}}}}}
20 {"properties": {"locales": {"default": {"address": {"locality": "Белгород"}}}}}
21 {"properties": {"locales": {"default": {"address": {"street": "Щорса", "locality": "Белгород"}}}}}
22 {"properties": {"locales": {"default": {"address": {"building": "60", "street": "Щорса", "locality": "Белгород"}}}}}
10 {"properties": {"kind": "state", "locales": {"default": {"address": {"region": "Metro Manila"}}}}}
11 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Caloocan", "region": "Metro Manila"}}}}}
12 {"properties": {"kind": "suburb", "locales": {"default": {"address": {"suburb": "60", "locality": "Caloocan", "region": "Metro Manila"}}}}}
20 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Белгород"}}}}}
21 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Щорса", "locality": "Белгород"}}}}}
22 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "60", "street": "Щорса", "locality": "Белгород"}}}}}
)#";
Geocoder geocoder;
@ -399,11 +399,11 @@ UNIT_TEST(Geocoder_NumericalSuburbRelevance)
UNIT_TEST(Geocoder_Serialization)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"country": "Россия"}}, "en": {"address": {"country": "Russia"}}}, "rank": 1}}
11 {"properties": {"locales": {"default": {"address": {"region": "Москва", "country": "Россия"}}}, "rank": 2}}
12 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Москва", "country": "Россия"}}}, "rank": 4}}
13 {"properties": {"locales": {"default": {"address": {"street": "Арбат", "locality": "Москва", "region": "Москва", "country": "Россия"}}}, "rank": 7}}
15 {"properties": {"locales": {"default": {"address": {"building": "4", "street": "Арбат", "locality": "Москва", "region": "Москва", "country": "Россия"}}}, "rank": 8}}
10 {"properties": {"kind": "country", "locales": {"default": {"address": {"country": "Россия"}}, "en": {"address": {"country": "Russia"}}}, "rank": 1}}
11 {"properties": {"kind": "state", "locales": {"default": {"address": {"region": "Москва", "country": "Россия"}}}, "rank": 2}}
12 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Москва", "region": "Москва", "country": "Россия"}}}, "rank": 4}}
13 {"properties": {"kind": "street", "locales": {"default": {"address": {"street": "Арбат", "locality": "Москва", "region": "Москва", "country": "Россия"}}}, "rank": 7}}
15 {"properties": {"kind": "building", "locales": {"default": {"address": {"building": "4", "street": "Арбат", "locality": "Москва", "region": "Москва", "country": "Россия"}}}, "rank": 8}}
)#";
Geocoder geocoderFromJsonl;
@ -462,7 +462,7 @@ UNIT_TEST(Geocoder_BigFileConcurrentRead)
<< "{"
<< R"("type": "Feature",)"
<< R"("geometry": {"type": "Point", "coordinates": [0, 0]},)"
<< R"("properties": {"locales": {"default": {)"
<< R"("properties": {"kind": "country", "locales": {"default": {)"
<< R"("name": ")" << i << R"(", "address": {"country": ")" << i << R"("}}}, "rank": 2})"
<< "}\n";
}
@ -473,4 +473,32 @@ UNIT_TEST(Geocoder_BigFileConcurrentRead)
TEST_EQUAL(geocoder.GetHierarchy().GetEntries().size(), kEntryCount, ());
}
//--------------------------------------------------------------------------------------------------
UNIT_TEST(Geocoder_CityVsHamletRankTest)
{
string const kData = R"#(
10 {"properties": {"kind": "state", "locales": {"default": {"address": {"region": "Оренбургская область"}}}}}
11 {"properties": {"kind": "hamlet", "locales": {"default": {"address": {"locality": "Красноярск", "region": "Оренбургская область"}}}}}
20 {"properties": {"kind": "state", "locales": {"default": {"address": {"region": "Красноярский край"}}}}}
21 {"properties": {"kind": "city", "locales": {"default": {"address": {"locality": "Красноярск", "region": "Красноярский край"}}}}}
)#";
Geocoder geocoder;
ScopedFile const regionsJsonFile("regions.jsonl", kData);
geocoder.LoadFromJsonl(regionsJsonFile.GetFullPath());
TestGeocoder(geocoder, "Красноярск", {{Id{0x21}, 1.0}, {Id{0x11}, 0.2099}});
}
// Kind tests --------------------------------------------------------------------------------------
UNIT_TEST(Geocoder_KindStringConversion)
{
TEST_EQUAL(static_cast<int>(Kind::Unknown), 0, ());
for (auto i = 1; i < static_cast<int>(Kind::Count); ++i)
{
auto const kind = static_cast<Kind>(i);
TEST_EQUAL(kind, KindFromString(ToString(kind)), ());
}
}
} // namespace geocoder

View file

@ -48,13 +48,17 @@ bool Hierarchy::Entry::DeserializeFromJSONImpl(
if (!DeserializeAddressFromJSON(root, normalizedNameDictionaryBuilder, stats))
return false;
coding::JsonValue const & properties = coding::GetJsonObligatoryField(root, "properties");
coding::JsonValue const & defaultLocale =
coding::GetJsonObligatoryFieldByPath(root, "properties", "locales", "default");
coding::FromJsonObjectOptionalField(defaultLocale, "name", m_name);
coding::GetJsonObligatoryFieldByPath(properties, "locales", "default");
coding::FromJsonObjectOptionalField(defaultLocale, "name", m_name);
if (m_name.empty())
++stats.m_emptyNames;
if (auto const * kind = coding::GetJsonOptionalField(properties, "kind"))
m_kind = KindFromString(kind->GetString());
if (m_type == Type::Count)
{
LOG(LDEBUG, ("No address in an hierarchy entry:", jsonStr));

View file

@ -73,6 +73,7 @@ public:
ar & m_osmId;
ar & m_name;
ar & m_type;
ar & m_kind;
ar & m_normalizedAddress;
}
@ -101,6 +102,7 @@ public:
std::string m_name;
Type m_type = Type::Count;
Kind m_kind{Kind::Unknown};
// The positions of entry address fields in normalized name dictionary, one per Type.
std::array<NameDictionary::Position, static_cast<size_t>(Type::Count)> m_normalizedAddress{};

View file

@ -27,4 +27,71 @@ string DebugPrint(Type type)
{
return ToString(type);
}
char const * ToString(Kind kind)
{
switch (kind)
{
case Kind::Country:
return "country";
case Kind::State:
return "state";
case Kind::Province:
return "province";
case Kind::District:
return "district";
case Kind::County:
return "county";
case Kind::Municipality:
return "municipality";
case Kind::City:
return "city";
case Kind::Town:
return "town";
case Kind::Village:
return "village";
case Kind::Hamlet:
return "hamlet";
case Kind::IsolatedDwelling:
return "isolated_dwelling";
case Kind::Suburb:
return "suburb";
case Kind::Quarter:
return "quarter";
case Kind::Neighbourhood:
return "neighbourhood";
case Kind::Street:
return "street";
case Kind::Building:
return "building";
case Kind::Unknown:
case Kind::Count:
UNREACHABLE();
};
UNREACHABLE();
}
Kind KindFromString(std::string const & str)
{
static auto const string2kindMap = std::map<std::string, Kind>{
{"country", Kind::Country},
{"state", Kind::State},
{"province", Kind::Province},
{"district", Kind::District},
{"county", Kind::County},
{"municipality", Kind::Municipality},
{"city", Kind::City},
{"town", Kind::Town},
{"village", Kind::Village},
{"hamlet", Kind::Hamlet},
{"isolated_dwelling", Kind::IsolatedDwelling},
{"suburb", Kind::Suburb},
{"quarter", Kind::Quarter},
{"neighbourhood", Kind::Neighbourhood},
{"street", Kind::Street},
{"building", Kind::Building},
};
auto it = string2kindMap.find(str);
return it != string2kindMap.end() ? it->second : Kind::Unknown;
}
} // namespace geocoder

View file

@ -7,7 +7,7 @@
namespace geocoder
{
enum : unsigned int { kIndexFormatVersion = 1 };
enum : unsigned int { kIndexFormatVersion = 2 };
using Tokens = std::vector<std::string>;
@ -29,4 +29,31 @@ enum class Type
std::string ToString(Type type);
std::string DebugPrint(Type type);
enum class Kind
{
Unknown = 0,
Country,
State,
Province,
District,
County,
Municipality,
City,
Town,
Village,
Hamlet,
IsolatedDwelling,
Suburb,
Quarter,
Neighbourhood,
Street,
Building,
Count
};
char const * ToString(Kind kind);
Kind KindFromString(std::string const & str);
} // namespace geocoder