Merge pull request #2 from mapsme/geocoder.locality-rank-improve

[geocoder] Improve Moscow objects ranks
This commit is contained in:
LaGrunge 2019-09-23 13:16:18 +03:00 committed by GitHub
commit 5fd816257d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 48 additions and 12 deletions

View file

@ -502,21 +502,44 @@ void Geocoder::AddResults(Context & ctx, std::vector<Index::DocId> const & entri
auto const & entry = m_index.GetDoc(docId);
auto entryCertainty = certainty;
if (entry.m_type == Type::Locality)
if (InCityState(entry))
{
auto const localityName = entry.m_normalizedAddress[static_cast<size_t>(Type::Locality)];
if (entry.m_normalizedAddress[static_cast<size_t>(Type::Region)] == localityName)
entryCertainty += GetWeight(Type::Region);
if (entry.m_normalizedAddress[static_cast<size_t>(Type::Subregion)] == localityName)
entryCertainty += GetWeight(Type::Subregion);
constexpr auto kCityStateExtraWeight = 0.05;
ASSERT_LESS(kCityStateExtraWeight, GetWeight(Type::Building),
("kCityStateExtraWeight must be smallest"));
// Prefer city-state (Moscow, Istambul) to other city types.
entryCertainty += kCityStateExtraWeight;
}
ctx.AddResult(entry.m_osmId, entryCertainty, entry.m_type, tokenIds, allTypes);
}
}
bool Geocoder::InCityState(Hierarchy::Entry const & entry) const
{
if (!entry.HasFieldInAddress(Type::Locality))
return false;
auto const & nameDictionary = m_hierarchy.GetNormalizedNameDictionary();
auto const & localityMultipleName = entry.GetNormalizedMultipleNames(Type::Locality,
nameDictionary);
auto const & localityName = localityMultipleName.GetMainName();
for (auto const type : {Type::Region, Type::Subregion})
{
if (!entry.HasFieldInAddress(type))
continue;
auto const & multipleName = entry.GetNormalizedMultipleNames(type, nameDictionary);
auto const & name = multipleName.GetMainName();
if (name == localityName)
return true;
}
return false;
}
bool Geocoder::HasParent(vector<Geocoder::Layer> const & layers, Hierarchy::Entry const & e) const
{
CHECK(!layers.empty(), ());

View file

@ -159,6 +159,8 @@ private:
Layer & curLayer) const;
void AddResults(Context & ctx, std::vector<Index::DocId> const & entries) const;
bool InCityState(Hierarchy::Entry const & entry) const;
// Returns whether any of the paths through |layers| can be extended
// by appending |e|.
bool HasParent(std::vector<Geocoder::Layer> const & layers, Hierarchy::Entry const & e) const;

View file

@ -176,17 +176,22 @@ UNIT_TEST(Geocoder_MoscowLocalityRank)
{
string const kData = R"#(
10 {"properties": {"locales": {"default": {"address": {"region": "Москва"}}}, "rank": 2}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Москва"}}}, "rank": 4}}
11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}, "rank": 4}}
12 {"properties": {"locales": {"default": {"address": {"street": "Ленинский проспект", "locality": "Москва", "region": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}}}
20 {"properties": {"locales": {"default": {"address": {"region": "Тверская Область"}}}, "rank": 2}}
21 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Тверская Область"}}}, "rank": 4}}
22 {"properties": {"locales": {"default": {"address": {"street": "Ленинский проспект", "locality": "Москва", "region": "Тверская Область"}}}}}
)#";
Geocoder geocoder;
ScopedFile const regionsJsonFile("regions.jsonl", kData);
geocoder.LoadFromJsonl(regionsJsonFile.GetFullPath());
TestGeocoder(geocoder, "Москва", {{Id{0x11}, 1.0}, {Id{0x10}, 0.625}, {Id{0x21}, 0.375}});
TestGeocoder(geocoder, "Москва", {{Id{0x10}, 1.0}, {Id{0x11}, 0.61}, {Id{0x21}, 0.6}});
TestGeocoder(geocoder, "Москва, Ленинский проспект", {{Id{0x12}, 1.0}, {Id{0x22}, 0.70922},
{Id{0x10}, 0.70922}, {Id{0x11}, 0.432624},
{Id{0x21}, 0.425532}});
}
// Geocoder_StreetWithNumber* ----------------------------------------------------------------------
@ -330,8 +335,8 @@ UNIT_TEST(Geocoder_SubregionInLocality)
TestGeocoder(geocoder, "Северный административный округ", {{Id{0x12}, 1.0}});
TestGeocoder(geocoder, "Москва, Северный административный округ",
{{Id{0x12}, 1.0}, {Id{0x11}, 0.470588}, {Id{0x10}, 0.294118}});
TestGeocoder(geocoder, "Москва", {{Id{0x11}, 1.0}, {Id{0x10}, 0.625}});
{{Id{0x12}, 1.0}, {Id{0x10}, 0.293255}, {Id{0x11}, 0.178886}});
TestGeocoder(geocoder, "Москва", {{Id{0x10}, 1.0}, {Id{0x11}, 0.61}});
}
// Geocoder_NumericalSuburb* ----------------------------------------------------------------------

View file

@ -158,6 +158,11 @@ MultipleNames const & Hierarchy::Entry::GetNormalizedMultipleNames(
return normalizedNameDictionary.Get(addressField);
}
bool Hierarchy::Entry::HasFieldInAddress(Type type) const
{
return m_normalizedAddress[static_cast<size_t>(type)] != NameDictionary::kUnspecifiedPosition;
}
// static
Type Hierarchy::Entry::RankToType(uint8_t rank)
{

View file

@ -89,6 +89,7 @@ public:
MultipleNames & multipleNames,
NameDictionaryBuilder & normalizedNameDictionaryBuilder,
ParsingStats & stats);
bool HasFieldInAddress(Type type) const;
// See generator::regions::LevelRegion::GetRank().
static Type RankToType(uint8_t rank);