[search] Translated names in results.

This commit is contained in:
Yuri Gorshenin 2017-12-12 12:20:42 +03:00 committed by Vladimir Byko-Ianko
parent 4e50a45aad
commit a4e3446245
14 changed files with 287 additions and 44 deletions

View file

@ -120,6 +120,8 @@ set(
ranking_info.hpp
ranking_utils.cpp
ranking_utils.hpp
region_info_getter.cpp
region_info_getter.hpp
result.cpp
result.hpp
retrieval.cpp

View file

@ -112,6 +112,7 @@ Engine::Engine(Index & index, CategoriesHolder const & categories,
m_threads.emplace_back(&Engine::MainLoop, this, ref(m_contexts[i]));
LoadCitiesBoundaries();
LoadCountriesTree();
}
Engine::~Engine()
@ -138,42 +139,43 @@ weak_ptr<ProcessorHandle> Engine::Search(SearchParams const & params)
void Engine::SetLocale(string const & locale)
{
PostMessage(Message::TYPE_BROADCAST, [this, locale](Processor & processor)
{
processor.SetPreferredLocale(locale);
});
PostMessage(Message::TYPE_BROADCAST,
[locale](Processor & processor) { processor.SetPreferredLocale(locale); });
}
void Engine::ClearCaches()
{
PostMessage(Message::TYPE_BROADCAST, [this](Processor & processor)
{
processor.ClearCaches();
});
PostMessage(Message::TYPE_BROADCAST, [](Processor & processor) { processor.ClearCaches(); });
}
void Engine::LoadCitiesBoundaries()
{
PostMessage(Message::TYPE_BROADCAST,
[this](Processor & processor) { processor.LoadCitiesBoundaries(); });
[](Processor & processor) { processor.LoadCitiesBoundaries(); });
}
void Engine::LoadCountriesTree()
{
PostMessage(Message::TYPE_BROADCAST,
[](Processor & processor) { processor.LoadCountriesTree(); });
}
void Engine::OnBookmarksCreated(vector<pair<bookmarks::Id, bookmarks::Doc>> const & marks)
{
PostMessage(Message::TYPE_BROADCAST,
[this, marks](Processor & processor) { processor.OnBookmarksCreated(marks); });
[marks](Processor & processor) { processor.OnBookmarksCreated(marks); });
}
void Engine::OnBookmarksUpdated(vector<pair<bookmarks::Id, bookmarks::Doc>> const & marks)
{
PostMessage(Message::TYPE_BROADCAST,
[this, marks](Processor & processor) { processor.OnBookmarksUpdated(marks); });
[marks](Processor & processor) { processor.OnBookmarksUpdated(marks); });
}
void Engine::OnBookmarksDeleted(vector<bookmarks::Id> const & marks)
{
PostMessage(Message::TYPE_BROADCAST,
[this, marks](Processor & processor) { processor.OnBookmarksDeleted(marks); });
[marks](Processor & processor) { processor.OnBookmarksDeleted(marks); });
}
void Engine::MainLoop(Context & context)

View file

@ -106,6 +106,9 @@ public:
// Posts request to reload cities boundaries tables.
void LoadCitiesBoundaries();
// Posts request to load countries tree.
void LoadCountriesTree();
void OnBookmarksCreated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);
void OnBookmarksUpdated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);
void OnBookmarksDeleted(std::vector<bookmarks::Id> const & marks);

View file

@ -112,16 +112,13 @@ RankerResult::RankerResult(double lat, double lon)
m_region.SetParams(string(), MercatorBounds::FromLatLon(lat, lon));
}
string RankerResult::GetRegionName(storage::CountryInfoGetter const & infoGetter,
uint32_t ftype) const
bool RankerResult::GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype,
storage::TCountryId & countryId) const
{
static SkipRegionInfo const checker;
if (checker.IsSkip(ftype))
return string();
storage::CountryInfo info;
m_region.GetRegion(infoGetter, info);
return info.m_name;
return false;
return m_region.GetCountryId(infoGetter, countryId);
}
bool RankerResult::IsEqualCommon(RankerResult const & r) const
@ -153,13 +150,23 @@ uint32_t RankerResult::GetBestType(set<uint32_t> const * pPrefferedTypes) const
}
// RankerResult::RegionInfo ------------------------------------------------------------------------
void RankerResult::RegionInfo::GetRegion(storage::CountryInfoGetter const & infoGetter,
storage::CountryInfo & info) const
bool RankerResult::RegionInfo::GetCountryId(storage::CountryInfoGetter const & infoGetter,
storage::TCountryId & countryId) const
{
if (!m_file.empty())
infoGetter.GetRegionInfo(m_file, info);
else
infoGetter.GetRegionInfo(m_point, info);
if (!m_countryId.empty())
{
countryId = m_countryId;
return true;
}
auto const id = infoGetter.GetRegionCountryId(m_point);
if (id != storage::kInvalidCountryId)
{
countryId = id;
return true;
}
return false;
}
// Functions ---------------------------------------------------------------------------------------

View file

@ -5,6 +5,8 @@
#include "search/ranking_utils.hpp"
#include "search/result.hpp"
#include "storage/index.hpp"
#include "indexer/feature_data.hpp"
#include "std/set.hpp"
@ -86,7 +88,8 @@ public:
double GetDistanceToPivot() const { return m_info.m_distanceToPivot; }
double GetLinearModelRank() const { return m_info.GetLinearModelRank(); }
string GetRegionName(storage::CountryInfoGetter const & infoGetter, uint32_t ftype) const;
bool GetCountryId(storage::CountryInfoGetter const & infoGetter, uint32_t ftype,
storage::TCountryId & countryId) const;
bool IsEqualCommon(RankerResult const & r) const;
@ -97,17 +100,17 @@ private:
struct RegionInfo
{
string m_file;
storage::TCountryId m_countryId;
m2::PointD m_point;
inline void SetParams(string const & file, m2::PointD const & pt)
void SetParams(storage::TCountryId const & countryId, m2::PointD const & point)
{
m_file = file;
m_point = pt;
m_countryId = countryId;
m_point = point;
}
void GetRegion(storage::CountryInfoGetter const & infoGetter,
storage::CountryInfo & info) const;
bool GetCountryId(storage::CountryInfoGetter const & infoGetter,
storage::TCountryId & countryId) const;
};
RegionInfo m_region;

View file

@ -194,7 +194,7 @@ void Processor::SetPreferredLocale(string const & locale)
// Default initialization.
// If you want to reset input language, call SetInputLocale before search.
SetInputLocale(locale);
m_ranker.SetLocalityLanguage(code);
m_ranker.SetLocale(locale);
}
void Processor::SetInputLocale(string const & locale)
@ -316,6 +316,8 @@ void Processor::LoadCitiesBoundaries()
LOG(LWARNING, ("Can't load cities boundaries"));
}
void Processor::LoadCountriesTree() { m_ranker.LoadCountriesTree(); }
void Processor::OnBookmarksCreated(vector<pair<bookmarks::Id, bookmarks::Doc>> const & /* marks */)
{
// TODO(@y): do something useful with marks.

View file

@ -93,6 +93,7 @@ public:
void ClearCaches();
void LoadCitiesBoundaries();
void LoadCountriesTree();
void OnBookmarksCreated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);
void OnBookmarksUpdated(std::vector<std::pair<bookmarks::Id, bookmarks::Doc>> const & marks);

View file

@ -12,12 +12,15 @@
#include "indexer/feature_algo.hpp"
#include "indexer/search_string_utils.hpp"
#include "platform/preferred_languages.hpp"
#include "coding/multilang_utf8_string.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include <algorithm>
#include <memory>
#include <utility>
#include <boost/optional.hpp>
@ -42,7 +45,7 @@ void UpdateNameScores(string const & name, TSlice const & slice, NameScores & be
template <typename TSlice>
void UpdateNameScores(vector<strings::UniString> const & tokens, TSlice const & slice,
NameScores & bestScores)
NameScores & bestScores)
{
bestScores.m_nameScore = max(bestScores.m_nameScore, GetNameScore(tokens, slice));
bestScores.m_errorsMade = ErrorsMade::Min(bestScores.m_errorsMade, GetErrorsMade(tokens, slice));
@ -368,6 +371,7 @@ Ranker::Ranker(Index const & index, CitiesBoundariesTable const & boundariesTabl
, m_categories(categories)
, m_suggests(suggests)
{
SetLocale("default");
}
void Ranker::Init(Params const & params, Geocoder::Params const & geocoderParams)
@ -387,7 +391,6 @@ void Ranker::Finish(bool cancelled)
Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress,
bool needHighlighting) const
{
uint32_t const type = rankerResult.GetBestType(&m_params.m_preferredTypes);
string name = rankerResult.GetName();
string address;
@ -403,7 +406,8 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress,
name = FormatStreetAndHouse(addr);
}
address = rankerResult.GetRegionName(m_infoGetter, type);
address = GetLocalizedRegionInfoForResult(rankerResult);
// Format full address only for suitable results.
if (ftypes::IsAddressObjectChecker::Instance()(rankerResult.GetTypes()))
address = FormatFullAddress(addressGetter.GetAddress(), address);
@ -415,11 +419,13 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress,
{
case RankerResult::Type::TYPE_FEATURE:
case RankerResult::Type::TYPE_BUILDING:
{
auto const type = rankerResult.GetBestType(&m_params.m_preferredTypes);
return Result(r.GetID(), r.GetCenter(), name, address,
m_categories.GetReadableFeatureType(type, m_params.m_currentLocaleCode), type,
r.GetMetadata());
case RankerResult::Type::TYPE_LATLON:
return Result(r.GetCenter(), name, address);
}
case RankerResult::Type::TYPE_LATLON: return Result(r.GetCenter(), name, address);
}
ASSERT(false, ("Bad RankerResult type:", static_cast<size_t>(r.GetResultType())));
};
@ -431,7 +437,7 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress,
{
m_localities.GetLocality(res.GetFeatureCenter(), [&](LocalityItem const & item) {
string city;
if (item.GetSpecifiedOrDefaultName(m_localityLang, city))
if (item.GetSpecifiedOrDefaultName(m_localeCode, city))
res.AppendCity(city);
});
}
@ -523,6 +529,14 @@ void Ranker::UpdateResults(bool lastUpdate)
void Ranker::ClearCaches() { m_localities.ClearCache(); }
void Ranker::SetLocale(string const & locale)
{
m_localeCode = StringUtf8Multilang::GetLangIndex(languages::Normalize(locale));
m_regionInfoGetter.SetLocale(locale);
}
void Ranker::LoadCountriesTree() { m_regionInfoGetter.LoadCountriesTree(); }
void Ranker::MakeRankerResults(Geocoder::Params const & geocoderParams,
vector<RankerResult> & results)
{
@ -564,8 +578,8 @@ void Ranker::MatchForSuggestions(strings::UniString const & token, int8_t locale
for (auto const & suggest : m_suggests)
{
strings::UniString const & s = suggest.m_name;
if (suggest.m_prefixLength <= token.size()
&& token != s // do not push suggestion if it already equals to token
if (suggest.m_prefixLength <= token.size() &&
token != s // do not push suggestion if it already equals to token
&& suggest.m_locale == locale // push suggestions only for needed language
&& strings::StartsWith(s, token))
{
@ -608,4 +622,15 @@ void Ranker::ProcessSuggestions(vector<RankerResult> & vec) const
++i;
}
}
string Ranker::GetLocalizedRegionInfoForResult(RankerResult const & result) const
{
auto const type = result.GetBestType(&m_params.m_preferredTypes);
storage::TCountryId id;
if (!result.GetCountryId(m_infoGetter, type, id))
return {};
return m_regionInfoGetter.GetLocalizedFullName(id);
}
} // namespace search

View file

@ -6,6 +6,7 @@
#include "search/keyword_lang_matcher.hpp"
#include "search/locality_finder.hpp"
#include "search/mode.hpp"
#include "search/region_info_getter.hpp"
#include "search/result.hpp"
#include "search/reverse_geocoder.hpp"
#include "search/search_params.hpp"
@ -107,7 +108,9 @@ public:
void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
void SetLocalityLanguage(int8_t code) { m_localityLang = code; }
void SetLocale(std::string const & locale);
void LoadCountriesTree();
private:
friend class RankerResultMaker;
@ -119,6 +122,8 @@ private:
std::string const & prolog);
void ProcessSuggestions(std::vector<RankerResult> & vec) const;
std::string GetLocalizedRegionInfoForResult(RankerResult const & result) const;
Params m_params;
Geocoder::Params m_geocoderParams;
ReverseGeocoder const m_reverseGeocoder;
@ -126,7 +131,8 @@ private:
KeywordLangMatcher & m_keywordsScorer;
mutable LocalityFinder m_localities;
int8_t m_localityLang = StringUtf8Multilang::kDefaultCode;
int8_t m_localeCode;
RegionInfoGetter m_regionInfoGetter;
Index const & m_index;
storage::CountryInfoGetter const & m_infoGetter;

View file

@ -0,0 +1,98 @@
#include "search/region_info_getter.hpp"
#include "storage/country_decl.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include <cstddef>
#include <vector>
using namespace std;
using namespace storage;
namespace search
{
namespace
{
// Calls |fn| on each node name on the way from |id| to the root of
// the |countries| tree, except the root. Does nothing if there are
// multiple ways from |id| to the |root|.
template <typename Fn>
void GetPathToRoot(storage::TCountryId const & id, storage::TCountryTree const & countries,
Fn && fn)
{
vector<storage::TCountryTree::Node const *> nodes;
countries.Find(id, nodes);
if (nodes.size() != 1 || nodes[0]->IsRoot())
return;
auto const * cur = nodes[0];
do
{
fn(cur->Value().Name());
cur = &cur->Parent();
} while (!cur->IsRoot());
}
} // namespace
void RegionInfoGetter::LoadCountriesTree()
{
storage::TMappingAffiliations affiliations;
storage::LoadCountriesFromFile(COUNTRIES_FILE, m_countries, affiliations);
}
void RegionInfoGetter::SetLocale(string const & locale)
{
m_nameGetter = platform::GetTextByIdFactory(platform::TextSource::Countries, locale);
}
string RegionInfoGetter::GetLocalizedFullName(storage::TCountryId const & id) const
{
size_t const kMaxNumParts = 2;
vector<string> parts;
GetPathToRoot(id, m_countries, [&](storage::TCountryId const & id) {
parts.push_back(GetLocalizedCountryName(id));
});
if (parts.size() > kMaxNumParts)
parts.erase(parts.begin(), parts.end() - kMaxNumParts);
my::EraseIf(parts, [&](string const & s) { return s.empty(); });
if (!parts.empty())
return strings::JoinStrings(parts, ", ");
// Tries to get at least localized name for |id|, if |id| is a
// discussed territory.
auto name = GetLocalizedCountryName(id);
if (!name.empty())
return name;
// Tries to transform map name to the full name.
name = id;
storage::CountryInfo::FileName2FullName(name);
if (!name.empty())
return name;
return {};
}
string RegionInfoGetter::GetLocalizedCountryName(storage::TCountryId const & id) const
{
if (!m_nameGetter)
return {};
auto const shortName = (*m_nameGetter)(id + " Short");
if (!shortName.empty())
return shortName;
auto const officialName = (*m_nameGetter)(id);
if (!officialName.empty())
return officialName;
return {};
}
} // namespace search

View file

@ -0,0 +1,26 @@
#pragma once
#include <memory>
#include <string>
#include "storage/country.hpp"
#include "storage/index.hpp"
#include "platform/get_text_by_id.hpp"
namespace search
{
class RegionInfoGetter
{
public:
void LoadCountriesTree();
void SetLocale(std::string const & locale);
std::string GetLocalizedFullName(storage::TCountryId const & id) const;
std::string GetLocalizedCountryName(storage::TCountryId const & id) const;
private:
storage::TCountryTree m_countries;
std::unique_ptr<platform::GetTextById> m_nameGetter;
};
} // namespace search

View file

@ -20,6 +20,7 @@ set(
point_rect_matcher_tests.cpp
query_saver_tests.cpp
ranking_tests.cpp
region_info_getter_tests.cpp
segment_tree_tests.cpp
string_match_test.cpp
)

View file

@ -0,0 +1,65 @@
#include "testing/testing.hpp"
#include "search/region_info_getter.hpp"
using namespace search;
namespace
{
class RegionInfoGetterTest
{
public:
RegionInfoGetterTest()
{
m_regionInfoGetter.LoadCountriesTree();
SetLocale("default");
}
void SetLocale(std::string const & locale) { m_regionInfoGetter.SetLocale(locale); }
std::string GetLocalizedFullName(storage::TCountryId const & id) const
{
return m_regionInfoGetter.GetLocalizedFullName(id);
}
std::string GetLocalizedCountryName(storage::TCountryId const & id) const
{
return m_regionInfoGetter.GetLocalizedCountryName(id);
}
protected:
RegionInfoGetter m_regionInfoGetter;
};
UNIT_CLASS_TEST(RegionInfoGetterTest, CountryName)
{
SetLocale("en");
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "Moscow Oblast", ());
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "Moscow", ());
TEST_EQUAL(GetLocalizedCountryName("United States of America"), "USA", ());
SetLocale("ru");
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "Московская область", ());
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "Москва", ());
TEST_EQUAL(GetLocalizedCountryName("United States of America"), "США", ());
TEST_EQUAL(GetLocalizedCountryName("Crimea"), "Крым", ());
// En locale should be actually used.
SetLocale("broken locale");
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "Moscow Oblast", ());
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "Moscow", ());
TEST_EQUAL(GetLocalizedCountryName("United States of America"), "USA", ());
SetLocale("zh-Hans");
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow Oblast_East"), "莫斯科州", ());
TEST_EQUAL(GetLocalizedCountryName("Russia_Moscow"), "莫斯科", ());
TEST_EQUAL(GetLocalizedCountryName("United States of America"), "美国", ());
}
UNIT_CLASS_TEST(RegionInfoGetterTest, FullName)
{
SetLocale("ru");
TEST_EQUAL(GetLocalizedFullName("Russia_Moscow Oblast_East"), "Московская область, Россия", ());
TEST_EQUAL(GetLocalizedFullName("Crimea"), "Крым", ());
}
} // namespace

View file

@ -68,6 +68,8 @@ public:
bool HasParent() const { return m_parent != nullptr; }
bool IsRoot() const { return !HasParent(); }
Node const & Parent() const
{
CHECK(HasParent(), ());