Using transliteration for obtaining best feature name.

This commit is contained in:
Daria Volvenkova 2017-03-21 21:38:56 +03:00
parent 61c7d12fbd
commit d13124d482
25 changed files with 135 additions and 77 deletions

View file

@ -9,22 +9,22 @@ namespace
// Languages below were choosen after sorting name:<lang> tags in 2011.
// Note, that it's not feasible to increase languages number here due to
// our current encoding (6 bit to store language code).
StringUtf8Multilang::Languages const g_languages = {{ {"default", "Native for each country"},
{"en", "English"}, {"ja", "日本語"}, {"fr", "Français"}, {"ko_rm", "Korean (Romanized)"},
{"ar", "العربية"}, {"de", "Deutsch"}, {"int_name", "International (Latin)"}, {"ru", "Русский"},
{"sv", "Svenska"}, {"zh", "中文"}, {"fi", "Suomi"}, {"be", "Беларуская"}, {"ka", "ქართული"},
{"ko", "한국어"}, {"he", "עברית"}, {"nl", "Nederlands"}, {"ga", "Gaeilge"},
{"ja_rm", "Japanese (Romanized)"}, {"el", "Ελληνικά"}, {"it", "Italiano"}, {"es", "Español"},
{"zh_pinyin", "Chinese (Pinyin)"}, {"th", "ไทย"}, {"cy", "Cymraeg"}, {"sr", "Српски"},
{"uk", "Українська"}, {"ca", "Català"}, {"hu", "Magyar"}, {"hsb", "Hornjoserbšćina"}, {"eu", "Euskara"},
{"fa", "فارسی"}, {"br", "Breton"}, {"pl", "Polski"}, {"hy", "Հայերէն"}, {"kn", "ಕನ್ನಡ"},
{"sl", "Slovenščina"}, {"ro", "Română"}, {"sq", "Shqipe"}, {"am", "አማርኛ"}, {"fy", "Frysk"},
{"cs", "Čeština"}, {"gd", "Gàidhlig"}, {"sk", "Slovenčina"}, {"af", "Afrikaans"},
{"ja_kana", "日本語(カタカナ)"}, {"lb", "Luxembourgish"}, {"pt", "Português"}, {"hr", "Hrvatski"},
{"fur", "Friulian"}, {"vi", "Tiếng Việt"}, {"tr", "Türkçe"}, {"bg", "Български"},
{"eo", "Esperanto"}, {"lt", "Lietuvių"}, {"la", "Latin"}, {"kk", "Қазақ"},
{"gsw", "Schwiizertüütsch"}, {"et", "Eesti"}, {"ku", "Kurdish"}, {"mn", "Mongolian"},
{"mk", "Македонски"}, {"lv", "Latviešu"}, {"hi", "हिन्दी"}
StringUtf8Multilang::Languages const g_languages = {{ {"default", "Native for each country", "Any"},
{"en", "English", "English"}, {"ja", "日本語", "Japanese"}, {"fr", "Français", "French"}, {"ko_rm", "Korean (Romanized)", "Korean"},
{"ar", "العربية", "Arabic"}, {"de", "Deutsch", "German"}, {"int_name", "International (Latin)", "Latin"}, {"ru", "Русский", "Russian"},
{"sv", "Svenska", "Swedish"}, {"zh", "中文", "Chinese"}, {"fi", "Suomi", "Finnish"}, {"be", "Беларуская", "Belarusian"}, {"ka", "ქართული", "Georgian"},
{"ko", "한국어", "Korean"}, {"he", "עברית", "Hebrew"}, {"nl", "Nederlands", "Dutch"}, {"ga", "Gaeilge", "Irish"},
{"ja_rm", "Japanese (Romanized)", "Japanese"}, {"el", "Ελληνικά", "Greek"}, {"it", "Italiano", "Italian"}, {"es", "Español", "Spanish"},
{"zh_pinyin", "Chinese (Pinyin)", "Chinese"}, {"th", "ไทย", "Thailand"}, {"cy", "Cymraeg", "Welsh"}, {"sr", "Српски", "Serbian"},
{"uk", "Українська", "Ukrainian"}, {"ca", "Català", "Catalan"}, {"hu", "Magyar", "Hungarian"}, {"hsb", "Hornjoserbšćina", "Upper Sorbian"}, {"eu", "Euskara", "Basque"},
{"fa", "فارسی", "Farsi"}, {"br", "Breton", "Breton"}, {"pl", "Polski", "Polish"}, {"hy", "Հայերէն", "Armenian"}, {"kn", "ಕನ್ನಡ", "Kannada"},
{"sl", "Slovenščina", "Slovene"}, {"ro", "Română", "Romanian"}, {"sq", "Shqipe", "Shqipe"}, {"am", "አማርኛ", "Amharic"}, {"fy", "Frysk", "Frisian"},
{"cs", "Čeština", "Czech"}, {"gd", "Gàidhlig", "Scots Gaelic"}, {"sk", "Slovenčina", "Slovak"}, {"af", "Afrikaans", "Afrikaans"},
{"ja_kana", "日本語(カタカナ)", "Japanese (Katakana)"}, {"lb", "Luxembourgish", "Luxembourgish"}, {"pt", "Português", "Portuguese"}, {"hr", "Hrvatski", "Croatian"},
{"fur", "Friulian", "Friulian"}, {"vi", "Tiếng Việt", "Vietnamese"}, {"tr", "Türkçe", "Turkish"}, {"bg", "Български", "Bulgarian"},
{"eo", "Esperanto", "Esperanto"}, {"lt", "Lietuvių", "Lithuanian"}, {"la", "Latin", "Latin"}, {"kk", "Қазақ", "Kazakh"},
{"gsw", "Schwiizertüütsch", "Swiss German"}, {"et", "Eesti", "Estonian"}, {"ku", "Kurdish", "Kurdish"}, {"mn", "Mongolian", "Mongolian"},
{"mk", "Македонски", "Macedonian"}, {"lv", "Latviešu", "Latvian"}, {"hi", "हिन्दी", "Hindi"}
}};
static_assert(g_languages.size() == StringUtf8Multilang::kMaxSupportedLanguages,
@ -44,6 +44,7 @@ StringUtf8Multilang::Languages const & StringUtf8Multilang::GetSupportedLanguage
ASSERT_EQUAL(g_languages[kInternationalCode].m_code, string("int_name"), ());
return g_languages;
}
// static
int8_t StringUtf8Multilang::GetLangIndex(string const & lang)
{
@ -53,6 +54,7 @@ int8_t StringUtf8Multilang::GetLangIndex(string const & lang)
return kUnsupportedLanguageCode;
}
// static
char const * StringUtf8Multilang::GetLangByCode(int8_t langCode)
{
@ -60,6 +62,7 @@ char const * StringUtf8Multilang::GetLangByCode(int8_t langCode)
return "";
return g_languages[langCode].m_code;
}
// static
char const * StringUtf8Multilang::GetLangNameByCode(int8_t langCode)
{
@ -68,6 +71,14 @@ char const * StringUtf8Multilang::GetLangNameByCode(int8_t langCode)
return g_languages[langCode].m_name;
}
// static
char const * StringUtf8Multilang::GetLangEnNameByCode(int8_t langCode)
{
if (langCode < 0 || langCode >= static_cast<int8_t>(g_languages.size()))
return "";
return g_languages[langCode].m_enName;
}
size_t StringUtf8Multilang::GetNextIndex(size_t i) const
{
++i;

View file

@ -50,6 +50,8 @@ public:
char const * m_code;
/// Native language name.
char const * m_name;
/// Native language name in English.
char const * m_enName;
};
using Languages = array<Lang, kMaxSupportedLanguages>;
@ -61,6 +63,8 @@ public:
static char const * GetLangByCode(int8_t langCode);
/// @returns empty string if langCode is invalid.
static char const * GetLangNameByCode(int8_t langCode);
/// @returns empty string if langCode is invalid.
static char const * GetLangEnNameByCode(int8_t langCode);
inline bool operator== (StringUtf8Multilang const & rhs) const
{

View file

@ -5,7 +5,7 @@ TEMPLATE = app
DEFINES += OGL_TEST_ENABLED GTEST_DONT_DEFINE_TEST COMPILER_TESTS
ROOT_DIR = ../..
DEPENDENCIES = qt_tstfrm indexer platform coding geometry base gmock freetype fribidi expat stats_client stb_image sdf_image
DEPENDENCIES = qt_tstfrm indexer platform coding geometry base gmock freetype fribidi expat stats_client stb_image sdf_image icu
SHADER_COMPILE_ARGS = $$PWD/../shaders shader_index.txt shader_def
include($$ROOT_DIR/common.pri)

View file

@ -4,7 +4,7 @@ CONFIG += console warn_on
CONFIG -= app_bundle
TEMPLATE = app
DEPENDENCIES = drape_frontend drape platform indexer geometry coding base expat stats_client stb_image sdf_image
DEPENDENCIES = drape_frontend drape platform indexer geometry coding base expat stats_client stb_image sdf_image icu
ROOT_DIR = ../..
include($$ROOT_DIR/common.pri)

View file

@ -25,6 +25,7 @@ DEPENDENCIES = \
oauthcpp \
expat \
protobuf \
icu \
include($$ROOT_DIR/common.pri)

View file

@ -8,7 +8,7 @@ TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = generator map routing traffic routing_common storage indexer \
platform geometry coding base minizip succinct protobuf gflags stats_client
platform geometry coding base minizip succinct protobuf gflags stats_client icu
include($$ROOT_DIR/common.pri)

View file

@ -7,7 +7,7 @@ ROOT_DIR = ../..
DEPENDENCIES = generator_tests_support platform_tests_support generator drape_frontend routing \
search storage indexer drape map traffic routing_common platform editor geometry \
coding base freetype expat fribidi jansson protobuf osrm stats_client \
minizip succinct pugixml tess2 gflags oauthcpp stb_image sdf_image
minizip succinct pugixml tess2 gflags oauthcpp stb_image sdf_image icu
include($$ROOT_DIR/common.pri)

View file

@ -4,7 +4,7 @@ ROOT_DIR = ../..
DEPENDENCIES = generator routing traffic routing_common search storage indexer editor platform geometry \
coding base freetype expat fribidi jansson protobuf osrm stats_client \
minizip succinct pugixml tess2 gflags oauthcpp
minizip succinct pugixml tess2 gflags oauthcpp icu
include($$ROOT_DIR/common.pri)
INCLUDEPATH *= $$ROOT_DIR/3party/gflags/src

View file

@ -10,7 +10,7 @@ DEPENDENCIES = \
generator search routing routing_common indexer geometry \
editor platform coding base jansson \
pugixml stats_client opening_hours gflags \
oauthcpp expat protobuf \
oauthcpp expat protobuf icu \
include($$ROOT_DIR/common.pri)

View file

@ -7,7 +7,7 @@ CONFIG -= app_bundle
TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = generator map routing routing_common search storage indexer platform editor geometry coding base \
DEPENDENCIES = generator map routing routing_common search storage icu indexer platform editor geometry coding base \
osrm jansson protobuf succinct stats_client pugixml minizip gflags stats_client
include($$ROOT_DIR/common.pri)

View file

@ -11,6 +11,11 @@
#include "base/base.hpp"
#include "3party/icu/i18n/unicode/translit.h"
#include "3party/icu/i18n/unicode/utrans.h"
#include "3party/icu/common/unicode/utypes.h"
#include "3party/icu/common/unicode/unistr.h"
#include "std/vector.hpp"
namespace
@ -29,6 +34,23 @@ void GetMwmLangName(feature::RegionData const & regionData, StringUtf8Multilang
}
}
void GetTransliteratedName(feature::RegionData const & regionData, StringUtf8Multilang const & src, string & out)
{
vector<int8_t> mwmLangCodes;
regionData.GetLanguages(mwmLangCodes);
string srcName;
for (auto const code : mwmLangCodes)
{
if (src.GetString(code, srcName))
{
out = Transliterate(srcName, StringUtf8Multilang::GetLangEnNameByCode(code));
if (!out.empty())
return;
}
}
}
void GetBestName(StringUtf8Multilang const & src, vector<int8_t> const & priorityList, string & out)
{
auto bestIndex = priorityList.size();
@ -63,6 +85,39 @@ void GetBestName(StringUtf8Multilang const & src, vector<int8_t> const & priorit
}
} // namespace
std::string Transliterate(std::string const & str, std::string const & lang)
{
class ICUDataInitializer
{
public:
ICUDataInitializer(std::string const & icuDataDir)
{
u_setDataDirectory(icuDataDir.c_str());
}
};
static ICUDataInitializer icuInitializer("../../../../../../omim/3party/icu/data/in/");
UnicodeString ustr(str.c_str());
UErrorCode status = U_ZERO_ERROR;
const std::string id = lang + "-Latin/BGN";
std::unique_ptr<Transliterator> latinTransliterator(Transliterator::createInstance(id.c_str(), UTRANS_FORWARD, status));
if (latinTransliterator == nullptr)
{
LOG(LWARNING, ("Cannot create transliterator", id));
return "";
}
latinTransliterator->transliterate(ustr);
std::string resultStr;
ustr.toUTF8String(resultStr);
LOG(LDEBUG, ("Transliterated", str, "->", resultStr, "id =", id));
return resultStr;
}
namespace feature
{
@ -212,19 +267,21 @@ void GetPreferredNames(RegionData const & regionData, StringUtf8Multilang const
if (src.IsEmpty())
return;
vector<int8_t> const primaryCodes = {deviceLang,
StrUtf8::kInternationalCode,
StrUtf8::kEnglishCode};
vector<int8_t> secondaryCodes = {StrUtf8::kDefaultCode,
StrUtf8::kInternationalCode};
GetBestName(src, {deviceLang, StrUtf8::kInternationalCode}, primary);
if (primary.empty())
{
GetTransliteratedName(regionData, src, primary);
if (primary.empty())
GetBestName(src, {StrUtf8::kEnglishCode}, primary);
}
vector<int8_t> mwmLangCodes;
regionData.GetLanguages(mwmLangCodes);
vector<int8_t> secondaryCodes = {StrUtf8::kDefaultCode,
StrUtf8::kInternationalCode};
secondaryCodes.insert(secondaryCodes.end(), mwmLangCodes.begin(), mwmLangCodes.end());
secondaryCodes.push_back(StrUtf8::kEnglishCode);
GetBestName(src, primaryCodes, primary);
GetBestName(src, secondaryCodes, secondary);
if (primary.empty())
@ -243,14 +300,27 @@ void GetReadableName(RegionData const & regionData, StringUtf8Multilang const &
vector<int8_t> codes;
// If MWM contains user's language.
if (regionData.HasLanguage(deviceLang))
codes = {deviceLang, StrUtf8::kDefaultCode, StrUtf8::kInternationalCode, StrUtf8::kEnglishCode};
bool const preferDefault = regionData.HasLanguage(deviceLang);
if (preferDefault)
codes = {deviceLang, StrUtf8::kDefaultCode, StrUtf8::kInternationalCode};
else
codes = {deviceLang, StrUtf8::kInternationalCode, StrUtf8::kEnglishCode, StrUtf8::kDefaultCode};
codes = {deviceLang, StrUtf8::kInternationalCode};
GetBestName(src, codes, out);
if (out.empty())
GetMwmLangName(regionData, src, out);
{
GetTransliteratedName(regionData, src, out);
if (out.empty())
{
if (preferDefault)
codes = {StrUtf8::kEnglishCode};
else
codes = {StrUtf8::kEnglishCode, StrUtf8::kDefaultCode};
GetBestName(src, codes, out);
if (out.empty())
GetMwmLangName(regionData, src, out);
}
}
}
} // namespace feature

View file

@ -7,6 +7,8 @@
struct FeatureID;
class StringUtf8Multilang;
std::string Transliterate(std::string const & str, std::string const & lang);
namespace feature
{
class TypesHolder;

View file

@ -3,7 +3,7 @@
TARGET = indexer
TEMPLATE = lib
CONFIG += staticlib warn_on
INCLUDEPATH += ../3party/protobuf/src
INCLUDEPATH += ../3party/protobuf/src ../3party/icu/common ../3party/icu/i18n
ROOT_DIR = ..

View file

@ -6,7 +6,7 @@ TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = generator_tests_support search_tests_support indexer_tests_support \
platform_tests_support generator search routing routing_common indexer storage editor \
platform coding geometry base stats_client jansson tess2 protobuf \
platform coding geometry base stats_client jansson tess2 protobuf icu \
succinct opening_hours pugixml
include($$ROOT_DIR/common.pri)

View file

@ -98,11 +98,6 @@
#include "3party/Alohalytics/src/alohalytics.h"
#include "3party/icu/i18n/unicode/translit.h"
#include "3party/icu/i18n/unicode/utrans.h"
#include "3party/icu/common/unicode/utypes.h"
#include "3party/icu/common/unicode/unistr.h"
#define KMZ_EXTENSION ".kmz"
#define DEFAULT_BOOKMARK_TYPE "placemark-red"
@ -146,29 +141,6 @@ vector<string> kSearchMarks =
"search-booking"
};
string Transliterate(string str)
{
UnicodeString ustr(str.c_str());
UErrorCode status = U_ZERO_ERROR;
unique_ptr<Transliterator> latin_tl(Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, status));
if (latin_tl == nullptr)
return "";
latin_tl->transliterate(ustr);
int32_t bufLen = 1024;
vector<char> outbuf(bufLen);
int32_t strLen = ustr.extract(outbuf.data(), bufLen, NULL, status);
if (status == U_BUFFER_OVERFLOW_ERROR)
{
outbuf.resize(strLen + 1);
ustr.extract(outbuf.data(), strLen + 1, NULL, status);
}
outbuf[strLen] = '\0';
return string(outbuf.data());
}
// TODO!
// To adjust GpsTrackFilter was added secret command "?gpstrackaccuracy:xxx;"
// where xxx is a new value for horizontal accuracy.
@ -2149,8 +2121,6 @@ void Framework::SetMapSelectionListeners(TActivateMapSelectionFn const & activat
void Framework::ActivateMapSelection(bool needAnimation, df::SelectionShape::ESelectedObject selectionType,
place_page::Info const & info)
{
//string result = Transliterate("Москва");
//LOG(LWARNING, ("!!!!!!!!!!!!!!!!!! ", result));
ASSERT_NOT_EQUAL(selectionType, df::SelectionShape::OBJECT_EMPTY, ("Empty selections are impossible."));
m_selectedFeature = info.GetID();

View file

@ -6,7 +6,7 @@ CONFIG += staticlib warn_on
ROOT_DIR = ..
INCLUDEPATH *= $$ROOT_DIR/3party/protobuf/src $$ROOT_DIR/3party/freetype/include $$ROOT_DIR/3party/jansson/src $$ROOT_DIR/3party/icu/common $$ROOT_DIR/3party/icu/i18n
INCLUDEPATH *= $$ROOT_DIR/3party/protobuf/src $$ROOT_DIR/3party/freetype/include $$ROOT_DIR/3party/jansson/src
include($$ROOT_DIR/common.pri)

View file

@ -3,7 +3,7 @@
ROOT_DIR = ../..
DEPENDENCIES = openlr routing routing_common search storage indexer editor \
platform geometry coding base protobuf osrm stats_client pugixml jansson succinct gflags
platform geometry coding base protobuf osrm stats_client pugixml jansson succinct gflags icu
include($$ROOT_DIR/common.pri)

View file

@ -5,7 +5,7 @@ TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = routing routing_common search storage indexer editor platform_tests_support platform \
geometry coding base protobuf osrm stats_client pugixml openlr jansson succinct
geometry coding base protobuf osrm stats_client pugixml openlr jansson succinct icu
include($$ROOT_DIR/common.pri)

View file

@ -7,7 +7,7 @@ ROOT_DIR = ../..
INCLUDEPATH *= $$ROOT_DIR/3party/jansson/src
DEPENDENCIES = partners_api indexer platform coding geometry base jansson stats_client protobuf
DEPENDENCIES = partners_api indexer platform coding geometry base jansson stats_client protobuf icu
include($$ROOT_DIR/common.pri)

View file

@ -7,7 +7,7 @@ TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = routing routing_common indexer platform_tests_support platform editor geometry coding base \
osrm protobuf succinct jansson stats_client map traffic pugixml stats_client
osrm protobuf succinct jansson stats_client map traffic pugixml stats_client icu
macx-*: LIBS *= "-framework IOKit" "-framework SystemConfiguration"

View file

@ -7,7 +7,7 @@ TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = routing_common indexer platform_tests_support platform editor geometry coding base \
osrm protobuf succinct jansson stats_client map traffic pugixml stats_client
osrm protobuf succinct jansson stats_client map traffic pugixml stats_client icu
macx-*: LIBS *= "-framework IOKit" "-framework SystemConfiguration"

View file

@ -9,7 +9,7 @@ ROOT_DIR = ../..
DEPENDENCIES = generator_tests_support search_tests_support indexer_tests_support generator \
routing routing_common search storage stats_client indexer platform editor geometry coding base \
tess2 protobuf jansson succinct pugixml opening_hours
tess2 protobuf jansson succinct pugixml opening_hours icu
include($$ROOT_DIR/common.pri)

View file

@ -6,7 +6,7 @@ CONFIG -= app_bundle
TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = search indexer platform editor geometry coding base protobuf jansson succinct pugixml stats_client
DEPENDENCIES = search indexer platform editor geometry coding icu base protobuf jansson succinct pugixml stats_client
include($$ROOT_DIR/common.pri)

View file

@ -14,7 +14,7 @@ DEPENDENCIES = generator_tests_support generator
DEPENDENCIES *= drape_frontend map routing traffic routing_common \
search storage indexer drape platform_tests_support platform editor opening_hours geometry \
coding base freetype expat fribidi jansson tess2 protobuf osrm stats_client \
minizip succinct pugixml oauthcpp stb_image sdf_image
minizip succinct pugixml oauthcpp stb_image sdf_image icu
include($$ROOT_DIR/common.pri)

View file

@ -7,7 +7,7 @@ ROOT_DIR = ../..
INCLUDEPATH *= $$ROOT_DIR/3party/jansson/src
DEPENDENCIES = traffic routing_common indexer platform_tests_support platform coding geometry base stats_client protobuf
DEPENDENCIES = traffic routing_common indexer platform_tests_support platform coding geometry base stats_client protobuf icu
include($$ROOT_DIR/common.pri)