From d571623f99a2abed39e323065b31c169255a3bc7 Mon Sep 17 00:00:00 2001 From: vng Date: Thu, 17 Nov 2011 20:33:13 +0300 Subject: [PATCH] Add native languages support to mwm header. --- coding/multilang_utf8_string.cpp | 15 +- data/languages.txt | 351 ++++++++++++------------------- generator/osm2type.cpp | 38 +++- indexer/data_header.cpp | 37 +++- indexer/data_header.hpp | 1 + platform/languages.cpp | 2 + 6 files changed, 215 insertions(+), 229 deletions(-) diff --git a/coding/multilang_utf8_string.cpp b/coding/multilang_utf8_string.cpp index 06a7b7ea99..c4549597ff 100644 --- a/coding/multilang_utf8_string.cpp +++ b/coding/multilang_utf8_string.cpp @@ -2,14 +2,13 @@ #include "../defines.hpp" -static char const * gLangs[] = { "default", - "en", "ja", "fr", "ko_rm", "ar", "de", "ru", "sv", "zh", "fi", - "ko", "ka", "he", "be", "nl", "ga", "ja_rm", "el", "it", "es", - "th", "zh_pinyin", "ca", "cy", "hu", "hsb", "sr", "fa", "eu", "pl", - "br", "uk", "sl", "ro", "sq", "am", "fy", "gd", "cs", "sk", - "af", "hr", "hy", "tr", "kn", "pt", "lt", "lb", "bg", "eo", - "kk", "la", "et", "vi", "mn", "mk", "lv", "fur", "gsw", "ja_kana", - "is", "hi", "ku" }; +static char const * gLangs[] = { + "default", + "en", "ja", "fr", "ko_rm", "ar", "de", "int_name", "ru", "sv", "zh", "fi", "be", "ka", "ko", + "he", "nl", "ga", "ja_rm", "el", "it", "es", "zh_pinyin", "th", "cy", "sr", "uk", "ca", "hu", + "hsb", "eu", "fa", "br", "pl", "hy", "kn", "sl", "ro", "sq", "am", "fy", "cs", "gd", "sk", + "af", "ja_kana", "lb", "pt", "hr", "fur", "vi", "tr", "bg", "eo", "lt", "la", "kk", "gsw", + "et", "ku", "mn", "mk", "lv", "hi" }; int8_t StringUtf8Multilang::GetLangIndex(string const & lang) { diff --git a/data/languages.txt b/data/languages.txt index b458f26af8..a5de9d6087 100644 --- a/data/languages.txt +++ b/data/languages.txt @@ -2,293 +2,222 @@ default|Native for each country en|English ja|日本語 fr|Français -ko_rm|한국어 (Rm) +ko_rm|한국어(Rm) ar|العربية de|Deutsch +int_name|International (Latin) ru|Русский sv|Svenska zh|中文 fi|Suomi -ko|한국어 -ka|ქართული -he|עברית be|Беларуская +ka|ქართული +ko|한국어 +he|עברית nl|Nederlands ga|Gaeilge -ja_rm|日本語 (Rm) +ja_rm|日本語(Rm) el|Ελληνικά it|Italiano es|Español +zh_pinyin|中文(拼音) th|ไทย -zh_pinyin|中文 (Pinyin) -ca|Català cy|Cymraeg +sr|Српски +uk|Українська +ca|Català hu|Magyar hsb|Upper Sorbian -sr|Српски -fa|فارسی eu|Euskara -pl|Polski +fa|فارسی br|Breton -uk|Українська +pl|Polski +hy|Հայերէն +kn|ಕನ್ನಡ sl|Slovenščina ro|Română sq|Shqipe am|አማርኛ fy|Western Frisian -gd|Scottish Gaelic cs|Čeština -sk|Slovenský +gd|Scottish Gaelic +sk|Slovenčina af|Afrikaans -hr|Hrvatski -hy|Հայերէն -tr|Türkçe -kn|ಕನ್ನಡ -pt|Português -lt|Lietuvių +ja_kana|日本語(カタカナ) lb|Luxembourgish +pt|Português +hr|Hrvatski +fur|Friulian +vi|Tiếng Việt +tr|Türkçe bg|Български eo|Esperanto -kk|Қазақ +lt|Lietuvių la|Latin +kk|Қазақ +gsw|Schwiizertüütsch et|Eesti -vi|Tiếng Việt +ku|Kurdish mn|Mongolian mk|Македонски lv|Latviešu -fur|Friulian -gsw|Swiss German -ja_kana|日本語 (カタカナ) -is|Íslenska hi|हिन्दी -ku|Kurdish -no|Norsk gl|Galego +no|Norsk +is|Íslenska +cv|Chuvash +mdf|Moksha +myv|Erzya da|Dansk -gv|Gaelg -ur|اردو -az|Azərbaycanca -dsb|Lower Sorbian -zh_py|中文 (巴拉圭) -oc|Occitan -tg|Tajik ast|Asturian -ta|தமிழ் -tt|Tatar -id|Bahasa Indonesia -tl|Tagalog +az|Azərbaycanca +gv|Gaelg ba|Bashkir +scn|Sicilian +dsb|Lower Sorbian +ur|اردو +oc|Occitan +tt|Tatar +zh_py|中文(巴拉圭) +tg|Tajik +ta|தமிழ் +nds|Low German +id|Bahasa Indonesia +ml|മലയാളം mr|मराठी -my|Burmese +tl|Tagalog +se|Northern Sami +my|ဗမာ +lo_rm|Lao (Rm) uz|Ўзбек haw|ʻŌlelo Hawaiʻi ky|Kirghiz -se|Northern Sami +mt|Malti +rm|Rumantsch bn|বাংলা li|Limburgish -nn|Nynorsk -mt|Malti -ht|Haitian -rm|Rhaeto-Romance -wa|Walloon te|తెలుగు -bo|Tibetan -ml|മലയാളം -nds|Low German -sw|Kiswahili -lo|Lao -lo_rm|Lao (Rm) -kw|Kernewek +nn|Nynorsk +ht|Haitian +wa|Walloon +ne|नेपाली +os|Ossetic +zh-py|中文(巴拉圭) +bo|པོད་སྐད་ ug|Uighur +sw|Kiswahili +kw|Kernewek +lo|Lao +sr_lat|Српски (Lat) yi|Yiddish jv|Javanese -zh_pyt|中文 (Pyt) -bm|Bambara -pa|ਪੰਜਾਬੀ -ak|Akan -bs|Bosnian -os|Ossetic -grc|Ancient Greek -scn|Sicilian -qu|Quechua -km|ភាសាខ្មែរ +zh_pyt|中文(Pyt) +bm|Bamanakan an|Aragonese +pa|ਪੰਜਾਬੀ +bs|Bosanski +ak|Akan +kv|Komi +grc|Ancient Greek +ms|Bahasa Melayu +qu|Quechua csb|Kashubian arc|Aramaic -ne|नेपाली -sh|Srpsko-Hrvatski -ms|Bahasa Melayu +km|ភាសាខ្មែរ +sh|Srpskohrvatski nv|Navajo -cv|Chuvash -kv|Komi -ab|Abkhazian +zh-min-nan|中文(Min、Nan) lad|Ladino -ee|Ewe -ln|Lingala +ab|Abkhazian +ce|Chechen +ee|Eʋegbe sa|Sanskrit +so|Soomaali +ln|Lingala +crh|Crimean Turkish io|Ido ps|پښتو wo|Wolof +sme|Northern Sami nah|Nahuatl sah|Yakut -ce|Chechen vo|Volapük ang|Old English -crh|Crimean Turkish -el_latin|Ελληνικά (Latin) +be-x-old|Беларуская (X=Old) ie|Interlingue -bua|Buriat -ia|Interlingua -pam|Pampanga -ceb|Cebuano -fo|Føroyskt -new|Newari -war|Waray -so|Soomaali -dv|Divehi -gu|ગુજરાતી -nb|Norsk Bokmål -syc|Classical Syriac -mg|Malagasy -ch|Chamorro -iu|Inuktitut -tk|Turkmen +el_latin|Ελληνικά (Latin) udm|Udmurt -ilo|Iloko +ia|Interlingua +bua|Buriat +pam|Pampanga +bat-smg|Baltic Language (Smg) +war|Waray +fo|Føroyskt +ceb|Cebuano +new|Newari mi|Maori -sco|Scots +ksh|Colognian +syc|Classical Syriac +gu|ગુજરાતી frr|Northern Frisian -yo|Yoruba -gn|Guarani -lu|Luba-Katanga +dv|Divehi +nb|Norsk Bokmål sc|Sardinian +mg|Malagasy +tk|Turkmen +ilo|Iloko +sco|Scots +zh-yue|中文(Yue) +si|සිංහල +fiu-vro|Finno-Ugrian Language (Vro) +iu|Inuktitut +yo|Èdè Yorùbá +ch|Chamorro +gn|Guarani ale|Aleut jbo|Lojban -as|অসমীয়া -na|Nauru -ti|ትግርኛ -tpi|Tok Pisin -ltz|Luxembourgish -si|සිංහල -sr_lat|Српски (Lat) -kg|Kongo -ks|Kashmiri +kab|Taqbaylit nap|Neapolitan -tet|Tetum -cu|Church Slavic -dz|Dzongkha -ik|Inupiaq -kab|Kabyle -sm|Samoan -su|Sundanese -ja_furigana|日本語 (Furigana) -xal|Kalmyk -ace|Achinese -kl|Kalaallisut -lat|Latin -pap|Papiamento -chm|Mari +tpi|Tok Pisin co|Corsican -mdf|Moksha +na|Nauru +as|অসমীয়া +dz|Dzongkha +kg|Kongo +ti|ትግርኛ +es-ar|Español (Argentina) +nds-nl|Low German (Netherlands) +tet|Tetum +ks|Kashmiri +zh-classical|中文(Classical) +cu|Church Slavic +su|Sundanese +ik|Inupiaq +sm|Samoan +ace|Achinese +pap|Papiamento +xal|Kalmyk +ja_furigana|日本語(Furigana) +kl|Kalaallisut +ja-rm|日本語(Rm) +roa-rup|Romance Language (Rup) +av|Avaric +chm|Mari +lat|Latin +srn|Sranan Tongo tn|Tswana ber|Berber -srn|Sranan Tongo xh|Xhosa -zu|Zulu -chr|Cherokee -kr|Kanuri +zu|Isizulu +chr|ᏣᎳᎩ roa|Romance Language -av|Avaric -en_rm|English (Rm) -ha|Haoussa -krl|Karelian sma|Southern Sami +kr|Kanuri +krl|Karelian +en_rm|English (Rm) +cr|Cree +ha|Hausa ts|Tsonga za|Zhuang fry|Western Frisian mus|Creek -cr|Cree -lg|Ganda -sd|Sindhi -smi|Sami Language -ss|Swati -gez|Geez -to|Tonga -el_en|Ελληνικά (En) -eng|English -inh|Ingush -mwl|Mirandese -myv|Erzya -or|ଓଡ଼ିଆ -orm|Oromoo -rw|Kinyarwanda -sn|Shona -st|Southern Sotho -ty|Tahitian -ady|Adyghe -aus|Australian Language -che|Chechen -en_old|English (Old) -kaa|Kara-Kalpak -kbd|Kabardian -ko_rr|한국어 (Rr) -krc|Karachay-Balkar -kum|Kumyk -lez|Lezghian -lit|Lietuvių -pag|Pangasinan -pt_br|Português (Brasil) -rn|Rundi -rup|Aromanian -sms|Skolt Sami -ve|Venda -zh_|中文 -aa|Afar -ava|Avaric -bat|Baltic Language -bi|Bislama -bis|Bislama -bug|Buginese -cat|Català -cop|Coptic -cs_ascii|Čeština (Ascii) -de_old|Deutsch (Old) -en_pinyin|English (Pinyin) -en_rome|English (Rome) -en_xx|English (Xx) -fil|Filipino -fin|Suomi -fj|Fijian -gle|Gaeilge -ja_ra|日本語 (Ra) -ja_reading|日本語 (Reading) -khi|Khoisan Language -kor|한국어 -lua|Luba-Lulua -mis|Miscellaneous Language -mo|Moldavian -niu|Niuean -non|Old Norse -om_old|Oromoo (Old) -oss|Ossetic -ota|Ottoman Turkish -pol|Polski -rap|Rapanui -rar|Rarotongan -run|Rundi -rus|Русский -smn|Inari Sami -snk|Soninke -srp|Српски -ssa|Nilo-Saharan Language -swe|Svenska -tmh|Tamashek -tyv|Tuvinian -uig|Uighur -wen|Sorbian Language -yid|Yiddish -zh_hk|中文 (中華人民共和國香港特別行政區) -zh_pi|中文 (Pi) -zh_piny|中文 (Piny) -zh_tw|中文 (臺灣) -zha|Zhuang diff --git a/generator/osm2type.cpp b/generator/osm2type.cpp index af506e927e..abf43e8e70 100644 --- a/generator/osm2type.cpp +++ b/generator/osm2type.cpp @@ -507,6 +507,8 @@ namespace ftype { class do_find_name { + set m_savedNames; + size_t & m_count; FeatureParams & m_params; bool m_tunnel; @@ -525,6 +527,35 @@ namespace ftype { m_params.layer = feature::LAYER_TRANSPARENT_TUNNEL; } + bool GetLangByKey(string const & k, string & lang) + { + strings::SimpleTokenizer token(k, "\t :"); + if (!token) + return false; + + // this is an international (latin) name + if (*token == "int_name") + lang = "int_name"; + else + { + if (*token == "name") + { + ++token; + lang = (token ? *token : "default"); + + // replace dummy arabian tag with correct tag + if (lang == "ar1") + lang = "ar"; + } + } + + if (lang.empty()) + return false; + + // avoid duplicating names + return m_savedNames.insert(lang).second; + } + bool operator() (string const & k, string const & v) { ++m_count; @@ -532,12 +563,9 @@ namespace ftype { if (v.empty()) return false; // get name with language suffix - strings::SimpleTokenizer token(k, "\t :"); - if (token && *token == "name") + string lang; + if (GetLangByKey(k, lang)) { - ++token; - string lang = (token ? *token : "default"); - // Unicode Compatibility Decomposition, // followed by Canonical Composition (NFKC). // Needed for better search matching diff --git a/indexer/data_header.cpp b/indexer/data_header.cpp index 581743f9ee..fadef04e4e 100644 --- a/indexer/data_header.cpp +++ b/indexer/data_header.cpp @@ -52,6 +52,34 @@ namespace feature } } + namespace + { + template + void SaveBytes(TSink & sink, TCont const & cont) + { + STATIC_ASSERT(sizeof(typename TCont::value_type) == 1); + + uint32_t const count = cont.size(); + WriteVarUint(sink, count); + if (count > 0) + sink.Write(&cont[0], count); + } + + template + void LoadBytes(TSource & src, TCont & cont) + { + STATIC_ASSERT(sizeof(typename TCont::value_type) == 1); + ASSERT ( cont.empty(), () ); + + uint32_t const count = ReadVarUint(src); + if (count > 0) + { + cont.resize(count); + src.Read(&cont[0], count); + } + } + } + void DataHeader::Save(FileWriter & w) const { m_codingParams.Save(w); @@ -59,8 +87,8 @@ namespace feature WriteVarInt(w, m_bounds.first); WriteVarInt(w, m_bounds.second); - WriteVarUint(w, m_scales.size()); - w.Write(m_scales.data(), m_scales.size()); + SaveBytes(w, m_scales); + SaveBytes(w, m_langs); WriteVarInt(w, static_cast(m_type)); } @@ -73,9 +101,8 @@ namespace feature m_bounds.first = ReadVarInt(src); m_bounds.second = ReadVarInt(src); - uint32_t const count = ReadVarUint(src); - m_scales.resize(count); - src.Read(m_scales.data(), count); + LoadBytes(src, m_scales); + LoadBytes(src, m_langs); m_type = static_cast(ReadVarInt(src)); diff --git a/indexer/data_header.hpp b/indexer/data_header.hpp index b3d945b697..e6e162f388 100644 --- a/indexer/data_header.hpp +++ b/indexer/data_header.hpp @@ -24,6 +24,7 @@ namespace feature pair m_bounds; buffer_vector m_scales; + buffer_vector m_langs; public: diff --git a/platform/languages.cpp b/platform/languages.cpp index 8df6b3f153..c8731580a2 100644 --- a/platform/languages.cpp +++ b/platform/languages.cpp @@ -41,6 +41,7 @@ namespace languages size_t const size = langCodes.size(); CHECK_EQUAL(size, MAX_SUPPORTED_LANGUAGES, ()); CHECK_EQUAL(size, static_cast(size), ()); + for (int8_t i = 0; i < static_cast(size); ++i) { int8_t const index = StringUtf8Multilang::GetLangIndex(langCodes[i]); @@ -50,6 +51,7 @@ namespace languages { ASSERT(false, ("Invalid language code")); } + CHECK_GREATER_OR_EQUAL(gDefaultPriorities[i], 0, ("Unsupported language", langCodes[i])); } }