Merge pull request #11161 from LaGrunge/json-locale-format

[generator]Json locale format
This commit is contained in:
Sergey Yershov 2019-07-04 19:00:28 +03:00 committed by GitHub
commit d855ef21ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 205 additions and 98 deletions

View file

@ -76,6 +76,21 @@ json_t const * GetJSONObligatoryField(json_t const * root, char const * field);
json_t * GetJSONOptionalField(json_t * root, std::string const & field);
json_t * GetJSONOptionalField(json_t * root, char const * field);
json_t const * GetJSONOptionalField(json_t const * root, char const * field);
template <class First>
inline json_t const * GetJSONObligatoryFieldByPath(json_t const * root, First && path)
{
return GetJSONObligatoryField(root, std::forward<First>(path));
}
template <class First, class... Paths>
inline json_t const * GetJSONObligatoryFieldByPath(json_t const * root, First && path,
Paths &&... paths)
{
json_t const * newRoot = GetJSONObligatoryFieldByPath(root, std::forward<First>(path));
return GetJSONObligatoryFieldByPath(newRoot, std::forward<Paths>(paths)...);
}
bool JSONIsNull(json_t const * root);
} // namespace base

View file

@ -10,6 +10,7 @@
#include <cstddef>
#include <cstdint>
#include <functional>
#include <string>
#include <utility>

View file

@ -150,6 +150,7 @@ public:
FeatureParams const & GetParams() const { return m_params; }
FeatureParams & GetParams() { return m_params; }
std::string GetName(int8_t lang = StringUtf8Multilang::kDefaultCode) const;
StringUtf8Multilang const & GetMultilangName() const { return m_params.name; }
uint8_t GetRank() const { return m_params.rank; }
bool FormatFullAddress(std::string & res) const;
AddressData const & GetAddressData() const { return m_params.GetAddressData(); }
@ -175,7 +176,8 @@ public:
bool PreSerializeAndRemoveUselessNamesForIntermediate();
void SerializeForIntermediate(Buffer & data) const;
void SerializeBorderForIntermediate(serial::GeometryCodingParams const & params, Buffer & data) const;
void SerializeBorderForIntermediate(serial::GeometryCodingParams const & params,
Buffer & data) const;
void DeserializeFromIntermediate(Buffer & data);
bool PreSerializeAndRemoveUselessNamesForMwm(SupportingData const & data);
@ -201,13 +203,13 @@ public:
void SetCoastCell(int64_t iCell) { m_coastCell = iCell; }
bool IsCoastCell() const { return (m_coastCell != -1); }
protected:
protected:
template <class ToDo>
class ToDoWrapper
{
public:
ToDoWrapper(ToDo && toDo) : m_toDo(std::forward<ToDo>(toDo)) {}
bool operator() (m2::PointD const & p) { return m_toDo(p); }
bool operator()(m2::PointD const & p) { return m_toDo(p); }
void EndRegion() {}
private:
@ -218,9 +220,9 @@ public:
// - point in point-feature
// - origin point of text [future] in line-feature
// - origin point of text or symbol in area-feature
m2::PointD m_center; // Check HEADER_HAS_POINT
m2::PointD m_center; // Check HEADER_HAS_POINT
// List of geometry polygons.
Geometry m_polygons; // Check HEADER_IS_AREA
Geometry m_polygons; // Check HEADER_IS_AREA
m2::RectD m_limitRect;
std::vector<base::GeoObjectId> m_osmIds;
FeatureParams m_params;
@ -264,7 +266,7 @@ void ForEachFromDatRawFormat(std::string const & filename, ToDo && toDo)
/// Parallel process features in .dat file.
template <class ToDo>
void ForEachParallelFromDatRawFormat(size_t threadsCount, std::string const & filename,
ToDo && toDo)
ToDo && toDo)
{
CHECK_GREATER_OR_EQUAL(threadsCount, 1, ());
if (threadsCount == 1)

View file

@ -15,8 +15,7 @@ using namespace generator;
// Transliteration tests ---------------------------------------------------------------------------
using Translations = std::vector<std::pair<std::string, std::string>>;
bool TestTransliteration(Translations const & translations,
std::string const & expectedTransliteration,
std::string const & lang)
std::string const & expectedTransliteration, std::string const & lang)
{
StringUtf8Multilang name;
for (auto const & langAndTranslation : translations)
@ -51,8 +50,8 @@ UNIT_TEST(Transliteration)
{"sr", "Мичиген"}, {"ta", "மிச்சிகன்"}, {"th", "รัฐมิชิแกน"}, {"tl", "Misigan"},
{"uk", "Мічиган"}, {"yi", "מישיגן"}, {"zh", "密歇根州"}};
TEST(TestTransliteration(scotlandTranslations, "Shotlandiya", "en"), ());
TEST(TestTransliteration(scotlandTranslations, "Scotland", "en"), ());
TEST(TestTransliteration(michiganTranslations, "Michigan", "en"), ());
TEST(TestTransliteration(scotlandTranslations, "Shotlandiya", "ru"), ());
TEST(TestTransliteration(michiganTranslations, "Michigan", "ru"), ());
TEST(TestTransliteration(scotlandTranslations, "Шотландия", "ru"), ());
TEST(TestTransliteration(michiganTranslations, "Мичиган", "ru"), ());
}

View file

@ -46,8 +46,9 @@ using IndexReader = ReaderPtr<Reader>;
bool HouseHasAddress(JsonValue const & json)
{
auto && properties = base::GetJSONObligatoryField(json, "properties");
auto && address = base::GetJSONObligatoryField(properties, "address");
auto && address =
base::GetJSONObligatoryFieldByPath(json, "properties", "locales", "default", "address");
auto && building = base::GetJSONOptionalField(address, "building");
return building && !base::JSONIsNull(building);
}
@ -100,7 +101,8 @@ base::JSONPtr MakeGeoObjectValueWithoutAddress(FeatureBuilder const & fb, JsonVa
{
auto jsonWithAddress = json.MakeDeepCopyJson();
auto properties = json_object_get(jsonWithAddress.get(), "properties");
ToJSONObject(*properties, "name", fb.GetName());
Localizator localizator(*properties);
localizator.AddLocale("name", Localizator::EasyObjectWithTranslation(fb.GetMultilangName()));
UpdateCoordinates(fb.GetKeyPoint(), jsonWithAddress);
return jsonWithAddress;
}
@ -154,9 +156,8 @@ void FilterAddresslessByCountryAndRepackMwm(std::string const & pathInGeoObjects
if (!regionKeyValue)
return;
auto && properties = base::GetJSONObligatoryField(*regionKeyValue->second, "properties");
auto && address = base::GetJSONObligatoryField(properties, "address");
auto && country = base::GetJSONObligatoryField(address, "country");
auto && country = base::GetJSONObligatoryFieldByPath(
*regionKeyValue->second, "properties", "locales", "default", "address", "country");
auto countryName = FromJSON<std::string>(country);
auto pos = includeCountries.find(countryName);
if (pos != std::string::npos)

View file

@ -60,10 +60,11 @@ Region::Region(PlacePoint const & place)
std::string Region::GetTranslatedOrTransliteratedName(LanguageCode languageCode) const
{
if (m_placeLabel)
return m_placeLabel->GetTranslatedOrTransliteratedName(languageCode);
if (!m_placeLabel)
return RegionWithName::GetTranslatedOrTransliteratedName(languageCode);
return RegionWithName::GetTranslatedOrTransliteratedName(languageCode);
std::string const & name = m_placeLabel->GetTranslatedOrTransliteratedName(languageCode);
return name.empty() ? RegionWithName::GetTranslatedOrTransliteratedName(languageCode) : name;
}
std::string Region::GetName(int8_t lang) const

View file

@ -102,8 +102,9 @@ private:
ToJSONArray(*coordinates, center.m_lat);
ToJSONObject(*geometry, "coordinates", coordinates);
auto address = base::NewJSONObject();
Localizator localizator;
auto properties = base::NewJSONObject();
Localizator localizator(*properties);
boost::optional<std::string> dref;
for (auto const & p : path)
@ -112,29 +113,24 @@ private:
CHECK(region.GetLevel() != regions::PlaceLevel::Unknown, ());
auto const label = GetLabel(region.GetLevel());
CHECK(label, ());
ToJSONObject(*address, label, region.GetName());
localizator.AddLocale(label, region, "address");
if (m_verbose)
{
ToJSONObject(*address, std::string{label} + "_i", DebugPrint(region.GetId()));
ToJSONObject(*address, std::string{label} + "_a", region.GetArea());
ToJSONObject(*address, std::string{label} + "_r", region.GetRank());
localizator.AddVerbose(
[&label, &region](auto & node) {
ToJSONObject(node, std::string{label} + "_i", DebugPrint(region.GetId()));
ToJSONObject(node, std::string{label} + "_a", region.GetArea());
ToJSONObject(node, std::string{label} + "_r", region.GetRank());
},
"address");
}
localizator.AddLocale([&label, &region](std::string const & language) {
return Localizator::LabelAndTranslition{
label,
region.GetTranslatedOrTransliteratedName(StringUtf8Multilang::GetLangIndex(language))};
});
if (!dref && region.GetId() != main.GetId())
dref = KeyValueStorage::SerializeDref(region.GetId().GetEncodedId());
}
auto properties = base::NewJSONObject();
ToJSONObject(*properties, "name", main.GetName());
localizator.AddLocale("name", main);
ToJSONObject(*properties, "rank", main.GetRank());
ToJSONObject(*properties, "address", address);
ToJSONObject(*properties, "locales", localizator.BuildLocales());
if (dref)
ToJSONObject(*properties, "dref", *dref);

View file

@ -1,6 +1,8 @@
#include "generator/streets/streets_builder.hpp"
#include "generator/key_value_storage.hpp"
#include "generator/streets/street_regions_tracing.hpp"
#include "generator/translation.hpp"
#include "indexer/classificator.hpp"
#include "indexer/ftypes_matcher.hpp"
@ -33,9 +35,14 @@ void StreetsBuilder::AssembleStreets(std::string const & pathInStreetsTmpMwm)
void StreetsBuilder::AssembleBindings(std::string const & pathInGeoObjectsTmpMwm)
{
auto const transform = [this](FeatureBuilder & fb, uint64_t /* currPos */) {
auto streetName = fb.GetParams().GetStreet();
std::string streetName = fb.GetParams().GetStreet();
if (!streetName.empty())
AddStreetBinding(std::move(streetName), fb);
{
// TODO maybe (lagrunge): add localizations on street:lang tags
StringUtf8Multilang multilangName;
multilangName.AddString(StringUtf8Multilang::kDefaultCode, streetName);
AddStreetBinding(std::move(streetName), fb, multilangName);
}
};
ForEachParallelFromDatRawFormat(m_threadsCount, pathInGeoObjectsTmpMwm, transform);
}
@ -55,12 +62,12 @@ void StreetsBuilder::SaveRegionStreetsKv(std::ostream & streamStreetsKv, uint64_
for (auto const & street : streets)
{
auto const & bbox = street.second.GetBbox();
auto const & pin = street.second.GetOrChoosePin();
auto const & bbox = street.second.m_geometry.GetBbox();
auto const & pin = street.second.m_geometry.GetOrChoosePin();
auto const id = KeyValueStorage::SerializeDref(pin.m_osmId.GetEncodedId());
auto const & value =
MakeStreetValue(regionId, *regionObject, street.first, bbox, pin.m_position);
MakeStreetValue(regionId, *regionObject, street.second.m_name, bbox, pin.m_position);
streamStreetsKv << id << " " << KeyValueStorage::Serialize(value) << "\n";
}
}
@ -90,9 +97,9 @@ void StreetsBuilder::AddStreetHighway(FeatureBuilder & fb)
for (auto & segment : pathSegments)
{
auto && region = segment.m_region;
auto & street = InsertStreet(region.first, fb.GetName());
auto & street = InsertStreet(region.first, fb.GetName(), fb.GetMultilangName());
auto const osmId = pathSegments.size() == 1 ? fb.GetMostGenericOsmId() : NextOsmSurrogateId();
street.AddHighwayLine(osmId, std::move(segment.m_path));
street.m_geometry.AddHighwayLine(osmId, std::move(segment.m_path));
}
}
@ -104,9 +111,9 @@ void StreetsBuilder::AddStreetArea(FeatureBuilder & fb)
std::lock_guard<std::mutex> lock{m_updateMutex};
auto & street = InsertStreet(region->first, fb.GetName());
auto & street = InsertStreet(region->first, fb.GetName(), fb.GetMultilangName());
auto osmId = fb.GetMostGenericOsmId();
street.AddHighwayArea(osmId, fb.GetOuterGeometry());
street.m_geometry.AddHighwayArea(osmId, fb.GetOuterGeometry());
}
void StreetsBuilder::AddStreetPoint(FeatureBuilder & fb)
@ -118,11 +125,12 @@ void StreetsBuilder::AddStreetPoint(FeatureBuilder & fb)
std::lock_guard<std::mutex> lock{m_updateMutex};
auto osmId = fb.GetMostGenericOsmId();
auto & street = InsertStreet(region->first, fb.GetName());
street.SetPin({fb.GetKeyPoint(), osmId});
auto & street = InsertStreet(region->first, fb.GetName(), fb.GetMultilangName());
street.m_geometry.SetPin({fb.GetKeyPoint(), osmId});
}
void StreetsBuilder::AddStreetBinding(std::string && streetName, FeatureBuilder & fb)
void StreetsBuilder::AddStreetBinding(std::string && streetName, FeatureBuilder & fb,
StringUtf8Multilang const & multiLangName)
{
auto const region = FindStreetRegionOwner(fb.GetKeyPoint());
if (!region)
@ -130,16 +138,16 @@ void StreetsBuilder::AddStreetBinding(std::string && streetName, FeatureBuilder
std::lock_guard<std::mutex> lock{m_updateMutex};
auto & street = InsertStreet(region->first, std::move(streetName));
street.AddBinding(NextOsmSurrogateId(), fb.GetKeyPoint());
auto & street = InsertStreet(region->first, std::move(streetName), multiLangName);
street.m_geometry.AddBinding(NextOsmSurrogateId(), fb.GetKeyPoint());
}
boost::optional<KeyValue> StreetsBuilder::FindStreetRegionOwner(m2::PointD const & point,
bool needLocality)
{
auto const isStreetAdministrator = [needLocality](KeyValue const & region) {
auto const && properties = base::GetJSONObligatoryField(*region.second, "properties");
auto const && address = base::GetJSONObligatoryField(properties, "address");
auto && address = base::GetJSONObligatoryFieldByPath(*region.second, "properties", "locales",
"default", "address");
if (base::GetJSONOptionalField(address, "suburb"))
return false;
@ -155,26 +163,45 @@ boost::optional<KeyValue> StreetsBuilder::FindStreetRegionOwner(m2::PointD const
return m_regionInfoGetter.FindDeepest(point, isStreetAdministrator);
}
StreetGeometry & StreetsBuilder::InsertStreet(uint64_t regionId, std::string && streetName)
StringUtf8Multilang MergeNames(const StringUtf8Multilang & first,
const StringUtf8Multilang & second)
{
StringUtf8Multilang result;
auto const fn = [&result](int8_t code, std::string const & name) {
result.AddString(code, name);
};
first.ForEach(fn);
second.ForEach(fn);
return result;
}
StreetsBuilder::Street & StreetsBuilder::InsertStreet(uint64_t regionId, std::string && streetName,
StringUtf8Multilang const & multilangName)
{
auto & regionStreets = m_regions[regionId];
return regionStreets[std::move(streetName)];
StreetsBuilder::Street & street = regionStreets[std::move(streetName)];
street.m_name = MergeNames(multilangName, street.m_name);
return street;
}
base::JSONPtr StreetsBuilder::MakeStreetValue(uint64_t regionId, JsonValue const & regionObject,
std::string const & streetName,
StringUtf8Multilang const & streetName,
m2::RectD const & bbox, m2::PointD const & pinPoint)
{
auto streetObject = base::NewJSONObject();
auto && regionLocales = base::GetJSONObligatoryFieldByPath(regionObject, "properties", "locales");
auto const && regionProperties = base::GetJSONObligatoryField(regionObject, "properties");
auto const && regionAddress = base::GetJSONObligatoryField(regionProperties, "address");
auto address = base::JSONPtr{json_deep_copy(const_cast<json_t *>(regionAddress))};
ToJSONObject(*address, "street", streetName);
auto locales = base::JSONPtr{json_deep_copy(const_cast<json_t *>(regionLocales))};
auto properties = base::NewJSONObject();
ToJSONObject(*properties, "address", std::move(address));
ToJSONObject(*properties, "name", streetName);
ToJSONObject(*properties, "locales", std::move(locales));
Localizator localizator(*properties);
auto const & localizee = Localizator::EasyObjectWithTranslation(streetName);
localizator.AddLocale("name", localizee);
localizator.AddLocale("street", localizee, "address");
ToJSONObject(*properties, "dref", KeyValueStorage::SerializeDref(regionId));
ToJSONObject(*streetObject, "properties", std::move(properties));

View file

@ -41,7 +41,12 @@ public:
static bool IsStreet(feature::FeatureBuilder const & fb);
private:
using RegionStreets = std::unordered_map<std::string, StreetGeometry>;
struct Street
{
StringUtf8Multilang m_name;
StreetGeometry m_geometry;
};
using RegionStreets = std::unordered_map<std::string, Street>;
void SaveRegionStreetsKv(std::ostream & streamStreetsKv, uint64_t regionId,
RegionStreets const & streets);
@ -50,12 +55,14 @@ private:
void AddStreetHighway(feature::FeatureBuilder & fb);
void AddStreetArea(feature::FeatureBuilder & fb);
void AddStreetPoint(feature::FeatureBuilder & fb);
void AddStreetBinding(std::string && streetName, feature::FeatureBuilder & fb);
void AddStreetBinding(std::string && streetName, feature::FeatureBuilder & fb,
StringUtf8Multilang const & multiLangName);
boost::optional<KeyValue> FindStreetRegionOwner(m2::PointD const & point,
bool needLocality = false);
StreetGeometry & InsertStreet(uint64_t regionId, std::string && streetName);
Street & InsertStreet(uint64_t regionId, std::string && streetName,
StringUtf8Multilang const & multilangName);
base::JSONPtr MakeStreetValue(uint64_t regionId, JsonValue const & regionObject,
std::string const & streetName, m2::RectD const & bbox,
const StringUtf8Multilang & streetName, m2::RectD const & bbox,
m2::PointD const & pinPoint);
base::GeoObjectId NextOsmSurrogateId();

View file

@ -13,8 +13,8 @@ namespace
using Languages = std::vector<std::string>;
const std::unordered_map<generator::LanguageCode, Languages> kPreferredLanguagesForTransliterate = {
{StringUtf8Multilang::GetLangIndex("ru"), {"en" /*English*/, "ru" /*Русский*/}},
{StringUtf8Multilang::GetLangIndex("en"), {"en" /*English*/, "ru" /*Русский*/}}};
{StringUtf8Multilang::GetLangIndex("ru"), {"ru", "uk", "be"}},
{StringUtf8Multilang::GetLangIndex("en"), {"en", "da", "es", "fr"}}};
Languages kLocalelanguages = {"en", "ru"};
} // namespace
@ -25,14 +25,27 @@ std::string GetTranslatedOrTransliteratedName(StringUtf8Multilang const & name,
LanguageCode languageCode)
{
std::string s = GetName(name, languageCode);
if (!s.empty() && strings::IsASCIIString(s))
if (!s.empty())
return s;
if (languageCode != StringUtf8Multilang::kEnglishCode)
return std::string();
s = GetName(name, StringUtf8Multilang::kInternationalCode);
if (!s.empty() && strings::IsASCIIString(s))
return s;
s = GetName(name, StringUtf8Multilang::kDefaultCode);
if (!s.empty() && strings::IsASCIIString(s))
return s;
auto const fn = [&s](int8_t code, std::string const & name) {
if (strings::IsASCIIString(name))
{
s = name;
return base::ControlFlow::Break;
}
if (code != StringUtf8Multilang::kDefaultCode &&
Transliteration::Instance().Transliterate(name, code, s) && strings::IsASCIIString(s))
{

View file

@ -19,58 +19,103 @@ inline std::string GetName(StringUtf8Multilang const & name, LanguageCode lang)
}
/// This function will take the following steps:
/// 1. Return the |languageCode| name if it exists and is ASCII.
/// 2. Try to get International name
/// 3. Return transliteration trying to use kPreferredLanguagesForTransliterate
/// 1. Return the |languageCode| name if it exists.
/// 1.1 Next steps only for english locale
/// 2. Try to get international name.
/// 3. Try to check if default name is ASCII and return it if succeeds.
/// 4. Return transliteration trying to use kPreferredLanguagesForTransliterate
/// first, then any, if it succeeds.
/// 3. Otherwise, return empty string.
/// 5. Otherwise, return empty string.
std::string GetTranslatedOrTransliteratedName(StringUtf8Multilang const & name,
LanguageCode languageCode);
class Localizator
{
public:
struct LabelAndTranslition
class EasyObjectWithTranslation
{
std::string m_label;
std::string m_translation;
public:
explicit EasyObjectWithTranslation(StringUtf8Multilang const & name) : m_name(name) {}
std::string GetTranslatedOrTransliteratedName(LanguageCode languageCode) const
{
return ::generator::GetTranslatedOrTransliteratedName(m_name, languageCode);
}
std::string GetName(LanguageCode languageCode = StringUtf8Multilang::kDefaultCode) const
{
return ::generator::GetName(m_name, languageCode);
}
private:
StringUtf8Multilang const m_name;
};
template <class Fn>
void AddLocale(Fn && translator)
explicit Localizator(json_t & node) : m_node(GetOrCreateNode("locales", node)) {}
template <class Object>
void AddLocale(std::string const & label, Object const & objectWithName,
std::string const & level = std::string())
{
AddLocale(DefaultLocaleName(), level, objectWithName.GetName(), label);
auto const & languages = LocaleLanguages();
for (auto const & language : languages)
for (std::string const & language : languages)
{
m_localesWithLanguages.emplace_back(LocaleWithLanguage{base::NewJSONObject(), language});
std::string label;
std::string translation;
LabelAndTranslition labelAndTranslation{translator(language)};
ToJSONObject(*m_localesWithLanguages.back().m_locale, labelAndTranslation.m_label,
labelAndTranslation.m_translation);
std::string const & translation = objectWithName.GetTranslatedOrTransliteratedName(
StringUtf8Multilang::GetLangIndex(language));
if (translation.empty())
continue;
AddLocale(language, level, translation, label);
}
}
base::JSONPtr BuildLocales()
template <class Verboser>
void AddVerbose(Verboser && verboser, std::string const & level)
{
auto locales = base::NewJSONObject();
for (auto & localeWithLanguage : m_localesWithLanguages)
ToJSONObject(*locales, localeWithLanguage.m_language, localeWithLanguage.m_locale);
m_localesWithLanguages.clear();
return locales;
json_t & locale = GetOrCreateNode(DefaultLocaleName(), m_node);
json_t & node = GetOrCreateNode(level, locale);
verboser(node);
}
private:
struct LocaleWithLanguage
void AddLocale(std::string const & language, std::string const & level, std::string const & name,
std::string const & label)
{
base::JSONPtr m_locale;
std::string m_language;
};
using LocalesWithLanguages = std::vector<LocaleWithLanguage>;
json_t & locale = GetOrCreateNode(language, m_node);
if (!level.empty())
{
json_t & levelNode = GetOrCreateNode(level, locale);
ToJSONObject(levelNode, label, name);
}
else
{
ToJSONObject(locale, label, name);
}
}
static std::string const & DefaultLocaleName()
{
static std::string const kDefaultLocaleName = "default";
return kDefaultLocaleName;
}
static json_t & GetOrCreateNode(std::string const & nodeName, json_t & root)
{
json_t * node = base::GetJSONOptionalField(&root, nodeName);
if (!node || base::JSONIsNull(node))
{
node = json_object();
ToJSONObject(root, nodeName, *node);
}
return *node;
}
std::vector<std::string> const & LocaleLanguages() const;
LocalesWithLanguages m_localesWithLanguages;
json_t & m_node;
};
} // namespace generator
} // namespace generator