From d5daf9d6927cf3f57202c15b844a754851c4145d Mon Sep 17 00:00:00 2001 From: vng Date: Wed, 19 Feb 2014 15:35:55 +0300 Subject: [PATCH] [generator] Better house name and house number accumulating. --- coding/multilang_utf8_string.cpp | 25 ++++++++++ coding/multilang_utf8_string.hpp | 4 ++ generator/feature_builder.cpp | 2 +- generator/osm2type.cpp | 23 +-------- indexer/feature_data.cpp | 79 +++++++++++++++++++++++++++--- indexer/feature_data.hpp | 12 ++--- indexer/old/feature_loader_101.cpp | 2 +- 7 files changed, 112 insertions(+), 35 deletions(-) diff --git a/coding/multilang_utf8_string.cpp b/coding/multilang_utf8_string.cpp index 84353d0cae..f71298261a 100644 --- a/coding/multilang_utf8_string.cpp +++ b/coding/multilang_utf8_string.cpp @@ -84,6 +84,7 @@ bool StringUtf8Multilang::GetString(int8_t lang, string & utf8s) const namespace { + struct Printer { string & m_out; @@ -94,8 +95,32 @@ struct Printer return true; } }; + +struct Finder +{ + string const & m_s; + int8_t m_res; + Finder(string const & s) : m_s(s), m_res(-1) {} + bool operator()(int8_t code, string const & name) + { + if (name == m_s) + { + m_res = code; + return false; + } + return true; + } +}; + } // namespace +int8_t StringUtf8Multilang::FindString(string const & utf8s) const +{ + Finder finder(utf8s); + ForEachRef(finder); + return finder.m_res; +} + string DebugPrint(StringUtf8Multilang const & s) { string out; diff --git a/coding/multilang_utf8_string.hpp b/coding/multilang_utf8_string.hpp index 938b7fd1cf..67d059126a 100644 --- a/coding/multilang_utf8_string.hpp +++ b/coding/multilang_utf8_string.hpp @@ -36,6 +36,8 @@ class StringUtf8Multilang public: static int8_t const UNSUPPORTED_LANGUAGE_CODE = -1; + static int8_t const DEFAULT_CODE = 0; + /// @return UNSUPPORTED_LANGUAGE_CODE if language is not recognized static int8_t GetLangIndex(string const & lang); /// @return empty string if langCode is invalid @@ -81,6 +83,8 @@ public: return false; } + int8_t FindString(string const & utf8s) const; + template void Write(TSink & sink) const { utils::WriteString(sink, m_s); diff --git a/generator/feature_builder.cpp b/generator/feature_builder.cpp index 441fbda5d1..494f13f7ed 100644 --- a/generator/feature_builder.cpp +++ b/generator/feature_builder.cpp @@ -179,7 +179,7 @@ bool FeatureBuilder1::PreSerialize() // Store ref's in name field (used in "highway-motorway_junction"). if (m_Params.name.IsEmpty() && !m_Params.ref.empty()) - m_Params.name.AddString(0, m_Params.ref); + m_Params.name.AddString(StringUtf8Multilang::DEFAULT_CODE, m_Params.ref); m_Params.ref.clear(); break; diff --git a/generator/osm2type.cpp b/generator/osm2type.cpp index d6364674b3..e1469c4bca 100644 --- a/generator/osm2type.cpp +++ b/generator/osm2type.cpp @@ -49,25 +49,6 @@ namespace ftype ); } - template class tags_wrapper - { - typedef typename ToDo::result_type res_t; - - string const & m_key; - ToDo & m_toDo; - res_t & m_res; - - public: - tags_wrapper(string const & key, ToDo & toDo, res_t & res) - : m_key(key), m_toDo(toDo), m_res(res) {} - - void operator() (string const & v) - { - if (!m_res) - m_res = m_toDo(m_key, v); - } - }; - template typename ToDo::result_type for_each_tag(XMLElement * p, ToDo toDo) { @@ -88,7 +69,7 @@ namespace ftype if (get_mark_value(k, v) == -1) continue; - strings::Tokenize(v, ";", tags_wrapper(k, toDo, res)); + res = toDo(k, v); if (res) return res; } } @@ -174,7 +155,7 @@ namespace ftype // Needed for better search matching QByteArray const normBytes = QString::fromUtf8( v.c_str()).normalized(QString::NormalizationForm_KC).toUtf8(); - m_params.name.AddString(lang, normBytes.constData()); + m_params.AddName(lang, normBytes.constData()); } // get layer diff --git a/indexer/feature_data.cpp b/indexer/feature_data.cpp index 0eeb4945b2..4acb1b697d 100644 --- a/indexer/feature_data.cpp +++ b/indexer/feature_data.cpp @@ -1,4 +1,5 @@ #include "feature_data.hpp" +#include "feature_impl.hpp" #include "classificator.hpp" #include "feature.hpp" @@ -108,13 +109,60 @@ string FeatureParamsBase::DebugString() const (!ref.empty() ? " Ref:" + ref : "") + " "); } -void FeatureParamsBase::AddHouseName(string const & s) +namespace { - house.Set(house.IsEmpty() ? s : house.Get() + ", " + s); + +// Most used dummy values are taken from +// http://taginfo.openstreetmap.org/keys/addr%3Ahousename#values +bool IsDummyName(string const & s) +{ + return (s.empty() || + s == "Bloc" || s == "bloc" || + s == "жилой дом" || + s == "Edificio" || s == "edificio"); } -void FeatureParamsBase::AddHouseNumber(string const & ss) +struct IsBadChar { + bool operator() (char c) const { return (c == '\n'); } +}; + +} + +bool FeatureParams::AddName(string const & lang, string const & s) +{ + if (IsDummyName(s)) + return false; + + name.AddString(lang, s); + return true; +} + +bool FeatureParams::AddHouseName(string const & s) +{ + if (IsDummyName(s) || name.FindString(s) != StringUtf8Multilang::UNSUPPORTED_LANGUAGE_CODE) + return false; + + // Most names are house numbers by statistics. + if (house.IsEmpty() && AddHouseNumber(s)) + return true; + + // Add as a default name if we don't have it yet. + string dummy; + if (!name.GetString(StringUtf8Multilang::DEFAULT_CODE, dummy)) + { + name.AddString(StringUtf8Multilang::DEFAULT_CODE, s); + return true; + } + + return false; +} + +bool FeatureParams::AddHouseNumber(string const & ss) +{ + if (!feature::IsHouseNumber(ss)) + return false; + // Remove trailing zero's from house numbers. // It's important for debug checks of serialized-deserialized feature. string s(ss); @@ -122,14 +170,33 @@ void FeatureParamsBase::AddHouseNumber(string const & ss) if (strings::to_uint64(s, n)) s = strings::to_string(n); - house.Set(house.IsEmpty() ? s : s + ", " + house.Get()); + house.Set(s); + return true; +} + +void FeatureParams::AddStreetAddress(string const & s) +{ + m_street = s; + + // Erase bad chars (\n) because we write addresses to txt file. + m_street.erase(remove_if(m_street.begin(), m_street.end(), IsBadChar()), m_street.end()); + + // Osm likes to put house numbers into addr:street field. + size_t i = m_street.find_last_of("\t "); + if (i != string::npos) + { + ++i; + uint64_t n; + if (strings::to_uint64(m_street.substr(i), n)) + m_street.erase(i); + } } bool FeatureParams::FormatFullAddress(m2::PointD const & pt, string & res) const { - if (!m_streetAddress.empty() && !house.IsEmpty()) + if (!m_street.empty() && !house.IsEmpty()) { - res = m_streetAddress + "|" + house.Get() + "|" + res = m_street + "|" + house.Get() + "|" + strings::to_string(MercatorBounds::YToLat(pt.y)) + "|" + strings::to_string(MercatorBounds::XToLon(pt.x)) + '\n'; return true; diff --git a/indexer/feature_data.hpp b/indexer/feature_data.hpp index 97ebf03311..eaec4371b1 100644 --- a/indexer/feature_data.hpp +++ b/indexer/feature_data.hpp @@ -125,9 +125,6 @@ struct FeatureParamsBase string DebugString() const; - void AddHouseName(string const & s); - void AddHouseNumber(string const & s); - template void Write(TSink & sink, uint8_t header) const { @@ -194,7 +191,7 @@ class FeatureParams : public FeatureParamsBase uint8_t m_geomType; /// We use it now only for search unit tests - string m_streetAddress; + string m_street; public: typedef vector types_t; @@ -202,9 +199,12 @@ public: FeatureParams() : m_geomType(0xFF) {} + bool AddName(string const & lang, string const & s); + bool AddHouseName(string const & s); + bool AddHouseNumber(string const & s); /// @name Used in storing full street address only. //@{ - inline void AddStreetAddress(string const & s) { m_streetAddress = s; } + void AddStreetAddress(string const & s); bool FormatFullAddress(m2::PointD const & pt, string & res) const; //@} @@ -215,7 +215,7 @@ public: BaseT::operator=(rhs); m_Types = rhs.m_Types; - m_streetAddress = rhs.m_streetAddress; + m_street = rhs.m_street; } inline bool IsValid() const { return !m_Types.empty(); } diff --git a/indexer/old/feature_loader_101.cpp b/indexer/old/feature_loader_101.cpp index c183548410..d1c585e0ff 100644 --- a/indexer/old/feature_loader_101.cpp +++ b/indexer/old/feature_loader_101.cpp @@ -168,7 +168,7 @@ void LoaderImpl::ParseCommon() string name; name.resize(ReadVarUint(source) + 1); source.Read(&name[0], name.size()); - m_pF->m_Params.name.AddString(0, name); + m_pF->m_Params.name.AddString(StringUtf8Multilang::DEFAULT_CODE, name); } if (h & HEADER_HAS_POINT)