From ae4b26638d4987944eb1581c81e08ca9763179af Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Tue, 27 Oct 2015 16:58:38 +0300 Subject: [PATCH] [metadata] Add GetWikiTitle and GetWikiURL methods --- generator/generator_tests/metadata_test.cpp | 4 ++ indexer/feature_meta.cpp | 75 +++++++++++++++++++++ indexer/feature_meta.hpp | 3 + indexer/indexer.pro | 1 + 4 files changed, 83 insertions(+) diff --git a/generator/generator_tests/metadata_test.cpp b/generator/generator_tests/metadata_test.cpp index fd2056fb9f..0b9ce77b38 100644 --- a/generator/generator_tests/metadata_test.cpp +++ b/generator/generator_tests/metadata_test.cpp @@ -137,10 +137,14 @@ UNIT_TEST(Metadata_ValidateAndFormat_wikipedia) p("wikipedia", "ru:Лана Вуд"); TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "ru:" + lanaWoodUrlEncoded, ("ru:")); + TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "ru:Лана Вуд", ("ru:")); + TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded, ("ru:")); params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA); p("wikipedia", "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded); TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "ru:" + lanaWoodUrlEncoded, ("https:")); + TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "ru:Лана Вуд", ("https:")); + TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded, ("https:")); params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA); p("wikipedia", "Test"); diff --git a/indexer/feature_meta.cpp b/indexer/feature_meta.cpp index 5b86dee054..02d21f8287 100644 --- a/indexer/feature_meta.cpp +++ b/indexer/feature_meta.cpp @@ -1 +1,76 @@ #include "indexer/feature_meta.hpp" + +namespace feature +{ +string Metadata::GetWikiURL() const +{ + string value = this->Get(FMD_WIKIPEDIA); + string::size_type i = value.find(':'); + if (i == string::npos) + return string(); + return "https://" + value.substr(0, i) + ".wikipedia.org/wiki/" + value.substr(i + 1); +} + +char hex_to_dec(char ch) +{ + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'a') + ch -= 32; + if (ch >= 'A' && ch <= 'F') + return ch - 'A' + 10; + return -1; +} + +string UriDecode(string const & sSrc) +{ + // This code was slightly modified from + // http://www.codeguru.com/cpp/cpp/string/conversions/article.php/c12759 + // + // Note from RFC1630: "Sequences which start with a percent + // sign but are not followed by two hexadecimal characters + // (0-9, A-F) are reserved for future extension" + + const unsigned char * pSrc = (const unsigned char *)sSrc.c_str(); + const string::size_type SRC_LEN = sSrc.length(); + const unsigned char * const SRC_END = pSrc + SRC_LEN; + // last decodable '%' + const unsigned char * const SRC_LAST_DEC = SRC_END - 2; + + char * const pStart = new char[SRC_LEN]; + char * pEnd = pStart; + + while (pSrc < SRC_LAST_DEC) + { + if (*pSrc == '%') + { + char dec1 = hex_to_dec(*(pSrc + 1)); + char dec2 = hex_to_dec(*(pSrc + 2)); + if (-1 != dec1 && -1 != dec2) + { + *pEnd++ = (dec1 << 4) + dec2; + pSrc += 2; + } + } + else if (*pSrc == '_') + *pEnd++ = ' '; + else + *pEnd++ = *pSrc; + pSrc++; + } + + // the last 2- chars + while (pSrc < SRC_END) + *pEnd++ = *pSrc++; + + string sResult(pStart, pEnd); + delete [] pStart; + return sResult; +} + +string Metadata::GetWikiTitle() const +{ + string value = this->Get(FMD_WIKIPEDIA); + return UriDecode(value); +} +} // namespace feature diff --git a/indexer/feature_meta.hpp b/indexer/feature_meta.hpp index 13f447e0b9..601dfd5fda 100644 --- a/indexer/feature_meta.hpp +++ b/indexer/feature_meta.hpp @@ -76,6 +76,9 @@ namespace feature inline bool Empty() const { return m_metadata.empty(); } inline size_t Size() const { return m_metadata.size(); } + string GetWikiURL() const; + string GetWikiTitle() const; + template void SerializeToMWM(ArchiveT & ar) const { for (auto const & e : m_metadata) diff --git a/indexer/indexer.pro b/indexer/indexer.pro index 3c65434ddc..f41ec5bf01 100644 --- a/indexer/indexer.pro +++ b/indexer/indexer.pro @@ -28,6 +28,7 @@ SOURCES += \ feature_impl.cpp \ feature_loader.cpp \ feature_loader_base.cpp \ + feature_meta.cpp \ feature_utils.cpp \ feature_visibility.cpp \ features_offsets_table.cpp \