diff --git a/android/jni/com/mapswithme/maps/Framework.cpp b/android/jni/com/mapswithme/maps/Framework.cpp index f0213d5f79..58e9dbff60 100644 --- a/android/jni/com/mapswithme/maps/Framework.cpp +++ b/android/jni/com/mapswithme/maps/Framework.cpp @@ -739,15 +739,21 @@ namespace android // Fills mapobject's metadata from UserMark void Framework::InjectMetadata(JNIEnv * env, jclass const clazz, jobject const mapObject, UserMark const * userMark) { - feature::Metadata metadata; + using feature::Metadata; + + Metadata metadata; frm()->FindClosestPOIMetadata(userMark->GetOrg(), metadata); static jmethodID const addId = env->GetMethodID(clazz, "addMetadata", "(ILjava/lang/String;)V"); ASSERT ( addId, () ); - for (feature::Metadata::EType t : metadata.GetPresentTypes()) + for (auto const t : metadata.GetPresentTypes()) { - jstring metaString = jni::ToJavaString(env, metadata.Get(t)); + // TODO: It is not a good idea to pass raw strings to UI. Calling separate getters should be a better way. + // Upcoming change: how to pass opening hours (parsed) into Editor's UI? How to get edited changes back? + jstring metaString = t == Metadata::FMD_WIKIPEDIA ? + jni::ToJavaString(env, metadata.GetWikiURL()) : + jni::ToJavaString(env, metadata.Get(t)); env->CallVoidMethod(mapObject, addId, t, metaString); // TODO use unique_ptrs for autoallocation of local refs env->DeleteLocalRef(metaString); @@ -775,7 +781,9 @@ namespace { pair NativeMetadataToJavaMetadata(JNIEnv * env, feature::Metadata const & metadata) { - vector const metaTypes = metadata.GetPresentTypes(); + using feature::Metadata; + + vector const metaTypes = metadata.GetPresentTypes(); // FIXME arrays, allocated through NewArray should be deleted manually in the method. // refactor that to delete refs locally or pass arrays from outside context const jintArray j_metaTypes = env->NewIntArray(metadata.Size()); @@ -784,8 +792,12 @@ pair NativeMetadataToJavaMetadata(JNIEnv * env, feature for (size_t i = 0; i < metaTypes.size(); i++) { - arr[i] = metaTypes[i]; - jstring metaString = jni::ToJavaString(env, metadata.Get(metaTypes[i])); + auto const type = metaTypes[i]; + arr[i] = type; + // TODO: Refactor code to use separate getters for each metadata. + jstring metaString = type == Metadata::FMD_WIKIPEDIA ? + jni::ToJavaString(env, metadata.GetWikiURL()) : + jni::ToJavaString(env, metadata.Get(type)); env->SetObjectArrayElement(j_metaValues, i, metaString); env->DeleteLocalRef(metaString); } diff --git a/android/res/layout/place_page_wiki.xml b/android/res/layout/place_page_wiki.xml index 1e355175e4..22cbba06d2 100644 --- a/android/res/layout/place_page_wiki.xml +++ b/android/res/layout/place_page_wiki.xml @@ -28,5 +28,5 @@ android:layout_height="wrap_content" android:textColor="@color/text_place_page_blue" android:textSize="@dimen/text_size_body_1" - tools:text="https://en.wikipedia.org/"/> + android:text="@string/read_in_wikipedia"/> diff --git a/android/src/com/mapswithme/maps/bookmarks/data/Metadata.java b/android/src/com/mapswithme/maps/bookmarks/data/Metadata.java index 6bbbf35b5d..99892617ae 100644 --- a/android/src/com/mapswithme/maps/bookmarks/data/Metadata.java +++ b/android/src/com/mapswithme/maps/bookmarks/data/Metadata.java @@ -26,6 +26,7 @@ public class Metadata implements Parcelable FMD_TURN_LANES_BACKWARD(13), FMD_EMAIL(14), FMD_POSTCODE(15), + // TODO: It is hacked in jni and returns full Wikipedia url. Should use separate getter instead. FMD_WIKIPEDIA(16), FMD_MAXSPEED(17), FMD_FLATS(18); diff --git a/android/src/com/mapswithme/maps/widget/placepage/PlacePageView.java b/android/src/com/mapswithme/maps/widget/placepage/PlacePageView.java index ae8a3c1470..bb3411f3c8 100644 --- a/android/src/com/mapswithme/maps/widget/placepage/PlacePageView.java +++ b/android/src/com/mapswithme/maps/widget/placepage/PlacePageView.java @@ -63,8 +63,6 @@ import com.mapswithme.util.sharing.ShareOption; import com.mapswithme.util.statistics.AlohaHelper; import com.mapswithme.util.statistics.Statistics; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; import java.util.ArrayList; import java.util.List; @@ -427,13 +425,7 @@ public class PlacePageView extends RelativeLayout implements View.OnClickListene refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_EMAIL), mEmail, mTvEmail); refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_OPERATOR), mOperator, mTvOperator); refreshMetadataOrHide(translateCuisine(mMapObject.getMetadata(Metadata.MetadataType.FMD_CUISINE)), mCuisine, mTvCuisine); - try - { - final String wikipedia = mMapObject.getMetadata(Metadata.MetadataType.FMD_WIKIPEDIA); - refreshMetadataOrHide(TextUtils.isEmpty(wikipedia) ? null : URLDecoder.decode(wikipedia, "UTF-8"), mWiki, mTvWiki); - } catch (UnsupportedEncodingException e) - { - } + refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_WIKIPEDIA), mWiki, mTvWiki); refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_INTERNET), mWifi, null); refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_FLATS), mEntrance, mTvEntrance); // TODO throw away parsing hack when data will be parsed correctly in core @@ -698,9 +690,8 @@ public class PlacePageView extends RelativeLayout implements View.OnClickListene followUrl(mTvWebsite.getText().toString()); break; case R.id.ll__place_wiki: - final String[] wikiParts = mTvWiki.getText().toString().split(":"); - if (wikiParts.length == 2) - followUrl("https://" + wikiParts[0] + ".wikipedia.org/wiki/" + wikiParts[1]); + // TODO: Refactor and use separate getters for Wiki and all other PP meta info too. + followUrl(mMapObject.getMetadata(Metadata.MetadataType.FMD_WIKIPEDIA)); break; case R.id.tv__bookmark_group: saveBookmarkNameIfUpdated(null); diff --git a/generator/generator.pro b/generator/generator.pro index 254d7df794..a5f12ece16 100644 --- a/generator/generator.pro +++ b/generator/generator.pro @@ -23,6 +23,7 @@ SOURCES += \ feature_generator.cpp \ feature_merger.cpp \ feature_sorter.cpp \ + osm2meta.cpp \ osm2type.cpp \ osm_element.cpp \ osm_id.cpp \ @@ -50,7 +51,6 @@ HEADERS += \ generate_info.hpp \ osm2meta.hpp \ osm2type.hpp \ - osm2meta.hpp \ osm_element.hpp \ osm_id.hpp \ osm_o5m_source.hpp \ diff --git a/generator/generator_tests/metadata_test.cpp b/generator/generator_tests/metadata_test.cpp index 364505b6c7..1666752024 100644 --- a/generator/generator_tests/metadata_test.cpp +++ b/generator/generator_tests/metadata_test.cpp @@ -131,44 +131,59 @@ UNIT_TEST(Metadata_ValidateAndFormat_ele) UNIT_TEST(Metadata_ValidateAndFormat_wikipedia) { + using feature::Metadata; + + char const * kWikiKey = "wikipedia"; + FeatureParams params; MetadataTagProcessor p(params); - string const lanaWoodUrlEncoded = "%D0%9B%D0%B0%D0%BD%D0%B0_%D0%92%D1%83%D0%B4"; - p("wikipedia", "ru:Лана Вуд"); - TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "ru:" + lanaWoodUrlEncoded, ("ru:")); - TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "ru:Лана Вуд", ("ru:")); - TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded, ("ru:")); - params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA); + p(kWikiKey, "en:Bad %20Data"); + TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "en:Bad %20Data", ()); + TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://en.m.wikipedia.org/wiki/Bad_%2520Data", ()); + params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA); - p("wikipedia", "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded); - TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "ru:" + lanaWoodUrlEncoded, ("https:")); - TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "ru:Лана Вуд", ("https:")); - TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded, ("https:")); - params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA); + p(kWikiKey, "ru:Тест_with % sign"); + TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "ru:Тест with % sign", ()); + TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.m.wikipedia.org/wiki/Тест_with_%25_sign", ()); + params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA); - p("wikipedia", "Test"); - TEST(params.GetMetadata().Empty(), ("Test")); + p(kWikiKey, "https://be-tarask.wikipedia.org/wiki/Вялікае_Княства_Літоўскае"); + TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "be-tarask:Вялікае Княства Літоўскае", ()); + TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://be-tarask.m.wikipedia.org/wiki/Вялікае_Княства_Літоўскае", ()); + params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA); - p("wikipedia", "https://en.wikipedia.org/wiki/"); - TEST(params.GetMetadata().Empty(), ("Null wiki")); + // Final link points to https and mobile version. + p(kWikiKey, "http://en.wikipedia.org/wiki/A"); + TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "en:A", ()); + TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://en.m.wikipedia.org/wiki/A", ()); + params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA); - p("wikipedia", "http://.wikipedia.org/wiki/Whatever"); - TEST(params.GetMetadata().Empty(), ("Null lang", params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA))); + p(kWikiKey, "invalid_input_without_language_and_colon"); + TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA))); - // We ignore incorrect prefixes - p("wikipedia", "ht.tps://en.wikipedia.org/wiki/Whuh"); - TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "en:Whuh", ("ht.tp:")); - params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA); + p(kWikiKey, "https://en.wikipedia.org/wiki/"); + TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA))); - p("wikipedia", "http://ru.google.com/wiki/wutlol"); - TEST(params.GetMetadata().Empty(), ("Google")); + p(kWikiKey, "http://wikipedia.org/wiki/Article"); + TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA))); - // URL Decoding Test - string const badWikiTitle = "%%A"; - p("wikipedia", "https://bad.wikipedia.org/wiki/" + badWikiTitle); - TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "bad:" + badWikiTitle, ("bad title")); - params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA); + p(kWikiKey, "http://somesite.org"); + TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA))); + + p(kWikiKey, "http://www.spamsitewithaslash.com/"); + TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA))); + + p(kWikiKey, "http://.wikipedia.org/wiki/Article"); + TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA))); + + // Ignore incorrect prefixes. + p(kWikiKey, "ht.tps://en.wikipedia.org/wiki/Whuh"); + TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "en:Whuh", ()); + params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA); + + p(kWikiKey, "http://ru.google.com/wiki/wutlol"); + TEST(params.GetMetadata().Empty(), ("Not a wikipedia site.")); } UNIT_TEST(Metadata_ReadWrite_Intermediate) diff --git a/generator/osm2meta.cpp b/generator/osm2meta.cpp index c6afb392f2..985c20a5c3 100644 --- a/generator/osm2meta.cpp +++ b/generator/osm2meta.cpp @@ -1 +1,51 @@ #include "generator/osm2meta.hpp" + +#include "coding/url_encode.hpp" + +#include "base/logging.hpp" +#include "base/string_utils.hpp" + +string MetadataTagProcessor::ValidateAndFormat_wikipedia(string v) const +{ + strings::Trim(v); + // Normalize by converting full URL to "lang:title" if necessary + // (some OSMers do not follow standard for wikipedia tag). + static string const base = ".wikipedia.org/wiki/"; + auto const baseIndex = v.find(base); + if (baseIndex != string::npos) + { + auto const baseSize = base.size(); + // Do not allow urls without article name after /wiki/. + if (v.size() > baseIndex + baseSize) + { + auto const slashIndex = v.rfind('/', baseIndex); + if (slashIndex != string::npos && slashIndex + 1 != baseIndex) + { + // Normalize article title according to OSM standards. + string title = UrlDecode(v.substr(baseIndex + baseSize)); + replace(title.begin(), title.end(), '_', ' '); + return v.substr(slashIndex + 1, baseIndex - slashIndex - 1) + ":" + title; + } + } + LOG(LWARNING, ("Invalid Wikipedia tag value:", v)); + return string(); + } + // Standard case: "lang:Article Name With Spaces". + // Language and article are at least 2 chars each. + auto const colonIndex = v.find(':'); + if (colonIndex == string::npos || colonIndex < 2 || colonIndex + 2 > v.size()) + { + LOG(LWARNING, ("Invalid Wikipedia tag value:", v)); + return string(); + } + // Check if it's not a random/invalid link. + if (v.find('/') != string::npos) + { + LOG(LWARNING, ("Invalid Wikipedia tag value:", v)); + return string(); + } + // Normalize to OSM standards. + string normalized(v); + replace(normalized.begin() + colonIndex, normalized.end(), '_', ' '); + return normalized; +} diff --git a/generator/osm2meta.hpp b/generator/osm2meta.hpp index 4b30411912..79efd40fb9 100644 --- a/generator/osm2meta.hpp +++ b/generator/osm2meta.hpp @@ -213,51 +213,5 @@ protected: { return v; } - - // Special URL encoding for wikipedia: - // Replaces special characters with %HH codes - // And spaces with underscores. - string WikiUrlEncode(string const & value) const - { - ostringstream escaped; - escaped.fill('0'); - escaped << hex; - - for (auto const & c : value) - { - if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') - escaped << c; - else if (c == ' ') - escaped << '_'; - else - escaped << '%' << std::uppercase << setw(2) << static_cast(static_cast(c)); - } - - return escaped.str(); - } - - string ValidateAndFormat_wikipedia(string const & v) const - { - // Find prefix before ':', shortest case: "lg:aa". - string::size_type i = v.find(':'); - if (i == string::npos || i < 2 || i + 2 > v.length()) - return string(); - - // URL encode lang:title (lang is at most 3 chars), so URL can be reconstructed faster. - if (i <= 3 || v.substr(0, i) == "be-x-old") - return v.substr(0, i + 1) + WikiUrlEncode(v.substr(i + 1)); - - static string::size_type const minUrlPartLength = string("//be.wikipedia.org/wiki/AB").length(); - if (v[i+1] == '/' && i + minUrlPartLength < v.length()) - { - // Convert URL to "lang:title". - i += 3; // skip "://" - string::size_type const j = v.find('.', i + 1); - static string const wikiUrlPart = ".wikipedia.org/wiki/"; - if (j != string::npos && v.substr(j, wikiUrlPart.length()) == wikiUrlPart) - return v.substr(i, j - i) + ":" + v.substr(j + wikiUrlPart.length()); - } - return string(); - } - + string ValidateAndFormat_wikipedia(string v) const; }; diff --git a/indexer/feature_meta.cpp b/indexer/feature_meta.cpp index f709aa0391..cc41944241 100644 --- a/indexer/feature_meta.cpp +++ b/indexer/feature_meta.cpp @@ -1,72 +1,33 @@ #include "indexer/feature_meta.hpp" -namespace -{ -char HexToDec(char ch) -{ - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'a') - ch -= 'a' - 'A'; - if (ch >= 'A' && ch <= 'F') - return ch - 'A' + 10; - return -1; -} - -string UriDecode(string const & sSrc) -{ - // This code is based on - // http://www.codeguru.com/cpp/cpp/string/conversions/article.php/c12759 - // - // Note from RFC1630: "Sequences which start with a percent - // sign but are not followed by two hexadecimal characters - // (0-9, A-F) are reserved for future extension" - - string result(sSrc.length(), 0); - auto itResult = result.begin(); - - for (auto it = sSrc.begin(); it != sSrc.end(); ++it) - { - if (*it == '%') - { - if (distance(it, sSrc.end()) > 2) - { - char dec1 = HexToDec(*(it + 1)); - char dec2 = HexToDec(*(it + 2)); - if (-1 != dec1 && -1 != dec2) - { - *itResult++ = (dec1 << 4) + dec2; - it += 2; - continue; - } - } - } - - if (*it == '_') - *itResult++ = ' '; - else - *itResult++ = *it; - } - - result.resize(distance(result.begin(), itResult)); - return result; -} -} // namespace +#include "std/algorithm.hpp" namespace feature { + string Metadata::GetWikiURL() const { - string value = this->Get(FMD_WIKIPEDIA); - string::size_type i = value.find(':'); - if (i == string::npos) - return string(); - return "https://" + value.substr(0, i) + ".wikipedia.org/wiki/" + value.substr(i + 1); + string v = this->Get(FMD_WIKIPEDIA); + if (v.empty()) + return v; + + auto const colon = v.find(':'); + if (colon == string::npos) + return v; + + // Spaces and % sign should be replaced in urls. + replace(v.begin() + colon, v.end(), ' ', '_'); + string::size_type percent, pos = colon; + string const escapedPercent("%25"); + while ((percent = v.find('%', pos)) != string::npos) + { + v.replace(percent, 1, escapedPercent); + pos = percent + escapedPercent.size(); + } + // Trying to avoid redirects by constructing the right link. + // TODO: Wikipedia article could be opened it a user's language, but need + // generator level support to check for available article languages first. + return "https://" + v.substr(0, colon) + ".m.wikipedia.org/wiki/" + v.substr(colon + 1); } -string Metadata::GetWikiTitle() const -{ - string value = this->Get(FMD_WIKIPEDIA); - return UriDecode(value); -} } // namespace feature diff --git a/indexer/feature_meta.hpp b/indexer/feature_meta.hpp index cc588048d7..363f899ded 100644 --- a/indexer/feature_meta.hpp +++ b/indexer/feature_meta.hpp @@ -54,7 +54,7 @@ namespace feature string Get(EType type) const { - auto it = m_metadata.find(type); + auto const it = m_metadata.find(type); return (it == m_metadata.end()) ? string() : it->second; } @@ -78,7 +78,6 @@ namespace feature inline size_t Size() const { return m_metadata.size(); } string GetWikiURL() const; - string GetWikiTitle() const; template void SerializeToMWM(ArchiveT & ar) const {