Fixed wikipedia links mwm storage and displaying in Android.

Previous solution was a really bad engineering decision (more details in https://github.com/mapsme/omim/pull/421 )
This commit is contained in:
Alex Zolotarev 2015-11-05 23:21:41 -08:00
parent 7685f5327b
commit 30d1880a6f
10 changed files with 142 additions and 159 deletions

View file

@ -739,15 +739,21 @@ namespace android
// Fills mapobject's metadata from UserMark
void Framework::InjectMetadata(JNIEnv * env, jclass const clazz, jobject const mapObject, UserMark const * userMark)
{
feature::Metadata metadata;
using feature::Metadata;
Metadata metadata;
frm()->FindClosestPOIMetadata(userMark->GetOrg(), metadata);
static jmethodID const addId = env->GetMethodID(clazz, "addMetadata", "(ILjava/lang/String;)V");
ASSERT ( addId, () );
for (feature::Metadata::EType t : metadata.GetPresentTypes())
for (auto const t : metadata.GetPresentTypes())
{
jstring metaString = jni::ToJavaString(env, metadata.Get(t));
// TODO: It is not a good idea to pass raw strings to UI. Calling separate getters should be a better way.
// Upcoming change: how to pass opening hours (parsed) into Editor's UI? How to get edited changes back?
jstring metaString = t == Metadata::FMD_WIKIPEDIA ?
jni::ToJavaString(env, metadata.GetWikiURL()) :
jni::ToJavaString(env, metadata.Get(t));
env->CallVoidMethod(mapObject, addId, t, metaString);
// TODO use unique_ptrs for autoallocation of local refs
env->DeleteLocalRef(metaString);
@ -775,7 +781,9 @@ namespace
{
pair<jintArray, jobjectArray> NativeMetadataToJavaMetadata(JNIEnv * env, feature::Metadata const & metadata)
{
vector<feature::Metadata::EType> const metaTypes = metadata.GetPresentTypes();
using feature::Metadata;
vector<Metadata::EType> const metaTypes = metadata.GetPresentTypes();
// FIXME arrays, allocated through New<Type>Array should be deleted manually in the method.
// refactor that to delete refs locally or pass arrays from outside context
const jintArray j_metaTypes = env->NewIntArray(metadata.Size());
@ -784,8 +792,12 @@ pair<jintArray, jobjectArray> NativeMetadataToJavaMetadata(JNIEnv * env, feature
for (size_t i = 0; i < metaTypes.size(); i++)
{
arr[i] = metaTypes[i];
jstring metaString = jni::ToJavaString(env, metadata.Get(metaTypes[i]));
auto const type = metaTypes[i];
arr[i] = type;
// TODO: Refactor code to use separate getters for each metadata.
jstring metaString = type == Metadata::FMD_WIKIPEDIA ?
jni::ToJavaString(env, metadata.GetWikiURL()) :
jni::ToJavaString(env, metadata.Get(type));
env->SetObjectArrayElement(j_metaValues, i, metaString);
env->DeleteLocalRef(metaString);
}

View file

@ -28,5 +28,5 @@
android:layout_height="wrap_content"
android:textColor="@color/text_place_page_blue"
android:textSize="@dimen/text_size_body_1"
tools:text="https://en.wikipedia.org/"/>
android:text="@string/read_in_wikipedia"/>
</LinearLayout>

View file

@ -26,6 +26,7 @@ public class Metadata implements Parcelable
FMD_TURN_LANES_BACKWARD(13),
FMD_EMAIL(14),
FMD_POSTCODE(15),
// TODO: It is hacked in jni and returns full Wikipedia url. Should use separate getter instead.
FMD_WIKIPEDIA(16),
FMD_MAXSPEED(17),
FMD_FLATS(18);

View file

@ -63,8 +63,6 @@ import com.mapswithme.util.sharing.ShareOption;
import com.mapswithme.util.statistics.AlohaHelper;
import com.mapswithme.util.statistics.Statistics;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
@ -427,13 +425,7 @@ public class PlacePageView extends RelativeLayout implements View.OnClickListene
refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_EMAIL), mEmail, mTvEmail);
refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_OPERATOR), mOperator, mTvOperator);
refreshMetadataOrHide(translateCuisine(mMapObject.getMetadata(Metadata.MetadataType.FMD_CUISINE)), mCuisine, mTvCuisine);
try
{
final String wikipedia = mMapObject.getMetadata(Metadata.MetadataType.FMD_WIKIPEDIA);
refreshMetadataOrHide(TextUtils.isEmpty(wikipedia) ? null : URLDecoder.decode(wikipedia, "UTF-8"), mWiki, mTvWiki);
} catch (UnsupportedEncodingException e)
{
}
refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_WIKIPEDIA), mWiki, mTvWiki);
refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_INTERNET), mWifi, null);
refreshMetadataOrHide(mMapObject.getMetadata(Metadata.MetadataType.FMD_FLATS), mEntrance, mTvEntrance);
// TODO throw away parsing hack when data will be parsed correctly in core
@ -698,9 +690,8 @@ public class PlacePageView extends RelativeLayout implements View.OnClickListene
followUrl(mTvWebsite.getText().toString());
break;
case R.id.ll__place_wiki:
final String[] wikiParts = mTvWiki.getText().toString().split(":");
if (wikiParts.length == 2)
followUrl("https://" + wikiParts[0] + ".wikipedia.org/wiki/" + wikiParts[1]);
// TODO: Refactor and use separate getters for Wiki and all other PP meta info too.
followUrl(mMapObject.getMetadata(Metadata.MetadataType.FMD_WIKIPEDIA));
break;
case R.id.tv__bookmark_group:
saveBookmarkNameIfUpdated(null);

View file

@ -23,6 +23,7 @@ SOURCES += \
feature_generator.cpp \
feature_merger.cpp \
feature_sorter.cpp \
osm2meta.cpp \
osm2type.cpp \
osm_element.cpp \
osm_id.cpp \
@ -50,7 +51,6 @@ HEADERS += \
generate_info.hpp \
osm2meta.hpp \
osm2type.hpp \
osm2meta.hpp \
osm_element.hpp \
osm_id.hpp \
osm_o5m_source.hpp \

View file

@ -131,44 +131,59 @@ UNIT_TEST(Metadata_ValidateAndFormat_ele)
UNIT_TEST(Metadata_ValidateAndFormat_wikipedia)
{
using feature::Metadata;
char const * kWikiKey = "wikipedia";
FeatureParams params;
MetadataTagProcessor p(params);
string const lanaWoodUrlEncoded = "%D0%9B%D0%B0%D0%BD%D0%B0_%D0%92%D1%83%D0%B4";
p("wikipedia", "ru:Лана Вуд");
TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "ru:" + lanaWoodUrlEncoded, ("ru:"));
TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "ru:Лана Вуд", ("ru:"));
TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded, ("ru:"));
params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA);
p(kWikiKey, "en:Bad %20Data");
TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "en:Bad %20Data", ());
TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://en.m.wikipedia.org/wiki/Bad_%2520Data", ());
params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA);
p("wikipedia", "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded);
TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "ru:" + lanaWoodUrlEncoded, ("https:"));
TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "ru:Лана Вуд", ("https:"));
TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.wikipedia.org/wiki/" + lanaWoodUrlEncoded, ("https:"));
params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA);
p(kWikiKey, "ru:Тест_with % sign");
TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "ru:Тест with % sign", ());
TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://ru.m.wikipedia.org/wiki/Тест_with_%25_sign", ());
params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA);
p("wikipedia", "Test");
TEST(params.GetMetadata().Empty(), ("Test"));
p(kWikiKey, "https://be-tarask.wikipedia.org/wiki/Вялікае_Княстваітоўскае");
TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "be-tarask:Вялікае Княства Літоўскае", ());
TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://be-tarask.m.wikipedia.org/wiki/Вялікае_Княстваітоўскае", ());
params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA);
p("wikipedia", "https://en.wikipedia.org/wiki/");
TEST(params.GetMetadata().Empty(), ("Null wiki"));
// Final link points to https and mobile version.
p(kWikiKey, "http://en.wikipedia.org/wiki/A");
TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "en:A", ());
TEST_EQUAL(params.GetMetadata().GetWikiURL(), "https://en.m.wikipedia.org/wiki/A", ());
params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA);
p("wikipedia", "http://.wikipedia.org/wiki/Whatever");
TEST(params.GetMetadata().Empty(), ("Null lang", params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA)));
p(kWikiKey, "invalid_input_without_language_and_colon");
TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA)));
// We ignore incorrect prefixes
p("wikipedia", "ht.tps://en.wikipedia.org/wiki/Whuh");
TEST_EQUAL(params.GetMetadata().Get(feature::Metadata::FMD_WIKIPEDIA), "en:Whuh", ("ht.tp:"));
params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA);
p(kWikiKey, "https://en.wikipedia.org/wiki/");
TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA)));
p("wikipedia", "http://ru.google.com/wiki/wutlol");
TEST(params.GetMetadata().Empty(), ("Google"));
p(kWikiKey, "http://wikipedia.org/wiki/Article");
TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA)));
// URL Decoding Test
string const badWikiTitle = "%%A";
p("wikipedia", "https://bad.wikipedia.org/wiki/" + badWikiTitle);
TEST_EQUAL(params.GetMetadata().GetWikiTitle(), "bad:" + badWikiTitle, ("bad title"));
params.GetMetadata().Drop(feature::Metadata::FMD_WIKIPEDIA);
p(kWikiKey, "http://somesite.org");
TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA)));
p(kWikiKey, "http://www.spamsitewithaslash.com/");
TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA)));
p(kWikiKey, "http://.wikipedia.org/wiki/Article");
TEST(params.GetMetadata().Empty(), (params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA)));
// Ignore incorrect prefixes.
p(kWikiKey, "ht.tps://en.wikipedia.org/wiki/Whuh");
TEST_EQUAL(params.GetMetadata().Get(Metadata::FMD_WIKIPEDIA), "en:Whuh", ());
params.GetMetadata().Drop(Metadata::FMD_WIKIPEDIA);
p(kWikiKey, "http://ru.google.com/wiki/wutlol");
TEST(params.GetMetadata().Empty(), ("Not a wikipedia site."));
}
UNIT_TEST(Metadata_ReadWrite_Intermediate)

View file

@ -1 +1,51 @@
#include "generator/osm2meta.hpp"
#include "coding/url_encode.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
string MetadataTagProcessor::ValidateAndFormat_wikipedia(string v) const
{
strings::Trim(v);
// Normalize by converting full URL to "lang:title" if necessary
// (some OSMers do not follow standard for wikipedia tag).
static string const base = ".wikipedia.org/wiki/";
auto const baseIndex = v.find(base);
if (baseIndex != string::npos)
{
auto const baseSize = base.size();
// Do not allow urls without article name after /wiki/.
if (v.size() > baseIndex + baseSize)
{
auto const slashIndex = v.rfind('/', baseIndex);
if (slashIndex != string::npos && slashIndex + 1 != baseIndex)
{
// Normalize article title according to OSM standards.
string title = UrlDecode(v.substr(baseIndex + baseSize));
replace(title.begin(), title.end(), '_', ' ');
return v.substr(slashIndex + 1, baseIndex - slashIndex - 1) + ":" + title;
}
}
LOG(LWARNING, ("Invalid Wikipedia tag value:", v));
return string();
}
// Standard case: "lang:Article Name With Spaces".
// Language and article are at least 2 chars each.
auto const colonIndex = v.find(':');
if (colonIndex == string::npos || colonIndex < 2 || colonIndex + 2 > v.size())
{
LOG(LWARNING, ("Invalid Wikipedia tag value:", v));
return string();
}
// Check if it's not a random/invalid link.
if (v.find('/') != string::npos)
{
LOG(LWARNING, ("Invalid Wikipedia tag value:", v));
return string();
}
// Normalize to OSM standards.
string normalized(v);
replace(normalized.begin() + colonIndex, normalized.end(), '_', ' ');
return normalized;
}

View file

@ -213,51 +213,5 @@ protected:
{
return v;
}
// Special URL encoding for wikipedia:
// Replaces special characters with %HH codes
// And spaces with underscores.
string WikiUrlEncode(string const & value) const
{
ostringstream escaped;
escaped.fill('0');
escaped << hex;
for (auto const & c : value)
{
if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~')
escaped << c;
else if (c == ' ')
escaped << '_';
else
escaped << '%' << std::uppercase << setw(2) << static_cast<int>(static_cast<unsigned char>(c));
}
return escaped.str();
}
string ValidateAndFormat_wikipedia(string const & v) const
{
// Find prefix before ':', shortest case: "lg:aa".
string::size_type i = v.find(':');
if (i == string::npos || i < 2 || i + 2 > v.length())
return string();
// URL encode lang:title (lang is at most 3 chars), so URL can be reconstructed faster.
if (i <= 3 || v.substr(0, i) == "be-x-old")
return v.substr(0, i + 1) + WikiUrlEncode(v.substr(i + 1));
static string::size_type const minUrlPartLength = string("//be.wikipedia.org/wiki/AB").length();
if (v[i+1] == '/' && i + minUrlPartLength < v.length())
{
// Convert URL to "lang:title".
i += 3; // skip "://"
string::size_type const j = v.find('.', i + 1);
static string const wikiUrlPart = ".wikipedia.org/wiki/";
if (j != string::npos && v.substr(j, wikiUrlPart.length()) == wikiUrlPart)
return v.substr(i, j - i) + ":" + v.substr(j + wikiUrlPart.length());
}
return string();
}
string ValidateAndFormat_wikipedia(string v) const;
};

View file

@ -1,72 +1,33 @@
#include "indexer/feature_meta.hpp"
namespace
{
char HexToDec(char ch)
{
if (ch >= '0' && ch <= '9')
return ch - '0';
if (ch >= 'a')
ch -= 'a' - 'A';
if (ch >= 'A' && ch <= 'F')
return ch - 'A' + 10;
return -1;
}
string UriDecode(string const & sSrc)
{
// This code is based on
// http://www.codeguru.com/cpp/cpp/string/conversions/article.php/c12759
//
// Note from RFC1630: "Sequences which start with a percent
// sign but are not followed by two hexadecimal characters
// (0-9, A-F) are reserved for future extension"
string result(sSrc.length(), 0);
auto itResult = result.begin();
for (auto it = sSrc.begin(); it != sSrc.end(); ++it)
{
if (*it == '%')
{
if (distance(it, sSrc.end()) > 2)
{
char dec1 = HexToDec(*(it + 1));
char dec2 = HexToDec(*(it + 2));
if (-1 != dec1 && -1 != dec2)
{
*itResult++ = (dec1 << 4) + dec2;
it += 2;
continue;
}
}
}
if (*it == '_')
*itResult++ = ' ';
else
*itResult++ = *it;
}
result.resize(distance(result.begin(), itResult));
return result;
}
} // namespace
#include "std/algorithm.hpp"
namespace feature
{
string Metadata::GetWikiURL() const
{
string value = this->Get(FMD_WIKIPEDIA);
string::size_type i = value.find(':');
if (i == string::npos)
return string();
return "https://" + value.substr(0, i) + ".wikipedia.org/wiki/" + value.substr(i + 1);
string v = this->Get(FMD_WIKIPEDIA);
if (v.empty())
return v;
auto const colon = v.find(':');
if (colon == string::npos)
return v;
// Spaces and % sign should be replaced in urls.
replace(v.begin() + colon, v.end(), ' ', '_');
string::size_type percent, pos = colon;
string const escapedPercent("%25");
while ((percent = v.find('%', pos)) != string::npos)
{
v.replace(percent, 1, escapedPercent);
pos = percent + escapedPercent.size();
}
// Trying to avoid redirects by constructing the right link.
// TODO: Wikipedia article could be opened it a user's language, but need
// generator level support to check for available article languages first.
return "https://" + v.substr(0, colon) + ".m.wikipedia.org/wiki/" + v.substr(colon + 1);
}
string Metadata::GetWikiTitle() const
{
string value = this->Get(FMD_WIKIPEDIA);
return UriDecode(value);
}
} // namespace feature

View file

@ -54,7 +54,7 @@ namespace feature
string Get(EType type) const
{
auto it = m_metadata.find(type);
auto const it = m_metadata.find(type);
return (it == m_metadata.end()) ? string() : it->second;
}
@ -78,7 +78,6 @@ namespace feature
inline size_t Size() const { return m_metadata.size(); }
string GetWikiURL() const;
string GetWikiTitle() const;
template <class ArchiveT> void SerializeToMWM(ArchiveT & ar) const
{