Fixed non-working Wikimedia links with ? character in a title #8618
3 changed files with 27 additions and 19 deletions
|
@ -154,8 +154,8 @@ UNIT_TEST(Metadata_ValidateAndFormat_wikipedia)
|
|||
char const * url;
|
||||
};
|
||||
constexpr Test tests[] = {
|
||||
{"en:Bad %20Data", "en:Bad %20Data", "https://en." WIKIHOST "/wiki/Bad_%2520Data"},
|
||||
{"ru:Это тест_со знаками %, ? (и скобками)", "ru:Это тест со знаками %, ? (и скобками)", "https://ru." WIKIHOST "/wiki/Это_тест_со_знаками_%25,_%3F_(и_скобками)"},
|
||||
{"en:Bad %20Data", "en:Bad %20Data", "https://en." WIKIHOST "/wiki/Bad_%20Data"},
|
||||
{"ru:Это тест_со знаками %, ? (и скобками)", "ru:Это тест со знаками %, ? (и скобками)", "https://ru." WIKIHOST "/wiki/Это_тест_со_знаками_%,_%3F_(и_скобками)"},
|
||||
{"https://be-tarask.wikipedia.org/wiki/Вялікае_Княства_Літоўскае", "be-tarask:Вялікае Княства Літоўскае", "https://be-tarask." WIKIHOST "/wiki/Вялікае_Княства_Літоўскае"},
|
||||
// Final link points to https and mobile version.
|
||||
{"http://en.wikipedia.org/wiki/A#id", "en:A#id", "https://en." WIKIHOST "/wiki/A#id"},
|
||||
|
@ -212,6 +212,8 @@ UNIT_TEST(Metadata_ValidateAndFormat_wikimedia_commons)
|
|||
p(kWikiKey, "Category:Bosphorus");
|
||||
TEST_EQUAL(md.Get(Metadata::FMD_WIKIMEDIA_COMMONS), "Category:Bosphorus", ());
|
||||
|
||||
TEST_EQUAL(md.ToWikimediaCommonsURL("Category:What If? (Bonn)"), "https://commons.wikimedia.org/wiki/Category:What_If%3F_(Bonn)", ());
|
||||
|
||||
md.Drop(Metadata::FMD_WIKIMEDIA_COMMONS);
|
||||
p(kWikiKey, "incorrect_wikimedia_content");
|
||||
TEST(md.Get(Metadata::FMD_WIKIMEDIA_COMMONS).empty(), ());
|
||||
|
|
|
@ -23,21 +23,7 @@ string Metadata::ToWikiURL(std::string v)
|
|||
if (colon == string::npos)
|
||||
return v;
|
||||
|
||||
// Spaces, % and ? characters should be corrected to form a valid URL's path.
|
||||
// Standard percent encoding also encodes other characters like (), which lead to an unnecessary HTTP redirection.
|
||||
for (auto i = colon; i < v.size(); ++i)
|
||||
{
|
||||
auto & c = v[i];
|
||||
if (c == ' ')
|
||||
c = '_';
|
||||
else if (c == '%')
|
||||
v.insert(i + 1, "25"); // % => %25
|
||||
else if (c == '?')
|
||||
{
|
||||
c = '%';
|
||||
v.insert(i + 1, "3F"); // ? => %3F
|
||||
}
|
||||
}
|
||||
v = EncodeWikiURL(v, colon);
|
||||
|
||||
// Trying to avoid redirects by constructing the right link.
|
||||
// TODO: Wikipedia article could be opened in a user's language, but need
|
||||
|
@ -50,14 +36,33 @@ std::string Metadata::GetWikiURL() const
|
|||
return ToWikiURL(string(Get(FMD_WIKIPEDIA)));
|
||||
}
|
||||
|
||||
string Metadata::ToWikimediaCommonsURL(std::string const & v)
|
||||
std::string Metadata::ToWikimediaCommonsURL(std::string v)
|
||||
{
|
||||
if (v.empty())
|
||||
return v;
|
||||
|
||||
v = EncodeWikiURL(v, 0);
|
||||
|
||||
return "https://commons.wikimedia.org/wiki/" + v;
|
||||
}
|
||||
|
||||
std::string Metadata::EncodeWikiURL(std::string v, int startIndex) {
|
||||
![]() If this code is similar/the same as If this code is similar/the same as `Metadata::ToWikiURL`, please move it into a reusable static function (or into the anonymous namespace), and call it from both these methods.
![]() These code fragments are similar except '%' encoding for Wikipedia title:
It was added here: I have a feeling it's not needed for Wikipedia links too but I don't wanna test what happens if we remove it. If '%' encoding is really required for Wikipedia links, I don't think moving space and '?' encoding into a separate function worth it. These code fragments are similar except '%' encoding for Wikipedia title:
```c++
else if (c == '%')
v.insert(i + 1, "25"); // % => %25
```
It was added here:
https://git.omaps.dev/organicmaps/organicmaps/commit/30d1880a6fe5f15add03a59cfa04411bdfda1d36#diff-2965b683212a4216733d4b452d86780e807d95340d8577ff497ddc090306f13bR24
I have a feeling it's not needed for Wikipedia links too but I don't wanna test what happens if we remove it.
If '%' encoding is really required for Wikipedia links, I don't think moving space and '?' encoding into a separate function worth it.
![]() I'm not following the logic.
I'm not following the logic.
1. Why percent escaping is not needed?
2. Why the same code should not be reused but instead copy-pasted?
|
||||
// Spaces and ? characters should be corrected to form a valid URL's path.
|
||||
// Standard percent encoding also encodes other characters like (), which lead to an unnecessary HTTP redirection.
|
||||
for (auto i = startIndex; i < v.size(); ++i)
|
||||
{
|
||||
auto & c = v[i];
|
||||
if (c == ' ')
|
||||
c = '_';
|
||||
else if (c == '?')
|
||||
{
|
||||
c = '%';
|
||||
v.insert(++i, "3F"); // ? => %3F
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
// static
|
||||
bool Metadata::TypeFromString(string_view k, Metadata::EType & outType)
|
||||
{
|
||||
|
|
|
@ -177,7 +177,8 @@ public:
|
|||
|
||||
static std::string ToWikiURL(std::string v);
|
||||
std::string GetWikiURL() const;
|
||||
static std::string ToWikimediaCommonsURL(std::string const & v);
|
||||
static std::string ToWikimediaCommonsURL(std::string v);
|
||||
static std::string EncodeWikiURL(std::string v, int startIndex);
|
||||
|
||||
void ClearPOIAttribs();
|
||||
};
|
||||
|
|
Reference in a new issue
nit: