Fixed non-working Wikimedia links with ? character in a title #8618

Open
alnzrv wants to merge 13 commits from alnzrv/wikimedia-fix into master
3 changed files with 27 additions and 19 deletions

View file

@ -154,8 +154,8 @@ UNIT_TEST(Metadata_ValidateAndFormat_wikipedia)
char const * url;
};
constexpr Test tests[] = {
{"en:Bad %20Data", "en:Bad %20Data", "https://en." WIKIHOST "/wiki/Bad_%2520Data"},
{"ru:Это тест_со знаками %, ? (и скобками)", "ru:Это тест со знаками %, ? (и скобками)", "https://ru." WIKIHOST "/wiki/Этоест_со_знаками_%25,_%3F_(и_скобками)"},
{"en:Bad %20Data", "en:Bad %20Data", "https://en." WIKIHOST "/wiki/Bad_%20Data"},
{"ru:Это тест_со знаками %, ? (и скобками)", "ru:Это тест со знаками %, ? (и скобками)", "https://ru." WIKIHOST "/wiki/Этоест_со_знаками_%,_%3F_(и_скобками)"},
{"https://be-tarask.wikipedia.org/wiki/Вялікае_Княстваітоўскае", "be-tarask:Вялікае Княства Літоўскае", "https://be-tarask." WIKIHOST "/wiki/Вялікае_Княстваітоўскае"},
// Final link points to https and mobile version.
{"http://en.wikipedia.org/wiki/A#id", "en:A#id", "https://en." WIKIHOST "/wiki/A#id"},
@ -212,6 +212,8 @@ UNIT_TEST(Metadata_ValidateAndFormat_wikimedia_commons)
p(kWikiKey, "Category:Bosphorus");
TEST_EQUAL(md.Get(Metadata::FMD_WIKIMEDIA_COMMONS), "Category:Bosphorus", ());
TEST_EQUAL(md.ToWikimediaCommonsURL("Category:What If? (Bonn)"), "https://commons.wikimedia.org/wiki/Category:What_If%3F_(Bonn)", ());
md.Drop(Metadata::FMD_WIKIMEDIA_COMMONS);
p(kWikiKey, "incorrect_wikimedia_content");
TEST(md.Get(Metadata::FMD_WIKIMEDIA_COMMONS).empty(), ());

View file

@ -23,21 +23,7 @@ string Metadata::ToWikiURL(std::string v)
if (colon == string::npos)
return v;
// Spaces, % and ? characters should be corrected to form a valid URL's path.
// Standard percent encoding also encodes other characters like (), which lead to an unnecessary HTTP redirection.
for (auto i = colon; i < v.size(); ++i)
{
auto & c = v[i];
if (c == ' ')
c = '_';
else if (c == '%')
v.insert(i + 1, "25"); // % => %25
else if (c == '?')
{
c = '%';
v.insert(i + 1, "3F"); // ? => %3F
}
}
v = EncodeWikiURL(v, colon);
// Trying to avoid redirects by constructing the right link.
// TODO: Wikipedia article could be opened in a user's language, but need
@ -50,14 +36,33 @@ std::string Metadata::GetWikiURL() const
return ToWikiURL(string(Get(FMD_WIKIPEDIA)));
}
string Metadata::ToWikimediaCommonsURL(std::string const & v)
std::string Metadata::ToWikimediaCommonsURL(std::string v)
{
if (v.empty())
return v;
v = EncodeWikiURL(v, 0);
return "https://commons.wikimedia.org/wiki/" + v;
}
std::string Metadata::EncodeWikiURL(std::string v, int startIndex) {
biodranik commented 2024-07-02 19:25:53 +00:00 (Migrated from github.com)
Review

nit:

      v.insert(++i, "3F");  // ? => %3F
nit: ```suggestion v.insert(++i, "3F"); // ? => %3F ```
biodranik commented 2024-07-02 19:26:00 +00:00 (Migrated from github.com)
Review
std::string Metadata::ToWikimediaCommonsURL(std::string v)
```suggestion std::string Metadata::ToWikimediaCommonsURL(std::string v) ```
biodranik commented 2024-07-04 20:57:20 +00:00 (Migrated from github.com)
Review

If this code is similar/the same as Metadata::ToWikiURL, please move it into a reusable static function (or into the anonymous namespace), and call it from both these methods.

If this code is similar/the same as `Metadata::ToWikiURL`, please move it into a reusable static function (or into the anonymous namespace), and call it from both these methods.
alnzrv commented 2024-07-04 21:45:31 +00:00 (Migrated from github.com)
Review

These code fragments are similar except '%' encoding for Wikipedia title:

else if (c == '%')
  v.insert(i + 1, "25");  // % => %25

It was added here:
30d1880a6f (diff-2965b683212a4216733d4b452d86780e807d95340d8577ff497ddc090306f13bR24)

I have a feeling it's not needed for Wikipedia links too but I don't wanna test what happens if we remove it.

If '%' encoding is really required for Wikipedia links, I don't think moving space and '?' encoding into a separate function worth it.

These code fragments are similar except '%' encoding for Wikipedia title: ```c++ else if (c == '%') v.insert(i + 1, "25"); // % => %25 ``` It was added here: https://git.omaps.dev/organicmaps/organicmaps/commit/30d1880a6fe5f15add03a59cfa04411bdfda1d36#diff-2965b683212a4216733d4b452d86780e807d95340d8577ff497ddc090306f13bR24 I have a feeling it's not needed for Wikipedia links too but I don't wanna test what happens if we remove it. If '%' encoding is really required for Wikipedia links, I don't think moving space and '?' encoding into a separate function worth it.
biodranik commented 2024-08-05 04:58:03 +00:00 (Migrated from github.com)
Review

I'm not following the logic.

  1. Why percent escaping is not needed?
  2. Why the same code should not be reused but instead copy-pasted?
I'm not following the logic. 1. Why percent escaping is not needed? 2. Why the same code should not be reused but instead copy-pasted?
// Spaces and ? characters should be corrected to form a valid URL's path.
// Standard percent encoding also encodes other characters like (), which lead to an unnecessary HTTP redirection.
for (auto i = startIndex; i < v.size(); ++i)
{
auto & c = v[i];
if (c == ' ')
c = '_';
else if (c == '?')
{
c = '%';
v.insert(++i, "3F"); // ? => %3F
}
}
return v;
}
// static
bool Metadata::TypeFromString(string_view k, Metadata::EType & outType)
{

View file

@ -177,7 +177,8 @@ public:
static std::string ToWikiURL(std::string v);
std::string GetWikiURL() const;
static std::string ToWikimediaCommonsURL(std::string const & v);
static std::string ToWikimediaCommonsURL(std::string v);
static std::string EncodeWikiURL(std::string v, int startIndex);
void ClearPOIAttribs();
};