diff --git a/coding/coding_tests/url_tests.cpp b/coding/coding_tests/url_tests.cpp index 4afded9446..787c9c6b59 100644 --- a/coding/coding_tests/url_tests.cpp +++ b/coding/coding_tests/url_tests.cpp @@ -16,13 +16,14 @@ using namespace url; class TestUrl { public: - explicit TestUrl(string const & url) : m_url(url) {} + explicit TestUrl(string && url) : m_url(std::move(url)) {} - TestUrl & Scheme(string const & scheme) { m_scheme = scheme; return *this; } - TestUrl & Path(string const & path) { m_path = path; return *this; } - TestUrl & KV(string const & key, string const & value) + TestUrl & Scheme(string && scheme) { m_scheme = std::move(scheme); return *this; } + TestUrl & Host(string && host) { m_host = std::move(host); return *this; } + TestUrl & Path(string && path) { m_path = std::move(path); return *this; } + TestUrl & KV(string && key, string && value) { - m_keyValuePairs.push(make_pair(key, value)); + m_keyValuePairs.emplace(std::move(key), std::move(value)); return *this; } @@ -30,6 +31,7 @@ public: { Url url(m_url); TEST_EQUAL(url.GetScheme(), m_scheme, ()); + TEST_EQUAL(url.GetHost(), m_host, ()); TEST_EQUAL(url.GetPath(), m_path, ()); TEST(!m_scheme.empty() || !url.IsValid(), ("Scheme is empty if and only if url is invalid!")); url.ForEachParam(bind(&TestUrl::AddTestValue, this, placeholders::_1)); @@ -44,9 +46,7 @@ private: m_keyValuePairs.pop(); } - string m_url; - string m_scheme; - string m_path; + string m_url, m_scheme, m_host, m_path; queue> m_keyValuePairs; }; @@ -99,10 +99,25 @@ UNIT_TEST(Url_Decode) TEST_EQUAL(UrlDecode(enc4), orig4, ()); } -UNIT_TEST(UrlScheme_Valid) +UNIT_TEST(Url_Valid) { - Url url("mapswithme://map?ll=10.3,12.3223&n=Hello%20World"); - TEST_EQUAL(url.GetScheme(), "mapswithme", ()); + TestUrl("mapswithme://map?ll=10.3,12.3223&n=Hello%20World") + .Scheme("mapswithme") + .Host("map") + .KV("ll", "10.3,12.3223") + .KV("n", "Hello World"); + + TestUrl("om:M&M/path?q=q&w=w") + .Scheme("om") + .Host("M&M") + .Path("path") + .KV("q", "q") + .KV("w", "w"); + + TestUrl("http://www.sandwichparlour.com.au/") + .Scheme("http") + .Host("www.sandwichparlour.com.au") + .Path(""); } UNIT_TEST(UrlScheme_NoColon) @@ -110,52 +125,42 @@ UNIT_TEST(UrlScheme_NoColon) TEST_EQUAL(Url("mapswithme:").GetScheme(), "mapswithme", ()); } -UNIT_TEST(UrlScheme_Valid2) -{ - TestUrl("mapswithme://map?ll=10.3,12.3223&n=Hello%20World") - .Scheme("mapswithme") - .Path("map") - .KV("ll", "10.3,12.3223") - .KV("n", "Hello World"); -} - UNIT_TEST(UrlScheme_Comprehensive) { TestUrl(""); - TestUrl("scheme:").Scheme("scheme"); - TestUrl("scheme:/").Scheme("scheme"); - TestUrl("scheme://").Scheme("scheme"); + TestUrl("scheme:").Scheme("scheme").Host("").Path(""); + TestUrl("scheme:/").Scheme("scheme").Host("").Path(""); + TestUrl("scheme://").Scheme("scheme").Host("").Path(""); TestUrl("sometext"); TestUrl(":noscheme"); TestUrl("://noscheme?"); - TestUrl("mwm://?").Scheme("mwm"); - TestUrl("http://path/to/something").Scheme("http").Path("path/to/something"); - TestUrl("http://path?").Scheme("http").Path("path"); - TestUrl("maps://path?&&key=&").Scheme("maps").Path("path").KV("key", ""); - TestUrl("mapswithme://map?ll=1.2,3.4&z=15").Scheme("mapswithme").Path("map") + TestUrl("mwm://?").Scheme("mwm").Host("").Path(""); + TestUrl("http://host/path/to/something").Scheme("http").Host("host").Path("path/to/something"); + TestUrl("http://host?").Scheme("http").Host("host").Path(""); + TestUrl("maps://host?&&key=&").Scheme("maps").Host("host").KV("key", ""); + TestUrl("mapswithme://map?ll=1.2,3.4&z=15").Scheme("mapswithme").Host("map").Path("") .KV("ll", "1.2,3.4").KV("z", "15"); - TestUrl("nopathnovalues://?key1&key2=val2").Scheme("nopathnovalues").Path("") + TestUrl("nopathnovalues://?key1&key2=val2").Scheme("nopathnovalues").Host("").Path("") .KV("key1", "").KV("key2", "val2"); - TestUrl("s://?key1&key2").Scheme("s").Path("").KV("key1", "").KV("key2", ""); - TestUrl("g://p?key1=val1&key2=").Scheme("g").Path("p").KV("key1", "val1").KV("key2", ""); - TestUrl("g://p?=val1&key2=").Scheme("g").Path("p").KV("", "val1").KV("key2", ""); - TestUrl("g://?k&key2").Scheme("g").KV("k", "").KV("key2", ""); - TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30").Scheme("m") + TestUrl("s://?key1&key2").Scheme("s").Host("").Path("").KV("key1", "").KV("key2", ""); + TestUrl("g://h/p?key1=val1&key2=").Scheme("g").Host("h").Path("p").KV("key1", "val1").KV("key2", ""); + TestUrl("g://h?=val1&key2=").Scheme("g").Host("h").Path("").KV("", "val1").KV("key2", ""); + TestUrl("g://?k&key2").Scheme("g").Host("").Path("").KV("k", "").KV("key2", ""); + TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30").Scheme("m").Host("").Path("") .KV("&Amp&=&Amp&", "").KV("name", "1 0"); TestUrl("s://?key1=value1&key1=value2&key1=value3&key2&key2&key3=value1&key3&key3=value2") - .Scheme("s") + .Scheme("s").Host("").Path("") .KV("key1", "value1").KV("key1", "value2").KV("key1", "value3") .KV("key2", "").KV("key2", "") .KV("key3", "value1").KV("key3", "").KV("key3", "value2"); } -UNIT_TEST(UrlApi_Smoke) +UNIT_TEST(Url_2Gis) { url::Url url("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15.232"); TEST_EQUAL(url.GetScheme(), "https", ()); - TEST_EQUAL(url.GetPath(), "2gis.ru/moscow/firm/4504127908589159", ()); - TEST_EQUAL(url.GetWebDomain(), "2gis.ru", ()); - TEST_EQUAL(url.GetWebPath(), "moscow/firm/4504127908589159", ()); + TEST_EQUAL(url.GetHost(), "2gis.ru", ()); + TEST_EQUAL(url.GetPath(), "moscow/firm/4504127908589159", ()); TEST(url.GetLastParam(), ()); TEST(url.GetParamValue("m"), ()); diff --git a/coding/url.cpp b/coding/url.cpp index 9a99a2ee15..20f9713331 100644 --- a/coding/url.cpp +++ b/coding/url.cpp @@ -21,7 +21,7 @@ Url::Url(std::string const & url) { if (!Parse(url)) { - ASSERT(m_scheme.empty() && m_path.empty() && !IsValid(), ()); + ASSERT(m_scheme.empty() && m_host.empty() && m_path.empty() && !IsValid(), ()); } } @@ -34,37 +34,48 @@ Url Url::FromString(std::string const & url) bool Url::Parse(std::string const & url) { // Get url scheme. - size_t pathStart = url.find(':'); - if (pathStart == string::npos || pathStart == 0) + size_t start = url.find(':'); + if (start == string::npos || start == 0) return false; - m_scheme.assign(url, 0, pathStart); + m_scheme = url.substr(0, start); // Skip slashes. - while (++pathStart < url.size() && url[pathStart] == '/') - { - } + start = url.find_first_not_of('/', start + 1); + if (start == std::string::npos) + return true; - // Find query starting point for (key, value) parsing. - size_t queryStart = url.find('?', pathStart); - size_t pathLength; - if (queryStart == string::npos) + // Get host. + size_t end = url.find_first_of("?/", start); + if (end == string::npos) { - queryStart = url.size(); - pathLength = queryStart - pathStart; + m_host = url.substr(start); + return true; } else + m_host = url.substr(start, end - start); + + // Get path. + if (url[end] == '/') { - pathLength = queryStart - pathStart; - ++queryStart; + // Skip slashes. + start = url.find_first_not_of('/', end); + if (start == std::string::npos) + return true; + + end = url.find('?', start); + if (end == string::npos) + { + m_path = url.substr(start); + return true; + } + else + m_path = url.substr(start, end - start); } - // Get path (url without query). - m_path.assign(url, pathStart, pathLength); - // Parse query for keys and values. - for (size_t start = queryStart; start < url.size();) + for (start = end + 1; start < url.size();) { - size_t end = url.find('&', start); + end = url.find('&', start); if (end == string::npos) end = url.size(); @@ -94,23 +105,6 @@ bool Url::Parse(std::string const & url) return true; } -string Url::GetWebDomain() const -{ - auto const found = m_path.find('/'); - if (found != string::npos) - return m_path.substr(0, found); - return m_path; -} - -string Url::GetWebPath() const -{ - // Return everything after the domain name. - auto const found = m_path.find('/'); - if (found != string::npos && m_path.size() > found + 1) - return m_path.substr(found + 1); - return {}; -} - string Make(string const & baseUrl, Params const & params) { ostringstream os; diff --git a/coding/url.hpp b/coding/url.hpp index fe2e236880..6a9ee95fcf 100644 --- a/coding/url.hpp +++ b/coding/url.hpp @@ -22,18 +22,20 @@ std::string DebugPrint(Param const & param); using Params = std::vector; -// Url in format: 'scheme://path?key1=value1&key2&key3=&key4=value4' +// Url in format: 'scheme://host/path?key1=value1&key2&key3=&key4=value4' class Url { public: explicit Url(std::string const & url); static Url FromString(std::string const & url); - std::string const & GetScheme() const { return m_scheme; } - std::string const & GetPath() const { return m_path; } - std::string GetWebDomain() const; - std::string GetWebPath() const; bool IsValid() const { return !m_scheme.empty(); } + + std::string const & GetScheme() const { return m_scheme; } + std::string const & GetHost() const { return m_host; } + std::string const & GetPath() const { return m_path; } + std::string GetHostAndPath() const { return m_host + m_path; } + template void ForEachParam(FnT && fn) const { for (auto const & p : m_params) @@ -52,8 +54,7 @@ public: private: bool Parse(std::string const & url); - std::string m_scheme; - std::string m_path; + std::string m_scheme, m_host, m_path; std::vector m_params; }; diff --git a/ge0/geo_url_parser.cpp b/ge0/geo_url_parser.cpp index 7ce4f45d1e..8a87445110 100644 --- a/ge0/geo_url_parser.cpp +++ b/ge0/geo_url_parser.cpp @@ -38,9 +38,9 @@ bool MatchLatLonZoom(const string & s, const regex & re, size_t lati, size_t lon return true; } -bool EqualWebDomain(url::Url const & url, char const * domain) +bool MatchHost(url::Url const & url, char const * host) { - return url.GetWebDomain().find(domain) != std::string::npos; + return url.GetHost().find(host) != std::string::npos; } } // namespace @@ -95,7 +95,7 @@ void LatLonParser::operator()(url::Param const & param) return; VERIFY(strings::to_double(token.substr(n, token.size() - n), lon), ()); - if (EqualWebDomain(*m_url, "2gis") || EqualWebDomain(*m_url, "yandex")) + if (MatchHost(*m_url, "2gis") || MatchHost(*m_url, "yandex")) std::swap(lat, lon); if (m_info->SetLat(lat) && m_info->SetLon(lon)) @@ -161,7 +161,7 @@ bool DoubleGISParser::Parse(url::Url const & url, GeoURLInfo & info) const return true; // Parse /$lon,$lat/zoom/$zoom from path next - return MatchLatLonZoom(url.GetPath(), m_pathRe, 2, 1, 3, info); + return MatchLatLonZoom(url.GetHostAndPath(), m_pathRe, 2, 1, 3, info); } OpenStreetMapParser::OpenStreetMapParser() @@ -171,7 +171,7 @@ OpenStreetMapParser::OpenStreetMapParser() bool OpenStreetMapParser::Parse(url::Url const & url, GeoURLInfo & info) const { - if (MatchLatLonZoom(url.GetPath(), m_regex, 2, 3, 1, info)) + if (MatchLatLonZoom(url.GetHostAndPath(), m_regex, 2, 3, 1, info)) return true; // Check if "#map=" fragment is attached to the last param in Url @@ -231,12 +231,12 @@ GeoURLInfo UnifiedParser::Parse(string const & s) if (url.GetScheme() == "https" || url.GetScheme() == "http") { - if (EqualWebDomain(url, "2gis")) + if (MatchHost(url, "2gis")) { if (m_dgParser.Parse(url, res)) return res; } - else if (EqualWebDomain(url, "openstreetmap")) + else if (MatchHost(url, "openstreetmap")) { if (m_osmParser.Parse(url, res)) return res; @@ -244,7 +244,7 @@ GeoURLInfo UnifiedParser::Parse(string const & s) } m_llParser.Reset(url, res); - m_llParser({{}, url.GetPath()}); + m_llParser({{}, url.GetHostAndPath()}); url.ForEachParam(m_llParser); if (!m_llParser.IsValid()) diff --git a/indexer/validate_and_format_contacts.cpp b/indexer/validate_and_format_contacts.cpp index 8fde4311f6..219634aebb 100644 --- a/indexer/validate_and_format_contacts.cpp +++ b/indexer/validate_and_format_contacts.cpp @@ -34,13 +34,13 @@ string ValidateAndFormat_facebook(string const & facebookPage) return {}; url::Url const url = url::Url::FromString(facebookPage); - string const domain = strings::MakeLowerCase(url.GetWebDomain()); + string const domain = strings::MakeLowerCase(url.GetHost()); // Check Facebook domain name. if (strings::EndsWith(domain, "facebook.com") || strings::EndsWith(domain, "fb.com") || strings::EndsWith(domain, "fb.me") || strings::EndsWith(domain, "facebook.de") || strings::EndsWith(domain, "facebook.fr")) { - auto webPath = url.GetWebPath(); + auto webPath = url.GetPath(); // Strip last '/' symbol webPath.erase(webPath.find_last_not_of('/') + 1); return webPath; @@ -65,11 +65,11 @@ string ValidateAndFormat_instagram(string const & instagramPage) return {}; url::Url const url = url::Url::FromString(instagramPage); - string const domain = strings::MakeLowerCase(url.GetWebDomain()); + string const domain = strings::MakeLowerCase(url.GetHost()); // Check Instagram domain name. if (domain == "instagram.com" || strings::EndsWith(domain, ".instagram.com")) { - auto webPath = url.GetWebPath(); + auto webPath = url.GetPath(); // Strip last '/' symbol. webPath.erase(webPath.find_last_not_of('/') + 1); return webPath; @@ -94,11 +94,11 @@ string ValidateAndFormat_twitter(string const & twitterPage) return {}; url::Url const url = url::Url::FromString(twitterPage); - string const domain = strings::MakeLowerCase(url.GetWebDomain()); + string const domain = strings::MakeLowerCase(url.GetHost()); // Check Twitter domain name. if (domain == "twitter.com" || strings::EndsWith(domain, ".twitter.com")) { - auto webPath = url.GetWebPath(); + auto webPath = url.GetPath(); // Strip last '/' symbol and first '@' symbol webPath.erase(webPath.find_last_not_of('/') + 1); @@ -135,12 +135,12 @@ string ValidateAndFormat_vk(string const & vkPage) return {}; url::Url const url = url::Url::FromString(vkPage); - string const domain = strings::MakeLowerCase(url.GetWebDomain()); + string const domain = strings::MakeLowerCase(url.GetHost()); // Check VK domain name. if (domain == "vk.com" || strings::EndsWith(domain, ".vk.com") || domain == "vkontakte.ru" || strings::EndsWith(domain, ".vkontakte.ru")) { - auto webPath = url.GetWebPath(); + auto webPath = url.GetPath(); // Strip last '/' symbol. webPath.erase(webPath.find_last_not_of('/') + 1); return webPath; @@ -183,17 +183,17 @@ string ValidateAndFormat_contactLine(string const & linePage) // URL schema documentation: https://developers.line.biz/en/docs/messaging-api/using-line-url-scheme/ url::Url const url = url::Url::FromString(linePage); - string const domain = strings::MakeLowerCase(url.GetWebDomain()); + string const domain = strings::MakeLowerCase(url.GetHost()); // Check Line domain name. if (domain == "page.line.me") { // Parse https://page.line.me/{LINE ID} - string lineId = url.GetWebPath(); + string lineId = url.GetPath(); return stripAtSymbol(lineId); } else if (domain == "line.me" || strings::EndsWith(domain, ".line.me")) { - auto webPath = url.GetWebPath(); + auto webPath = url.GetPath(); if (strings::StartsWith(webPath, "R/ti/p/")) { // Parse https://line.me/R/ti/p/{LINE ID} @@ -242,7 +242,7 @@ bool ValidateFacebookPage(string const & page) if (!EditableMapObject::ValidateWebsite(page)) return false; - string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetWebDomain()); + string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetHost()); return (strings::StartsWith(domain, "facebook.") || strings::StartsWith(domain, "fb.") || domain.find(".facebook.") != string::npos || domain.find(".fb.") != string::npos); } @@ -259,7 +259,7 @@ bool ValidateInstagramPage(string const & page) if (!EditableMapObject::ValidateWebsite(page)) return false; - string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetWebDomain()); + string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetHost()); return domain == "instagram.com" || strings::EndsWith(domain, ".instagram.com"); } @@ -271,7 +271,7 @@ bool ValidateTwitterPage(string const & page) if (!EditableMapObject::ValidateWebsite(page)) return regex_match(page, s_twitterRegex); // Rules are defined here: https://stackoverflow.com/q/11361044 - string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetWebDomain()); + string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetHost()); return domain == "twitter.com" || strings::EndsWith(domain, ".twitter.com"); } @@ -304,7 +304,7 @@ bool ValidateVkPage(string const & page) if (!EditableMapObject::ValidateWebsite(page)) return false; - string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetWebDomain()); + string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetHost()); return domain == "vk.com" || strings::EndsWith(domain, ".vk.com") || domain == "vkontakte.ru" || strings::EndsWith(domain, ".vkontakte.ru"); } @@ -327,7 +327,7 @@ bool ValidateLinePage(string const & page) if (!EditableMapObject::ValidateWebsite(page)) return false; - string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetWebDomain()); + string const domain = strings::MakeLowerCase(url::Url::FromString(page).GetHost()); // Check Line domain name. return (domain == "line.me" || strings::EndsWith(domain, ".line.me")); } diff --git a/map/mwm_url.cpp b/map/mwm_url.cpp index 582ea513f8..69cc721c63 100644 --- a/map/mwm_url.cpp +++ b/map/mwm_url.cpp @@ -33,7 +33,7 @@ struct CampaignDescription } bool IsValid() const { return !m_from.empty() && !m_type.empty() && !m_name.empty(); } - + string m_from; string m_type; string m_name; @@ -85,7 +85,7 @@ ParsedMapApi::UrlType GetUrlType(url::Url const & url) if (std::find(kAvailableSchemes.begin(), kAvailableSchemes.end(), url.GetScheme()) == kAvailableSchemes.end()) return ParsedMapApi::UrlType::Incorrect; - auto const path = url.GetPath(); + auto const path = url.GetHost(); if (path == "map") return ParsedMapApi::UrlType::Map; if (path == "route") @@ -198,7 +198,7 @@ bool ParsedMapApi::Parse(url::Url const & url, UrlType type) }); if (request.m_query.empty()) return false; - + m_request = request; return true; }