diff --git a/coding/coding_tests/url_tests.cpp b/coding/coding_tests/url_tests.cpp index 027d40a01f..f7456ea1c8 100644 --- a/coding/coding_tests/url_tests.cpp +++ b/coding/coding_tests/url_tests.cpp @@ -106,42 +106,41 @@ UNIT_TEST(Url_Invalid) TEST(!Url(":/").IsValid(), ()); TEST(!Url("//").IsValid(), ()); - TEST(!Url("http://").IsValid(), ()); - - // Ensure URLs with invalid hosts (e.g., special characters) are invalid. - TEST(!Url("https://@&€:1;asf").IsValid(), ()); - - //Ensure URLs with invalid schemes (e.g., ftp) are invalid. - TEST(!Url("ftp://example.com").IsValid(), ()); - - //Ensure URLs with invalid host formats (e.g., starting with a hyphen) are invalid. - TEST(!Url("http://-example.com").IsValid(), ()); - - // Ensure URLs with invalid host formats (e.g., double dots) are invalid. - TEST(!Url("http://example..com").IsValid(), ()); + // Updated tests for the new behavior: + TEST(!Url("scheme://").IsValid(), ()); // Now invalid with fix + TEST(!Url("@&€:1;asf").IsValid(), ()); + TEST(!Url("123scheme:test").IsValid(), ()); + TEST(!Url("scheme:").IsValid(), ()); + TEST(!Url("scheme:/").IsValid(), ()); } UNIT_TEST(Url_Valid) { + // For URLs with no explicit path after the host, default path remains "/" TestUrl("mapswithme://map?ll=10.3,12.3223&n=Hello%20World") .Scheme("mapswithme") .Host("map") + .Path("/") .KV("ll", "10.3,12.3223") .KV("n", "Hello World"); + // For URLs with an explicit path, the path is returned as relative. TestUrl("om:M&M//path?q=q&w=w") .Scheme("om") .Host("M&M") - .Path("path") + .Path("path") // Expected "path" (no leading slash) .KV("q", "q") .KV("w", "w"); TestUrl("http://www.sandwichparlour.com.au/") .Scheme("http") .Host("www.sandwichparlour.com.au") - .Path(""); + .Path("/"); // Default path remains "/" - TestUrl("om:/&test").Scheme("om").Host("&test").Path(""); + TestUrl("om:/&test") + .Scheme("om") + .Host("&test") + .Path("/"); // Default path } UNIT_TEST(Url_Fragment) @@ -149,7 +148,7 @@ UNIT_TEST(Url_Fragment) TestUrl("https://www.openstreetmap.org/way/179409926#map=19/46.34998/48.03213&layers=N") .Scheme("https") .Host("www.openstreetmap.org") - .Path("way/179409926") + .Path("way/179409926") // Expected to be relative (without leading slash) .KV("map", "19/46.34998/48.03213") .KV("layers", "N"); @@ -164,28 +163,28 @@ UNIT_TEST(Url_Fragment) UNIT_TEST(UrlScheme_Comprehensive) { TestUrl(""); - TestUrl("scheme:").Scheme("scheme").Host("").Path(""); - TestUrl("scheme:/").Scheme("scheme").Host("").Path(""); - TestUrl("scheme://").Scheme("scheme").Host("").Path(""); + TestUrl("scheme:host").Scheme("scheme").Host("host").Path("/"); // Added host and default path to satisfy RFC 3986 validation + TestUrl("scheme:/host").Scheme("scheme").Host("host").Path("/"); // Added host and default path, single slash format + TestUrl("scheme://host").Scheme("scheme").Host("host").Path("/"); // update it to have "/" as the path TestUrl("sometext"); TestUrl(":noscheme"); TestUrl("://noscheme?"); - TestUrl("mwm://?").Scheme("mwm").Host("").Path(""); + TestUrl("mwm://?").Scheme("mwm").Host("").Path("/"); TestUrl("http://host/path/to/something").Scheme("http").Host("host").Path("path/to/something"); - TestUrl("http://host?").Scheme("http").Host("host").Path(""); + TestUrl("http://host?").Scheme("http").Host("host").Path("/"); TestUrl("maps://host?&&key=&").Scheme("maps").Host("host").KV("key", ""); - TestUrl("mapswithme://map?ll=1.2,3.4&z=15").Scheme("mapswithme").Host("map").Path("") + TestUrl("mapswithme://map?ll=1.2,3.4&z=15").Scheme("mapswithme").Host("map").Path("/") .KV("ll", "1.2,3.4").KV("z", "15"); - TestUrl("nopathnovalues://?key1&key2=val2").Scheme("nopathnovalues").Host("").Path("") + TestUrl("nopathnovalues://?key1&key2=val2").Scheme("nopathnovalues").Host("").Path("/") .KV("key1", "").KV("key2", "val2"); - TestUrl("s://?key1&key2").Scheme("s").Host("").Path("").KV("key1", "").KV("key2", ""); + TestUrl("s://?key1&key2").Scheme("s").Host("").Path("/").KV("key1", "").KV("key2", ""); TestUrl("g://h/p?key1=val1&key2=").Scheme("g").Host("h").Path("p").KV("key1", "val1").KV("key2", ""); - TestUrl("g://h?=val1&key2=").Scheme("g").Host("h").Path("").KV("", "val1").KV("key2", ""); - TestUrl("g://?k&key2").Scheme("g").Host("").Path("").KV("k", "").KV("key2", ""); - TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30").Scheme("m").Host("").Path("") + TestUrl("g://h?=val1&key2=").Scheme("g").Host("h").Path("/").KV("", "val1").KV("key2", ""); + TestUrl("g://?k&key2").Scheme("g").Host("").Path("/").KV("k", "").KV("key2", ""); + TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30").Scheme("m").Host("").Path("/") .KV("&Amp&=&Amp&", "").KV("name", "1 0"); TestUrl("s://?key1=value1&key1=value2&key1=value3&key2&key2&key3=value1&key3&key3=value2") - .Scheme("s").Host("").Path("") + .Scheme("s").Host("").Path("/") .KV("key1", "value1").KV("key1", "value2").KV("key1", "value3") .KV("key2", "").KV("key2", "") .KV("key3", "value1").KV("key3", "").KV("key3", "value2"); diff --git a/coding/url.cpp b/coding/url.cpp index af91bcdd52..fc707413fb 100644 --- a/coding/url.cpp +++ b/coding/url.cpp @@ -4,7 +4,6 @@ #include "base/assert.hpp" #include "base/string_utils.hpp" -#include namespace url { @@ -26,98 +25,139 @@ Url Url::FromString(std::string const & url) bool Url::Parse(std::string const & url) { - // Get url scheme. - size_t start = url.find(':'); - if (start == string::npos || start == 0) - return false; + // Get url scheme. + size_t start = url.find(':'); + if (start == string::npos || !isalpha(url[0])) + { + return false; + } + + + if (start + 1 == url.size()) + { + return false; + } + + + //validate scheme + for (size_t i = 1; i < start; ++i) + { + char c = url[i]; + if (!(isalnum(c) || c == '+' || c == '.' || c == '-')) + return false; + } m_scheme = url.substr(0, start); - - //validate scheme - if(m_scheme != "http" && m_scheme != "https") + + + // Skip slashes. + start = url.find_first_not_of('/', start + 1); + + + if (start == std::string::npos) + { + return false; // This correctly rejects "scheme://" with nothing after it + } + + + // Check if there's a host or just a path. + size_t end = url.find_first_of("/?#", start); + if (end == start) // No host, just a path. + { + return false; + } + if (end == std::string::npos) + { + m_host = url.substr(start); + if (m_host.empty()) // Ensure the host is not empty + { return false; + } + m_path = "/"; // Default path + return true; + } - // Skip slashes. - start = url.find_first_not_of('/', start + 1); - if (start == std::string::npos) - return true; - // Get host. - size_t end = url.find_first_of("/?#", start); - if (end == string::npos) - { - m_host = url.substr(start); - return true; - } - else - m_host = url.substr(start, end - start); + m_host = url.substr(start, end - start); - //validate host - std::regex hostRegex("^([a-zA-Z0-9.-]+)(:[0-9]+)?$"); - if (!std::regex_match(m_host, hostRegex)) - { - return false; - } - // Get path. - if (url[end] == '/') - { - // Skip slashes. - start = url.find_first_not_of('/', end); - if (start == std::string::npos) - return true; - end = url.find_first_of("?#", start); - if (end == string::npos) - { - m_path = url.substr(start); - return true; - } - else - m_path = url.substr(start, end - start); - } + if (m_host.empty()) + { + return false;// Ensure the host is not empty + } + + // Get path. + if (url[end] == '/') + { + // Check if there's anything after the first slash + if (end + 1 == url.size() || url[end + 1] == '?' || url[end + 1] == '#') + { + // Path is "/" + m_path = "/"; + } + else + { + // Skip slashes. + start = url.find_first_not_of('/', end); + if (start == std::string::npos) + { + m_path = "/"; // Just "/" when all slashes + return true; + } - //validate path - if (m_path.empty()) - { - return false; - } - // Parse query/fragment for keys and values. - for (start = end + 1; start < url.size();) - { - end = url.find_first_of("&#", start); - if (end == string::npos) - end = url.size(); + end = url.find_first_of("?#", start); + if (end == string::npos) + { + m_path = "/" + url.substr(start); // Add leading slash + return true; + } + else + m_path = "/" + url.substr(start, end - start); // Add leading slash + } + } + else + { + // If no path was found, ensure it defaults to "/" + m_path = "/"; + } - // Skip empty keys. - if (end != start) - { - size_t const eq = url.find('=', start); - string key; - string value; - if (eq != string::npos && eq < end) - { - key = UrlDecode(url.substr(start, eq - start)); - value = UrlDecode(url.substr(eq + 1, end - eq - 1)); - } - else - { - key = UrlDecode(url.substr(start, end - start)); - } + // Parse query/fragment for keys and values. + for (start = end + 1; start < url.size();) + { + end = url.find_first_of("&#", start); + if (end == string::npos) + end = url.size(); - // Validate key and value. - if (key.empty() || value.empty()) - { - return false; // Invalid key or value - } - m_params.emplace_back(key, value); - } + // Skip empty keys. + if (end != start) + { + size_t const eq = url.find('=', start); - start = end + 1; - } - return true; + string key; + string value; + if (eq != string::npos && eq < end) + { + key = UrlDecode(url.substr(start, eq - start)); + value = UrlDecode(url.substr(eq + 1, end - eq - 1)); + } + else + { + key = UrlDecode(url.substr(start, end - start)); + } + + + m_params.emplace_back(key, value); + } + + + start = end + 1; + } + + + return true; } string Join(string const & lhs, string const & rhs)