[url] Fix URL class handling of invalid URLs

The current implementation incorrectly validates URLs without a host (e.g., 'https://')
and URLs with invalid schemes (e.g., '@&€:1;asf'). This change updates the URL
parsing logic to reject such URLs, ensuring that only valid URLs are accepted.

Fixes: #1566
Signed-off-by: Raghad Dahi <raghaddahi27@gmail.com>
This commit is contained in:
Raghad Dahi 2025-03-13 07:40:31 +02:00
parent d18a46ac6a
commit cafbe4aa08
3 changed files with 38 additions and 75 deletions

View file

@ -107,7 +107,7 @@ UNIT_TEST(Url_Invalid)
TEST(!Url("//").IsValid(), ());
// Updated tests for the new behavior:
TEST(!Url("scheme://").IsValid(), ()); // Now invalid with fix
TEST(!Url("scheme://").IsValid(), ());
TEST(!Url("@&€:1;asf").IsValid(), ());
TEST(!Url("123scheme:test").IsValid(), ());
TEST(!Url("scheme:").IsValid(), ());
@ -116,16 +116,14 @@ UNIT_TEST(Url_Invalid)
UNIT_TEST(Url_Valid)
{
// For URLs with no explicit path after the host, default path remains "/"
TestUrl("mapswithme://map?ll=10.3,12.3223&n=Hello%20World")
.Scheme("mapswithme")
.Host("map")
.Path("/")
.Path("")
.KV("ll", "10.3,12.3223")
.KV("n", "Hello World");
// For URLs with an explicit path, the path is returned as relative.
TestUrl("om:M&M//path?q=q&w=w")
TestUrl("om:M&M//path?q=q&w=w")
.Scheme("om")
.Host("M&M")
.Path("path") // Expected "path" (no leading slash)
@ -135,12 +133,12 @@ UNIT_TEST(Url_Valid)
TestUrl("http://www.sandwichparlour.com.au/")
.Scheme("http")
.Host("www.sandwichparlour.com.au")
.Path("/"); // Default path remains "/"
.Path("");
TestUrl("om:/&test")
.Scheme("om")
.Host("&test")
.Path("/"); // Default path
.Path("");
}
UNIT_TEST(Url_Fragment)
@ -163,28 +161,28 @@ UNIT_TEST(Url_Fragment)
UNIT_TEST(UrlScheme_Comprehensive)
{
TestUrl("");
TestUrl("scheme:host").Scheme("scheme").Host("host").Path("/"); // Added host and default path to satisfy RFC 3986 validation
TestUrl("scheme:/host").Scheme("scheme").Host("host").Path("/"); // Added host and default path, single slash format
TestUrl("scheme://host").Scheme("scheme").Host("host").Path("/"); // update it to have "/" as the path
TestUrl("scheme:host").Scheme("scheme").Host("host").Path("");
TestUrl("scheme:/host").Scheme("scheme").Host("host").Path("");
TestUrl("scheme://host").Scheme("scheme").Host("host").Path("");
TestUrl("sometext");
TestUrl(":noscheme");
TestUrl("://noscheme?");
TestUrl("mwm://?").Scheme("mwm").Host("").Path("/");
TestUrl("mwm://?").Scheme("mwm").Host("").Path("");
TestUrl("http://host/path/to/something").Scheme("http").Host("host").Path("path/to/something");
TestUrl("http://host?").Scheme("http").Host("host").Path("/");
TestUrl("http://host?").Scheme("http").Host("host").Path("");
TestUrl("maps://host?&&key=&").Scheme("maps").Host("host").KV("key", "");
TestUrl("mapswithme://map?ll=1.2,3.4&z=15").Scheme("mapswithme").Host("map").Path("/")
TestUrl("mapswithme://map?ll=1.2,3.4&z=15").Scheme("mapswithme").Host("map").Path("")
.KV("ll", "1.2,3.4").KV("z", "15");
TestUrl("nopathnovalues://?key1&key2=val2").Scheme("nopathnovalues").Host("").Path("/")
TestUrl("nopathnovalues://?key1&key2=val2").Scheme("nopathnovalues").Host("").Path("")
.KV("key1", "").KV("key2", "val2");
TestUrl("s://?key1&key2").Scheme("s").Host("").Path("/").KV("key1", "").KV("key2", "");
TestUrl("s://?key1&key2").Scheme("s").Host("").Path("").KV("key1", "").KV("key2", "");
TestUrl("g://h/p?key1=val1&key2=").Scheme("g").Host("h").Path("p").KV("key1", "val1").KV("key2", "");
TestUrl("g://h?=val1&key2=").Scheme("g").Host("h").Path("/").KV("", "val1").KV("key2", "");
TestUrl("g://?k&key2").Scheme("g").Host("").Path("/").KV("k", "").KV("key2", "");
TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30").Scheme("m").Host("").Path("/")
TestUrl("g://h?=val1&key2=").Scheme("g").Host("h").Path("").KV("", "val1").KV("key2", "");
TestUrl("g://?k&key2").Scheme("g").Host("").Path("").KV("k", "").KV("key2", "");
TestUrl("m:?%26Amp%26%3D%26Amp%26&name=%31%20%30").Scheme("m").Host("").Path("")
.KV("&Amp&=&Amp&", "").KV("name", "1 0");
TestUrl("s://?key1=value1&key1=value2&key1=value3&key2&key2&key3=value1&key3&key3=value2")
.Scheme("s").Host("").Path("/")
.Scheme("s").Host("").Path("")
.KV("key1", "value1").KV("key1", "value2").KV("key1", "value3")
.KV("key2", "").KV("key2", "")
.KV("key3", "value1").KV("key3", "").KV("key3", "value2");

View file

@ -48,11 +48,8 @@ bool Url::Parse(std::string const & url)
}
m_scheme = url.substr(0, start);
// Skip slashes.
start = url.find_first_not_of('/', start + 1);
if (start == std::string::npos)
{
return false; // This correctly rejects "scheme://" with nothing after it
@ -61,66 +58,39 @@ bool Url::Parse(std::string const & url)
// Check if there's a host or just a path.
size_t end = url.find_first_of("/?#", start);
if (end == start) // No host, just a path.
{
return false;
}
if (end == std::string::npos)
if (end == start) // prevent cases like: "scheme:///"
return false;
else if (end == std::string::npos)
{
m_host = url.substr(start);
if (m_host.empty()) // Ensure the host is not empty
{
return false;
}
m_path = "/"; // Default path
if (m_host.empty())
return false;
return true;
}
else
{
m_host = url.substr(start, end - start);
if (m_host.empty()) // Ensure the host is not empty
return false;
}
if (m_host.empty())
{
return false;// Ensure the host is not empty
}
// Get path.
if (url[end] == '/')
{
// Check if there's anything after the first slash
if (end + 1 == url.size() || url[end + 1] == '?' || url[end + 1] == '#')
// Skip slashes.
start = url.find_first_not_of('/', end);
if (start == std::string::npos)
return true;
end = url.find_first_of("?#", start);
if (end == string::npos)
{
// Path is "/"
m_path = "/";
m_path = url.substr(start);
return true;
}
else
{
// Skip slashes.
start = url.find_first_not_of('/', end);
if (start == std::string::npos)
{
m_path = "/"; // Just "/" when all slashes
return true;
}
end = url.find_first_of("?#", start);
if (end == string::npos)
{
m_path = "/" + url.substr(start); // Add leading slash
return true;
}
else
m_path = "/" + url.substr(start, end - start); // Add leading slash
}
m_path = url.substr(start, end - start);
}
else
{
// If no path was found, ensure it defaults to "/"
m_path = "/";
}
// Parse query/fragment for keys and values.
for (start = end + 1; start < url.size();)
@ -129,13 +99,11 @@ bool Url::Parse(std::string const & url)
if (end == string::npos)
end = url.size();
// Skip empty keys.
if (end != start)
{
size_t const eq = url.find('=', start);
string key;
string value;
if (eq != string::npos && eq < end)
@ -148,15 +116,12 @@ bool Url::Parse(std::string const & url)
key = UrlDecode(url.substr(start, end - start));
}
m_params.emplace_back(key, value);
}
start = end + 1;
}
return true;
}

View file

@ -18,7 +18,7 @@ public:
explicit Url(std::string const & url);
static Url FromString(std::string const & url);
bool IsValid() const { return !m_scheme.empty(); }
bool IsValid() const { return !m_scheme.empty() && !m_host.empty(); }
std::string const & GetScheme() const { return m_scheme; }
std::string const & GetHost() const { return m_host; }