From a0a86a2a1dc99b86d45f29ab320bbda25f15dc82 Mon Sep 17 00:00:00 2001 From: Viktor Govako Date: Sat, 22 Jan 2022 16:30:21 +0300 Subject: [PATCH] Geo URL parsing refactoring with geo::UnifiedParser. Signed-off-by: Viktor Govako --- base/string_utils.cpp | 22 +- coding/coding_tests/url_tests.cpp | 13 + coding/url.cpp | 6 - coding/url.hpp | 18 +- ge0/ge0_tests/geo_url_tests.cpp | 66 +++-- ge0/geo_url_parser.cpp | 415 ++++++++++++------------------ ge0/geo_url_parser.hpp | 67 ++++- map/framework.cpp | 3 +- search/processor.cpp | 6 +- search/processor.hpp | 2 +- 10 files changed, 315 insertions(+), 303 deletions(-) diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 11ead2fd28..0f76f9612b 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -191,21 +191,19 @@ void NormalizeDigits(UniString & us) } } -namespace +void AsciiToLower(std::string & s) { -char ascii_to_lower(char in) -{ - char const diff = 'z' - 'Z'; - static_assert(diff == 'a' - 'A', ""); - static_assert(diff > 0, ""); + std::transform(s.begin(), s.end(), s.begin(), [](char in) + { + char constexpr diff = 'z' - 'Z'; + static_assert(diff == 'a' - 'A', ""); + static_assert(diff > 0, ""); - if (in >= 'A' && in <= 'Z') - return (in + diff); - return in; + if (in >= 'A' && in <= 'Z') + return char(in + diff); + return in; + }); } -} // namespace - -void AsciiToLower(std::string & s) { transform(s.begin(), s.end(), s.begin(), &ascii_to_lower); } std::string & TrimLeft(std::string & s) { diff --git a/coding/coding_tests/url_tests.cpp b/coding/coding_tests/url_tests.cpp index a0bf1afd62..4afded9446 100644 --- a/coding/coding_tests/url_tests.cpp +++ b/coding/coding_tests/url_tests.cpp @@ -148,4 +148,17 @@ UNIT_TEST(UrlScheme_Comprehensive) .KV("key2", "").KV("key2", "") .KV("key3", "value1").KV("key3", "").KV("key3", "value2"); } + +UNIT_TEST(UrlApi_Smoke) +{ + url::Url url("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15.232"); + TEST_EQUAL(url.GetScheme(), "https", ()); + TEST_EQUAL(url.GetPath(), "2gis.ru/moscow/firm/4504127908589159", ()); + TEST_EQUAL(url.GetWebDomain(), "2gis.ru", ()); + TEST_EQUAL(url.GetWebPath(), "moscow/firm/4504127908589159", ()); + + TEST(url.GetLastParam(), ()); + TEST(url.GetParamValue("m"), ()); +} + } // namespace url_tests diff --git a/coding/url.cpp b/coding/url.cpp index 7f59120695..9a99a2ee15 100644 --- a/coding/url.cpp +++ b/coding/url.cpp @@ -111,12 +111,6 @@ string Url::GetWebPath() const return {}; } -void Url::ForEachParam(Callback const & callback) const -{ - for (auto const & param : m_params) - callback(param); -} - string Make(string const & baseUrl, Params const & params) { ostringstream os; diff --git a/coding/url.hpp b/coding/url.hpp index d9691efc00..fe2e236880 100644 --- a/coding/url.hpp +++ b/coding/url.hpp @@ -26,8 +26,6 @@ using Params = std::vector; class Url { public: - using Callback = std::function; - explicit Url(std::string const & url); static Url FromString(std::string const & url); @@ -36,8 +34,20 @@ public: std::string GetWebDomain() const; std::string GetWebPath() const; bool IsValid() const { return !m_scheme.empty(); } - void ForEachParam(Callback const & callback) const; - const std::vector & Params() const { return m_params; } + template void ForEachParam(FnT && fn) const + { + for (auto const & p : m_params) + fn(p); + } + + Param const * GetLastParam() const { return m_params.empty() ? nullptr : &m_params.back(); } + std::string const * GetParamValue(std::string const & name) const + { + for (auto const & p : m_params) + if (p.m_name == name) + return &p.m_value; + return nullptr; + } private: bool Parse(std::string const & url); diff --git a/ge0/ge0_tests/geo_url_tests.cpp b/ge0/ge0_tests/geo_url_tests.cpp index 1ad31e5316..87189cf80a 100644 --- a/ge0/ge0_tests/geo_url_tests.cpp +++ b/ge0/ge0_tests/geo_url_tests.cpp @@ -11,38 +11,37 @@ using namespace geo; UNIT_TEST(GeoURL_Smoke) { - GeoURLInfo info; + UnifiedParser parser; - info.Parse("geo:53.666,27.666"); + GeoURLInfo info = parser.Parse("geo:53.666,27.666"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 53.666, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 27.666, kEps, ()); - info.Parse("geo://point/?lon=27.666&lat=53.666&zoom=10"); + info = parser.Parse("geo://point/?lon=27.666&lat=53.666&zoom=10"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 53.666, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 27.666, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 10.0, kEps, ()); - info.Parse("geo:53.666"); + info = parser.Parse("geo:53.666"); TEST(!info.IsValid(), ()); - info.Parse("mapswithme:123.33,32.22/showmethemagic"); + info = parser.Parse("mapswithme:123.33,32.22/showmethemagic"); TEST(!info.IsValid(), ()); - info.Parse("mapswithme:32.22, 123.33/showmethemagic"); + info = parser.Parse("mapswithme:32.22, 123.33/showmethemagic"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 32.22, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 123.33, kEps, ()); - info.Parse("model: iphone 7,1"); + info = parser.Parse("model: iphone 7,1"); TEST(!info.IsValid(), ()); } UNIT_TEST(GeoURL_Instagram) { - GeoURLInfo info; - info.Parse("geo:0,0?z=14&q=54.683486138,25.289361259 (Forto%20dvaras)"); + GeoURLInfo info = UnifiedParser().Parse("geo:0,0?z=14&q=54.683486138,25.289361259 (Forto%20dvaras)"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 54.683486138, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 25.289361259, kEps, ()); @@ -51,28 +50,28 @@ UNIT_TEST(GeoURL_Instagram) UNIT_TEST(GeoURL_GoogleMaps) { - GeoURLInfo info; + UnifiedParser parser; - info.Parse("https://maps.google.com/maps?z=16&q=Mezza9%401.3067198,103.83282"); + GeoURLInfo info = parser.Parse("https://maps.google.com/maps?z=16&q=Mezza9%401.3067198,103.83282"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 1.3067198, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 103.83282, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 16.0, kEps, ()); - info.Parse("https://maps.google.com/maps?z=16&q=House+of+Seafood+%40+180%401.356706,103.87591"); + info = parser.Parse("https://maps.google.com/maps?z=16&q=House+of+Seafood+%40+180%401.356706,103.87591"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 1.356706, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 103.87591, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 16.0, kEps, ()); - info.Parse("https://www.google.com/maps/place/Falafel+M.+Sahyoun/@33.8904447,35.5044618,16z"); + info = parser.Parse("https://www.google.com/maps/place/Falafel+M.+Sahyoun/@33.8904447,35.5044618,16z"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 33.8904447, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 35.5044618, kEps, ()); // Sic: zoom is not parsed //TEST_ALMOST_EQUAL_ABS(info.m_zoom, 16.0, kEps, ()); - info.Parse("https://www.google.com/maps?q=55.751809,37.6130029"); + info = parser.Parse("https://www.google.com/maps?q=55.751809,37.6130029"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 55.751809, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 37.6130029, kEps, ()); @@ -80,27 +79,27 @@ UNIT_TEST(GeoURL_GoogleMaps) UNIT_TEST(GeoURL_2GIS) { - GeoURLInfo info; + UnifiedParser parser; - info.Parse("https://2gis.ru/moscow/firm/4504127908589159/center/37.6186,55.7601/zoom/15.9764"); + GeoURLInfo info = parser.Parse("https://2gis.ru/moscow/firm/4504127908589159/center/37.6186,55.7601/zoom/15.9764"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 55.7601, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 37.6186, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 15.9764, kEps, ()); - info.Parse("https://2gis.ru/moscow/firm/4504127908589159/center/37,55/zoom/15"); + info = parser.Parse("https://2gis.ru/moscow/firm/4504127908589159/center/37,55/zoom/15"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 55.0, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 37.0, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 15.0, kEps, ()); - info.Parse("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15.232"); + info = parser.Parse("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15.232"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 55.760069, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 37.618632, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 15.232, kEps, ()); - info.Parse("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15"); + info = parser.Parse("https://2gis.ru/moscow/firm/4504127908589159?m=37.618632%2C55.760069%2F15"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 55.760069, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 37.618632, kEps, ()); @@ -109,25 +108,36 @@ UNIT_TEST(GeoURL_2GIS) UNIT_TEST(GeoURL_OpenStreetMap) { - GeoURLInfo info; + UnifiedParser parser; - info.Parse("https://www.openstreetmap.org/#map=16/33.89041/35.50664"); + GeoURLInfo info = parser.Parse("https://www.openstreetmap.org/#map=16/33.89041/35.50664"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 33.89041, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 35.50664, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 16.0, kEps, ()); - info.Parse("https://www.openstreetmap.org/search?query=Falafel%20Sahyoun#map=16/33.89041/35.50664"); + info = parser.Parse("https://www.openstreetmap.org/search?query=Falafel%20Sahyoun#map=16/33.89041/35.50664"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 33.89041, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 35.50664, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 16.0, kEps, ()); + + info = parser.Parse("https://openstreetmap.ru/#map=19/53.90323/27.55806"); + TEST(info.IsValid(), ()); + TEST_ALMOST_EQUAL_ABS(info.m_lat, 53.90323, kEps, ()); + TEST_ALMOST_EQUAL_ABS(info.m_lon, 27.55806, kEps, ()); + TEST_ALMOST_EQUAL_ABS(info.m_zoom, 17.0, kEps, ()); + + info = parser.Parse("https://www.openstreetmap.org/way/45394171#map=10/34.67379/33.04422"); + TEST(info.IsValid(), ()); + TEST_ALMOST_EQUAL_ABS(info.m_lat, 34.67379, kEps, ()); + TEST_ALMOST_EQUAL_ABS(info.m_lon, 33.04422, kEps, ()); + TEST_ALMOST_EQUAL_ABS(info.m_zoom, 10.0, kEps, ()); } UNIT_TEST(GeoURL_CaseInsensitive) { - GeoURLInfo info; - info.Parse("geo:52.23405,21.01547?Z=10"); + GeoURLInfo info = UnifiedParser().Parse("geo:52.23405,21.01547?Z=10"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 52.23405, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 21.01547, kEps, ()); @@ -136,21 +146,21 @@ UNIT_TEST(GeoURL_CaseInsensitive) UNIT_TEST(GeoURL_BadZoom) { - GeoURLInfo info; + UnifiedParser parser; - info.Parse("geo:52.23405,21.01547?Z=19"); + GeoURLInfo info = parser.Parse("geo:52.23405,21.01547?Z=19"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 52.23405, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 21.01547, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 17.0, kEps, ()); - info.Parse("geo:52.23405,21.01547?Z=nineteen"); + info = parser.Parse("geo:52.23405,21.01547?Z=nineteen"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 52.23405, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 21.01547, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_zoom, 17.0, kEps, ()); - info.Parse("geo:52.23405,21.01547?Z=-1"); + info = parser.Parse("geo:52.23405,21.01547?Z=-1"); TEST(info.IsValid(), ()); TEST_ALMOST_EQUAL_ABS(info.m_lat, 52.23405, kEps, ()); TEST_ALMOST_EQUAL_ABS(info.m_lon, 21.01547, kEps, ()); diff --git a/ge0/geo_url_parser.cpp b/ge0/geo_url_parser.cpp index fa12928beb..7ce4f45d1e 100644 --- a/ge0/geo_url_parser.cpp +++ b/ge0/geo_url_parser.cpp @@ -2,14 +2,9 @@ #include "geometry/mercator.hpp" -#include "coding/url.hpp" - #include "base/assert.hpp" #include "base/string_utils.hpp" -#include - - namespace geo { using namespace std; @@ -25,157 +20,6 @@ double const kInvalidCoord = -1000.0; // maximal zoom levels. double const kMaxZoom = 17.0; -class LatLonParser -{ -public: - LatLonParser(url::Url const & url, GeoURLInfo & info) - : m_info(info) - , m_url(url) - , m_regexp("-?\\d+\\.{1}\\d*, *-?\\d+\\.{1}\\d*") - , m_latPriority(-1) - , m_lonPriority(-1) - { - } - - url::Url const & GetUrl() const { return m_url; } - - bool IsValid() const - { - return m_latPriority == m_lonPriority && m_latPriority != -1; - } - - void operator()(url::Param const & param) - { - auto name = param.m_name; - strings::AsciiToLower(name); - if (name == "z" || name == "zoom") - { - double x; - if (strings::to_double(param.m_value, x)) - m_info.SetZoom(x); - return; - } - - int const priority = GetCoordinatesPriority(name); - if (priority == -1 || priority < m_latPriority || priority < m_lonPriority) - return; - - if (priority != kXYPriority && priority != kLatLonPriority) - { - strings::ForEachMatched(param.m_value, m_regexp, AssignCoordinates(*this, priority)); - return; - } - - double x; - if (strings::to_double(param.m_value, x)) - { - if (name == "lat" || name == "y") - { - if (!m_info.SetLat(x)) - return; - m_latPriority = priority; - } - else - { - ASSERT(name == "lon" || name == "x", (param.m_name, name)); - if (!m_info.SetLon(x)) - return; - m_lonPriority = priority; - } - } - } - -private: - class AssignCoordinates - { - public: - AssignCoordinates(LatLonParser & parser, int priority) : m_parser(parser), m_priority(priority) - { - } - - void operator()(string const & token) const - { - double lat; - double lon; - - string::size_type n = token.find(','); - if (n == string::npos) - return; - VERIFY(strings::to_double(token.substr(0, n), lat), ()); - - n = token.find_first_not_of(", ", n); - if (n == string::npos) - return; - VERIFY(strings::to_double(token.substr(n, token.size() - n), lon), ()); - - SwapIfNeeded(lat, lon); - - if (m_parser.m_info.SetLat(lat) && m_parser.m_info.SetLon(lon)) - { - m_parser.m_latPriority = m_priority; - m_parser.m_lonPriority = m_priority; - } - } - - void SwapIfNeeded(double & lat, double & lon) const - { - vector const kSwappingProviders = {"2gis", "yandex"}; - for (auto const & s : kSwappingProviders) - { - if (m_parser.GetUrl().GetPath().find(s) != string::npos) - { - swap(lat, lon); - break; - } - } - } - - private: - LatLonParser & m_parser; - int m_priority; - }; - - // Usually (lat, lon), but some providers use (lon, lat). - inline static int const kLLPriority = 5; - // We do not try to guess the projection and do not interpret (x, y) - // as Mercator coordinates in URLs. We simply use (y, x) for (lat, lon). - inline static int const kXYPriority = 6; - inline static int const kLatLonPriority = 7; - - // Priority for accepting coordinates if we have many choices. - // -1 - not initialized - // 0 - coordinates in path; - // x - priority for query type (greater is better) - int GetCoordinatesPriority(string const & token) - { - if (token.empty()) - return 0; - if (token == "q" || token == "m") - return 1; - if (token == "saddr" || token == "daddr") - return 2; - if (token == "sll") - return 3; - if (token.find("point") != string::npos) - return 4; - if (token == "ll") - return kLLPriority; - if (token == "x" || token == "y") - return kXYPriority; - if (token == "lat" || token == "lon") - return kLatLonPriority; - - return -1; - } - - GeoURLInfo & m_info; - url::Url const & m_url; - regex m_regexp; - int m_latPriority; - int m_lonPriority; -}; // class LatLongParser - - bool MatchLatLonZoom(const string & s, const regex & re, size_t lati, size_t loni, size_t zoomi, GeoURLInfo & info) { std::smatch m; @@ -194,102 +38,150 @@ bool MatchLatLonZoom(const string & s, const regex & re, size_t lati, size_t lon return true; } -class DoubleGISParser +bool EqualWebDomain(url::Url const & url, char const * domain) { -public: - DoubleGISParser() - : m_pathRe("/(\\d+\\.?\\d*),(\\d+\\.?\\d*)/zoom/(\\d+\\.?\\d*)"), - m_paramRe("(\\d+\\.?\\d*),(\\d+\\.?\\d*)/(\\d+\\.?\\d*)") - { - } + return url.GetWebDomain().find(domain) != std::string::npos; +} +} // namespace - bool Parse(url::Url const & url, GeoURLInfo & info) - { - // Try m=$lon,$lat/$zoom first - for (auto const & param : url.Params()) - { - if (param.m_name == "m") - { - if (MatchLatLonZoom(param.m_value, m_paramRe, 2, 1, 3, info)) - return true; - break; - } - } - - // Parse /$lon,$lat/zoom/$zoom from path next - if (MatchLatLonZoom(url.GetPath(), m_pathRe, 2, 1, 3, info)) - return true; - - return false; - } - -private: - regex m_pathRe; - regex m_paramRe; -}; // Class DoubleGISParser - -class OpenStreetMapParser -{ -public: - OpenStreetMapParser() - : m_regex("#map=(\\d+\\.?\\d*)/(\\d+\\.\\d+)/(\\d+\\.\\d+)") - { - } - - bool Parse(url::Url const & url, GeoURLInfo & info) - { - if (MatchLatLonZoom(url.GetPath(), m_regex, 2, 3, 1, info)) - return true; - // Check if "#map=" fragment is attached to the last param in Url - if (!url.Params().empty() && MatchLatLonZoom(url.Params().back().m_value, m_regex, 2, 3, 1, info)) - return true; - return false; - } - -private: - regex m_regex; -}; // Class OpenStreetMapParser - -} // namespace - - -GeoURLInfo::GeoURLInfo() +LatLonParser::LatLonParser() +: m_info(nullptr), m_url(nullptr) +, m_regexp("-?\\d+\\.{1}\\d*, *-?\\d+\\.{1}\\d*") { } -void GeoURLInfo::Parse(string const & s) +void LatLonParser::Reset(url::Url const & url, GeoURLInfo & info) +{ + m_info = &info; + m_url = &url; + m_latPriority = m_lonPriority = -1; +} + +bool LatLonParser::IsValid() const +{ + return m_latPriority == m_lonPriority && m_latPriority != -1; +} + +void LatLonParser::operator()(url::Param const & param) +{ + auto name = param.m_name; + strings::AsciiToLower(name); + if (name == "z" || name == "zoom") + { + double x; + if (strings::to_double(param.m_value, x)) + m_info->SetZoom(x); + return; + } + + int const priority = GetCoordinatesPriority(name); + if (priority == -1 || priority < m_latPriority || priority < m_lonPriority) + return; + + if (priority != kXYPriority && priority != kLatLonPriority) + { + strings::ForEachMatched(param.m_value, m_regexp, [this, priority](string const & token) + { + double lat; + double lon; + + size_t n = token.find(','); + if (n == string::npos) + return; + VERIFY(strings::to_double(token.substr(0, n), lat), ()); + + n = token.find_first_not_of(", ", n); + if (n == string::npos) + return; + VERIFY(strings::to_double(token.substr(n, token.size() - n), lon), ()); + + if (EqualWebDomain(*m_url, "2gis") || EqualWebDomain(*m_url, "yandex")) + std::swap(lat, lon); + + if (m_info->SetLat(lat) && m_info->SetLon(lon)) + { + m_latPriority = priority; + m_lonPriority = priority; + } + }); + + return; + } + + double x; + if (strings::to_double(param.m_value, x)) + { + if (name == "lat" || name == "y") + { + if (m_info->SetLat(x)) + m_latPriority = priority; + } + else + { + ASSERT(name == "lon" || name == "x", (param.m_name, name)); + if (m_info->SetLon(x)) + m_lonPriority = priority; + } + } +} + +int LatLonParser::GetCoordinatesPriority(string const & token) +{ + if (token.empty()) + return 0; + if (token == "q" || token == "m") + return 1; + if (token == "saddr" || token == "daddr") + return 2; + if (token == "sll") + return 3; + if (token.find("point") != string::npos) + return 4; + if (token == "ll") + return kLLPriority; + if (token == "x" || token == "y") + return kXYPriority; + if (token == "lat" || token == "lon") + return kLatLonPriority; + + return -1; +} + +DoubleGISParser::DoubleGISParser() +: m_pathRe("/(\\d+\\.?\\d*),(\\d+\\.?\\d*)/zoom/(\\d+\\.?\\d*)") +, m_paramRe("(\\d+\\.?\\d*),(\\d+\\.?\\d*)/(\\d+\\.?\\d*)") +{ +} + +bool DoubleGISParser::Parse(url::Url const & url, GeoURLInfo & info) const +{ + // Try m=$lon,$lat/$zoom first + auto const * value = url.GetParamValue("m"); + if (value && MatchLatLonZoom(*value, m_paramRe, 2, 1, 3, info)) + return true; + + // Parse /$lon,$lat/zoom/$zoom from path next + return MatchLatLonZoom(url.GetPath(), m_pathRe, 2, 1, 3, info); +} + +OpenStreetMapParser::OpenStreetMapParser() +: m_regex("#map=(\\d+\\.?\\d*)/(\\d+\\.\\d+)/(\\d+\\.\\d+)") +{ +} + +bool OpenStreetMapParser::Parse(url::Url const & url, GeoURLInfo & info) const +{ + if (MatchLatLonZoom(url.GetPath(), m_regex, 2, 3, 1, info)) + return true; + + // Check if "#map=" fragment is attached to the last param in Url + auto const * last = url.GetLastParam(); + return (last && MatchLatLonZoom(last->m_value, m_regex, 2, 3, 1, info)); +} + +GeoURLInfo::GeoURLInfo() { Reset(); - - url::Url url(s); - if (!url.IsValid()) - return; - - if (url.GetScheme() == "https" || url.GetScheme() == "http") - { - if (url.GetWebDomain().find("2gis") != string::npos) - { - DoubleGISParser parser; - if (parser.Parse(url, *this)) - return; - } - else if (url.GetWebDomain().find("openstreetmap.org") != string::npos) - { - OpenStreetMapParser parser; - if (parser.Parse(url, *this)) - return; - } - } - - LatLonParser parser(url, *this); - parser(url::Param(string(), url.GetPath())); - url.ForEachParam(ref(parser)); - - if (!parser.IsValid()) - { - Reset(); - return; - } } bool GeoURLInfo::IsValid() const @@ -328,4 +220,37 @@ bool GeoURLInfo::SetLon(double x) } return false; } + +GeoURLInfo UnifiedParser::Parse(string const & s) +{ + GeoURLInfo res; + + url::Url url(s); + if (!url.IsValid()) + return res; + + if (url.GetScheme() == "https" || url.GetScheme() == "http") + { + if (EqualWebDomain(url, "2gis")) + { + if (m_dgParser.Parse(url, res)) + return res; + } + else if (EqualWebDomain(url, "openstreetmap")) + { + if (m_osmParser.Parse(url, res)) + return res; + } + } + + m_llParser.Reset(url, res); + m_llParser({{}, url.GetPath()}); + url.ForEachParam(m_llParser); + + if (!m_llParser.IsValid()) + res.Reset(); + + return res; +} + } // namespace geo diff --git a/ge0/geo_url_parser.hpp b/ge0/geo_url_parser.hpp index a7eaffce01..50ccebcd05 100644 --- a/ge0/geo_url_parser.hpp +++ b/ge0/geo_url_parser.hpp @@ -1,5 +1,8 @@ #pragma once +#include "coding/url.hpp" + +#include #include namespace geo @@ -10,8 +13,6 @@ class GeoURLInfo public: GeoURLInfo(); - void Parse(std::string const & s); - bool IsValid() const; void Reset(); @@ -24,4 +25,66 @@ public: double m_zoom; }; +class DoubleGISParser +{ +public: + DoubleGISParser(); + bool Parse(url::Url const & url, GeoURLInfo & info) const; + +private: + std::regex m_pathRe; + std::regex m_paramRe; +}; + +class OpenStreetMapParser +{ +public: + OpenStreetMapParser(); + bool Parse(url::Url const & url, GeoURLInfo & info) const; + +private: + std::regex m_regex; +}; + +class LatLonParser +{ +public: + LatLonParser(); + void Reset(url::Url const & url, GeoURLInfo & info); + + bool IsValid() const; + void operator()(url::Param const & param); + +private: + // Usually (lat, lon), but some providers use (lon, lat). + static int constexpr kLLPriority = 5; + // We do not try to guess the projection and do not interpret (x, y) + // as Mercator coordinates in URLs. We simply use (y, x) for (lat, lon). + static int constexpr kXYPriority = 6; + static int constexpr kLatLonPriority = 7; + + // Priority for accepting coordinates if we have many choices. + // -1 - not initialized + // 0 - coordinates in path; + // x - priority for query type (greater is better) + static int GetCoordinatesPriority(std::string const & token); + + GeoURLInfo * m_info; + url::Url const * m_url; + std::regex m_regexp; + int m_latPriority; + int m_lonPriority; +}; + +class UnifiedParser +{ +public: + GeoURLInfo Parse(std::string const & url); + +private: + DoubleGISParser m_dgParser; + OpenStreetMapParser m_osmParser; + LatLonParser m_llParser; +}; + } // namespace geo diff --git a/map/framework.cpp b/map/framework.cpp index a71d9be9ad..ae15316dd7 100644 --- a/map/framework.cpp +++ b/map/framework.cpp @@ -1788,8 +1788,7 @@ bool Framework::ShowMapForURL(string const & url) } else // Actually, we can parse any geo url scheme with correct coordinates. { - geo::GeoURLInfo info; - info.Parse(url); + geo::GeoURLInfo const info = geo::UnifiedParser().Parse(url); if (info.IsValid()) { point = mercator::FromLatLon(info.m_lat, info.m_lon); diff --git a/search/processor.cpp b/search/processor.cpp index e5ced1f623..9cb756f963 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -691,9 +691,9 @@ void Processor::SearchCoordinates() if (parser.Parse(token, r)) results.emplace_back(r.m_lat, r.m_lon); - m_geoUrlParser.Parse(token); - if (m_geoUrlParser.IsValid()) - results.emplace_back(m_geoUrlParser.m_lat, m_geoUrlParser.m_lon); + geo::GeoURLInfo const info = m_geoUrlParser.Parse(token); + if (info.IsValid()) + results.emplace_back(info.m_lat, info.m_lon); } base::SortUnique(results); diff --git a/search/processor.hpp b/search/processor.hpp index 8758afa9d3..564bfbc314 100644 --- a/search/processor.hpp +++ b/search/processor.hpp @@ -185,6 +185,6 @@ protected: bookmarks::Processor m_bookmarksProcessor; - geo::GeoURLInfo m_geoUrlParser; + geo::UnifiedParser m_geoUrlParser; }; } // namespace search