diff --git a/generator/osm2meta.cpp b/generator/osm2meta.cpp index 216b379c70..e92a93a85d 100644 --- a/generator/osm2meta.cpp +++ b/generator/osm2meta.cpp @@ -534,6 +534,8 @@ void MetadataTagProcessor::operator()(std::string const & k, std::string const & case Metadata::FMD_CONTACT_TWITTER: valid = osm::ValidateAndFormat_twitter(v); break; case Metadata::FMD_CONTACT_VK: valid = osm::ValidateAndFormat_vk(v); break; case Metadata::FMD_CONTACT_LINE: valid = osm::ValidateAndFormat_contactLine(v); break; + case Metadata::FMD_CONTACT_FEDIVERSE: valid = osm::ValidateAndFormat_fediverse(v); break; + case Metadata::FMD_CONTACT_BLUESKY: valid = osm::ValidateAndFormat_bluesky(v); break; case Metadata::FMD_INTERNET: valid = ValidateAndFormat_internet(v); break; case Metadata::FMD_ELE: valid = ValidateAndFormat_ele(v); break; case Metadata::FMD_DESTINATION: valid = ValidateAndFormat_destination(v); break; diff --git a/indexer/feature_meta.cpp b/indexer/feature_meta.cpp index 983492cffe..c8057befea 100644 --- a/indexer/feature_meta.cpp +++ b/indexer/feature_meta.cpp @@ -95,6 +95,10 @@ bool Metadata::TypeFromString(string_view k, Metadata::EType & outType) outType = Metadata::FMD_CONTACT_VK; else if (k == "contact:line") outType = Metadata::FMD_CONTACT_LINE; + else if (k == "contact:mastodon") + outType = Metadata::FMD_CONTACT_FEDIVERSE; + else if (k == "contact:bluesky") + outType = Metadata::FMD_CONTACT_BLUESKY; else if (k == "internet_access" || k == "wifi") outType = Metadata::FMD_INTERNET; else if (k == "ele") @@ -264,6 +268,8 @@ string ToString(Metadata::EType type) case Metadata::FMD_CONTACT_TWITTER: return "contact:twitter"; case Metadata::FMD_CONTACT_VK: return "contact:vk"; case Metadata::FMD_CONTACT_LINE: return "contact:line"; + case Metadata::FMD_CONTACT_FEDIVERSE: return "contact:mastodon"; + case Metadata::FMD_CONTACT_BLUESKY: return "contact:bluesky"; case Metadata::FMD_DESTINATION: return "destination"; case Metadata::FMD_DESTINATION_REF: return "destination:ref"; case Metadata::FMD_JUNCTION_REF: return "junction:ref"; diff --git a/indexer/feature_meta.hpp b/indexer/feature_meta.hpp index 17312b69a2..ad024ebd0e 100644 --- a/indexer/feature_meta.hpp +++ b/indexer/feature_meta.hpp @@ -155,6 +155,8 @@ public: FMD_SELF_SERVICE = 47, FMD_OUTDOOR_SEATING = 48, FMD_NETWORK = 49, + FMD_CONTACT_FEDIVERSE = 50, + FMD_CONTACT_BLUESKY = 51, FMD_COUNT }; diff --git a/indexer/indexer_tests/validate_and_format_contacts_test.cpp b/indexer/indexer_tests/validate_and_format_contacts_test.cpp index 54ffd1a030..2896c52d85 100644 --- a/indexer/indexer_tests/validate_and_format_contacts_test.cpp +++ b/indexer/indexer_tests/validate_and_format_contacts_test.cpp @@ -108,6 +108,39 @@ UNIT_TEST(EditableMapObject_ValidateAndFormat_contactLine) TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.com/ti/p/invalid-domain"), "", ()); } +UNIT_TEST(EditableMapObject_ValidateAndFormat_fediverse) +{ + TEST_EQUAL(osm::ValidateAndFormat_fediverse("https://fosstodon.org/@organicmaps"), "organicmaps@fosstodon.org", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("https://fosstodon.org/users/organicmaps"), "organicmaps@fosstodon.org", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("http://fosstodon.org/users/organicmaps"), "organicmaps@fosstodon.org", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("fosstodon.org/users/organicmaps"), "organicmaps@fosstodon.org", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("organicmaps@fosstodon.org"), "organicmaps@fosstodon.org", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("@organicmaps@fosstodon.org"), "organicmaps@fosstodon.org", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("@organicmaps@fosstodon.org.uk"), "organicmaps@fosstodon.org.uk", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/@organicmaps"), "organicmaps@pub.mastodon.org.uk", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/users/@organicmaps"), "organicmaps@pub.mastodon.org.uk", ()); + + TEST_EQUAL(osm::ValidateAndFormat_fediverse("organicmaps@fosstodon@mastodon.org"), "", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("orga$nicmaps@mastodon.social"), "", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/organicmaps"), "", ()); + TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/users/"), "", ()); +} + +UNIT_TEST(EditableMapObject_ValidateAndFormat_bluesky) +{ + TEST_EQUAL(osm::ValidateAndFormat_bluesky("organicmaps.bsky.social"), "organicmaps.bsky.social", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("@organicmaps.bsky.social"), "organicmaps.bsky.social", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/@organicmaps.bsky.social"), "organicmaps.bsky.social", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("http://bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ()); + + TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmap$.bsky.social"), "", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmaps.bsky.social$"), "", ()); + TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/pineapple/organicmaps.bsky.social"), "", ()); +} + UNIT_TEST(EditableMapObject_ValidateFacebookPage) { TEST(osm::ValidateFacebookPage(""), ()); @@ -262,6 +295,39 @@ UNIT_TEST(EditableMapObject_ValidateLinePage) TEST(!osm::ValidateLinePage("https://line.com/ti/p/invalid-domain"), ()); } +UNIT_TEST(EditableMapObject_ValidateFediversePage) +{ + TEST(osm::ValidateFediversePage("https://fosstodon.org/@organicmaps"), ()); + TEST(osm::ValidateFediversePage("https://fosstodon.org/users/organicmaps"), ()); + TEST(osm::ValidateFediversePage("http://fosstodon.org/users/organicmaps"), ()); + TEST(osm::ValidateFediversePage("fosstodon.org/users/organicmaps"), ()); + TEST(osm::ValidateFediversePage("organicmaps@fosstodon.org"), ()); + TEST(osm::ValidateFediversePage("@organicmaps@fosstodon.org"), ()); + TEST(osm::ValidateFediversePage("@organicmaps@fosstodon.org.uk"), ()); + TEST(osm::ValidateFediversePage("pub.mastodon.org.uk/@organicmaps"), ()); + TEST(osm::ValidateFediversePage("pub.mastodon.org.uk/users/@organicmaps"), ()); + + TEST(!osm::ValidateFediversePage("organicmaps@fosstodon@mastodon.org"), ()); + TEST(!osm::ValidateFediversePage("orga$nicmaps@mastodon.social"), ()); + TEST(!osm::ValidateFediversePage("pub.mastodon.org.uk/organicmaps"), ()); + TEST(!osm::ValidateFediversePage("pub.mastodon.org.uk/users/"), ()); +} + +UNIT_TEST(EditableMapObject_ValidateBlueskyPage) +{ + TEST(osm::ValidateBlueskyPage("organicmaps.bsky.social"), ()); + TEST(osm::ValidateBlueskyPage("@organicmaps.bsky.social"), ()); + TEST(osm::ValidateBlueskyPage("https://bsky.app/profile/organicmaps.bsky.social"), ()); + TEST(osm::ValidateBlueskyPage("https://bsky.app/profile/@organicmaps.bsky.social"), ()); + TEST(osm::ValidateBlueskyPage("http://bsky.app/profile/organicmaps.bsky.social"), ()); + TEST(osm::ValidateBlueskyPage("bsky.app/profile/organicmaps.bsky.social"), ()); + TEST(osm::ValidateBlueskyPage("https://bsky.app/profile/organicmaps.bsky.social"), ()); + + TEST(!osm::ValidateBlueskyPage("https://bsky.app/profile/organicmap$.bsky.social"), ()); + TEST(!osm::ValidateBlueskyPage("https://bsky.app/profile/organicmaps.bsky.social$"), ()); + TEST(!osm::ValidateBlueskyPage("https://bsky.app/pineapple/organicmaps.bsky.social"), ()); +} + UNIT_TEST(EditableMapObject_socialContactToURL) { TEST_EQUAL(osm::socialContactToURL(osm::MapObject::MetadataID::FMD_CONTACT_INSTAGRAM, "some_page_name"), "https://instagram.com/some_page_name", ()); diff --git a/indexer/validate_and_format_contacts.cpp b/indexer/validate_and_format_contacts.cpp index 3904ae5803..7defbe2740 100644 --- a/indexer/validate_and_format_contacts.cpp +++ b/indexer/validate_and_format_contacts.cpp @@ -16,12 +16,16 @@ static auto const s_twitterRegex = regex(R"(^@?[A-Za-z0-9_]{1,15}$)"); static auto const s_badVkRegex = regex(R"(^\d\d\d.+$)"); static auto const s_goodVkRegex = regex(R"(^[A-Za-z0-9_.]{5,32}$)"); static auto const s_lineRegex = regex(R"(^[a-z0-9-_.]{4,20}$)"); +static auto const s_fediverseRegex = regex(R"(^@?[a-zA-Z0-9_]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$)"); +static auto const s_blueskyRegex = regex(R"(^@?[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)+$)"); constexpr string_view kFacebook{"contact:facebook"}; constexpr string_view kInstagram{"contact:instagram"}; constexpr string_view kTwitter{"contact:twitter"}; constexpr string_view kVk{"contact:vk"}; constexpr string_view kLine{"contact:line"}; +constexpr string_view kFediverse{"contact:mastodon"}; +constexpr string_view kBluesky{"contact:bluesky"}; constexpr string_view kProfilePhp{"profile.php"}; @@ -41,6 +45,7 @@ constexpr string_view kDotVkontakteRu{".vkontakte.ru"}; constexpr string_view kLineMe{"line.me"}; constexpr string_view kPageLineMe{"page.line.me"}; constexpr string_view kDotLineMe{".line.me"}; +constexpr string_view kBskyApp{"bsky.app"}; // URLs constants constexpr string_view kUrlFacebook{"https://facebook.com/"}; @@ -48,6 +53,7 @@ constexpr string_view kUrlInstagram{"https://instagram.com/"}; constexpr string_view kUrlTwitter{"https://twitter.com/"}; constexpr string_view kUrlVk{"https://vk.com/"}; constexpr string_view kUrlLine{"https://line.me/R/ti/p/@"}; +constexpr string_view kUrlBluesky{"https://bsky.app/profile/"}; constexpr string_view kHttp{"http://"}; constexpr string_view kHttps{"https://"}; @@ -65,6 +71,13 @@ bool IsProtocolSpecified(string const & website) return 0 != GetProtocolNameLength(website); } +string fediverseHandleToUrl(string_view handle) +{ + // Convert stored username@domain.name to https://domain.name/username + vector const handleElements = strings::Tokenize(handle, "@"); + return string{kHttps}.append(handleElements[1]).append("/@").append(handleElements[0]); +} + // TODO: Current implementation looks only for restricted symbols from ASCII block ignoring // unicode. Need to find all restricted *Unicode* symbols // from https://www.facebook.com/pages/create page and verify those symbols @@ -239,16 +252,16 @@ string ValidateAndFormat_vk(string const & vkPage) return {}; } -// Strip '%40' and `@` chars from Line ID start. -string stripAtSymbol(string const & lineId) +// Strip '%40' and `@` chars from string start if they exist. +string stripAtSymbol(string const & inputString) { - if (lineId.empty()) - return lineId; - if (lineId.front() == '@') - return lineId.substr(1); - if (lineId.starts_with("%40")) - return lineId.substr(3); - return lineId; + if (inputString.empty()) + return inputString; + if (inputString.front() == '@') + return inputString.substr(1); + if (inputString.starts_with("%40")) + return inputString.substr(3); + return inputString; } string ValidateAndFormat_contactLine(string const & linePage) @@ -318,6 +331,79 @@ string ValidateAndFormat_contactLine(string const & linePage) return {}; } +string ValidateAndFormat_fediverse(string const & fediPage) +{ + if (fediPage.empty()) + return {}; + + // Parse {@?}{username}@{domain.name} format + if (regex_match(fediPage, s_fediverseRegex)) + return stripAtSymbol(fediPage); + + // If it doesn't match the above format, it can only be an URL format. + if (!ValidateWebsite(fediPage)) + return {}; + + // Parse https://{domain.name}{@ || /users/}{username} formats + url::Url const parsedUrl = url::Url::FromString(fediPage); + string const parsedDomain = strings::MakeLowerCase(parsedUrl.GetHost()); + string path = parsedUrl.GetPath(); + path.erase(path.find_last_not_of('/') + 1); // Strip any trailing '/' symbol + + // Could be /users/ type - check and remove to be left with just username. + if (path.starts_with("users/")) // first slash is already removed by GetPath() + { + path.erase(0, 6); + path = stripAtSymbol(path); // handle technically wrong but parseable domain/users/@username + } + // domain.name/@username - username has to start with @ + else if (path.starts_with("@")) + path = stripAtSymbol(path); + // unknown/invalid format + else + return {}; + + // Then construct the final username@domain.name format + path.append("@").append(parsedDomain); + // and make sure it's valid + if (regex_match(path, s_fediverseRegex)) + return path; + else + return {}; +} + +string ValidateAndFormat_bluesky(string const & bskyPage) +{ + if (bskyPage.empty()) + return {}; + + // Try matching {@?}{user/domain.name} format to avoid doing the other stuff + if (regex_match(bskyPage, s_blueskyRegex)) + return stripAtSymbol(bskyPage); + + // If not, it must match the URL format + if (ValidateWebsite(bskyPage)) + { + // Match https://bsky.app/profile/{username/domain.name} + url::Url const pageUrl = url::Url::FromString(bskyPage); + string_view const domain = pageUrl.GetHost(); + string path = pageUrl.GetPath(); + + // First remove url bits if they exist + if (domain.starts_with(kBskyApp) && path.starts_with("profile/")) + { + path.erase(0, 8); // Strip "profile/" part + path.erase(path.find_last_not_of('/') + 1); // Strip last '/' symbol if exists + + // Then make sure it matches {@?}{user/domain.name} + if (regex_match(path, s_blueskyRegex)) + return stripAtSymbol(path); + } + } + + return {}; +} + bool ValidateWebsite(string const & site) { if (site.empty()) @@ -449,9 +535,79 @@ bool ValidateLinePage(string const & page) return (domain == kLineMe || domain.ends_with(kDotLineMe)); } +bool ValidateFediversePage(string const & page) +{ + if (page.empty()) + return true; + + // Match @username@instance.name format + if (regex_match(page, s_fediverseRegex)) + return true; + + // If it doesn't match the above format, it can only be an URL format. + if (!ValidateWebsite(page)) + return false; + + // Try to match https://{domain.name}{@ || /users/}{username} formats + url::Url const pageUrl = url::Url::FromString(page); + string_view const domain = pageUrl.GetHost(); + string path = pageUrl.GetPath(); + + // Could be /users/ type - check and remove to be left with just username. + if (path.starts_with("users/")) // first slash is already removed by GetPath() + { + path.erase(0, 6); + path = stripAtSymbol(path); // handle technically wrong but parseable domain/users/@username + } + // domain.name/@username - username has to start with @ + else if (path.starts_with("@")) + path = stripAtSymbol(path); + // unknown/invalid format + else + return false; + + path.erase(path.find_last_not_of('/') + 1); // Strip any trailing '/' symbol + // Then construct the username@domain.name format + path.append("@").append(domain); + // And return if it's valid or not + return regex_match(path, s_fediverseRegex); +} + +bool ValidateBlueskyPage(string const & page) +{ + // A valid username can be any domain name, so the username rules don't apply. + if (page.empty()) + return true; + + // Match {@?}{user/domain.name} format + if (regex_match(page, s_blueskyRegex)) + return true; + + // Has to be an url format now + if (!ValidateWebsite(page)) + return false; + + // Match https://bsky.app/profile/{username/domain.name} + url::Url const pageUrl = url::Url::FromString(page); + string_view const domain = pageUrl.GetHost(); + string path = pageUrl.GetPath(); + + // First remove url bits if they exist + if (domain.starts_with(kBskyApp) && path.starts_with("profile/")) + { + path.erase(0, 8); // Strip "profile/" part + path.erase(path.find_last_not_of('/') + 1); // Strip last '/' symbol if exists + // Then try to parse the remaining text as a username again + if (regex_match(path, s_blueskyRegex)) + return true; + } + + return false; +} + bool isSocialContactTag(string_view tag) { - return tag == kInstagram || tag == kFacebook || tag == kTwitter || tag == kVk || tag == kLine; + return tag == kInstagram || tag == kFacebook || tag == kTwitter || tag == kVk || tag == kLine || tag == kFediverse || tag == kBluesky; } bool isSocialContactTag(MapObject::MetadataID const metaID) @@ -460,7 +616,9 @@ bool isSocialContactTag(MapObject::MetadataID const metaID) metaID == MapObject::MetadataID::FMD_CONTACT_FACEBOOK || metaID == MapObject::MetadataID::FMD_CONTACT_TWITTER || metaID == MapObject::MetadataID::FMD_CONTACT_VK || - metaID == MapObject::MetadataID::FMD_CONTACT_LINE; + metaID == MapObject::MetadataID::FMD_CONTACT_LINE || + metaID == MapObject::MetadataID::FMD_CONTACT_FEDIVERSE || + metaID == MapObject::MetadataID ::FMD_CONTACT_BLUESKY; } // Functions ValidateAndFormat_{facebook,instagram,twitter,vk}(...) by default strip domain name @@ -477,6 +635,10 @@ string socialContactToURL(string_view tag, string_view value) return string{kUrlTwitter}.append(value); if (tag == kVk) return string{kUrlVk}.append(value); + if (tag == kFediverse) + return fediverseHandleToUrl(value); + if (tag == kBluesky) // In future + return string{kUrlBluesky}.append(value); if (tag == kLine) { if (value.find('/') == string::npos) // 'value' is a username. @@ -502,6 +664,10 @@ string socialContactToURL(MapObject::MetadataID metaID, string_view value) return string{kUrlTwitter}.append(value); case MapObject::MetadataID::FMD_CONTACT_VK: return string{kUrlVk}.append(value); + case MapObject::MetadataID::FMD_CONTACT_FEDIVERSE: + return fediverseHandleToUrl(value); + case MapObject::MetadataID::FMD_CONTACT_BLUESKY: + return string{kUrlBluesky}.append(value); case MapObject::MetadataID::FMD_CONTACT_LINE: if (value.find('/') == string::npos) // 'value' is a username. return string{kUrlLine}.append(value); diff --git a/indexer/validate_and_format_contacts.hpp b/indexer/validate_and_format_contacts.hpp index 7e430c1656..ab337e68b2 100644 --- a/indexer/validate_and_format_contacts.hpp +++ b/indexer/validate_and_format_contacts.hpp @@ -12,6 +12,8 @@ std::string ValidateAndFormat_instagram(std::string const & v); std::string ValidateAndFormat_twitter(std::string const & v); std::string ValidateAndFormat_vk(std::string const & v); std::string ValidateAndFormat_contactLine(std::string const & v); +std::string ValidateAndFormat_fediverse(std::string const & v); +std::string ValidateAndFormat_bluesky(std::string const & v); bool ValidateWebsite(std::string const & site); bool ValidateFacebookPage(std::string const & v); @@ -19,6 +21,8 @@ bool ValidateInstagramPage(std::string const & v); bool ValidateTwitterPage(std::string const & v); bool ValidateVkPage(std::string const & v); bool ValidateLinePage(std::string const & v); +bool ValidateFediversePage(std::string const & v); +bool ValidateBlueskyPage(std::string const & v); bool isSocialContactTag(std::string_view tag); bool isSocialContactTag(osm::MapObject::MetadataID const metaID);