[generator][core] Add Mastodon and Bluesky

Signed-off-by: Harry Bond <me@hbond.xyz>
This commit is contained in:
Harry Bond 2025-01-28 19:21:51 +00:00
parent bac9073bcb
commit 6557471697
6 changed files with 257 additions and 11 deletions

View file

@ -534,6 +534,8 @@ void MetadataTagProcessor::operator()(std::string const & k, std::string const &
case Metadata::FMD_CONTACT_TWITTER: valid = osm::ValidateAndFormat_twitter(v); break;
case Metadata::FMD_CONTACT_VK: valid = osm::ValidateAndFormat_vk(v); break;
case Metadata::FMD_CONTACT_LINE: valid = osm::ValidateAndFormat_contactLine(v); break;
case Metadata::FMD_CONTACT_FEDIVERSE: valid = osm::ValidateAndFormat_fediverse(v); break;
case Metadata::FMD_CONTACT_BLUESKY: valid = osm::ValidateAndFormat_bluesky(v); break;
case Metadata::FMD_INTERNET: valid = ValidateAndFormat_internet(v); break;
case Metadata::FMD_ELE: valid = ValidateAndFormat_ele(v); break;
case Metadata::FMD_DESTINATION: valid = ValidateAndFormat_destination(v); break;

View file

@ -95,6 +95,10 @@ bool Metadata::TypeFromString(string_view k, Metadata::EType & outType)
outType = Metadata::FMD_CONTACT_VK;
else if (k == "contact:line")
outType = Metadata::FMD_CONTACT_LINE;
else if (k == "contact:mastodon")
outType = Metadata::FMD_CONTACT_FEDIVERSE;
else if (k == "contact:bluesky")
outType = Metadata::FMD_CONTACT_BLUESKY;
else if (k == "internet_access" || k == "wifi")
outType = Metadata::FMD_INTERNET;
else if (k == "ele")
@ -264,6 +268,8 @@ string ToString(Metadata::EType type)
case Metadata::FMD_CONTACT_TWITTER: return "contact:twitter";
case Metadata::FMD_CONTACT_VK: return "contact:vk";
case Metadata::FMD_CONTACT_LINE: return "contact:line";
case Metadata::FMD_CONTACT_FEDIVERSE: return "contact:mastodon";
case Metadata::FMD_CONTACT_BLUESKY: return "contact:bluesky";
case Metadata::FMD_DESTINATION: return "destination";
case Metadata::FMD_DESTINATION_REF: return "destination:ref";
case Metadata::FMD_JUNCTION_REF: return "junction:ref";

View file

@ -155,6 +155,8 @@ public:
FMD_SELF_SERVICE = 47,
FMD_OUTDOOR_SEATING = 48,
FMD_NETWORK = 49,
FMD_CONTACT_FEDIVERSE = 50,
FMD_CONTACT_BLUESKY = 51,
FMD_COUNT
};

View file

@ -108,6 +108,39 @@ UNIT_TEST(EditableMapObject_ValidateAndFormat_contactLine)
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.com/ti/p/invalid-domain"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateAndFormat_fediverse)
{
TEST_EQUAL(osm::ValidateAndFormat_fediverse("https://fosstodon.org/@organicmaps"), "organicmaps@fosstodon.org", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("https://fosstodon.org/users/organicmaps"), "organicmaps@fosstodon.org", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("http://fosstodon.org/users/organicmaps"), "organicmaps@fosstodon.org", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("fosstodon.org/users/organicmaps"), "organicmaps@fosstodon.org", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("organicmaps@fosstodon.org"), "organicmaps@fosstodon.org", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("@organicmaps@fosstodon.org"), "organicmaps@fosstodon.org", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("@organicmaps@fosstodon.org.uk"), "organicmaps@fosstodon.org.uk", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/@organicmaps"), "organicmaps@pub.mastodon.org.uk", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/users/@organicmaps"), "organicmaps@pub.mastodon.org.uk", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("organicmaps@fosstodon@mastodon.org"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("orga$nicmaps@mastodon.social"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/organicmaps"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_fediverse("pub.mastodon.org.uk/users/"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateAndFormat_bluesky)
{
TEST_EQUAL(osm::ValidateAndFormat_bluesky("organicmaps.bsky.social"), "organicmaps.bsky.social", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("@organicmaps.bsky.social"), "organicmaps.bsky.social", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/@organicmaps.bsky.social"), "organicmaps.bsky.social", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("http://bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmaps.bsky.social"), "organicmaps.bsky.social", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmap$.bsky.social"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/profile/organicmaps.bsky.social$"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_bluesky("https://bsky.app/pineapple/organicmaps.bsky.social"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateFacebookPage)
{
TEST(osm::ValidateFacebookPage(""), ());
@ -262,6 +295,39 @@ UNIT_TEST(EditableMapObject_ValidateLinePage)
TEST(!osm::ValidateLinePage("https://line.com/ti/p/invalid-domain"), ());
}
UNIT_TEST(EditableMapObject_ValidateFediversePage)
{
TEST(osm::ValidateFediversePage("https://fosstodon.org/@organicmaps"), ());
TEST(osm::ValidateFediversePage("https://fosstodon.org/users/organicmaps"), ());
TEST(osm::ValidateFediversePage("http://fosstodon.org/users/organicmaps"), ());
TEST(osm::ValidateFediversePage("fosstodon.org/users/organicmaps"), ());
TEST(osm::ValidateFediversePage("organicmaps@fosstodon.org"), ());
TEST(osm::ValidateFediversePage("@organicmaps@fosstodon.org"), ());
TEST(osm::ValidateFediversePage("@organicmaps@fosstodon.org.uk"), ());
TEST(osm::ValidateFediversePage("pub.mastodon.org.uk/@organicmaps"), ());
TEST(osm::ValidateFediversePage("pub.mastodon.org.uk/users/@organicmaps"), ());
TEST(!osm::ValidateFediversePage("organicmaps@fosstodon@mastodon.org"), ());
TEST(!osm::ValidateFediversePage("orga$nicmaps@mastodon.social"), ());
TEST(!osm::ValidateFediversePage("pub.mastodon.org.uk/organicmaps"), ());
TEST(!osm::ValidateFediversePage("pub.mastodon.org.uk/users/"), ());
}
UNIT_TEST(EditableMapObject_ValidateBlueskyPage)
{
TEST(osm::ValidateBlueskyPage("organicmaps.bsky.social"), ());
TEST(osm::ValidateBlueskyPage("@organicmaps.bsky.social"), ());
TEST(osm::ValidateBlueskyPage("https://bsky.app/profile/organicmaps.bsky.social"), ());
TEST(osm::ValidateBlueskyPage("https://bsky.app/profile/@organicmaps.bsky.social"), ());
TEST(osm::ValidateBlueskyPage("http://bsky.app/profile/organicmaps.bsky.social"), ());
TEST(osm::ValidateBlueskyPage("bsky.app/profile/organicmaps.bsky.social"), ());
TEST(osm::ValidateBlueskyPage("https://bsky.app/profile/organicmaps.bsky.social"), ());
TEST(!osm::ValidateBlueskyPage("https://bsky.app/profile/organicmap$.bsky.social"), ());
TEST(!osm::ValidateBlueskyPage("https://bsky.app/profile/organicmaps.bsky.social$"), ());
TEST(!osm::ValidateBlueskyPage("https://bsky.app/pineapple/organicmaps.bsky.social"), ());
}
UNIT_TEST(EditableMapObject_socialContactToURL)
{
TEST_EQUAL(osm::socialContactToURL(osm::MapObject::MetadataID::FMD_CONTACT_INSTAGRAM, "some_page_name"), "https://instagram.com/some_page_name", ());

View file

@ -16,12 +16,16 @@ static auto const s_twitterRegex = regex(R"(^@?[A-Za-z0-9_]{1,15}$)");
static auto const s_badVkRegex = regex(R"(^\d\d\d.+$)");
static auto const s_goodVkRegex = regex(R"(^[A-Za-z0-9_.]{5,32}$)");
static auto const s_lineRegex = regex(R"(^[a-z0-9-_.]{4,20}$)");
static auto const s_fediverseRegex = regex(R"(^@?[a-zA-Z0-9_]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$)");
static auto const s_blueskyRegex = regex(R"(^@?[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)+$)");
constexpr string_view kFacebook{"contact:facebook"};
constexpr string_view kInstagram{"contact:instagram"};
constexpr string_view kTwitter{"contact:twitter"};
constexpr string_view kVk{"contact:vk"};
constexpr string_view kLine{"contact:line"};
constexpr string_view kFediverse{"contact:mastodon"};
constexpr string_view kBluesky{"contact:bluesky"};
constexpr string_view kProfilePhp{"profile.php"};
@ -41,6 +45,7 @@ constexpr string_view kDotVkontakteRu{".vkontakte.ru"};
constexpr string_view kLineMe{"line.me"};
constexpr string_view kPageLineMe{"page.line.me"};
constexpr string_view kDotLineMe{".line.me"};
constexpr string_view kBskyApp{"bsky.app"};
// URLs constants
constexpr string_view kUrlFacebook{"https://facebook.com/"};
@ -48,6 +53,7 @@ constexpr string_view kUrlInstagram{"https://instagram.com/"};
constexpr string_view kUrlTwitter{"https://twitter.com/"};
constexpr string_view kUrlVk{"https://vk.com/"};
constexpr string_view kUrlLine{"https://line.me/R/ti/p/@"};
constexpr string_view kUrlBluesky{"https://bsky.app/profile/"};
constexpr string_view kHttp{"http://"};
constexpr string_view kHttps{"https://"};
@ -65,6 +71,13 @@ bool IsProtocolSpecified(string const & website)
return 0 != GetProtocolNameLength(website);
}
string fediverseHandleToUrl(string_view handle)
{
// Convert stored username@domain.name to https://domain.name/username
vector<string_view> const handleElements = strings::Tokenize(handle, "@");
return string{kHttps}.append(handleElements[1]).append("/@").append(handleElements[0]);
}
// TODO: Current implementation looks only for restricted symbols from ASCII block ignoring
// unicode. Need to find all restricted *Unicode* symbols
// from https://www.facebook.com/pages/create page and verify those symbols
@ -239,16 +252,16 @@ string ValidateAndFormat_vk(string const & vkPage)
return {};
}
// Strip '%40' and `@` chars from Line ID start.
string stripAtSymbol(string const & lineId)
// Strip '%40' and `@` chars from string start if they exist.
string stripAtSymbol(string const & inputString)
{
if (lineId.empty())
return lineId;
if (lineId.front() == '@')
return lineId.substr(1);
if (lineId.starts_with("%40"))
return lineId.substr(3);
return lineId;
if (inputString.empty())
return inputString;
if (inputString.front() == '@')
return inputString.substr(1);
if (inputString.starts_with("%40"))
return inputString.substr(3);
return inputString;
}
string ValidateAndFormat_contactLine(string const & linePage)
@ -318,6 +331,79 @@ string ValidateAndFormat_contactLine(string const & linePage)
return {};
}
string ValidateAndFormat_fediverse(string const & fediPage)
{
if (fediPage.empty())
return {};
// Parse {@?}{username}@{domain.name} format
if (regex_match(fediPage, s_fediverseRegex))
return stripAtSymbol(fediPage);
// If it doesn't match the above format, it can only be an URL format.
if (!ValidateWebsite(fediPage))
return {};
// Parse https://{domain.name}{@ || /users/}{username} formats
url::Url const parsedUrl = url::Url::FromString(fediPage);
string const parsedDomain = strings::MakeLowerCase(parsedUrl.GetHost());
string path = parsedUrl.GetPath();
path.erase(path.find_last_not_of('/') + 1); // Strip any trailing '/' symbol
// Could be /users/ type - check and remove to be left with just username.
if (path.starts_with("users/")) // first slash is already removed by GetPath()
{
path.erase(0, 6);
path = stripAtSymbol(path); // handle technically wrong but parseable domain/users/@username
}
// domain.name/@username - username has to start with @
else if (path.starts_with("@"))
path = stripAtSymbol(path);
// unknown/invalid format
else
return {};
// Then construct the final username@domain.name format
path.append("@").append(parsedDomain);
// and make sure it's valid
if (regex_match(path, s_fediverseRegex))
return path;
else
return {};
}
string ValidateAndFormat_bluesky(string const & bskyPage)
{
if (bskyPage.empty())
return {};
// Try matching {@?}{user/domain.name} format to avoid doing the other stuff
if (regex_match(bskyPage, s_blueskyRegex))
return stripAtSymbol(bskyPage);
// If not, it must match the URL format
if (ValidateWebsite(bskyPage))
{
// Match https://bsky.app/profile/{username/domain.name}
url::Url const pageUrl = url::Url::FromString(bskyPage);
string_view const domain = pageUrl.GetHost();
string path = pageUrl.GetPath();
// First remove url bits if they exist
if (domain.starts_with(kBskyApp) && path.starts_with("profile/"))
{
path.erase(0, 8); // Strip "profile/" part
path.erase(path.find_last_not_of('/') + 1); // Strip last '/' symbol if exists
// Then make sure it matches {@?}{user/domain.name}
if (regex_match(path, s_blueskyRegex))
return stripAtSymbol(path);
}
}
return {};
}
bool ValidateWebsite(string const & site)
{
if (site.empty())
@ -449,9 +535,79 @@ bool ValidateLinePage(string const & page)
return (domain == kLineMe || domain.ends_with(kDotLineMe));
}
bool ValidateFediversePage(string const & page)
{
if (page.empty())
return true;
// Match @username@instance.name format
if (regex_match(page, s_fediverseRegex))
return true;
// If it doesn't match the above format, it can only be an URL format.
if (!ValidateWebsite(page))
return false;
// Try to match https://{domain.name}{@ || /users/}{username} formats
url::Url const pageUrl = url::Url::FromString(page);
string_view const domain = pageUrl.GetHost();
string path = pageUrl.GetPath();
// Could be /users/ type - check and remove to be left with just username.
if (path.starts_with("users/")) // first slash is already removed by GetPath()
{
path.erase(0, 6);
path = stripAtSymbol(path); // handle technically wrong but parseable domain/users/@username
}
// domain.name/@username - username has to start with @
else if (path.starts_with("@"))
path = stripAtSymbol(path);
// unknown/invalid format
else
return false;
path.erase(path.find_last_not_of('/') + 1); // Strip any trailing '/' symbol
// Then construct the username@domain.name format
path.append("@").append(domain);
// And return if it's valid or not
return regex_match(path, s_fediverseRegex);
}
bool ValidateBlueskyPage(string const & page)
{
// A valid username can be any domain name, so the username rules don't apply.
if (page.empty())
return true;
// Match {@?}{user/domain.name} format
if (regex_match(page, s_blueskyRegex))
return true;
// Has to be an url format now
if (!ValidateWebsite(page))
return false;
// Match https://bsky.app/profile/{username/domain.name}
url::Url const pageUrl = url::Url::FromString(page);
string_view const domain = pageUrl.GetHost();
string path = pageUrl.GetPath();
// First remove url bits if they exist
if (domain.starts_with(kBskyApp) && path.starts_with("profile/"))
{
path.erase(0, 8); // Strip "profile/" part
path.erase(path.find_last_not_of('/') + 1); // Strip last '/' symbol if exists
// Then try to parse the remaining text as a username again
if (regex_match(path, s_blueskyRegex))
return true;
}
return false;
}
bool isSocialContactTag(string_view tag)
{
return tag == kInstagram || tag == kFacebook || tag == kTwitter || tag == kVk || tag == kLine;
return tag == kInstagram || tag == kFacebook || tag == kTwitter || tag == kVk || tag == kLine || tag == kFediverse || tag == kBluesky;
}
bool isSocialContactTag(MapObject::MetadataID const metaID)
@ -460,7 +616,9 @@ bool isSocialContactTag(MapObject::MetadataID const metaID)
metaID == MapObject::MetadataID::FMD_CONTACT_FACEBOOK ||
metaID == MapObject::MetadataID::FMD_CONTACT_TWITTER ||
metaID == MapObject::MetadataID::FMD_CONTACT_VK ||
metaID == MapObject::MetadataID::FMD_CONTACT_LINE;
metaID == MapObject::MetadataID::FMD_CONTACT_LINE ||
metaID == MapObject::MetadataID::FMD_CONTACT_FEDIVERSE ||
metaID == MapObject::MetadataID ::FMD_CONTACT_BLUESKY;
}
// Functions ValidateAndFormat_{facebook,instagram,twitter,vk}(...) by default strip domain name
@ -477,6 +635,10 @@ string socialContactToURL(string_view tag, string_view value)
return string{kUrlTwitter}.append(value);
if (tag == kVk)
return string{kUrlVk}.append(value);
if (tag == kFediverse)
return fediverseHandleToUrl(value);
if (tag == kBluesky) // In future
return string{kUrlBluesky}.append(value);
if (tag == kLine)
{
if (value.find('/') == string::npos) // 'value' is a username.
@ -502,6 +664,10 @@ string socialContactToURL(MapObject::MetadataID metaID, string_view value)
return string{kUrlTwitter}.append(value);
case MapObject::MetadataID::FMD_CONTACT_VK:
return string{kUrlVk}.append(value);
case MapObject::MetadataID::FMD_CONTACT_FEDIVERSE:
return fediverseHandleToUrl(value);
case MapObject::MetadataID::FMD_CONTACT_BLUESKY:
return string{kUrlBluesky}.append(value);
case MapObject::MetadataID::FMD_CONTACT_LINE:
if (value.find('/') == string::npos) // 'value' is a username.
return string{kUrlLine}.append(value);

View file

@ -12,6 +12,8 @@ std::string ValidateAndFormat_instagram(std::string const & v);
std::string ValidateAndFormat_twitter(std::string const & v);
std::string ValidateAndFormat_vk(std::string const & v);
std::string ValidateAndFormat_contactLine(std::string const & v);
std::string ValidateAndFormat_fediverse(std::string const & v);
std::string ValidateAndFormat_bluesky(std::string const & v);
bool ValidateWebsite(std::string const & site);
bool ValidateFacebookPage(std::string const & v);
@ -19,6 +21,8 @@ bool ValidateInstagramPage(std::string const & v);
bool ValidateTwitterPage(std::string const & v);
bool ValidateVkPage(std::string const & v);
bool ValidateLinePage(std::string const & v);
bool ValidateFediversePage(std::string const & v);
bool ValidateBlueskyPage(std::string const & v);
bool isSocialContactTag(std::string_view tag);
bool isSocialContactTag(osm::MapObject::MetadataID const metaID);