Issue 2787 - Facebook validation fix (#2793)

Signed-off-by: S. Kozyr <s.trump@gmail.com>
This commit is contained in:
Sergiy Kozyr 2022-07-05 11:03:08 +03:00 committed by GitHub
parent cc02d2e4ee
commit 3380f5a375
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 157 additions and 10 deletions

View file

@ -4,6 +4,100 @@
#include <string>
UNIT_TEST(EditableMapObject_ValidateAndFormat_facebook)
{
TEST_EQUAL(osm::ValidateAndFormat_facebook(""), "", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("facebook.com/OpenStreetMap"), "OpenStreetMap", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("www.facebook.com/OpenStreetMap"), "OpenStreetMap", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("http://facebook.com/OpenStreetMap"), "OpenStreetMap", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("https://facebook.com/OpenStreetMap"), "OpenStreetMap", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("http://www.facebook.com/OpenStreetMap"), "OpenStreetMap", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("https://www.facebook.com/OpenStreetMap"), "OpenStreetMap", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("https://en-us.facebook.com/OpenStreetMap"), "OpenStreetMap", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("some.good.page"), "some.good.page", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("@tree-house-interiors"), "tree-house-interiors", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("instagram.com/openstreetmapus"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("https://instagram.com/openstreetmapus"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("osm"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("@spaces are not welcome here"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_facebook("spaces are not welcome here"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateAndFormat_instagram)
{
TEST_EQUAL(osm::ValidateAndFormat_instagram(""), "", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("instagram.com/openstreetmapus"), "openstreetmapus", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("www.instagram.com/openstreetmapus"), "openstreetmapus", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("http://instagram.com/openstreetmapus"), "openstreetmapus", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("https://instagram.com/openstreetmapus"), "openstreetmapus", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("http://www.instagram.com/openstreetmapus"), "openstreetmapus", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("https://www.instagram.com/openstreetmapus"), "openstreetmapus", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("https://en-us.instagram.com/openstreetmapus"), "openstreetmapus", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("@open_street_map_us"), "open_street_map_us", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("https://www.instagram.com/explore/locations/358536820/trivium-sport-en-dance/"), "explore/locations/358536820/trivium-sport-en-dance", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("https://www.instagram.com/p/BvkgKZNDbqN/?ghid=UwPchX7B"), "p/BvkgKZNDbqN", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("facebook.com/osm_us"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram(".dots_not_allowed."), "", ());
TEST_EQUAL(osm::ValidateAndFormat_instagram("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateAndFormat_twitter)
{
TEST_EQUAL(osm::ValidateAndFormat_twitter("twitter.com/osm_tech"), "osm_tech", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("www.twitter.com/osm_tech"), "osm_tech", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("http://twitter.com/osm_tech"), "osm_tech", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("https://twitter.com/osm_tech"), "osm_tech", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("http://www.twitter.com/osm_tech"), "osm_tech", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("https://www.twitter.com/osm_tech"), "osm_tech", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("@_osm_tech_"), "_osm_tech_", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("instagram.com/osm_tech"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("dots.not.allowed"), "", ());
TEST_EQUAL(osm::ValidateAndFormat_twitter("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateAndFormat_vk)
{
TEST_EQUAL(osm::ValidateAndFormat_vk("vk.com/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("vkontakte.ru/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("www.vk.com/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("http://vk.com/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("https://vk.com/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("https://vkontakte.ru/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("http://www.vk.com/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("https://www.vk.com/id404"), "id404", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("https://www.vk.com/id405/"), "id405", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("@22ab.cdef"), "22ab.cdef", ());
TEST_EQUAL(osm::ValidateAndFormat_vk("instagram.com/hello_world"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateAndFormat_contactLine)
{
TEST_EQUAL(osm::ValidateAndFormat_contactLine("http://line.me/ti/p/mzog4fnz24"), "mzog4fnz24", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/ti/p/xnv0g02rws"), "xnv0g02rws", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/ti/p/@dgxs9r6wad"), "dgxs9r6wad", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/ti/p/%40vne5uwke17"), "vne5uwke17", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("http://line.me/R/ti/p/bfsg1a8x9u"), "bfsg1a8x9u", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/R/ti/p/gdltt7s380"), "gdltt7s380", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/R/ti/p/@sdb2pb3lsg"), "sdb2pb3lsg", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/R/ti/p/%40b30h5mdj11"), "b30h5mdj11", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("http://line.me/R/home/public/main?id=hmczqsbav5"), "hmczqsbav5", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/R/home/public/main?id=wa1gvx91jb"), "wa1gvx91jb", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("http://line.me/R/home/public/profile?id=5qll5dyqqu"), "5qll5dyqqu", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/R/home/public/profile?id=r90ck7n1rq"), "r90ck7n1rq", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.me/R/home/public/profile?id=r90ck7n1rq"), "r90ck7n1rq", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://page.line.me/fom5198h"), "fom5198h", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://page.line.me/qn58n8g?web=mobile"), "qn58n8g", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://abc.line.me/en/some/page?id=xaladqv"), "abc.line.me/en/some/page?id=xaladqv", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("@abcd"), "abcd", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("@-hyphen-test-"), "-hyphen-test-", ());
TEST_EQUAL(osm::ValidateAndFormat_contactLine("https://line.com/ti/p/invalid-domain"), "", ());
}
UNIT_TEST(EditableMapObject_ValidateFacebookPage)
{
TEST(osm::ValidateFacebookPage(""), ());
@ -17,13 +111,32 @@ UNIT_TEST(EditableMapObject_ValidateFacebookPage)
TEST(osm::ValidateFacebookPage("OpenStreetMap"), ());
TEST(osm::ValidateFacebookPage("some.good.page"), ());
TEST(osm::ValidateFacebookPage("Quaama-Volunteer-Bushfire-Brigade-526790054021506"), ());
TEST(osm::ValidateFacebookPage(u8"Páter-Bonifác-Restaurant-Budapest-111001693867133"), ());
TEST(osm::ValidateFacebookPage(u8"MÊGÅ--CÄFË-3141592653589793"), ());
TEST(osm::ValidateFacebookPage(u8"ресторан"), ()); // Cyrillic
TEST(osm::ValidateFacebookPage(u8"საქართველო"), ()); // Georgian
TEST(osm::ValidateFacebookPage(u8"日本語"), ()); // Japanese
TEST(osm::ValidateFacebookPage("@tree-house-interiors"), ());
TEST(osm::ValidateFacebookPage("allow_underscore-1234567890"), ());
TEST(osm::ValidateFacebookPage("alexander.net"), ());
TEST(!osm::ValidateFacebookPage("instagram.com/openstreetmapus"), ());
TEST(!osm::ValidateFacebookPage("https://instagram.com/openstreetmapus"), ());
TEST(!osm::ValidateFacebookPage("osm"), ());
TEST(!osm::ValidateFacebookPage("invalid_username"), ());
TEST(!osm::ValidateFacebookPage("@spaces are not welcome here"), ());
TEST(!osm::ValidateFacebookPage("spaces are not welcome here"), ());
constexpr char kForbiddenFBSymbols[] = " !@^*()~[]{}#$%&;,:+\"'/\\";
for(size_t i=0; i<std::size(kForbiddenFBSymbols)-1; i++)
{
auto test_str = std::string("special-symbol-") + kForbiddenFBSymbols[i] + "-forbidden";
TEST(!osm::ValidateFacebookPage(test_str), (test_str));
}
// Symbols "£€¥" are not allowed, but to check such cases it requires unicode magic. Not supported currently.
// TODO: find all restricted *Unicode* symbols from https://www.facebook.com/pages/create page
// and them to the test
//TEST(!osm::ValidateFacebookPage(u8"you-shall-not-pass-£€¥"), ());
}
UNIT_TEST(EditableMapObject_ValidateInstagramPage)

View file

@ -10,26 +10,55 @@ using namespace std;
namespace osm {
static auto const s_fbRegex = regex(R"(^@?[a-zA-Z\d.\-]{5,}$)");
static auto const s_instaRegex = regex(R"(^@?[A-Za-z0-9_][A-Za-z0-9_.]{0,28}[A-Za-z0-9_]$)");
static auto const s_twitterRegex = regex(R"(^@?[A-Za-z0-9_]{1,15}$)");
static auto const s_badVkRegex = regex(R"(^\d\d\d.+$)");
static auto const s_goodVkRegex = regex(R"(^[A-Za-z0-9_.]{5,32}$)");
static auto const s_lineRegex = regex(R"(^[a-z0-9-_.]{4,20}$)");
// TODO: Current implementation looks only for restricted symbols from ASCII block ignoring
// unicode. Need to find all restricted *Unicode* symbols
// from https://www.facebook.com/pages/create page and verify those symbols
// using MakeUniString or utf8cpp.
bool containsInvalidFBSymbol(string const & facebookPage, size_t startIndex = 0)
{
auto const size = facebookPage.size();
for (auto i=startIndex; i<size; ++i)
{
const char ch = facebookPage[i];
// Forbid all ASCII symbols except '-', '.', and '_'
if ((ch >= ' ' && ch <= ',') ||
ch == '/' ||
(ch >= ':' && ch <= '@') ||
(ch >= '[' && ch <= '^') ||
ch == '`' ||
(ch >= '{' && ch <= '~'))
return true;
}
return false;
}
string ValidateAndFormat_facebook(string const & facebookPage)
{
if (facebookPage.empty())
return {};
// Check that facebookPage contains valid username. See rules: https://www.facebook.com/help/105399436216001
if (strings::EndsWith(facebookPage, ".com") || strings::EndsWith(facebookPage, ".net"))
return {};
if (regex_match(facebookPage, s_fbRegex))
if (facebookPage.front() == '@')
{
if (facebookPage.front() == '@')
// Validate facebookPage as username or page name.
if (facebookPage.length() >= 6 && !containsInvalidFBSymbol(facebookPage, 1))
return facebookPage.substr(1);
return facebookPage;
else
return {}; // Invalid symbol in Facebook username of page name.
}
else
{
if (facebookPage.length() >= 5 && !containsInvalidFBSymbol(facebookPage))
return facebookPage;
}
// facebookPage is not a valid username it must be an URL.
if (!EditableMapObject::ValidateWebsite(facebookPage))
return {};
@ -230,8 +259,13 @@ bool ValidateFacebookPage(string const & page)
if (page.empty())
return true;
// Rules are defined here: https://www.facebook.com/help/105399436216001
if (regex_match(page, s_fbRegex))
// Check if 'page' contains valid Facebook username or page name.
// * length >= 5
// * no forbidden symbols in the string
// * optional '@' at the start
if (page.front() == '@')
return page.length() >= 6 && !containsInvalidFBSymbol(page, 1);
else if (page.length() >= 5 && !containsInvalidFBSymbol(page))
return true;
if (!EditableMapObject::ValidateWebsite(page))