forked from organicmaps/organicmaps
Added unicode normalization
@TODO make code smaller
This commit is contained in:
parent
015250ae97
commit
7841b70b09
6 changed files with 4607 additions and 16 deletions
|
@ -21,6 +21,7 @@ SOURCES += \
|
|||
path_utils.cpp \
|
||||
condition.cpp \
|
||||
lower_case.cpp \
|
||||
normalize_unicode.cpp \
|
||||
|
||||
HEADERS += \
|
||||
SRC_FIRST.hpp \
|
||||
|
|
|
@ -252,3 +252,14 @@ UNIT_TEST(MakeUniString_Smoke)
|
|||
char const s [] = "Hello!";
|
||||
TEST_EQUAL(strings::UniString(&s[0], &s[0] + ARRAY_SIZE(s) - 1), strings::MakeUniString(s), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(Normalize)
|
||||
{
|
||||
strings::UniChar const s[] = { 0x1f101, 'H', 0xfef0, 0xfdfc, 0x2150 };
|
||||
strings::UniString us(&s[0], &s[0] + ARRAY_SIZE(s));
|
||||
strings::UniChar const r[] = { 0x30, 0x2c, 'H', 0x649, 0x631, 0x6cc, 0x627, 0x644,
|
||||
0x31, 0x2044, 0x37 };
|
||||
strings::UniString result(&r[0], &r[0] + ARRAY_SIZE(r));
|
||||
strings::Normalize(us);
|
||||
TEST_EQUAL(us, result, ());
|
||||
}
|
||||
|
|
|
@ -22,22 +22,22 @@ static uint16_t const smallff[] = {0xff00,0xff01,0xff02,0xff03,0xff04,0xff05,0xf
|
|||
/// @return 0 if char should be replaced with 2 or more chars
|
||||
UniChar LowerUniChar(UniChar c)
|
||||
{
|
||||
switch (c & 0x00ffff00)
|
||||
switch (c & 0xffffff00)
|
||||
{
|
||||
case 0x0000: return small00[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x0100: return small01[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x0200: return small02[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x0300: return small03[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x0400: return small04[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x0500: return small05[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x1000: return small10[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x1e00: return small1e[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x1f00: return small1f[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x2100: return small21[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x2400: return small24[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x2c00: return small2c[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0xa600: return smalla6[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0xa700: return smalla7[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0x0000: return small00[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x0100: return small01[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x0200: return small02[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x0300: return small03[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x0400: return small04[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x0500: return small05[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x1000: return small10[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x1e00: return small1e[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x1f00: return small1f[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x2100: return small21[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x2400: return small24[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x2c00: return small2c[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0xa600: return smalla6[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0xa700: return smalla7[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0xfb00:
|
||||
{
|
||||
if (c >= 0xfb00 && c <= 0xfb06)
|
||||
|
@ -46,7 +46,7 @@ UniChar LowerUniChar(UniChar c)
|
|||
return 0;
|
||||
return c;
|
||||
}
|
||||
case 0xff00: return smallff[static_cast<uint16_t>(c & 0x00ff)];
|
||||
case 0xff00: return smallff[static_cast<uint8_t>(c & 0x00ff)];
|
||||
case 0x10400:
|
||||
{
|
||||
if (c >= 0x10400 && c <= 0x10427)
|
||||
|
|
4569
base/normalize_unicode.cpp
Normal file
4569
base/normalize_unicode.cpp
Normal file
File diff suppressed because one or more lines are too long
|
@ -95,6 +95,13 @@ string MakeLowerCase(string const & s)
|
|||
return result;
|
||||
}
|
||||
|
||||
UniString Normalize(UniString const & s)
|
||||
{
|
||||
UniString result(s);
|
||||
Normalize(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool EqualNoCase(string const & s1, string const & s2)
|
||||
{
|
||||
return MakeLowerCase(s1) == MakeLowerCase(s2);
|
||||
|
|
|
@ -17,6 +17,9 @@ typedef buffer_vector<UniChar, 32> UniString;
|
|||
|
||||
UniString MakeLowerCase(UniString const & s);
|
||||
void MakeLowerCase(UniString & s);
|
||||
UniString Normalize(UniString const & s);
|
||||
void Normalize(UniString & s);
|
||||
|
||||
void MakeLowerCase(string & s);
|
||||
string MakeLowerCase(string const & s);
|
||||
bool EqualNoCase(string const & s1, string const & s2);
|
||||
|
|
Loading…
Add table
Reference in a new issue