Added normalize and lower case for utf8 strings
This commit is contained in:
parent
30aebfdea7
commit
11cc91f3e4
4 changed files with 46 additions and 4 deletions
7
env/env.pro
vendored
7
env/env.pro
vendored
|
@ -31,9 +31,14 @@ SOURCES += \
|
|||
posix.cpp \
|
||||
source_address.cpp \
|
||||
thread_posix.cpp \
|
||||
strings.cpp \
|
||||
|
||||
# utf8proc
|
||||
SOURCES += \
|
||||
../3rdparty/utf8proc/utf8proc.c \
|
||||
|
||||
# unit tests
|
||||
SOURCES += \
|
||||
../3rdparty/googletest/src/gtest-all.cc \
|
||||
../3rdparty/googletest/src/gtest_main.cc \
|
||||
tests/smoke.cpp \
|
||||
tests/env_tests.cpp \
|
||||
|
|
24
env/strings.cpp
vendored
Normal file
24
env/strings.cpp
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
#include "strings.hpp"
|
||||
#include "assert.hpp"
|
||||
|
||||
#include "../std/vector.hpp"
|
||||
|
||||
#include "../3rdparty/utf8proc/utf8proc.h"
|
||||
|
||||
|
||||
namespace str
|
||||
{
|
||||
|
||||
string MakeNormalizeAndLowerUtf8(string const & s)
|
||||
{
|
||||
int const count = static_cast<int>(s.size());
|
||||
vector<int32_t> buffer(count);
|
||||
int sz = utf8proc_decompose(reinterpret_cast<uint8_t const *>(s.c_str()), count, buffer.data(), count,
|
||||
UTF8PROC_CASEFOLD | UTF8PROC_DECOMPOSE | UTF8PROC_STRIPMARK);
|
||||
CHECK(sz >= 0 && sz <= count, ());
|
||||
|
||||
sz = utf8proc_reencode(buffer.data(), sz, 0);
|
||||
return string(reinterpret_cast<char *>(buffer.data()), sz);
|
||||
}
|
||||
|
||||
}
|
2
env/strings.hpp
vendored
2
env/strings.hpp
vendored
|
@ -14,4 +14,6 @@ template <class T> string ToString(T const & t)
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
string MakeNormalizeAndLowerUtf8(string const & s);
|
||||
|
||||
}
|
||||
|
|
17
env/tests/smoke.cpp → env/tests/env_tests.cpp
vendored
17
env/tests/smoke.cpp → env/tests/env_tests.cpp
vendored
|
@ -3,9 +3,11 @@
|
|||
#include "../file_handle.hpp"
|
||||
#include "../file_system.hpp"
|
||||
#include "../logging.hpp"
|
||||
#include "../strings.hpp"
|
||||
|
||||
#include "../../std/algorithm.hpp"
|
||||
#include "../../std/vector.hpp"
|
||||
#include "../../std/array.hpp"
|
||||
|
||||
|
||||
/// @note Do not edit formatting here (SRC() test):
|
||||
|
@ -18,7 +20,7 @@ namespace
|
|||
}
|
||||
}
|
||||
|
||||
TEST(EnvSmoke, SourceAddress)
|
||||
TEST(Env, SourceAddress)
|
||||
{
|
||||
string s = GetSourceAddress();
|
||||
size_t const beg = s.find_last_of('/');
|
||||
|
@ -28,7 +30,7 @@ TEST(EnvSmoke, SourceAddress)
|
|||
size_t const end = s.find_last_of(',');
|
||||
EXPECT_NE(end, string::npos);
|
||||
string const test = s.substr(0, end);
|
||||
EXPECT_EQ(test, "smoke.cpp, GetSourceAddress");
|
||||
EXPECT_EQ(test, "env_tests.cpp, GetSourceAddress");
|
||||
|
||||
ostringstream ss;
|
||||
ss << test << ", " << (__LINE__ - 17) << ": "; // magic constant
|
||||
|
@ -37,7 +39,7 @@ TEST(EnvSmoke, SourceAddress)
|
|||
//@}
|
||||
|
||||
|
||||
TEST(EnvSmoke, FileHandle)
|
||||
TEST(Env, FileHandle)
|
||||
{
|
||||
typedef file::FileHandle HandleT;
|
||||
|
||||
|
@ -68,3 +70,12 @@ TEST(EnvSmoke, FileHandle)
|
|||
|
||||
EXPECT_TRUE(fs::DeleteFile(name));
|
||||
}
|
||||
|
||||
TEST(Env, MakeNormalizeAndLowerUtf8)
|
||||
{
|
||||
char const * arr[] = { "Atualização disponível", "Můžeš", "Über Karten", "Schließen" };
|
||||
char const * res[] = { "atualizacao disponivel", "muzes", "uber karten", "schliessen" };
|
||||
|
||||
for (size_t i = 0; i < ArraySize(arr); ++i)
|
||||
EXPECT_EQ(str::MakeNormalizeAndLowerUtf8(arr[i]), res[i]);
|
||||
}
|
Reference in a new issue