Added normalize and lower case for utf8 strings

This commit is contained in:
vng 2013-08-06 12:42:18 +03:00
parent 30aebfdea7
commit 11cc91f3e4
4 changed files with 46 additions and 4 deletions

7
env/env.pro vendored
View file

@ -31,9 +31,14 @@ SOURCES += \
posix.cpp \
source_address.cpp \
thread_posix.cpp \
strings.cpp \
# utf8proc
SOURCES += \
../3rdparty/utf8proc/utf8proc.c \
# unit tests
SOURCES += \
../3rdparty/googletest/src/gtest-all.cc \
../3rdparty/googletest/src/gtest_main.cc \
tests/smoke.cpp \
tests/env_tests.cpp \

24
env/strings.cpp vendored Normal file
View file

@ -0,0 +1,24 @@
#include "strings.hpp"
#include "assert.hpp"
#include "../std/vector.hpp"
#include "../3rdparty/utf8proc/utf8proc.h"
namespace str
{
string MakeNormalizeAndLowerUtf8(string const & s)
{
int const count = static_cast<int>(s.size());
vector<int32_t> buffer(count);
int sz = utf8proc_decompose(reinterpret_cast<uint8_t const *>(s.c_str()), count, buffer.data(), count,
UTF8PROC_CASEFOLD | UTF8PROC_DECOMPOSE | UTF8PROC_STRIPMARK);
CHECK(sz >= 0 && sz <= count, ());
sz = utf8proc_reencode(buffer.data(), sz, 0);
return string(reinterpret_cast<char *>(buffer.data()), sz);
}
}

2
env/strings.hpp vendored
View file

@ -14,4 +14,6 @@ template <class T> string ToString(T const & t)
return ss.str();
}
string MakeNormalizeAndLowerUtf8(string const & s);
}

View file

@ -3,9 +3,11 @@
#include "../file_handle.hpp"
#include "../file_system.hpp"
#include "../logging.hpp"
#include "../strings.hpp"
#include "../../std/algorithm.hpp"
#include "../../std/vector.hpp"
#include "../../std/array.hpp"
/// @note Do not edit formatting here (SRC() test):
@ -18,7 +20,7 @@ namespace
}
}
TEST(EnvSmoke, SourceAddress)
TEST(Env, SourceAddress)
{
string s = GetSourceAddress();
size_t const beg = s.find_last_of('/');
@ -28,7 +30,7 @@ TEST(EnvSmoke, SourceAddress)
size_t const end = s.find_last_of(',');
EXPECT_NE(end, string::npos);
string const test = s.substr(0, end);
EXPECT_EQ(test, "smoke.cpp, GetSourceAddress");
EXPECT_EQ(test, "env_tests.cpp, GetSourceAddress");
ostringstream ss;
ss << test << ", " << (__LINE__ - 17) << ": "; // magic constant
@ -37,7 +39,7 @@ TEST(EnvSmoke, SourceAddress)
//@}
TEST(EnvSmoke, FileHandle)
TEST(Env, FileHandle)
{
typedef file::FileHandle HandleT;
@ -68,3 +70,12 @@ TEST(EnvSmoke, FileHandle)
EXPECT_TRUE(fs::DeleteFile(name));
}
TEST(Env, MakeNormalizeAndLowerUtf8)
{
char const * arr[] = { "Atualização disponível", "Můžeš", "Über Karten", "Schließen" };
char const * res[] = { "atualizacao disponivel", "muzes", "uber karten", "schliessen" };
for (size_t i = 0; i < ArraySize(arr); ++i)
EXPECT_EQ(str::MakeNormalizeAndLowerUtf8(arr[i]), res[i]);
}