forked from organicmaps/organicmaps
Add string class for storing utf8 strings with language descriptor.
This commit is contained in:
parent
76ddd0f420
commit
c399e6d9fe
5 changed files with 178 additions and 2 deletions
|
@ -21,6 +21,7 @@ SOURCES += \
|
|||
timsort/timsort.cpp \
|
||||
base64.cpp \
|
||||
sha2.cpp \
|
||||
multilang_utf8_string.cpp \
|
||||
|
||||
HEADERS += \
|
||||
internal/xmlparser.h \
|
||||
|
@ -76,4 +77,5 @@ HEADERS += \
|
|||
bit_shift.hpp \
|
||||
base64.hpp \
|
||||
sha2.hpp \
|
||||
value_opt_string.hpp \
|
||||
value_opt_string.hpp \
|
||||
multilang_utf8_string.hpp \
|
||||
|
|
|
@ -34,7 +34,8 @@ SOURCES += ../../testing/testingmain.cpp \
|
|||
bit_shift_test.cpp \
|
||||
base64_test.cpp \
|
||||
sha2_test.cpp \
|
||||
value_opt_string_test.cpp \
|
||||
value_opt_string_test.cpp \
|
||||
multilang_utf8_string_test.cpp \
|
||||
|
||||
HEADERS += \
|
||||
reader_test.hpp \
|
||||
|
|
50
coding/coding_tests/multilang_utf8_string_test.cpp
Normal file
50
coding/coding_tests/multilang_utf8_string_test.cpp
Normal file
|
@ -0,0 +1,50 @@
|
|||
#include "../../testing/testing.hpp"
|
||||
|
||||
#include "../multilang_utf8_string.hpp"
|
||||
#include "../strutil.hpp"
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
struct lang_string
|
||||
{
|
||||
char const * m_lang;
|
||||
wchar_t const * m_str;
|
||||
};
|
||||
|
||||
void TestMultilangString(lang_string const * arr, size_t count)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
string const utf8 = ToUtf8(arr[i].m_str);
|
||||
s.AddString(arr[i].m_lang, utf8);
|
||||
|
||||
string comp;
|
||||
TEST(s.GetString(arr[i].m_lang, comp), ());
|
||||
TEST_EQUAL(utf8, comp, ());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
string const utf8 = ToUtf8(arr[i].m_str);
|
||||
|
||||
string comp;
|
||||
TEST(s.GetString(arr[i].m_lang, comp), ());
|
||||
TEST_EQUAL(utf8, comp, ());
|
||||
}
|
||||
|
||||
string test;
|
||||
TEST(!s.GetString("xxx", test), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(MultilangString_Smoke)
|
||||
{
|
||||
StringUtf8Multilang s;
|
||||
|
||||
lang_string arr[] = { {"en", L"abcd"}, {"ru", L"éóõ¸"}, {"be", L"öìîê"} };
|
||||
|
||||
TestMultilangString(arr, ARRAY_SIZE(arr));
|
||||
}
|
69
coding/multilang_utf8_string.cpp
Normal file
69
coding/multilang_utf8_string.cpp
Normal file
|
@ -0,0 +1,69 @@
|
|||
#include "multilang_utf8_string.hpp"
|
||||
|
||||
|
||||
|
||||
char StringUtf8Multilang::GetLangIndex(string const & lang) const
|
||||
{
|
||||
if (lang.empty() || lang == "en")
|
||||
return 0;
|
||||
else if (lang == "ru")
|
||||
return 1;
|
||||
else if (lang == "be")
|
||||
return 2;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t StringUtf8Multilang::GetNextIndex(size_t i) const
|
||||
{
|
||||
++i;
|
||||
size_t const sz = m_s.size();
|
||||
|
||||
while (i < sz && (m_s[i] & 0xC0) != 0x80)
|
||||
{
|
||||
if ((m_s[i] & 0x80) == 0)
|
||||
i += 1;
|
||||
else if ((m_s[i] & 0xC0) == 0xC0)
|
||||
i += 2;
|
||||
else if ((m_s[i] & 0xE0) == 0xE0)
|
||||
i += 3;
|
||||
else if ((m_s[i] & 0xF0) == 0xF0)
|
||||
i += 4;
|
||||
else if ((m_s[i] & 0xF8) == 0xF8)
|
||||
i += 5;
|
||||
else if ((m_s[i] & 0xFC) == 0xFC)
|
||||
i += 6;
|
||||
else if ((m_s[i] & 0xFE) == 0xFE)
|
||||
i += 7;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void StringUtf8Multilang::AddString(char lang, string const & utf8s)
|
||||
{
|
||||
m_s.push_back(lang | 0x80);
|
||||
m_s.insert(m_s.end(), utf8s.begin(), utf8s.end());
|
||||
}
|
||||
|
||||
bool StringUtf8Multilang::GetString(char lang, string & utf8s) const
|
||||
{
|
||||
size_t i = 0;
|
||||
size_t const sz = m_s.size();
|
||||
|
||||
while (i < sz)
|
||||
{
|
||||
size_t const next = GetNextIndex(i);
|
||||
|
||||
if ((m_s[i] & 0x3F) == lang)
|
||||
{
|
||||
++i;
|
||||
utf8s.assign(m_s.c_str() + i, next - i);
|
||||
return true;
|
||||
}
|
||||
|
||||
i = next;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
54
coding/multilang_utf8_string.hpp
Normal file
54
coding/multilang_utf8_string.hpp
Normal file
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#include "varint.hpp"
|
||||
|
||||
#include "../base/assert.hpp"
|
||||
|
||||
#include "../std/string.hpp"
|
||||
|
||||
|
||||
class StringUtf8Multilang
|
||||
{
|
||||
string m_s;
|
||||
|
||||
size_t GetNextIndex(size_t i) const;
|
||||
char GetLangIndex(string const & lang) const;
|
||||
|
||||
public:
|
||||
|
||||
void AddString(char lang, string const & utf8s);
|
||||
void AddString(string const & lang, string const & utf8s)
|
||||
{
|
||||
char const l = GetLangIndex(lang);
|
||||
if (l >= 0)
|
||||
AddString(l, utf8s);
|
||||
}
|
||||
|
||||
bool GetString(char lang, string & utf8s) const;
|
||||
bool GetString(string const & lang, string & utf8s) const
|
||||
{
|
||||
char const l = GetLangIndex(lang);
|
||||
if (l >= 0)
|
||||
return GetString(l, utf8s);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class TSink> void Write(TSink & sink)
|
||||
{
|
||||
CHECK(!m_s.empty(), ());
|
||||
|
||||
size_t const sz = m_s.size();
|
||||
WriteVarUint(sink, static_cast<uint32_t>(sz-1));
|
||||
sink.Write(m_s.c_str(), sz);
|
||||
}
|
||||
|
||||
template <class TSource> void Read(TSource & src)
|
||||
{
|
||||
uint32_t const sz = ReadVarUint<uint32_t>(src) + 1;
|
||||
m_s.resize(sz);
|
||||
src.Read(&m_s[0], sz);
|
||||
|
||||
CHECK(!m_s.empty(), ());
|
||||
}
|
||||
};
|
Loading…
Add table
Reference in a new issue