Add string class for storing utf8 strings with language descriptor.

This commit is contained in:
vng 2011-05-07 01:02:16 +03:00 committed by Alex Zolotarev
parent 76ddd0f420
commit c399e6d9fe
5 changed files with 178 additions and 2 deletions

View file

@ -21,6 +21,7 @@ SOURCES += \
timsort/timsort.cpp \
base64.cpp \
sha2.cpp \
multilang_utf8_string.cpp \
HEADERS += \
internal/xmlparser.h \
@ -76,4 +77,5 @@ HEADERS += \
bit_shift.hpp \
base64.hpp \
sha2.hpp \
value_opt_string.hpp \
value_opt_string.hpp \
multilang_utf8_string.hpp \

View file

@ -34,7 +34,8 @@ SOURCES += ../../testing/testingmain.cpp \
bit_shift_test.cpp \
base64_test.cpp \
sha2_test.cpp \
value_opt_string_test.cpp \
value_opt_string_test.cpp \
multilang_utf8_string_test.cpp \
HEADERS += \
reader_test.hpp \

View file

@ -0,0 +1,50 @@
#include "../../testing/testing.hpp"
#include "../multilang_utf8_string.hpp"
#include "../strutil.hpp"
namespace
{
struct lang_string
{
char const * m_lang;
wchar_t const * m_str;
};
void TestMultilangString(lang_string const * arr, size_t count)
{
StringUtf8Multilang s;
for (size_t i = 0; i < count; ++i)
{
string const utf8 = ToUtf8(arr[i].m_str);
s.AddString(arr[i].m_lang, utf8);
string comp;
TEST(s.GetString(arr[i].m_lang, comp), ());
TEST_EQUAL(utf8, comp, ());
}
for (size_t i = 0; i < count; ++i)
{
string const utf8 = ToUtf8(arr[i].m_str);
string comp;
TEST(s.GetString(arr[i].m_lang, comp), ());
TEST_EQUAL(utf8, comp, ());
}
string test;
TEST(!s.GetString("xxx", test), ());
}
}
UNIT_TEST(MultilangString_Smoke)
{
StringUtf8Multilang s;
lang_string arr[] = { {"en", L"abcd"}, {"ru", L"éóõ¸"}, {"be", L"öìîê"} };
TestMultilangString(arr, ARRAY_SIZE(arr));
}

View file

@ -0,0 +1,69 @@
#include "multilang_utf8_string.hpp"
char StringUtf8Multilang::GetLangIndex(string const & lang) const
{
if (lang.empty() || lang == "en")
return 0;
else if (lang == "ru")
return 1;
else if (lang == "be")
return 2;
return -1;
}
size_t StringUtf8Multilang::GetNextIndex(size_t i) const
{
++i;
size_t const sz = m_s.size();
while (i < sz && (m_s[i] & 0xC0) != 0x80)
{
if ((m_s[i] & 0x80) == 0)
i += 1;
else if ((m_s[i] & 0xC0) == 0xC0)
i += 2;
else if ((m_s[i] & 0xE0) == 0xE0)
i += 3;
else if ((m_s[i] & 0xF0) == 0xF0)
i += 4;
else if ((m_s[i] & 0xF8) == 0xF8)
i += 5;
else if ((m_s[i] & 0xFC) == 0xFC)
i += 6;
else if ((m_s[i] & 0xFE) == 0xFE)
i += 7;
}
return i;
}
void StringUtf8Multilang::AddString(char lang, string const & utf8s)
{
m_s.push_back(lang | 0x80);
m_s.insert(m_s.end(), utf8s.begin(), utf8s.end());
}
bool StringUtf8Multilang::GetString(char lang, string & utf8s) const
{
size_t i = 0;
size_t const sz = m_s.size();
while (i < sz)
{
size_t const next = GetNextIndex(i);
if ((m_s[i] & 0x3F) == lang)
{
++i;
utf8s.assign(m_s.c_str() + i, next - i);
return true;
}
i = next;
}
return false;
}

View file

@ -0,0 +1,54 @@
#pragma once
#include "varint.hpp"
#include "../base/assert.hpp"
#include "../std/string.hpp"
class StringUtf8Multilang
{
string m_s;
size_t GetNextIndex(size_t i) const;
char GetLangIndex(string const & lang) const;
public:
void AddString(char lang, string const & utf8s);
void AddString(string const & lang, string const & utf8s)
{
char const l = GetLangIndex(lang);
if (l >= 0)
AddString(l, utf8s);
}
bool GetString(char lang, string & utf8s) const;
bool GetString(string const & lang, string & utf8s) const
{
char const l = GetLangIndex(lang);
if (l >= 0)
return GetString(l, utf8s);
else
return false;
}
template <class TSink> void Write(TSink & sink)
{
CHECK(!m_s.empty(), ());
size_t const sz = m_s.size();
WriteVarUint(sink, static_cast<uint32_t>(sz-1));
sink.Write(m_s.c_str(), sz);
}
template <class TSource> void Read(TSource & src)
{
uint32_t const sz = ReadVarUint<uint32_t>(src) + 1;
m_s.resize(sz);
src.Read(&m_s[0], sz);
CHECK(!m_s.empty(), ());
}
};