mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-04 13:05:06 +00:00
* Redefined and renamed types for code units. * Remove -Wsign-conversion from test builds. * find_invalid and is_valid that work with C-style strings. * Lifted the C++11 requirement for some functions that take std::string as an argument. * Support for C++20 u8string Issue #89 * Update test docker image to 4.0.0 * Update Dockerfile to run tests with a recent gcc compiler. * Make some internal helper functions non-template * Add append16 function Support for appending codepoints to existing utf16 encoded strings. See #91 * next16 * Tests and documentation for next16 * Rewrite CMakeLists Drop the existing CMake structure and write the new one from scratch. The root CMakeLists.txt is used for installing the package without building and running tests. Testing is done via a separate CMakeLists.txt in the tests directory. * Remove "samples" directory. The content of that file is already in the documentation. * Update README.md Restructure the reference, add installation instructions, toc, other minor changes
88 lines
2.6 KiB
C++
88 lines
2.6 KiB
C++
#include "../extern/ftest/ftest.h"
|
|
#include "utf8.h"
|
|
#include <string>
|
|
using namespace utf8;
|
|
using namespace std;
|
|
|
|
#if __cplusplus >= 201703L // C++ 17 or later
|
|
|
|
|
|
TEST(CPP17APITests, test_utf16to8)
|
|
{
|
|
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
|
|
u16string_view utf16stringview(utf16string);
|
|
string u = utf16to8(utf16stringview);
|
|
EXPECT_EQ (u.size(), 10);
|
|
}
|
|
|
|
TEST(CPP17APITests, test_utf8to16)
|
|
{
|
|
string_view utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
|
|
u16string utf16result = utf8to16(utf8_with_surrogates);
|
|
EXPECT_EQ (utf16result.size(), 4);
|
|
EXPECT_EQ (utf16result[2], 0xd834);
|
|
EXPECT_EQ (utf16result[3], 0xdd1e);
|
|
}
|
|
|
|
TEST(CPP17APITests, test_utf32to8)
|
|
{
|
|
u32string utf32string = {0x448, 0x65E5, 0x10346};
|
|
u32string_view utf32stringview(utf32string);
|
|
string utf8result = utf32to8(utf32stringview);
|
|
EXPECT_EQ (utf8result.size(), 9);
|
|
}
|
|
|
|
TEST(CPP17APITests, test_utf8to32)
|
|
{
|
|
string_view twochars = "\xe6\x97\xa5\xd1\x88";
|
|
u32string utf32result = utf8to32(twochars);
|
|
EXPECT_EQ (utf32result.size(), 2);
|
|
}
|
|
|
|
TEST(CPP17APITests, test_find_invalid)
|
|
{
|
|
string_view utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa";
|
|
auto invalid = find_invalid(utf_invalid);
|
|
EXPECT_EQ (invalid, 5);
|
|
}
|
|
|
|
TEST(CPP17APITests, test_is_valid)
|
|
{
|
|
string_view utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa";
|
|
bool bvalid = is_valid(utf_invalid);
|
|
EXPECT_FALSE (bvalid);
|
|
string_view utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
|
|
bvalid = is_valid(utf8_with_surrogates);
|
|
EXPECT_TRUE (bvalid);
|
|
}
|
|
|
|
TEST(CPP17APITests, test_replace_invalid)
|
|
{
|
|
string_view invalid_sequence = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
|
|
string replace_invalid_result = replace_invalid(invalid_sequence, '?');
|
|
bool bvalid = is_valid(replace_invalid_result);
|
|
EXPECT_TRUE (bvalid);
|
|
const string fixed_invalid_sequence = "a????z";
|
|
EXPECT_EQ(fixed_invalid_sequence, replace_invalid_result);
|
|
}
|
|
|
|
TEST(CPP17APITests, test_starts_with_bom)
|
|
{
|
|
string byte_order_mark = {char(0xef), char(0xbb), char(0xbf)};
|
|
string_view byte_order_mark_view(byte_order_mark);
|
|
bool bbom = starts_with_bom(byte_order_mark_view);
|
|
EXPECT_TRUE (bbom);
|
|
string_view threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
|
|
bool no_bbom = starts_with_bom(threechars);
|
|
EXPECT_FALSE (no_bbom);
|
|
}
|
|
|
|
TEST(CPP17APITests, string_class_and_literals)
|
|
{
|
|
const char* twochars = u8"ab";
|
|
EXPECT_TRUE (is_valid(twochars));
|
|
const string two_chars_string(twochars);
|
|
EXPECT_TRUE (is_valid(two_chars_string));
|
|
}
|
|
|
|
#endif // C++ 11 or later
|