mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-04 13:05:06 +00:00
* Redefined and renamed types for code units. * Remove -Wsign-conversion from test builds. * find_invalid and is_valid that work with C-style strings. * Lifted the C++11 requirement for some functions that take std::string as an argument. * Support for C++20 u8string Issue #89 * Update test docker image to 4.0.0 * Update Dockerfile to run tests with a recent gcc compiler. * Make some internal helper functions non-template * Add append16 function Support for appending codepoints to existing utf16 encoded strings. See #91 * next16 * Tests and documentation for next16 * Rewrite CMakeLists Drop the existing CMake structure and write the new one from scratch. The root CMakeLists.txt is used for installing the package without building and running tests. Testing is done via a separate CMakeLists.txt in the tests directory. * Remove "samples" directory. The content of that file is already in the documentation. * Update README.md Restructure the reference, add installation instructions, toc, other minor changes
77 lines
2.3 KiB
C++
77 lines
2.3 KiB
C++
#include "../extern/ftest/ftest.h"
|
|
#define UTF_CPP_CPLUSPLUS 202002L
|
|
#include "utf8.h"
|
|
#include <string>
|
|
using namespace utf8;
|
|
using namespace std;
|
|
|
|
TEST(CPP20APITests, test_utf16tou8)
|
|
{
|
|
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
|
|
u16string_view utf16stringview{utf16string};
|
|
u8string u = utf16tou8(utf16string);
|
|
EXPECT_EQ (u.size(), 10);
|
|
u = utf16tou8(utf16stringview);
|
|
EXPECT_EQ (u.size(), 10);
|
|
}
|
|
|
|
TEST(CPP20APITests, tes20t_utf8to16)
|
|
{
|
|
u8string utf8_with_surrogates{u8"\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"};
|
|
u16string utf16result = utf8to16(utf8_with_surrogates);
|
|
EXPECT_EQ (utf16result.size(), 4);
|
|
EXPECT_EQ (utf16result[2], 0xd834);
|
|
EXPECT_EQ (utf16result[3], 0xdd1e);
|
|
}
|
|
|
|
TEST(CPP20APITests, test_utf32tou8)
|
|
{
|
|
u32string utf32string = {0x448, 0x65E5, 0x10346};
|
|
u32string_view utf32stringview{utf32string};
|
|
u8string utf8result = utf32tou8(utf32stringview);
|
|
EXPECT_EQ (utf8result.size(), 9);
|
|
}
|
|
|
|
TEST(CPP20APITests, test_utf8to32)
|
|
{
|
|
u8string twochars = u8"\xe6\x97\xa5\xd1\x88";
|
|
u32string utf32result = utf8to32(twochars);
|
|
EXPECT_EQ (utf32result.size(), 2);
|
|
}
|
|
|
|
TEST(CPP20APITests, test_find_invalid)
|
|
{
|
|
u8string utf_invalid = u8"\xe6\x97\xa5\xd1\x88\xfa";
|
|
auto invalid = find_invalid(utf_invalid);
|
|
EXPECT_EQ (invalid, 5);
|
|
}
|
|
|
|
TEST(CPP20APITests, test_is_valid)
|
|
{
|
|
u8string utf_invalid = u8"\xe6\x97\xa5\xd1\x88\xfa";
|
|
bool bvalid = is_valid(utf_invalid);
|
|
EXPECT_FALSE (bvalid);
|
|
u8string utf8_with_surrogates = u8"\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
|
|
bvalid = is_valid(utf8_with_surrogates);
|
|
EXPECT_TRUE (bvalid);
|
|
}
|
|
|
|
TEST(CPP20APITests, test_replace_invalid)
|
|
{
|
|
u8string invalid_sequence = u8"a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
|
|
u8string replace_invalid_result = replace_invalid(invalid_sequence, u8'?');
|
|
bool bvalid = is_valid(replace_invalid_result);
|
|
EXPECT_TRUE (bvalid);
|
|
const u8string fixed_invalid_sequence = u8"a????z";
|
|
EXPECT_EQ(fixed_invalid_sequence, replace_invalid_result);
|
|
}
|
|
|
|
TEST(CPP20APITests, test_starts_with_bom)
|
|
{
|
|
u8string byte_order_mark = u8"\xef\xbb\xbf";
|
|
bool bbom = starts_with_bom(byte_order_mark);
|
|
EXPECT_TRUE (bbom);
|
|
u8string threechars = u8"\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
|
|
bool no_bbom = starts_with_bom(threechars);
|
|
EXPECT_FALSE (no_bbom);
|
|
}
|