From e3e57c811aa4cd12153f920a45ff2157427977da Mon Sep 17 00:00:00 2001 From: nemtrif Date: Mon, 26 Dec 2022 14:19:04 -0500 Subject: [PATCH] Fix for issue #92: Clang 10 Compiler warnings Fix warnings when -Wconversion or -Wsign-conversion are enabled. --- source/utf8/checked.h | 20 ++-------------- source/utf8/core.h | 49 ++++++++++++++++++++++++++++++++++++++ source/utf8/unchecked.h | 19 +-------------- tests/CMakeLists.txt | 2 +- tests/test_checked_api.cpp | 6 +++++ 5 files changed, 59 insertions(+), 37 deletions(-) diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 9b0a98d..512dcc2 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -50,6 +50,7 @@ namespace utf8 uint8_t u8; public: invalid_utf8 (uint8_t u) : u8(u) {} + invalid_utf8 (char c) : u8(static_cast(c)) {} virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; } uint8_t utf8_octet() const {return u8;} }; @@ -75,24 +76,7 @@ namespace utf8 if (!utf8::internal::is_code_point_valid(cp)) throw invalid_code_point(cp); - if (cp < 0x80) // one octet - *(result++) = static_cast(cp); - else if (cp < 0x800) { // two octets - *(result++) = static_cast((cp >> 6) | 0xc0); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else if (cp < 0x10000) { // three octets - *(result++) = static_cast((cp >> 12) | 0xe0); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else { // four octets - *(result++) = static_cast((cp >> 18) | 0xf0); - *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - return result; + return internal::append(cp, result); } template diff --git a/source/utf8/core.h b/source/utf8/core.h index de6199f..34371ee 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -297,6 +297,55 @@ namespace internal return utf8::internal::validate_next(it, end, ignored); } + // Internal implementation of both checked and unchecked append() function + // This function will be invoked by the overloads below, as they will know + // the octet_type. + template + octet_iterator append(uint32_t cp, octet_iterator result) { + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + + // One of the following overloads will be invoked from the API calls + + // A simple (but dangerous) case: the caller appends byte(s) to a char array + inline char* append(uint32_t cp, char* result) { + return append(cp, result); + } + + // Hopefully, most common case: the caller uses back_inserter + // i.e. append(cp, std::back_inserter(str)); + template + std::back_insert_iterator append + (uint32_t cp, std::back_insert_iterator result) { + return append, + typename container_type::value_type>(cp, result); + } + + // The caller uses some other kind of output operator - not covered above + // Note that in this case we are not able to determine octet_type + // so we assume it's uint_8; that can cause a conversion warning if we are wrong. + template + octet_iterator append(uint32_t cp, octet_iterator result) { + return append(cp, result); + } + } // namespace internal /// The library API - functions intended to be called by the users diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index 0e1b51c..8fe83c9 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -37,24 +37,7 @@ namespace utf8 template octet_iterator append(uint32_t cp, octet_iterator result) { - if (cp < 0x80) // one octet - *(result++) = static_cast(cp); - else if (cp < 0x800) { // two octets - *(result++) = static_cast((cp >> 6) | 0xc0); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else if (cp < 0x10000) { // three octets - *(result++) = static_cast((cp >> 12) | 0xe0); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else { // four octets - *(result++) = static_cast((cp >> 18) | 0xf0); - *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - return result; + return internal::append(cp, result); } template diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2662004..12790e2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -21,7 +21,7 @@ target_link_libraries(noexceptionstests PRIVATE utf8::cpp) target_compile_options(${PROJECT_NAME} INTERFACE $<$:/W4> - $<$>:-Wall -Wextra -Wpedantic>) + $<$>:-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion>) target_compile_options(noexceptionstests PUBLIC -fno-exceptions) diff --git a/tests/test_checked_api.cpp b/tests/test_checked_api.cpp index 984f2be..9682ffe 100644 --- a/tests/test_checked_api.cpp +++ b/tests/test_checked_api.cpp @@ -38,6 +38,12 @@ TEST(CheckedAPITests, test_append) EXPECT_EQ (u[2], 0x8d); EXPECT_EQ (u[3], 0x86); EXPECT_EQ (u[4], 0); + + // Ensure no warnings with plain char + char c[2] = {0,0}; + append('a', c); + EXPECT_EQ (u[0], 'a'); + EXPECT_EQ (u[1], 0); } TEST(CheckedAPITests, test_next)