Compare commits

...

11 commits

Author SHA1 Message Date
Nemanja Trifunovic
65701fe007 Compile time checks for size of UTF-16 code units 2025-01-11 11:09:19 -05:00
root
b26a5f718f Release 4.0.6 2024-11-03 13:03:52 -05:00
root
10b7895211 Release 4.0.5 2024-11-03 13:02:26 -05:00
root
77870cc80b Fix Wsign-conversion warnings 2024-11-02 20:38:03 -04:00
root
cd191b6c45 Tests: use add_compile_options() to add warnings to tests 2024-11-02 19:42:46 -04:00
Nemanja Trifunovic
6be08bbea1
Merge pull request #120 from ufleisch/arch-independent
Use ARCH_INDEPENDENT with write_basic_package_version_file
2023-12-29 17:40:20 -05:00
Urs Fleisch
b594c9bfe3 Use ARCH_INDEPENDENT with write_basic_package_version_file
This allows to use a package built on amd64 also on
32-bit Android.
2023-12-29 15:29:47 +01:00
Nemanja Trifunovic
139d233262
Merge pull request #118 from striezel-stash/fix-typos
Fix some typos
2023-12-11 20:27:00 -05:00
Nemanja Trifunovic
9d9067ff42
Merge pull request #119 from striezel-stash/actions-checkout-v4
Update actions/checkout in GitHub Actions to v4
2023-12-11 20:25:59 -05:00
Dirk Stolle
bb2b8f2ac2 Update actions/checkout in GitHub Actions to v4 2023-12-11 03:58:53 +01:00
Dirk Stolle
2725faafbc Fix some typos 2023-12-11 03:55:49 +01:00
6 changed files with 36 additions and 23 deletions

View file

@ -45,7 +45,7 @@ jobs:
c_compiler: cl
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.

View file

@ -1,9 +1,9 @@
# This file is deprecated and will be removed in a future release
# Please see the instructions for installation in README.md file
cmake_minimum_required (VERSION 3.5...3.27)
cmake_minimum_required (VERSION 3.14...3.27)
project (utf8cpp
VERSION 4.0.4
VERSION 4.0.6
LANGUAGES CXX
DESCRIPTION "C++ portable library for working with utf-8 encoding")
@ -21,6 +21,7 @@ write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY SameMajorVersion
ARCH_INDEPENDENT
)
install(TARGETS ${PROJECT_NAME}

View file

@ -118,7 +118,7 @@ This is a header-only library and the supported way of deploying it is:
- Copy the content of utfcpp/source file into the directory where you keep include files for your project
The CMakeList.txt file was originally made for testing purposes only, but unfortunatelly over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
The CMakeList.txt file was originally made for testing purposes only, but unfortunately over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
<!-- TOC --><a name="examples-of-use"></a>
## Examples of use
@ -839,7 +839,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu16string-utf8to16stdu8string_view-s"></a>
@ -867,7 +867,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="utf8utf32to8"></a>
#### utf8::utf32to8
@ -1083,7 +1083,7 @@ u32string utf32result = utf8to32(twochars);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdu8string_view-s"></a>
@ -1109,7 +1109,7 @@ u32string utf32result = utf8to32(stringview);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdstring-s"></a>
@ -1644,7 +1644,7 @@ class iterator;
<!-- TOC --><a name="member-functions"></a>
##### Member functions
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end);` a constructor that initializes the underlying octet_iterator with octet_it and sets the range in which the iterator is considered valid.
@ -2086,7 +2086,7 @@ class iterator;
<!-- TOC --><a name="member-functions-1"></a>
##### Member functions
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it);` a constructor that initializes the underlying octet_iterator with `octet_it`.

View file

@ -43,9 +43,14 @@ DEALINGS IN THE SOFTWARE.
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
#define UTF_CPP_OVERRIDE override
#define UTF_CPP_NOEXCEPT noexcept
#define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
#else // C++ 98/03
#define UTF_CPP_OVERRIDE
#define UTF_CPP_NOEXCEPT throw()
// Simulate static_assert:
template <bool Condition> struct StaticAssert {static void assert() {int static_assert_impl[(Condition ? 1 : -1)];} };
template <> struct StaticAssert<true> {static void assert() {}};
#define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
#endif // C++ 11 or later
@ -215,7 +220,7 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
return UTF8_OK;
}
@ -234,11 +239,11 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
return UTF8_OK;
}
@ -308,6 +313,10 @@ namespace internal
template <typename word_iterator>
utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
{
// Make sure the iterator dereferences a large enough type
typedef typename std::iterator_traits<word_iterator>::value_type word_type;
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
// Check the edge case:
if (it == end)
return NOT_ENOUGH_ROOM;
// Save the original value of it so we can go back in case of failure
@ -327,7 +336,7 @@ namespace internal
else if (is_lead_surrogate(first_word)) {
const utfchar16_t second_word = *it++;
if (is_trail_surrogate(second_word)) {
code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
code_point = static_cast<utfchar32_t>(first_word << 10) + second_word + SURROGATE_OFFSET;
return UTF8_OK;
} else
err = INCOMPLETE_SEQUENCE;
@ -395,6 +404,7 @@ namespace internal
// the word_type.
template <typename word_iterator, typename word_type>
word_iterator append16(utfchar32_t cp, word_iterator result) {
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
if (is_in_bmp(cp))
*(result++) = static_cast<word_type>(cp);
else {

View file

@ -115,15 +115,15 @@ namespace utf8
++it;
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
++it;
cp += (*it) & 0x3f;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
++it;
cp += (*it) & 0x3f;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
break;
}
++it;

View file

@ -6,6 +6,14 @@ add_library(${PROJECT_NAME} INTERFACE)
include_directories("${PROJECT_SOURCE_DIR}/../source")
if (MSVC)
# warning level 4
add_compile_options(/W4)
else()
# additional warnings
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion)
endif()
add_executable(negative negative.cpp)
add_executable(cpp11 test_cpp11.cpp)
add_executable(cpp17 test_cpp17.cpp)
@ -14,12 +22,6 @@ add_executable(apitests apitests.cpp)
add_executable(noexceptionstests noexceptionstests.cpp)
target_compile_options(${PROJECT_NAME} INTERFACE
$<$<CXX_COMPILER_ID:MSVC>:/W4>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion>)
target_compile_options(noexceptionstests PUBLIC -fno-exceptions)
set_target_properties(negative apitests noexceptionstests