Compare commits

..

13 commits

Author SHA1 Message Date
Nemanja Trifunovic
65701fe007 Compile time checks for size of UTF-16 code units 2025-01-11 11:09:19 -05:00
root
b26a5f718f Release 4.0.6 2024-11-03 13:03:52 -05:00
root
10b7895211 Release 4.0.5 2024-11-03 13:02:26 -05:00
root
77870cc80b Fix Wsign-conversion warnings 2024-11-02 20:38:03 -04:00
root
cd191b6c45 Tests: use add_compile_options() to add warnings to tests 2024-11-02 19:42:46 -04:00
Nemanja Trifunovic
6be08bbea1
Merge pull request #120 from ufleisch/arch-independent
Use ARCH_INDEPENDENT with write_basic_package_version_file
2023-12-29 17:40:20 -05:00
Urs Fleisch
b594c9bfe3 Use ARCH_INDEPENDENT with write_basic_package_version_file
This allows to use a package built on amd64 also on
32-bit Android.
2023-12-29 15:29:47 +01:00
Nemanja Trifunovic
139d233262
Merge pull request #118 from striezel-stash/fix-typos
Fix some typos
2023-12-11 20:27:00 -05:00
Nemanja Trifunovic
9d9067ff42
Merge pull request #119 from striezel-stash/actions-checkout-v4
Update actions/checkout in GitHub Actions to v4
2023-12-11 20:25:59 -05:00
Dirk Stolle
bb2b8f2ac2 Update actions/checkout in GitHub Actions to v4 2023-12-11 03:58:53 +01:00
Dirk Stolle
2725faafbc Fix some typos 2023-12-11 03:55:49 +01:00
nemtrif
aed58281cf Release 4.0.4 2023-12-10 10:42:20 -05:00
nemtrif
4965d378fb Deprecate CMake support
Make clear that CMake is not the supported way of installing the library
2023-12-10 10:41:12 -05:00
6 changed files with 45 additions and 24 deletions

View file

@ -45,7 +45,7 @@ jobs:
c_compiler: cl
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.

View file

@ -1,6 +1,9 @@
cmake_minimum_required (VERSION 3.5...3.27)
# This file is deprecated and will be removed in a future release
# Please see the instructions for installation in README.md file
cmake_minimum_required (VERSION 3.14...3.27)
project (utf8cpp
VERSION 4.0.3
VERSION 4.0.6
LANGUAGES CXX
DESCRIPTION "C++ portable library for working with utf-8 encoding")
@ -18,6 +21,7 @@ write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY SameMajorVersion
ARCH_INDEPENDENT
)
install(TARGETS ${PROJECT_NAME}

View file

@ -112,8 +112,13 @@ I came up with a small, C++98 compatible generic library in order to handle UTF-
<!-- TOC --><a name="installation"></a>
## Installation
The recommended way to use the library is to download an official release and copy the content of source directory into location of your project's header files.
If you use CMake for your builds, I still recommend just copying the files into your project, but if you want you can use the CMakeList.txt file included in the project.
This is a header-only library and the supported way of deploying it is:
- Download a release from https://github.com/nemtrif/utfcpp/releases into a temporary directory
- Unzip the release
- Copy the content of utfcpp/source file into the directory where you keep include files for your project
The CMakeList.txt file was originally made for testing purposes only, but unfortunately over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
<!-- TOC --><a name="examples-of-use"></a>
## Examples of use
@ -834,7 +839,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu16string-utf8to16stdu8string_view-s"></a>
@ -862,7 +867,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="utf8utf32to8"></a>
#### utf8::utf32to8
@ -1078,7 +1083,7 @@ u32string utf32result = utf8to32(twochars);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdu8string_view-s"></a>
@ -1104,7 +1109,7 @@ u32string utf32result = utf8to32(stringview);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdstring-s"></a>
@ -1639,7 +1644,7 @@ class iterator;
<!-- TOC --><a name="member-functions"></a>
##### Member functions
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end);` a constructor that initializes the underlying octet_iterator with octet_it and sets the range in which the iterator is considered valid.
@ -2081,7 +2086,7 @@ class iterator;
<!-- TOC --><a name="member-functions-1"></a>
##### Member functions
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it);` a constructor that initializes the underlying octet_iterator with `octet_it`.

View file

@ -43,9 +43,14 @@ DEALINGS IN THE SOFTWARE.
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
#define UTF_CPP_OVERRIDE override
#define UTF_CPP_NOEXCEPT noexcept
#define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
#else // C++ 98/03
#define UTF_CPP_OVERRIDE
#define UTF_CPP_NOEXCEPT throw()
// Simulate static_assert:
template <bool Condition> struct StaticAssert {static void assert() {int static_assert_impl[(Condition ? 1 : -1)];} };
template <> struct StaticAssert<true> {static void assert() {}};
#define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
#endif // C++ 11 or later
@ -215,7 +220,7 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
return UTF8_OK;
}
@ -234,11 +239,11 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
return UTF8_OK;
}
@ -308,6 +313,10 @@ namespace internal
template <typename word_iterator>
utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
{
// Make sure the iterator dereferences a large enough type
typedef typename std::iterator_traits<word_iterator>::value_type word_type;
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
// Check the edge case:
if (it == end)
return NOT_ENOUGH_ROOM;
// Save the original value of it so we can go back in case of failure
@ -327,7 +336,7 @@ namespace internal
else if (is_lead_surrogate(first_word)) {
const utfchar16_t second_word = *it++;
if (is_trail_surrogate(second_word)) {
code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
code_point = static_cast<utfchar32_t>(first_word << 10) + second_word + SURROGATE_OFFSET;
return UTF8_OK;
} else
err = INCOMPLETE_SEQUENCE;
@ -395,6 +404,7 @@ namespace internal
// the word_type.
template <typename word_iterator, typename word_type>
word_iterator append16(utfchar32_t cp, word_iterator result) {
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
if (is_in_bmp(cp))
*(result++) = static_cast<word_type>(cp);
else {

View file

@ -115,15 +115,15 @@ namespace utf8
++it;
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
++it;
cp += (*it) & 0x3f;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
++it;
cp += (*it) & 0x3f;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
break;
}
++it;

View file

@ -6,6 +6,14 @@ add_library(${PROJECT_NAME} INTERFACE)
include_directories("${PROJECT_SOURCE_DIR}/../source")
if (MSVC)
# warning level 4
add_compile_options(/W4)
else()
# additional warnings
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion)
endif()
add_executable(negative negative.cpp)
add_executable(cpp11 test_cpp11.cpp)
add_executable(cpp17 test_cpp17.cpp)
@ -14,12 +22,6 @@ add_executable(apitests apitests.cpp)
add_executable(noexceptionstests noexceptionstests.cpp)
target_compile_options(${PROJECT_NAME} INTERFACE
$<$<CXX_COMPILER_ID:MSVC>:/W4>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion>)
target_compile_options(noexceptionstests PUBLIC -fno-exceptions)
set_target_properties(negative apitests noexceptionstests