mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-06 14:05:06 +00:00
Compare commits
13 commits
Author | SHA1 | Date | |
---|---|---|---|
|
65701fe007 | ||
|
b26a5f718f | ||
|
10b7895211 | ||
|
77870cc80b | ||
|
cd191b6c45 | ||
|
6be08bbea1 | ||
|
b594c9bfe3 | ||
|
139d233262 | ||
|
9d9067ff42 | ||
|
bb2b8f2ac2 | ||
|
2725faafbc | ||
|
aed58281cf | ||
|
4965d378fb |
6 changed files with 45 additions and 24 deletions
2
.github/workflows/cmake-multi-platform.yml
vendored
2
.github/workflows/cmake-multi-platform.yml
vendored
|
@ -45,7 +45,7 @@ jobs:
|
|||
c_compiler: cl
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set reusable strings
|
||||
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
cmake_minimum_required (VERSION 3.5...3.27)
|
||||
# This file is deprecated and will be removed in a future release
|
||||
# Please see the instructions for installation in README.md file
|
||||
|
||||
cmake_minimum_required (VERSION 3.14...3.27)
|
||||
project (utf8cpp
|
||||
VERSION 4.0.3
|
||||
VERSION 4.0.6
|
||||
LANGUAGES CXX
|
||||
DESCRIPTION "C++ portable library for working with utf-8 encoding")
|
||||
|
||||
|
@ -18,6 +21,7 @@ write_basic_package_version_file(
|
|||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY SameMajorVersion
|
||||
ARCH_INDEPENDENT
|
||||
)
|
||||
|
||||
install(TARGETS ${PROJECT_NAME}
|
||||
|
|
21
README.md
21
README.md
|
@ -112,8 +112,13 @@ I came up with a small, C++98 compatible generic library in order to handle UTF-
|
|||
<!-- TOC --><a name="installation"></a>
|
||||
## Installation
|
||||
|
||||
The recommended way to use the library is to download an official release and copy the content of source directory into location of your project's header files.
|
||||
If you use CMake for your builds, I still recommend just copying the files into your project, but if you want you can use the CMakeList.txt file included in the project.
|
||||
This is a header-only library and the supported way of deploying it is:
|
||||
- Download a release from https://github.com/nemtrif/utfcpp/releases into a temporary directory
|
||||
- Unzip the release
|
||||
- Copy the content of utfcpp/source file into the directory where you keep include files for your project
|
||||
|
||||
|
||||
The CMakeList.txt file was originally made for testing purposes only, but unfortunately over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
|
||||
|
||||
<!-- TOC --><a name="examples-of-use"></a>
|
||||
## Examples of use
|
||||
|
@ -834,7 +839,7 @@ assert (utf16result[2] == 0xd834);
|
|||
assert (utf16result[3] == 0xdd1e);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
|
||||
<!-- TOC --><a name="stdu16string-utf8to16stdu8string_view-s"></a>
|
||||
|
@ -862,7 +867,7 @@ assert (utf16result[2] == 0xd834);
|
|||
assert (utf16result[3] == 0xdd1e);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
<!-- TOC --><a name="utf8utf32to8"></a>
|
||||
#### utf8::utf32to8
|
||||
|
@ -1078,7 +1083,7 @@ u32string utf32result = utf8to32(twochars);
|
|||
assert (utf32result.size() == 2);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
|
||||
<!-- TOC --><a name="stdu32string-utf8to32const-stdu8string_view-s"></a>
|
||||
|
@ -1104,7 +1109,7 @@ u32string utf32result = utf8to32(stringview);
|
|||
assert (utf32result.size() == 2);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
|
||||
<!-- TOC --><a name="stdu32string-utf8to32const-stdstring-s"></a>
|
||||
|
@ -1639,7 +1644,7 @@ class iterator;
|
|||
<!-- TOC --><a name="member-functions"></a>
|
||||
##### Member functions
|
||||
|
||||
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
|
||||
`explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end);` a constructor that initializes the underlying octet_iterator with octet_it and sets the range in which the iterator is considered valid.
|
||||
|
||||
|
@ -2081,7 +2086,7 @@ class iterator;
|
|||
<!-- TOC --><a name="member-functions-1"></a>
|
||||
##### Member functions
|
||||
|
||||
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
|
||||
`explicit iterator (const octet_iterator& octet_it);` a constructor that initializes the underlying octet_iterator with `octet_it`.
|
||||
|
||||
|
|
|
@ -43,9 +43,14 @@ DEALINGS IN THE SOFTWARE.
|
|||
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
|
||||
#define UTF_CPP_OVERRIDE override
|
||||
#define UTF_CPP_NOEXCEPT noexcept
|
||||
#define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
|
||||
#else // C++ 98/03
|
||||
#define UTF_CPP_OVERRIDE
|
||||
#define UTF_CPP_NOEXCEPT throw()
|
||||
// Simulate static_assert:
|
||||
template <bool Condition> struct StaticAssert {static void assert() {int static_assert_impl[(Condition ? 1 : -1)];} };
|
||||
template <> struct StaticAssert<true> {static void assert() {}};
|
||||
#define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
|
||||
#endif // C++ 11 or later
|
||||
|
||||
|
||||
|
@ -215,7 +220,7 @@ namespace internal
|
|||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (*it) & 0x3f;
|
||||
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
@ -234,11 +239,11 @@ namespace internal
|
|||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
|
||||
code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (*it) & 0x3f;
|
||||
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
@ -308,6 +313,10 @@ namespace internal
|
|||
template <typename word_iterator>
|
||||
utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
|
||||
{
|
||||
// Make sure the iterator dereferences a large enough type
|
||||
typedef typename std::iterator_traits<word_iterator>::value_type word_type;
|
||||
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
|
||||
// Check the edge case:
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
// Save the original value of it so we can go back in case of failure
|
||||
|
@ -327,7 +336,7 @@ namespace internal
|
|||
else if (is_lead_surrogate(first_word)) {
|
||||
const utfchar16_t second_word = *it++;
|
||||
if (is_trail_surrogate(second_word)) {
|
||||
code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
|
||||
code_point = static_cast<utfchar32_t>(first_word << 10) + second_word + SURROGATE_OFFSET;
|
||||
return UTF8_OK;
|
||||
} else
|
||||
err = INCOMPLETE_SEQUENCE;
|
||||
|
@ -395,6 +404,7 @@ namespace internal
|
|||
// the word_type.
|
||||
template <typename word_iterator, typename word_type>
|
||||
word_iterator append16(utfchar32_t cp, word_iterator result) {
|
||||
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
|
||||
if (is_in_bmp(cp))
|
||||
*(result++) = static_cast<word_type>(cp);
|
||||
else {
|
||||
|
|
|
@ -115,15 +115,15 @@ namespace utf8
|
|||
++it;
|
||||
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
|
||||
++it;
|
||||
cp += (*it) & 0x3f;
|
||||
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
|
||||
break;
|
||||
case 4:
|
||||
++it;
|
||||
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
|
||||
++it;
|
||||
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
|
||||
cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
|
||||
++it;
|
||||
cp += (*it) & 0x3f;
|
||||
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
|
||||
break;
|
||||
}
|
||||
++it;
|
||||
|
|
|
@ -6,6 +6,14 @@ add_library(${PROJECT_NAME} INTERFACE)
|
|||
|
||||
include_directories("${PROJECT_SOURCE_DIR}/../source")
|
||||
|
||||
if (MSVC)
|
||||
# warning level 4
|
||||
add_compile_options(/W4)
|
||||
else()
|
||||
# additional warnings
|
||||
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion)
|
||||
endif()
|
||||
|
||||
add_executable(negative negative.cpp)
|
||||
add_executable(cpp11 test_cpp11.cpp)
|
||||
add_executable(cpp17 test_cpp17.cpp)
|
||||
|
@ -14,12 +22,6 @@ add_executable(apitests apitests.cpp)
|
|||
|
||||
add_executable(noexceptionstests noexceptionstests.cpp)
|
||||
|
||||
|
||||
|
||||
target_compile_options(${PROJECT_NAME} INTERFACE
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/W4>
|
||||
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion>)
|
||||
|
||||
target_compile_options(noexceptionstests PUBLIC -fno-exceptions)
|
||||
|
||||
set_target_properties(negative apitests noexceptionstests
|
||||
|
|
Loading…
Add table
Reference in a new issue