mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-05 21:45:07 +00:00
Compare commits
19 commits
Author | SHA1 | Date | |
---|---|---|---|
|
65701fe007 | ||
|
b26a5f718f | ||
|
10b7895211 | ||
|
77870cc80b | ||
|
cd191b6c45 | ||
|
6be08bbea1 | ||
|
b594c9bfe3 | ||
|
139d233262 | ||
|
9d9067ff42 | ||
|
bb2b8f2ac2 | ||
|
2725faafbc | ||
|
aed58281cf | ||
|
4965d378fb | ||
|
c9733bbe49 | ||
|
bc36bafd6b | ||
|
fbc0225345 | ||
|
b199c0d6bb | ||
|
a645095170 | ||
|
fdca3c98ab |
7 changed files with 127 additions and 41 deletions
|
@ -1,13 +0,0 @@
|
|||
version: 2
|
||||
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: nemtrif/utf8cpp:4.0.0
|
||||
steps:
|
||||
- checkout
|
||||
- run: git submodule update --init --recursive --remote
|
||||
- run: mkdir -p tests/build
|
||||
- run: cd tests/build && cmake ..
|
||||
- run: cd tests/build && cmake --build .
|
||||
- run: cd tests/build && ctest -VV
|
75
.github/workflows/cmake-multi-platform.yml
vendored
Normal file
75
.github/workflows/cmake-multi-platform.yml
vendored
Normal file
|
@ -0,0 +1,75 @@
|
|||
# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
|
||||
# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
|
||||
name: CMake on multiple platforms
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
# Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
|
||||
fail-fast: false
|
||||
|
||||
# Set up a matrix to run the following 3 configurations:
|
||||
# 1. <Windows, Release, latest MSVC compiler toolchain on the default runner image, default generator>
|
||||
# 2. <Linux, Release, latest GCC compiler toolchain on the default runner image, default generator>
|
||||
# 3. <Linux, Release, latest Clang compiler toolchain on the default runner image, default generator>
|
||||
#
|
||||
# To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list.
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
build_type: [Release]
|
||||
c_compiler: [gcc, clang, cl]
|
||||
include:
|
||||
- os: windows-latest
|
||||
c_compiler: cl
|
||||
cpp_compiler: cl
|
||||
- os: ubuntu-latest
|
||||
c_compiler: gcc
|
||||
cpp_compiler: g++
|
||||
- os: ubuntu-latest
|
||||
c_compiler: clang
|
||||
cpp_compiler: clang++
|
||||
exclude:
|
||||
- os: windows-latest
|
||||
c_compiler: gcc
|
||||
- os: windows-latest
|
||||
c_compiler: clang
|
||||
- os: ubuntu-latest
|
||||
c_compiler: cl
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set reusable strings
|
||||
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
|
||||
id: strings
|
||||
shell: bash
|
||||
run: |
|
||||
echo "build-output-dir=${{ github.workspace }}/tests/build" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Configure CMake
|
||||
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
|
||||
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
|
||||
run: >
|
||||
cmake -B ${{ steps.strings.outputs.build-output-dir }}
|
||||
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
|
||||
-DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||
-S ${{ github.workspace }}
|
||||
|
||||
- name: Build
|
||||
# Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
|
||||
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
|
||||
|
||||
- name: Test
|
||||
working-directory: ${{ steps.strings.outputs.build-output-dir }}
|
||||
# Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
|
||||
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
|
||||
run: ctest -VV --build-config ${{ matrix.build_type }}
|
|
@ -1,6 +1,9 @@
|
|||
cmake_minimum_required (VERSION 3.5...3.27)
|
||||
# This file is deprecated and will be removed in a future release
|
||||
# Please see the instructions for installation in README.md file
|
||||
|
||||
cmake_minimum_required (VERSION 3.14...3.27)
|
||||
project (utf8cpp
|
||||
VERSION 4.0.1
|
||||
VERSION 4.0.6
|
||||
LANGUAGES CXX
|
||||
DESCRIPTION "C++ portable library for working with utf-8 encoding")
|
||||
|
||||
|
@ -10,7 +13,7 @@ include(GNUInstallDirs)
|
|||
|
||||
target_include_directories(utf8cpp INTERFACE
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/source>"
|
||||
$<INSTALL_INTERFACE:include>
|
||||
$<INSTALL_INTERFACE:include/utf8cpp>
|
||||
)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
@ -18,6 +21,7 @@ write_basic_package_version_file(
|
|||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY SameMajorVersion
|
||||
ARCH_INDEPENDENT
|
||||
)
|
||||
|
||||
install(TARGETS ${PROJECT_NAME}
|
||||
|
@ -44,5 +48,6 @@ install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
|
|||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
|
||||
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include)
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION include)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include/utf8cpp)
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION
|
||||
include/utf8cpp)
|
||||
|
|
27
README.md
27
README.md
|
@ -112,8 +112,13 @@ I came up with a small, C++98 compatible generic library in order to handle UTF-
|
|||
<!-- TOC --><a name="installation"></a>
|
||||
## Installation
|
||||
|
||||
The recommended way to use the library is to download an official release and copy the content of source directory into location of your project's header files.
|
||||
If you use CMake for your builds, I still recommend just copying the files into your project, but if you want you can use the CMakeList.txt file included in the project.
|
||||
This is a header-only library and the supported way of deploying it is:
|
||||
- Download a release from https://github.com/nemtrif/utfcpp/releases into a temporary directory
|
||||
- Unzip the release
|
||||
- Copy the content of utfcpp/source file into the directory where you keep include files for your project
|
||||
|
||||
|
||||
The CMakeList.txt file was originally made for testing purposes only, but unfortunately over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
|
||||
|
||||
<!-- TOC --><a name="examples-of-use"></a>
|
||||
## Examples of use
|
||||
|
@ -736,7 +741,8 @@ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_itera
|
|||
|
||||
`octet_iterator`: an input iterator.
|
||||
`u16bit_iterator`: an output iterator.
|
||||
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
|
||||
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert.
|
||||
`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
|
||||
`result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.
|
||||
Return value: An iterator pointing to the place after the appended UTF-16 string.
|
||||
|
||||
|
@ -833,7 +839,7 @@ assert (utf16result[2] == 0xd834);
|
|||
assert (utf16result[3] == 0xdd1e);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
|
||||
<!-- TOC --><a name="stdu16string-utf8to16stdu8string_view-s"></a>
|
||||
|
@ -861,7 +867,7 @@ assert (utf16result[2] == 0xd834);
|
|||
assert (utf16result[3] == 0xdd1e);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
<!-- TOC --><a name="utf8utf32to8"></a>
|
||||
#### utf8::utf32to8
|
||||
|
@ -1077,7 +1083,7 @@ u32string utf32result = utf8to32(twochars);
|
|||
assert (utf32result.size() == 2);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
|
||||
<!-- TOC --><a name="stdu32string-utf8to32const-stdu8string_view-s"></a>
|
||||
|
@ -1103,7 +1109,7 @@ u32string utf32result = utf8to32(stringview);
|
|||
assert (utf32result.size() == 2);
|
||||
```
|
||||
|
||||
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
|
||||
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
|
||||
|
||||
|
||||
<!-- TOC --><a name="stdu32string-utf8to32const-stdstring-s"></a>
|
||||
|
@ -1638,7 +1644,7 @@ class iterator;
|
|||
<!-- TOC --><a name="member-functions"></a>
|
||||
##### Member functions
|
||||
|
||||
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
|
||||
`explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end);` a constructor that initializes the underlying octet_iterator with octet_it and sets the range in which the iterator is considered valid.
|
||||
|
||||
|
@ -1950,7 +1956,8 @@ template <typename u16bit_iterator, typename octet_iterator>
|
|||
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
|
||||
```
|
||||
|
||||
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
|
||||
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert.
|
||||
`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
|
||||
`result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.
|
||||
Return value: An iterator pointing to the place after the appended UTF-16 string.
|
||||
|
||||
|
@ -2079,7 +2086,7 @@ class iterator;
|
|||
<!-- TOC --><a name="member-functions-1"></a>
|
||||
##### Member functions
|
||||
|
||||
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
|
||||
|
||||
`explicit iterator (const octet_iterator& octet_it);` a constructor that initializes the underlying octet_iterator with `octet_it`.
|
||||
|
||||
|
|
|
@ -43,9 +43,14 @@ DEALINGS IN THE SOFTWARE.
|
|||
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
|
||||
#define UTF_CPP_OVERRIDE override
|
||||
#define UTF_CPP_NOEXCEPT noexcept
|
||||
#define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
|
||||
#else // C++ 98/03
|
||||
#define UTF_CPP_OVERRIDE
|
||||
#define UTF_CPP_NOEXCEPT throw()
|
||||
// Simulate static_assert:
|
||||
template <bool Condition> struct StaticAssert {static void assert() {int static_assert_impl[(Condition ? 1 : -1)];} };
|
||||
template <> struct StaticAssert<true> {static void assert() {}};
|
||||
#define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
|
||||
#endif // C++ 11 or later
|
||||
|
||||
|
||||
|
@ -215,7 +220,7 @@ namespace internal
|
|||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (*it) & 0x3f;
|
||||
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
@ -234,11 +239,11 @@ namespace internal
|
|||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
|
||||
code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (*it) & 0x3f;
|
||||
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
@ -308,6 +313,10 @@ namespace internal
|
|||
template <typename word_iterator>
|
||||
utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
|
||||
{
|
||||
// Make sure the iterator dereferences a large enough type
|
||||
typedef typename std::iterator_traits<word_iterator>::value_type word_type;
|
||||
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
|
||||
// Check the edge case:
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
// Save the original value of it so we can go back in case of failure
|
||||
|
@ -327,7 +336,7 @@ namespace internal
|
|||
else if (is_lead_surrogate(first_word)) {
|
||||
const utfchar16_t second_word = *it++;
|
||||
if (is_trail_surrogate(second_word)) {
|
||||
code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
|
||||
code_point = static_cast<utfchar32_t>(first_word << 10) + second_word + SURROGATE_OFFSET;
|
||||
return UTF8_OK;
|
||||
} else
|
||||
err = INCOMPLETE_SEQUENCE;
|
||||
|
@ -395,6 +404,7 @@ namespace internal
|
|||
// the word_type.
|
||||
template <typename word_iterator, typename word_type>
|
||||
word_iterator append16(utfchar32_t cp, word_iterator result) {
|
||||
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
|
||||
if (is_in_bmp(cp))
|
||||
*(result++) = static_cast<word_type>(cp);
|
||||
else {
|
||||
|
|
|
@ -115,15 +115,15 @@ namespace utf8
|
|||
++it;
|
||||
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
|
||||
++it;
|
||||
cp += (*it) & 0x3f;
|
||||
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
|
||||
break;
|
||||
case 4:
|
||||
++it;
|
||||
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
|
||||
++it;
|
||||
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
|
||||
cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
|
||||
++it;
|
||||
cp += (*it) & 0x3f;
|
||||
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
|
||||
break;
|
||||
}
|
||||
++it;
|
||||
|
|
|
@ -6,6 +6,14 @@ add_library(${PROJECT_NAME} INTERFACE)
|
|||
|
||||
include_directories("${PROJECT_SOURCE_DIR}/../source")
|
||||
|
||||
if (MSVC)
|
||||
# warning level 4
|
||||
add_compile_options(/W4)
|
||||
else()
|
||||
# additional warnings
|
||||
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion)
|
||||
endif()
|
||||
|
||||
add_executable(negative negative.cpp)
|
||||
add_executable(cpp11 test_cpp11.cpp)
|
||||
add_executable(cpp17 test_cpp17.cpp)
|
||||
|
@ -14,12 +22,6 @@ add_executable(apitests apitests.cpp)
|
|||
|
||||
add_executable(noexceptionstests noexceptionstests.cpp)
|
||||
|
||||
|
||||
|
||||
target_compile_options(${PROJECT_NAME} INTERFACE
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/W4>
|
||||
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion>)
|
||||
|
||||
target_compile_options(noexceptionstests PUBLIC -fno-exceptions)
|
||||
|
||||
set_target_properties(negative apitests noexceptionstests
|
||||
|
|
Loading…
Add table
Reference in a new issue