Compare commits

..

No commits in common. "master" and "v4.0.1" have entirely different histories.

8 changed files with 40 additions and 130 deletions

13
.circleci/config.yml Normal file
View file

@ -0,0 +1,13 @@
version: 2
jobs:
build:
docker:
- image: nemtrif/utf8cpp:4.0.0
steps:
- checkout
- run: git submodule update --init --recursive --remote
- run: mkdir -p tests/build
- run: cd tests/build && cmake ..
- run: cd tests/build && cmake --build .
- run: cd tests/build && ctest -VV

View file

@ -1,75 +0,0 @@
# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
name: CMake on multiple platforms
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
# Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
fail-fast: false
# Set up a matrix to run the following 3 configurations:
# 1. <Windows, Release, latest MSVC compiler toolchain on the default runner image, default generator>
# 2. <Linux, Release, latest GCC compiler toolchain on the default runner image, default generator>
# 3. <Linux, Release, latest Clang compiler toolchain on the default runner image, default generator>
#
# To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list.
matrix:
os: [ubuntu-latest, windows-latest]
build_type: [Release]
c_compiler: [gcc, clang, cl]
include:
- os: windows-latest
c_compiler: cl
cpp_compiler: cl
- os: ubuntu-latest
c_compiler: gcc
cpp_compiler: g++
- os: ubuntu-latest
c_compiler: clang
cpp_compiler: clang++
exclude:
- os: windows-latest
c_compiler: gcc
- os: windows-latest
c_compiler: clang
- os: ubuntu-latest
c_compiler: cl
steps:
- uses: actions/checkout@v4
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
id: strings
shell: bash
run: |
echo "build-output-dir=${{ github.workspace }}/tests/build" >> "$GITHUB_OUTPUT"
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: >
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-S ${{ github.workspace }}
- name: Build
# Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
- name: Test
working-directory: ${{ steps.strings.outputs.build-output-dir }}
# Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest -VV --build-config ${{ matrix.build_type }}

View file

@ -1,9 +1,6 @@
# This file is deprecated and will be removed in a future release
# Please see the instructions for installation in README.md file
cmake_minimum_required (VERSION 3.14...3.27)
cmake_minimum_required (VERSION 3.5...3.27)
project (utf8cpp
VERSION 4.0.6
VERSION 4.0.1
LANGUAGES CXX
DESCRIPTION "C++ portable library for working with utf-8 encoding")
@ -21,7 +18,6 @@ write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY SameMajorVersion
ARCH_INDEPENDENT
)
install(TARGETS ${PROJECT_NAME}
@ -48,6 +44,5 @@ install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include/utf8cpp)
install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION
include/utf8cpp)
install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include)
install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION include)

View file

@ -112,13 +112,8 @@ I came up with a small, C++98 compatible generic library in order to handle UTF-
<!-- TOC --><a name="installation"></a>
## Installation
This is a header-only library and the supported way of deploying it is:
- Download a release from https://github.com/nemtrif/utfcpp/releases into a temporary directory
- Unzip the release
- Copy the content of utfcpp/source file into the directory where you keep include files for your project
The CMakeList.txt file was originally made for testing purposes only, but unfortunately over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
The recommended way to use the library is to download an official release and copy the content of source directory into location of your project's header files.
If you use CMake for your builds, I still recommend just copying the files into your project, but if you want you can use the CMakeList.txt file included in the project.
<!-- TOC --><a name="examples-of-use"></a>
## Examples of use
@ -741,8 +736,7 @@ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_itera
`octet_iterator`: an input iterator.
`u16bit_iterator`: an output iterator.
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert.
`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.
Return value: An iterator pointing to the place after the appended UTF-16 string.
@ -839,7 +833,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu16string-utf8to16stdu8string_view-s"></a>
@ -867,7 +861,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="utf8utf32to8"></a>
#### utf8::utf32to8
@ -1083,7 +1077,7 @@ u32string utf32result = utf8to32(twochars);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdu8string_view-s"></a>
@ -1109,7 +1103,7 @@ u32string utf32result = utf8to32(stringview);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdstring-s"></a>
@ -1644,7 +1638,7 @@ class iterator;
<!-- TOC --><a name="member-functions"></a>
##### Member functions
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end);` a constructor that initializes the underlying octet_iterator with octet_it and sets the range in which the iterator is considered valid.
@ -1956,8 +1950,7 @@ template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
```
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert.
`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.
Return value: An iterator pointing to the place after the appended UTF-16 string.
@ -2086,7 +2079,7 @@ class iterator;
<!-- TOC --><a name="member-functions-1"></a>
##### Member functions
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it);` a constructor that initializes the underlying octet_iterator with `octet_it`.

View file

@ -43,14 +43,9 @@ DEALINGS IN THE SOFTWARE.
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
#define UTF_CPP_OVERRIDE override
#define UTF_CPP_NOEXCEPT noexcept
#define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
#else // C++ 98/03
#define UTF_CPP_OVERRIDE
#define UTF_CPP_NOEXCEPT throw()
// Simulate static_assert:
template <bool Condition> struct StaticAssert {static void assert() {int static_assert_impl[(Condition ? 1 : -1)];} };
template <> struct StaticAssert<true> {static void assert() {}};
#define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
#endif // C++ 11 or later
@ -220,7 +215,7 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
code_point += (*it) & 0x3f;
return UTF8_OK;
}
@ -239,11 +234,11 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
code_point += (*it) & 0x3f;
return UTF8_OK;
}
@ -313,10 +308,6 @@ namespace internal
template <typename word_iterator>
utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
{
// Make sure the iterator dereferences a large enough type
typedef typename std::iterator_traits<word_iterator>::value_type word_type;
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
// Check the edge case:
if (it == end)
return NOT_ENOUGH_ROOM;
// Save the original value of it so we can go back in case of failure
@ -336,7 +327,7 @@ namespace internal
else if (is_lead_surrogate(first_word)) {
const utfchar16_t second_word = *it++;
if (is_trail_surrogate(second_word)) {
code_point = static_cast<utfchar32_t>(first_word << 10) + second_word + SURROGATE_OFFSET;
code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
return UTF8_OK;
} else
err = INCOMPLETE_SEQUENCE;
@ -404,7 +395,6 @@ namespace internal
// the word_type.
template <typename word_iterator, typename word_type>
word_iterator append16(utfchar32_t cp, word_iterator result) {
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
if (is_in_bmp(cp))
*(result++) = static_cast<word_type>(cp);
else {

View file

@ -115,15 +115,15 @@ namespace utf8
++it;
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
++it;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
cp += (*it) & 0x3f;
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
++it;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
cp += (*it) & 0x3f;
break;
}
++it;

View file

@ -6,14 +6,6 @@ add_library(${PROJECT_NAME} INTERFACE)
include_directories("${PROJECT_SOURCE_DIR}/../source")
if (MSVC)
# warning level 4
add_compile_options(/W4)
else()
# additional warnings
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion)
endif()
add_executable(negative negative.cpp)
add_executable(cpp11 test_cpp11.cpp)
add_executable(cpp17 test_cpp17.cpp)
@ -22,6 +14,12 @@ add_executable(apitests apitests.cpp)
add_executable(noexceptionstests noexceptionstests.cpp)
target_compile_options(${PROJECT_NAME} INTERFACE
$<$<CXX_COMPILER_ID:MSVC>:/W4>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion>)
target_compile_options(noexceptionstests PUBLIC -fno-exceptions)
set_target_properties(negative apitests noexceptionstests

View file

@ -2,7 +2,3 @@
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
check_required_components("@PROJECT_NAME@")
if(NOT TARGET utf8::cpp)
add_library(utf8::cpp ALIAS utf8cpp::utf8cpp)
endif()