2025-04-20 12:08:51 +00:00
8 changed files with 40 additions and 130 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -0,0 +1,13 @@
+version: 2
+
+jobs:
+  build:
+    docker:
+      - image: nemtrif/utf8cpp:4.0.0
+    steps:
+      - checkout
+      - run: git submodule update --init --recursive --remote
+      - run: mkdir -p tests/build
+      - run: cd tests/build && cmake ..
+      - run: cd tests/build && cmake --build .
+      - run: cd tests/build && ctest -VV
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@ -1,75 +0,0 @@
-# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
-# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
-name: CMake on multiple platforms
-
-on:
-  push:
-    branches: [ "master" ]
-  pull_request:
-    branches: [ "master" ]
-
-jobs:
-  build:
-    runs-on: ${{ matrix.os }}
-
-    strategy:
-      # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
-      fail-fast: false
-
-      # Set up a matrix to run the following 3 configurations:
-      # 1. <Windows, Release, latest MSVC compiler toolchain on the default runner image, default generator>
-      # 2. <Linux, Release, latest GCC compiler toolchain on the default runner image, default generator>
-      # 3. <Linux, Release, latest Clang compiler toolchain on the default runner image, default generator>
-      #
-      # To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list.
-      matrix:
-        os: [ubuntu-latest, windows-latest]
-        build_type: [Release]
-        c_compiler: [gcc, clang, cl]
-        include:
-          - os: windows-latest
-            c_compiler: cl
-            cpp_compiler: cl
-          - os: ubuntu-latest
-            c_compiler: gcc
-            cpp_compiler: g++
-          - os: ubuntu-latest
-            c_compiler: clang
-            cpp_compiler: clang++
-        exclude:
-          - os: windows-latest
-            c_compiler: gcc
-          - os: windows-latest
-            c_compiler: clang
-          - os: ubuntu-latest
-            c_compiler: cl
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Set reusable strings
-      # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
-      id: strings
-      shell: bash
-      run: |
-        echo "build-output-dir=${{ github.workspace }}/tests/build" >> "$GITHUB_OUTPUT"
-
-    - name: Configure CMake
-      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
-      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: >
-        cmake -B ${{ steps.strings.outputs.build-output-dir }}
-        -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-        -DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-        -S ${{ github.workspace }}
-
-    - name: Build
-      # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
-      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
-
-    - name: Test
-      working-directory: ${{ steps.strings.outputs.build-output-dir }}
-      # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
-      # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
-      run: ctest -VV --build-config ${{ matrix.build_type }}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,9 +1,6 @@
-# This file is deprecated and will be removed in a future release
-# Please see the instructions for installation in README.md file
-
-cmake_minimum_required (VERSION 3.14...3.27)
+cmake_minimum_required (VERSION 3.5...3.27)
 project (utf8cpp 
-         VERSION 4.0.6
+         VERSION 4.0.1
         LANGUAGES CXX
         DESCRIPTION "C++ portable library for working with utf-8 encoding")

@ -21,7 +18,6 @@ write_basic_package_version_file(
    "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
    VERSION ${PROJECT_VERSION}
    COMPATIBILITY SameMajorVersion
-    ARCH_INDEPENDENT
 )

 install(TARGETS ${PROJECT_NAME}
@ -48,6 +44,5 @@ install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
              "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)

-install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include/utf8cpp)
-install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION
-        include/utf8cpp)
+install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include)
+install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION include)
--- a/README.md
+++ b/README.md
@ -112,13 +112,8 @@ I came up with a small, C++98 compatible generic library in order to handle UTF-
 <!-- TOC --><a name="installation"></a>
 ## Installation

-This is a header-only library and the supported way of deploying it is:
- Download a release from https://github.com/nemtrif/utfcpp/releases into a temporary directory
- Unzip the release
- Copy the content of utfcpp/source file into the directory where you keep include files for your project
-
-
-The CMakeList.txt file was originally made for testing purposes only, but unfortunately over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
+The recommended way to use the library is to download an official release and copy the content of source directory into location of your project's header files.
+If you use CMake for your builds, I still recommend just copying the files into your project, but if you want you can use the CMakeList.txt file included in the project.

 <!-- TOC --><a name="examples-of-use"></a>
 ## Examples of use
@ -741,8 +736,7 @@ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_itera

 `octet_iterator`: an input iterator.  
 `u16bit_iterator`: an output iterator.  
-`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. 
-`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.  
+`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.  
 `result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.  
 Return value: An iterator pointing to the place after the appended UTF-16 string.

@ -839,7 +833,7 @@ assert (utf16result[2] == 0xd834);
 assert (utf16result[3] == 0xdd1e);
 ```

-In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
+In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.


 <!-- TOC --><a name="stdu16string-utf8to16stdu8string_view-s"></a>
@ -867,7 +861,7 @@ assert (utf16result[2] == 0xd834);
 assert (utf16result[3] == 0xdd1e);
 ```

-In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
+In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.

 <!-- TOC --><a name="utf8utf32to8"></a>
 #### utf8::utf32to8
@ -1083,7 +1077,7 @@ u32string utf32result = utf8to32(twochars);
 assert (utf32result.size() == 2);
 ```

-In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
+In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.


 <!-- TOC --><a name="stdu32string-utf8to32const-stdu8string_view-s"></a>
@ -1109,7 +1103,7 @@ u32string utf32result = utf8to32(stringview);
 assert (utf32result.size() == 2);
 ```

-In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
+In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.


 <!-- TOC --><a name="stdu32string-utf8to32const-stdstring-s"></a>
@ -1644,7 +1638,7 @@ class iterator;
 <!-- TOC --><a name="member-functions"></a>
 ##### Member functions

-`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
+`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.

 `explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end);` a constructor that initializes the underlying octet_iterator with octet_it and sets the range in which the iterator is considered valid.

@ -1956,8 +1950,7 @@ template <typename u16bit_iterator, typename octet_iterator>
 u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
 ```

-`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert.
-`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.  
+`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.  
 `result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.  
 Return value: An iterator pointing to the place after the appended UTF-16 string.

@ -2086,7 +2079,7 @@ class iterator;
 <!-- TOC --><a name="member-functions-1"></a>
 ##### Member functions

-`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
+`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.

 `explicit iterator (const octet_iterator& octet_it);` a constructor that initializes the underlying octet_iterator with `octet_it`.

--- a/source/utf8/core.h
+++ b/source/utf8/core.h
@ -43,14 +43,9 @@ DEALINGS IN THE SOFTWARE.
 #if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
    #define UTF_CPP_OVERRIDE override
    #define UTF_CPP_NOEXCEPT noexcept
-    #define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
 #else // C++ 98/03
    #define UTF_CPP_OVERRIDE
    #define UTF_CPP_NOEXCEPT throw()
-    // Simulate static_assert:
-    template <bool Condition> struct StaticAssert {static void assert() {int static_assert_impl[(Condition ? 1 : -1)];} };
-    template <> struct StaticAssert<true> {static void assert() {}};
-    #define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
 #endif // C++ 11 or later


@ -220,7 +215,7 @@ namespace internal

        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)

-        code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
+        code_point += (*it) & 0x3f;

        return UTF8_OK;
    }
@ -239,11 +234,11 @@ namespace internal

        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)

-        code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
+        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;

        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)

-        code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
+        code_point += (*it) & 0x3f;

        return UTF8_OK;
    }
@ -313,10 +308,6 @@ namespace internal
    template <typename word_iterator>
    utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
    {
-        // Make sure the iterator dereferences a large enough type
-        typedef typename std::iterator_traits<word_iterator>::value_type word_type;
-        UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
-        // Check the edge case:
        if (it == end)
            return NOT_ENOUGH_ROOM;
        // Save the original value of it so we can go back in case of failure
@ -336,7 +327,7 @@ namespace internal
            else if (is_lead_surrogate(first_word)) {
                const utfchar16_t second_word = *it++;
                if (is_trail_surrogate(second_word)) {
-                    code_point = static_cast<utfchar32_t>(first_word << 10) + second_word + SURROGATE_OFFSET;
+                    code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
                    return UTF8_OK;
                } else 
                    err = INCOMPLETE_SEQUENCE; 
@ -404,7 +395,6 @@ namespace internal
    // the word_type.
    template <typename word_iterator, typename word_type>
    word_iterator append16(utfchar32_t cp, word_iterator result) {
-        UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
        if (is_in_bmp(cp))
            *(result++) = static_cast<word_type>(cp);
        else {
--- a/source/utf8/unchecked.h
+++ b/source/utf8/unchecked.h
@ -115,15 +115,15 @@ namespace utf8
                    ++it; 
                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
                    ++it;
-                    cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
+                    cp += (*it) & 0x3f;
                    break;
                case 4:
                    ++it;
                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                
                    ++it;
-                    cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
+                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
                    ++it;
-                    cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f)); 
+                    cp += (*it) & 0x3f; 
                    break;
            }
            ++it;
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -6,14 +6,6 @@ add_library(${PROJECT_NAME} INTERFACE)

 include_directories("${PROJECT_SOURCE_DIR}/../source")

-if (MSVC)
-    # warning level 4
-    add_compile_options(/W4)
-else()
-    # additional warnings
-    add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion)
-endif()
-
 add_executable(negative negative.cpp)
 add_executable(cpp11 test_cpp11.cpp)
 add_executable(cpp17 test_cpp17.cpp)
@ -22,6 +14,12 @@ add_executable(apitests apitests.cpp)

 add_executable(noexceptionstests noexceptionstests.cpp)

+
+
+target_compile_options(${PROJECT_NAME} INTERFACE
+  $<$<CXX_COMPILER_ID:MSVC>:/W4>
+  $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion>)
+
 target_compile_options(noexceptionstests PUBLIC -fno-exceptions)

 set_target_properties(negative apitests noexceptionstests
--- a/utf8cppConfig.cmake.in
+++ b/utf8cppConfig.cmake.in
@ -2,7 +2,3 @@

 include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
 check_required_components("@PROJECT_NAME@")
-
-if(NOT TARGET utf8::cpp)
-    add_library(utf8::cpp ALIAS utf8cpp::utf8cpp)
-endif()