Compare commits

..

19 commits

Author SHA1 Message Date
Nemanja Trifunovic
65701fe007 Compile time checks for size of UTF-16 code units 2025-01-11 11:09:19 -05:00
root
b26a5f718f Release 4.0.6 2024-11-03 13:03:52 -05:00
root
10b7895211 Release 4.0.5 2024-11-03 13:02:26 -05:00
root
77870cc80b Fix Wsign-conversion warnings 2024-11-02 20:38:03 -04:00
root
cd191b6c45 Tests: use add_compile_options() to add warnings to tests 2024-11-02 19:42:46 -04:00
Nemanja Trifunovic
6be08bbea1
Merge pull request #120 from ufleisch/arch-independent
Use ARCH_INDEPENDENT with write_basic_package_version_file
2023-12-29 17:40:20 -05:00
Urs Fleisch
b594c9bfe3 Use ARCH_INDEPENDENT with write_basic_package_version_file
This allows to use a package built on amd64 also on
32-bit Android.
2023-12-29 15:29:47 +01:00
Nemanja Trifunovic
139d233262
Merge pull request #118 from striezel-stash/fix-typos
Fix some typos
2023-12-11 20:27:00 -05:00
Nemanja Trifunovic
9d9067ff42
Merge pull request #119 from striezel-stash/actions-checkout-v4
Update actions/checkout in GitHub Actions to v4
2023-12-11 20:25:59 -05:00
Dirk Stolle
bb2b8f2ac2 Update actions/checkout in GitHub Actions to v4 2023-12-11 03:58:53 +01:00
Dirk Stolle
2725faafbc Fix some typos 2023-12-11 03:55:49 +01:00
nemtrif
aed58281cf Release 4.0.4 2023-12-10 10:42:20 -05:00
nemtrif
4965d378fb Deprecate CMake support
Make clear that CMake is not the supported way of installing the library
2023-12-10 10:41:12 -05:00
nemtrif
c9733bbe49 Release 4.0.3 2023-12-03 18:03:19 -05:00
Nemanja Trifunovic
bc36bafd6b
Merge pull request #115 from mhx/mhx/fix-install
fix: revert to pre-4.x install path {prefix}/utf8cpp (see #112)
2023-12-03 17:58:59 -05:00
Marcus Holland-Moritz
fbc0225345 fix: revert to pre-4.x install path {prefix}/utf8cpp (see #112)
This reverts the install path of the headers to the path that was
used before the 4.x release series, unbreaking projects that were
building against the library without using the CMake config files.
2023-11-30 06:52:29 +01:00
Nemanja Trifunovic
b199c0d6bb
A minor fix to README.md 2023-11-24 18:45:51 -05:00
nemtrif
a645095170 Remove CircleCI, as we are using GitHub Workflows now 2023-11-24 18:20:06 -05:00
Nemanja Trifunovic
fdca3c98ab
Create cmake-multi-platform.yml 2023-11-24 18:15:17 -05:00
7 changed files with 127 additions and 41 deletions

View file

@ -1,13 +0,0 @@
version: 2
jobs:
build:
docker:
- image: nemtrif/utf8cpp:4.0.0
steps:
- checkout
- run: git submodule update --init --recursive --remote
- run: mkdir -p tests/build
- run: cd tests/build && cmake ..
- run: cd tests/build && cmake --build .
- run: cd tests/build && ctest -VV

View file

@ -0,0 +1,75 @@
# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
name: CMake on multiple platforms
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
# Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
fail-fast: false
# Set up a matrix to run the following 3 configurations:
# 1. <Windows, Release, latest MSVC compiler toolchain on the default runner image, default generator>
# 2. <Linux, Release, latest GCC compiler toolchain on the default runner image, default generator>
# 3. <Linux, Release, latest Clang compiler toolchain on the default runner image, default generator>
#
# To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list.
matrix:
os: [ubuntu-latest, windows-latest]
build_type: [Release]
c_compiler: [gcc, clang, cl]
include:
- os: windows-latest
c_compiler: cl
cpp_compiler: cl
- os: ubuntu-latest
c_compiler: gcc
cpp_compiler: g++
- os: ubuntu-latest
c_compiler: clang
cpp_compiler: clang++
exclude:
- os: windows-latest
c_compiler: gcc
- os: windows-latest
c_compiler: clang
- os: ubuntu-latest
c_compiler: cl
steps:
- uses: actions/checkout@v4
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
id: strings
shell: bash
run: |
echo "build-output-dir=${{ github.workspace }}/tests/build" >> "$GITHUB_OUTPUT"
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: >
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-S ${{ github.workspace }}
- name: Build
# Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
- name: Test
working-directory: ${{ steps.strings.outputs.build-output-dir }}
# Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest -VV --build-config ${{ matrix.build_type }}

View file

@ -1,6 +1,9 @@
cmake_minimum_required (VERSION 3.5...3.27)
# This file is deprecated and will be removed in a future release
# Please see the instructions for installation in README.md file
cmake_minimum_required (VERSION 3.14...3.27)
project (utf8cpp
VERSION 4.0.1
VERSION 4.0.6
LANGUAGES CXX
DESCRIPTION "C++ portable library for working with utf-8 encoding")
@ -10,7 +13,7 @@ include(GNUInstallDirs)
target_include_directories(utf8cpp INTERFACE
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/source>"
$<INSTALL_INTERFACE:include>
$<INSTALL_INTERFACE:include/utf8cpp>
)
include(CMakePackageConfigHelpers)
@ -18,6 +21,7 @@ write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY SameMajorVersion
ARCH_INDEPENDENT
)
install(TARGETS ${PROJECT_NAME}
@ -44,5 +48,6 @@ install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include)
install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION include)
install(FILES ${PROJECT_SOURCE_DIR}/source/utf8.h DESTINATION include/utf8cpp)
install(DIRECTORY ${PROJECT_SOURCE_DIR}/source/utf8 DESTINATION
include/utf8cpp)

View file

@ -112,8 +112,13 @@ I came up with a small, C++98 compatible generic library in order to handle UTF-
<!-- TOC --><a name="installation"></a>
## Installation
The recommended way to use the library is to download an official release and copy the content of source directory into location of your project's header files.
If you use CMake for your builds, I still recommend just copying the files into your project, but if you want you can use the CMakeList.txt file included in the project.
This is a header-only library and the supported way of deploying it is:
- Download a release from https://github.com/nemtrif/utfcpp/releases into a temporary directory
- Unzip the release
- Copy the content of utfcpp/source file into the directory where you keep include files for your project
The CMakeList.txt file was originally made for testing purposes only, but unfortunately over time I accepted contributions that added install target. *This is not a supported way of installing the utfcpp library* and I am considering removing the CMakeList.txt in a future release.
<!-- TOC --><a name="examples-of-use"></a>
## Examples of use
@ -736,7 +741,8 @@ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_itera
`octet_iterator`: an input iterator.
`u16bit_iterator`: an output iterator.
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert.
`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.
Return value: An iterator pointing to the place after the appended UTF-16 string.
@ -833,7 +839,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu16string-utf8to16stdu8string_view-s"></a>
@ -861,7 +867,7 @@ assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="utf8utf32to8"></a>
#### utf8::utf32to8
@ -1077,7 +1083,7 @@ u32string utf32result = utf8to32(twochars);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdu8string_view-s"></a>
@ -1103,7 +1109,7 @@ u32string utf32result = utf8to32(stringview);
assert (utf32result.size() == 2);
```
In case of an invalid UTF-8 seqence, a `utf8::invalid_utf8` exception is thrown.
In case of an invalid UTF-8 sequence, a `utf8::invalid_utf8` exception is thrown.
<!-- TOC --><a name="stdu32string-utf8to32const-stdstring-s"></a>
@ -1638,7 +1644,7 @@ class iterator;
<!-- TOC --><a name="member-functions"></a>
##### Member functions
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end);` a constructor that initializes the underlying octet_iterator with octet_it and sets the range in which the iterator is considered valid.
@ -1950,7 +1956,8 @@ template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
```
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert. < br /> `end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`start`: an iterator pointing to the beginning of the UTF-8 encoded string to convert.
`end`: an iterator pointing to pass-the-end of the UTF-8 encoded string to convert.
`result`: an output iterator to the place in the UTF-16 string where to append the result of conversion.
Return value: An iterator pointing to the place after the appended UTF-16 string.
@ -2079,7 +2086,7 @@ class iterator;
<!-- TOC --><a name="member-functions-1"></a>
##### Member functions
`iterator();` the deafult constructor; the underlying octet_iterator is constructed with its default constructor.
`iterator();` the default constructor; the underlying octet_iterator is constructed with its default constructor.
`explicit iterator (const octet_iterator& octet_it);` a constructor that initializes the underlying octet_iterator with `octet_it`.

View file

@ -43,9 +43,14 @@ DEALINGS IN THE SOFTWARE.
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
#define UTF_CPP_OVERRIDE override
#define UTF_CPP_NOEXCEPT noexcept
#define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
#else // C++ 98/03
#define UTF_CPP_OVERRIDE
#define UTF_CPP_NOEXCEPT throw()
// Simulate static_assert:
template <bool Condition> struct StaticAssert {static void assert() {int static_assert_impl[(Condition ? 1 : -1)];} };
template <> struct StaticAssert<true> {static void assert() {}};
#define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
#endif // C++ 11 or later
@ -215,7 +220,7 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
return UTF8_OK;
}
@ -234,11 +239,11 @@ namespace internal
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
return UTF8_OK;
}
@ -308,6 +313,10 @@ namespace internal
template <typename word_iterator>
utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
{
// Make sure the iterator dereferences a large enough type
typedef typename std::iterator_traits<word_iterator>::value_type word_type;
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
// Check the edge case:
if (it == end)
return NOT_ENOUGH_ROOM;
// Save the original value of it so we can go back in case of failure
@ -327,7 +336,7 @@ namespace internal
else if (is_lead_surrogate(first_word)) {
const utfchar16_t second_word = *it++;
if (is_trail_surrogate(second_word)) {
code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
code_point = static_cast<utfchar32_t>(first_word << 10) + second_word + SURROGATE_OFFSET;
return UTF8_OK;
} else
err = INCOMPLETE_SEQUENCE;
@ -395,6 +404,7 @@ namespace internal
// the word_type.
template <typename word_iterator, typename word_type>
word_iterator append16(utfchar32_t cp, word_iterator result) {
UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
if (is_in_bmp(cp))
*(result++) = static_cast<word_type>(cp);
else {

View file

@ -115,15 +115,15 @@ namespace utf8
++it;
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
++it;
cp += (*it) & 0x3f;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
++it;
cp += (*it) & 0x3f;
cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
break;
}
++it;

View file

@ -6,6 +6,14 @@ add_library(${PROJECT_NAME} INTERFACE)
include_directories("${PROJECT_SOURCE_DIR}/../source")
if (MSVC)
# warning level 4
add_compile_options(/W4)
else()
# additional warnings
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion)
endif()
add_executable(negative negative.cpp)
add_executable(cpp11 test_cpp11.cpp)
add_executable(cpp17 test_cpp17.cpp)
@ -14,12 +22,6 @@ add_executable(apitests apitests.cpp)
add_executable(noexceptionstests noexceptionstests.cpp)
target_compile_options(${PROJECT_NAME} INTERFACE
$<$<CXX_COMPILER_ID:MSVC>:/W4>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -Wpedantic -Wconversion>)
target_compile_options(noexceptionstests PUBLIC -fno-exceptions)
set_target_properties(negative apitests noexceptionstests