diff --git a/README.md b/README.md index 721b8a4..a2800fb 100644 --- a/README.md +++ b/README.md @@ -169,12 +169,12 @@ In case you want to look into other means of working with UTF-8 strings from C++ #### utf8::append -Available in version 3.0 and later. Requires a C++ 11 compliant compiler. +Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0. Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence to a UTF-8 string. ```cpp -void append(char32_t cp, std::string& s); +void append(utfchar32_t cp, std::string& s); ``` `cp`: a code point to append to the string. @@ -705,7 +705,7 @@ This function is typically used to make sure a UTF-8 string is valid before proc #### utf8::find_invalid -Available in version 3.0 and later. Requires a C++ 11 compliant compiler. +Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0 Detects an invalid sequence within a UTF-8 string. @@ -802,7 +802,7 @@ You may want to use `is_valid` to make sure that a string contains valid UTF-8 t #### utf8::is_valid -Available in version 3.0 and later. Requires a C++ 11 compliant compiler. +Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0 Checks whether a string object contains valid UTF-8 encoded text. @@ -875,12 +875,12 @@ assert (bvalid == false); #### utf8::replace_invalid -Available in version 3.0 and later. Requires a C++ 11 compliant compiler. +Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0 Replaces all invalid UTF-8 sequences within a string with a replacement marker. ```cpp -std::string replace_invalid(const std::string& s, char32_t replacement); +std::string replace_invalid(const std::string& s, utfchar32_t replacement); std::string replace_invalid(const std::string& s); ``` @@ -963,7 +963,7 @@ assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), #### utf8::starts_with_bom -Available in version 3.0 and later. Requires a C++ 11 compliant compiler. +Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0 Checks whether a string starts with a UTF-8 byte order mark (BOM) diff --git a/source/utf8/checked.h b/source/utf8/checked.h index f71e60a..c9c0bf3 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -79,6 +79,11 @@ namespace utf8 return internal::append(cp, result); } + inline void append(utfchar32_t cp, std::string& s) + { + append(cp, std::back_inserter(s)); + } + template output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement) { @@ -119,6 +124,20 @@ namespace utf8 return utf8::replace_invalid(start, end, out, replacement_marker); } + inline std::string replace_invalid(const std::string& s, utfchar32_t replacement) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } + + inline std::string replace_invalid(const std::string& s) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + template utfchar32_t next(octet_iterator& it, octet_iterator end) { diff --git a/source/utf8/core.h b/source/utf8/core.h index ab95453..5f09970 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -30,6 +30,7 @@ DEALINGS IN THE SOFTWARE. #include #include +#include // Determine the C++ standard version. // If the user defines UTF_CPP_CPLUSPLUS, use that. @@ -377,6 +378,12 @@ namespace internal return find_invalid(str, end); } + inline std::size_t find_invalid(const std::string& s) + { + std::string::const_iterator invalid = find_invalid(s.begin(), s.end()); + return (invalid == s.end()) ? std::string::npos : static_cast(invalid - s.begin()); + } + template inline bool is_valid(octet_iterator start, octet_iterator end) { @@ -388,6 +395,13 @@ namespace internal return (*(utf8::find_invalid(str)) == '\0'); } + inline bool is_valid(const std::string& s) + { + return is_valid(s.begin(), s.end()); + } + + + template inline bool starts_with_bom (octet_iterator it, octet_iterator end) { @@ -396,7 +410,12 @@ namespace internal ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) ); - } + } + + inline bool starts_with_bom(const std::string& s) + { + return starts_with_bom(s.begin(), s.end()); + } } // namespace utf8 #endif // header guard diff --git a/source/utf8/cpp11.h b/source/utf8/cpp11.h index d482f52..fc63e10 100644 --- a/source/utf8/cpp11.h +++ b/source/utf8/cpp11.h @@ -29,16 +29,9 @@ DEALINGS IN THE SOFTWARE. #define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1 #include "checked.h" -#include namespace utf8 { - - inline void append(char32_t cp, std::string& s) - { - append(cp, std::back_inserter(s)); - } - inline std::string utf16to8(const std::u16string& s) { std::string result; @@ -66,37 +59,6 @@ namespace utf8 utf8to32(s.begin(), s.end(), std::back_inserter(result)); return result; } - - inline std::size_t find_invalid(const std::string& s) - { - std::string::const_iterator invalid = find_invalid(s.begin(), s.end()); - return (invalid == s.end()) ? std::string::npos : static_cast(invalid - s.begin()); - } - - inline bool is_valid(const std::string& s) - { - return is_valid(s.begin(), s.end()); - } - - inline std::string replace_invalid(const std::string& s, char32_t replacement) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); - return result; - } - - inline std::string replace_invalid(const std::string& s) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline bool starts_with_bom(const std::string& s) - { - return starts_with_bom(s.begin(), s.end()); - } - } // namespace utf8 #endif // header guard diff --git a/source/utf8/cpp17.h b/source/utf8/cpp17.h index f58bc00..6e2fcc2 100644 --- a/source/utf8/cpp17.h +++ b/source/utf8/cpp17.h @@ -28,17 +28,10 @@ DEALINGS IN THE SOFTWARE. #ifndef UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9 #define UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9 -#include "checked.h" -#include +#include "cpp11.h" namespace utf8 { - - inline void append(char32_t cp, std::string& s) - { - append(cp, std::back_inserter(s)); - } - inline std::string utf16to8(std::u16string_view s) { std::string result; diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index c63b044..ede8916 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -80,6 +80,20 @@ namespace utf8 return utf8::unchecked::replace_invalid(start, end, out, replacement_marker); } + inline std::string replace_invalid(const std::string& s, utfchar32_t replacement) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } + + inline std::string replace_invalid(const std::string& s) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + template utfchar32_t next(octet_iterator& it) {