Lifted the C++11 requirement for some functions

that take std::string as an argument.
This commit is contained in:
nemtrif 2022-12-28 15:26:33 -05:00
parent d5dc5701ab
commit 2af99eae7a
6 changed files with 61 additions and 54 deletions

View file

@ -169,12 +169,12 @@ In case you want to look into other means of working with UTF-8 strings from C++
#### utf8::append
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0.
Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence to a UTF-8 string.
```cpp
void append(char32_t cp, std::string& s);
void append(utfchar32_t cp, std::string& s);
```
`cp`: a code point to append to the string.
@ -705,7 +705,7 @@ This function is typically used to make sure a UTF-8 string is valid before proc
#### utf8::find_invalid
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
Detects an invalid sequence within a UTF-8 string.
@ -802,7 +802,7 @@ You may want to use `is_valid` to make sure that a string contains valid UTF-8 t
#### utf8::is_valid
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
Checks whether a string object contains valid UTF-8 encoded text.
@ -875,12 +875,12 @@ assert (bvalid == false);
#### utf8::replace_invalid
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
Replaces all invalid UTF-8 sequences within a string with a replacement marker.
```cpp
std::string replace_invalid(const std::string& s, char32_t replacement);
std::string replace_invalid(const std::string& s, utfchar32_t replacement);
std::string replace_invalid(const std::string& s);
```
@ -963,7 +963,7 @@ assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(),
#### utf8::starts_with_bom
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
Checks whether a string starts with a UTF-8 byte order mark (BOM)

View file

@ -79,6 +79,11 @@ namespace utf8
return internal::append(cp, result);
}
inline void append(utfchar32_t cp, std::string& s)
{
append(cp, std::back_inserter(s));
}
template <typename octet_iterator, typename output_iterator>
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
{
@ -119,6 +124,20 @@ namespace utf8
return utf8::replace_invalid(start, end, out, replacement_marker);
}
inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
{
std::string result;
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
return result;
}
inline std::string replace_invalid(const std::string& s)
{
std::string result;
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
return result;
}
template <typename octet_iterator>
utfchar32_t next(octet_iterator& it, octet_iterator end)
{

View file

@ -30,6 +30,7 @@ DEALINGS IN THE SOFTWARE.
#include <iterator>
#include <cstring>
#include <string>
// Determine the C++ standard version.
// If the user defines UTF_CPP_CPLUSPLUS, use that.
@ -377,6 +378,12 @@ namespace internal
return find_invalid(str, end);
}
inline std::size_t find_invalid(const std::string& s)
{
std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
return (invalid == s.end()) ? std::string::npos : static_cast<std::size_t>(invalid - s.begin());
}
template <typename octet_iterator>
inline bool is_valid(octet_iterator start, octet_iterator end)
{
@ -388,6 +395,13 @@ namespace internal
return (*(utf8::find_invalid(str)) == '\0');
}
inline bool is_valid(const std::string& s)
{
return is_valid(s.begin(), s.end());
}
template <typename octet_iterator>
inline bool starts_with_bom (octet_iterator it, octet_iterator end)
{
@ -396,7 +410,12 @@ namespace internal
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
);
}
}
inline bool starts_with_bom(const std::string& s)
{
return starts_with_bom(s.begin(), s.end());
}
} // namespace utf8
#endif // header guard

View file

@ -29,16 +29,9 @@ DEALINGS IN THE SOFTWARE.
#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
#include "checked.h"
#include <string>
namespace utf8
{
inline void append(char32_t cp, std::string& s)
{
append(cp, std::back_inserter(s));
}
inline std::string utf16to8(const std::u16string& s)
{
std::string result;
@ -66,37 +59,6 @@ namespace utf8
utf8to32(s.begin(), s.end(), std::back_inserter(result));
return result;
}
inline std::size_t find_invalid(const std::string& s)
{
std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
return (invalid == s.end()) ? std::string::npos : static_cast<std::size_t>(invalid - s.begin());
}
inline bool is_valid(const std::string& s)
{
return is_valid(s.begin(), s.end());
}
inline std::string replace_invalid(const std::string& s, char32_t replacement)
{
std::string result;
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
return result;
}
inline std::string replace_invalid(const std::string& s)
{
std::string result;
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
return result;
}
inline bool starts_with_bom(const std::string& s)
{
return starts_with_bom(s.begin(), s.end());
}
} // namespace utf8
#endif // header guard

View file

@ -28,17 +28,10 @@ DEALINGS IN THE SOFTWARE.
#ifndef UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
#define UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
#include "checked.h"
#include <string>
#include "cpp11.h"
namespace utf8
{
inline void append(char32_t cp, std::string& s)
{
append(cp, std::back_inserter(s));
}
inline std::string utf16to8(std::u16string_view s)
{
std::string result;

View file

@ -80,6 +80,20 @@ namespace utf8
return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
}
inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
{
std::string result;
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
return result;
}
inline std::string replace_invalid(const std::string& s)
{
std::string result;
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
return result;
}
template <typename octet_iterator>
utfchar32_t next(octet_iterator& it)
{