mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-05 05:25:07 +00:00
Lifted the C++11 requirement for some functions
that take std::string as an argument.
This commit is contained in:
parent
d5dc5701ab
commit
2af99eae7a
6 changed files with 61 additions and 54 deletions
14
README.md
14
README.md
|
@ -169,12 +169,12 @@ In case you want to look into other means of working with UTF-8 strings from C++
|
|||
|
||||
#### utf8::append
|
||||
|
||||
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
|
||||
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0.
|
||||
|
||||
Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence to a UTF-8 string.
|
||||
|
||||
```cpp
|
||||
void append(char32_t cp, std::string& s);
|
||||
void append(utfchar32_t cp, std::string& s);
|
||||
```
|
||||
|
||||
`cp`: a code point to append to the string.
|
||||
|
@ -705,7 +705,7 @@ This function is typically used to make sure a UTF-8 string is valid before proc
|
|||
|
||||
#### utf8::find_invalid
|
||||
|
||||
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
|
||||
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
|
||||
|
||||
Detects an invalid sequence within a UTF-8 string.
|
||||
|
||||
|
@ -802,7 +802,7 @@ You may want to use `is_valid` to make sure that a string contains valid UTF-8 t
|
|||
|
||||
#### utf8::is_valid
|
||||
|
||||
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
|
||||
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
|
||||
|
||||
Checks whether a string object contains valid UTF-8 encoded text.
|
||||
|
||||
|
@ -875,12 +875,12 @@ assert (bvalid == false);
|
|||
|
||||
#### utf8::replace_invalid
|
||||
|
||||
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
|
||||
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
|
||||
|
||||
Replaces all invalid UTF-8 sequences within a string with a replacement marker.
|
||||
|
||||
```cpp
|
||||
std::string replace_invalid(const std::string& s, char32_t replacement);
|
||||
std::string replace_invalid(const std::string& s, utfchar32_t replacement);
|
||||
std::string replace_invalid(const std::string& s);
|
||||
```
|
||||
|
||||
|
@ -963,7 +963,7 @@ assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(),
|
|||
|
||||
#### utf8::starts_with_bom
|
||||
|
||||
Available in version 3.0 and later. Requires a C++ 11 compliant compiler.
|
||||
Available in version 3.0 and later. Prior to 4.0 it required a C++ 11 compiler; the requirement is lifted with 4.0
|
||||
|
||||
Checks whether a string starts with a UTF-8 byte order mark (BOM)
|
||||
|
||||
|
|
|
@ -79,6 +79,11 @@ namespace utf8
|
|||
return internal::append(cp, result);
|
||||
}
|
||||
|
||||
inline void append(utfchar32_t cp, std::string& s)
|
||||
{
|
||||
append(cp, std::back_inserter(s));
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename output_iterator>
|
||||
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
|
||||
{
|
||||
|
@ -119,6 +124,20 @@ namespace utf8
|
|||
return utf8::replace_invalid(start, end, out, replacement_marker);
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utfchar32_t next(octet_iterator& it, octet_iterator end)
|
||||
{
|
||||
|
|
|
@ -30,6 +30,7 @@ DEALINGS IN THE SOFTWARE.
|
|||
|
||||
#include <iterator>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
// Determine the C++ standard version.
|
||||
// If the user defines UTF_CPP_CPLUSPLUS, use that.
|
||||
|
@ -377,6 +378,12 @@ namespace internal
|
|||
return find_invalid(str, end);
|
||||
}
|
||||
|
||||
inline std::size_t find_invalid(const std::string& s)
|
||||
{
|
||||
std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
|
||||
return (invalid == s.end()) ? std::string::npos : static_cast<std::size_t>(invalid - s.begin());
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline bool is_valid(octet_iterator start, octet_iterator end)
|
||||
{
|
||||
|
@ -388,6 +395,13 @@ namespace internal
|
|||
return (*(utf8::find_invalid(str)) == '\0');
|
||||
}
|
||||
|
||||
inline bool is_valid(const std::string& s)
|
||||
{
|
||||
return is_valid(s.begin(), s.end());
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline bool starts_with_bom (octet_iterator it, octet_iterator end)
|
||||
{
|
||||
|
@ -396,7 +410,12 @@ namespace internal
|
|||
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
|
||||
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
inline bool starts_with_bom(const std::string& s)
|
||||
{
|
||||
return starts_with_bom(s.begin(), s.end());
|
||||
}
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
|
|
@ -29,16 +29,9 @@ DEALINGS IN THE SOFTWARE.
|
|||
#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
|
||||
|
||||
#include "checked.h"
|
||||
#include <string>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
||||
inline void append(char32_t cp, std::string& s)
|
||||
{
|
||||
append(cp, std::back_inserter(s));
|
||||
}
|
||||
|
||||
inline std::string utf16to8(const std::u16string& s)
|
||||
{
|
||||
std::string result;
|
||||
|
@ -66,37 +59,6 @@ namespace utf8
|
|||
utf8to32(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::size_t find_invalid(const std::string& s)
|
||||
{
|
||||
std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
|
||||
return (invalid == s.end()) ? std::string::npos : static_cast<std::size_t>(invalid - s.begin());
|
||||
}
|
||||
|
||||
inline bool is_valid(const std::string& s)
|
||||
{
|
||||
return is_valid(s.begin(), s.end());
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s, char32_t replacement)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline bool starts_with_bom(const std::string& s)
|
||||
{
|
||||
return starts_with_bom(s.begin(), s.end());
|
||||
}
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
|
|
@ -28,17 +28,10 @@ DEALINGS IN THE SOFTWARE.
|
|||
#ifndef UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
|
||||
#define UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
|
||||
|
||||
#include "checked.h"
|
||||
#include <string>
|
||||
#include "cpp11.h"
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
||||
inline void append(char32_t cp, std::string& s)
|
||||
{
|
||||
append(cp, std::back_inserter(s));
|
||||
}
|
||||
|
||||
inline std::string utf16to8(std::u16string_view s)
|
||||
{
|
||||
std::string result;
|
||||
|
|
|
@ -80,6 +80,20 @@ namespace utf8
|
|||
return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utfchar32_t next(octet_iterator& it)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue