mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-05 05:25:07 +00:00
Remove deprecated functions
Removed: - previous - is_bom
This commit is contained in:
parent
2b8596149e
commit
00d73e4a02
6 changed files with 6 additions and 165 deletions
90
README.md
90
README.md
|
@ -308,39 +308,6 @@ In case `start` is reached before a UTF-8 lead octet is hit, or if an invalid UT
|
|||
|
||||
In case `start` equals `it`, a `not_enough_room` exception is thrown.
|
||||
|
||||
#### utf8::previous
|
||||
|
||||
Deprecated in version 1.02 and later.
|
||||
|
||||
Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it decreases the iterator until it hits the beginning of the previous UTF-8 encoded code point and returns the 32 bits representation of the code point.
|
||||
|
||||
```cpp
|
||||
template <typename octet_iterator>
|
||||
uint32_t previous(octet_iterator& it, octet_iterator pass_start);
|
||||
```
|
||||
|
||||
`octet_iterator`: a random access iterator.
|
||||
`it`: a reference pointing to an octet within a UTF-8 encoded string. After the function returns, it is decremented to point to the beginning of the previous code point.
|
||||
`pass_start`: an iterator to the point in the sequence where the search for the beginning of a code point is aborted if no result was reached. It is a safety measure to prevent passing the beginning of the string in the search for a UTF-8 lead octet.
|
||||
Return value: the 32 bit representation of the previous code point.
|
||||
|
||||
Example of use:
|
||||
|
||||
```cpp
|
||||
char* twochars = "\xe6\x97\xa5\xd1\x88";
|
||||
unsigned char* w = twochars + 3;
|
||||
int cp = previous (w, twochars - 1);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == twochars);
|
||||
```
|
||||
|
||||
|
||||
`utf8::previous` is deprecated, and `utf8::prior` should be used instead, although the existing code can continue using this function. The problem is the parameter `pass_start` that points to the position just before the beginning of the sequence. Standard containers don't have the concept of "pass start" and the function can not be used with their iterators.
|
||||
|
||||
`it` will typically point to the beginning of a code point, and `pass_start` will point to the octet just before the beginning of the string to ensure we don't go backwards too far. `it` is decreased until it points to a lead UTF-8 octet, and then the UTF-8 sequence beginning with that octet is decoded to a 32 bit representation and returned.
|
||||
|
||||
In case `pass_start` is reached before a UTF-8 lead octet is hit, or if an invalid UTF-8 sequence is started by the lead octet, an `invalid_utf8` exception is thrown
|
||||
|
||||
#### utf8::advance
|
||||
Available in version 1.0 and later.
|
||||
|
||||
|
@ -801,7 +768,7 @@ The typical use of this function is to check the first three bytes of a file. If
|
|||
|
||||
#### utf8::starts_with_bom
|
||||
|
||||
Available in version 2.3 and later. Replaces deprecated `is_bom()` function.
|
||||
Available in version 2.3 and later.
|
||||
|
||||
Checks whether an octet sequence starts with a UTF-8 byte order mark (BOM)
|
||||
|
||||
|
@ -825,33 +792,6 @@ assert (bbom == true);
|
|||
|
||||
The typical use of this function is to check the first three bytes of a file. If they form the UTF-8 BOM, we want to skip them before processing the actual UTF-8 encoded text.
|
||||
|
||||
#### utf8::is_bom
|
||||
|
||||
Available in version 1.0 and later. Deprecated in version 2.3\. `starts_with_bom()` should be used instead.
|
||||
|
||||
Checks whether a sequence of three octets is a UTF-8 byte order mark (BOM)
|
||||
|
||||
```cpp
|
||||
template <typename octet_iterator>
|
||||
bool is_bom (octet_iterator it); // Deprecated
|
||||
```
|
||||
|
||||
`octet_iterator`: an input iterator.
|
||||
`it`: beginning of the 3-octet sequence to check
|
||||
Return value: `true` if the sequence is UTF-8 byte order mark; `false` if not.
|
||||
|
||||
Example of use:
|
||||
|
||||
```cpp
|
||||
unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf};
|
||||
bool bbom = is_bom(byte_order_mark);
|
||||
assert (bbom == true);
|
||||
```
|
||||
|
||||
The typical use of this function is to check the first three bytes of a file. If they form the UTF-8 BOM, we want to skip them before processing the actual UTF-8 encoded text.
|
||||
|
||||
If a sequence is shorter than three bytes, an invalid iterator will be dereferenced. Therefore, this function is deprecated in favor of `starts_with_bom()`that takes the end of sequence as an argument.
|
||||
|
||||
### Types From utf8 Namespace
|
||||
|
||||
#### utf8::exception
|
||||
|
@ -1098,34 +1038,6 @@ assert (w == twochars);
|
|||
|
||||
This is a faster but less safe version of `utf8::prior`. It does not check for validity of the supplied UTF-8 sequence and offers no boundary checking.
|
||||
|
||||
#### utf8::unchecked::previous (deprecated, see utf8::unchecked::prior)
|
||||
|
||||
Deprecated in version 1.02 and later.
|
||||
|
||||
Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it decreases the iterator until it hits the beginning of the previous UTF-8 encoded code point and returns the 32 bits representation of the code point.
|
||||
|
||||
```cpp
|
||||
template <typename octet_iterator>
|
||||
uint32_t previous(octet_iterator& it);
|
||||
```
|
||||
|
||||
`it`: a reference pointing to an octet within a UTF-8 encoded string. After the function returns, it is decremented to point to the beginning of the previous code point.
|
||||
Return value: the 32 bit representation of the previous code point.
|
||||
|
||||
Example of use:
|
||||
|
||||
```cpp
|
||||
char* twochars = "\xe6\x97\xa5\xd1\x88";
|
||||
char* w = twochars + 3;
|
||||
int cp = unchecked::previous (w);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == twochars);
|
||||
```
|
||||
|
||||
The reason this function is deprecated is just the consistency with the "checked" versions, where `prior` should be used instead of `previous`. In fact, `unchecked::previous` behaves exactly the same as `unchecked::prior`
|
||||
|
||||
This is a faster but less safe version of `utf8::previous`. It does not check for validity of the supplied UTF-8 sequence and offers no boundary checking.
|
||||
|
||||
#### utf8::unchecked::advance
|
||||
|
||||
Available in version 1.0 and later.
|
||||
|
|
|
@ -174,18 +174,6 @@ namespace utf8
|
|||
return utf8::peek_next(it, end);
|
||||
}
|
||||
|
||||
/// Deprecated in versions that include "prior"
|
||||
template <typename octet_iterator>
|
||||
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
|
||||
{
|
||||
octet_iterator end = it;
|
||||
while (utf8::internal::is_trail(*(--it)))
|
||||
if (it == pass_start)
|
||||
throw invalid_utf8(*it); // error - no lead byte in the sequence
|
||||
octet_iterator temp = it;
|
||||
return utf8::next(temp, end);
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename distance_type>
|
||||
void advance (octet_iterator& it, distance_type n, octet_iterator end)
|
||||
{
|
||||
|
|
|
@ -313,18 +313,7 @@ namespace internal
|
|||
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
|
||||
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
|
||||
);
|
||||
}
|
||||
|
||||
//Deprecated in release 2.3
|
||||
template <typename octet_iterator>
|
||||
inline bool is_bom (octet_iterator it)
|
||||
{
|
||||
return (
|
||||
(utf8::internal::mask8(*it++)) == bom[0] &&
|
||||
(utf8::internal::mask8(*it++)) == bom[1] &&
|
||||
(utf8::internal::mask8(*it)) == bom[2]
|
||||
);
|
||||
}
|
||||
}
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
|
|
@ -102,13 +102,6 @@ namespace utf8
|
|||
return utf8::unchecked::next(temp);
|
||||
}
|
||||
|
||||
// Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
|
||||
template <typename octet_iterator>
|
||||
inline uint32_t previous(octet_iterator& it)
|
||||
{
|
||||
return utf8::unchecked::prior(it);
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename distance_type>
|
||||
void advance (octet_iterator& it, distance_type n)
|
||||
{
|
||||
|
|
|
@ -65,23 +65,6 @@ int main()
|
|||
assert (cp == 0x10346);
|
||||
assert (w == threechars);
|
||||
|
||||
//previous (deprecated)
|
||||
w = twochars + 3;
|
||||
cp = previous (w, twochars - 1);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == twochars);
|
||||
|
||||
w = threechars + 9;
|
||||
cp = previous(w, threechars - 1);
|
||||
assert (cp == 0x0448);
|
||||
assert (w == threechars + 7);
|
||||
cp = previous(w, threechars -1);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == threechars + 4);
|
||||
cp = previous(w, threechars - 1);
|
||||
assert (cp == 0x10346);
|
||||
assert (w == threechars);
|
||||
|
||||
// advance
|
||||
w = twochars;
|
||||
advance (w, 2, twochars + 6);
|
||||
|
@ -145,11 +128,6 @@ int main()
|
|||
assert (bbom == true);
|
||||
bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars));
|
||||
assert (no_bbom == false);
|
||||
|
||||
//is_bom
|
||||
bool unsafe_bbom = is_bom(byte_order_mark);
|
||||
assert (unsafe_bbom == true);
|
||||
|
||||
|
||||
//replace_invalid
|
||||
char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
|
||||
|
@ -215,25 +193,6 @@ int main()
|
|||
assert (cp == 0x65e5);
|
||||
assert (cw == twochars);
|
||||
|
||||
|
||||
//previous (calls prior internally)
|
||||
|
||||
w = twochars + 3;
|
||||
cp = unchecked::previous (w);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == twochars);
|
||||
|
||||
w = threechars + 9;
|
||||
cp = unchecked::previous(w);
|
||||
assert (cp == 0x0448);
|
||||
assert (w == threechars + 7);
|
||||
cp = unchecked::previous(w);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == threechars + 4);
|
||||
cp = unchecked::previous(w);
|
||||
assert (cp == 0x10346);
|
||||
assert (w == threechars);
|
||||
|
||||
// advance
|
||||
w = twochars;
|
||||
unchecked::advance (w, 2);
|
||||
|
|
|
@ -76,11 +76,11 @@ int main(int argc, char** argv)
|
|||
cout << "Line " << line_count << ": Error in distance function" << '\n';
|
||||
|
||||
while (it != line_start) {
|
||||
previous(it, line.rend().base());
|
||||
prior(it, line.rend().base());
|
||||
char_count--;
|
||||
}
|
||||
if (char_count != 0)
|
||||
cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n';
|
||||
cout << "Line " << line_count << ": Error in iterating with prior - wrong number of characters" << '\n';
|
||||
|
||||
// Try utf8::iterator
|
||||
utf8::iterator<string::iterator> u8it(line_start, line_start, line_end);
|
||||
|
@ -140,11 +140,11 @@ int main(int argc, char** argv)
|
|||
cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n';
|
||||
|
||||
while (it != line_start) {
|
||||
unchecked::previous(it);
|
||||
unchecked::prior(it);
|
||||
char_count--;
|
||||
}
|
||||
if (char_count != 0)
|
||||
cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n';
|
||||
cout << "Line " << line_count << ": Error in iterating with unchecked::prior - wrong number of characters" << '\n';
|
||||
|
||||
// Try utf8::unchecked::iterator
|
||||
utf8::unchecked::iterator<string::iterator> un_u8it(line_start);
|
||||
|
|
Loading…
Add table
Reference in a new issue