mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-2397 add prototype of u_strCompareIter() - compare strings via iterators; add test for it, too - needs discussion, finalizing of semantics, proposal, ...
X-SVN-Rev: 10838
This commit is contained in:
parent
e03434154e
commit
b171e3f8ca
1 changed files with 117 additions and 2 deletions
|
@ -26,6 +26,106 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* ### TODO prototype ------------------------------------------------------- */
|
||||
|
||||
#include "unicode/uiter.h"
|
||||
|
||||
/*
|
||||
* Compare two strings as presented by UCharIterators.
|
||||
* Use code unit or code point order.
|
||||
* When the function returns, it is undefined where the iterators
|
||||
* have stopped.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
|
||||
UChar32 c1, c2;
|
||||
|
||||
/* ### TODO: iterate from current positions or reset to start? reset for now */
|
||||
iter1->move(iter1, 0, UITER_START);
|
||||
iter2->move(iter2, 0, UITER_START);
|
||||
|
||||
/* compare identical prefixes - they do not need to be fixed up */
|
||||
for(;;) {
|
||||
c1=iter1->next(iter1);
|
||||
c2=iter2->next(iter2);
|
||||
if(c1!=c2) {
|
||||
break;
|
||||
}
|
||||
if(c1==-1) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* if both values are in or above the surrogate range, fix them up */
|
||||
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
|
||||
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
|
||||
if(
|
||||
(c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
|
||||
(UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
/* BMP code point - may be surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
|
||||
if(
|
||||
(c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
|
||||
(UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
/* BMP code point - may be surrogate code point - make <d800 */
|
||||
c2-=0x2800;
|
||||
}
|
||||
}
|
||||
|
||||
/* now c1 and c2 are in the requested (code unit or code point) order */
|
||||
return (int32_t)c1-(int32_t)c2;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* ### TODO: The prototype above does not leave the iterators _on_ the different units.
|
||||
* This is possible but would cost a few extra indirect function calls to back
|
||||
* up if a unit (c1 or c2 respectively) was >=0.
|
||||
*
|
||||
* Consistently leaving them _behind_ the different units is not an option
|
||||
* because the current "unit" is the end of the string if that is reached,
|
||||
* and in such a case the iterator does not move.
|
||||
* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
|
||||
* of their strings. Calling previous() on each does not move them to where
|
||||
* the comparison fails.
|
||||
*
|
||||
* So the simplest semantics for now is to not define where the iterators end up.
|
||||
*
|
||||
* The following fragment is part of what needs to be done for backing up.
|
||||
*/
|
||||
void fragment {
|
||||
/* iff a surrogate is part of a surrogate pair, leave >=d800 */
|
||||
if(c1<=0xdbff) {
|
||||
if(!UTF_IS_TRAIL(iter1->current(iter1))) {
|
||||
/* lead surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
} else if(c1<=0xdfff) {
|
||||
int32_t index=iter1->getIndex(iter1, UITER_CURRENT);
|
||||
iter1->previous(iter1); /* ==c1 */
|
||||
if(!UTF_IS_LEAD(iter1->previous(iter1))) {
|
||||
/* trail surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
/* go back to behind where the difference is */
|
||||
iter1->move(iter1, index, UITER_ZERO);
|
||||
} else /* 0xe000<=c1<=0xffff */ {
|
||||
/* BMP code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* end prototype ------------------------------------------------------------ */
|
||||
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* get the sign of an integer */
|
||||
|
@ -433,7 +533,9 @@ static void TestStringFunctions()
|
|||
{ 0xd800, 0xdc02, 0 }, /* U+10002 */
|
||||
{ 0xd84d, 0xdc56, 0 } /* U+23456 */
|
||||
};
|
||||
int32_t len1, len2;
|
||||
|
||||
UCharIterator iter1, iter2; /* ### TODO prototype */
|
||||
int32_t len1, len2, r1, r2;
|
||||
|
||||
for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
|
||||
if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
|
||||
|
@ -460,9 +562,22 @@ static void TestStringFunctions()
|
|||
}
|
||||
|
||||
/* test u_strCompare(FALSE) */
|
||||
if(_SIGN(u_strCompare(strings[i], -1, strings[i+1], -1, FALSE))!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
|
||||
r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
|
||||
r2=u_strcmp(strings[i], strings[i+1]);
|
||||
if(_SIGN(r1)!=_SIGN(r2)) {
|
||||
log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
|
||||
}
|
||||
|
||||
/* test u_strCompareIter() ### TODO prototype */
|
||||
uiter_setString(&iter1, strings[i], len1);
|
||||
uiter_setString(&iter2, strings[i+1], len2);
|
||||
if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
|
||||
log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
|
||||
}
|
||||
r1=u_strCompareIter(&iter1, &iter2, FALSE);
|
||||
if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
|
||||
log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue