ICU-2397 add prototype of u_strCompareIter() - compare strings via iterators; add test for it, too - needs discussion, finalizing of semantics, proposal, ...

X-SVN-Rev: 10838
This commit is contained in:
Markus Scherer 2003-01-10 22:51:45 +00:00
parent e03434154e
commit b171e3f8ca

View file

@ -26,6 +26,106 @@
#include <string.h>
#include <stdlib.h>
/* ### TODO prototype ------------------------------------------------------- */
#include "unicode/uiter.h"
/*
* Compare two strings as presented by UCharIterators.
* Use code unit or code point order.
* When the function returns, it is undefined where the iterators
* have stopped.
*/
U_CAPI int32_t U_EXPORT2
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
UChar32 c1, c2;
/* ### TODO: iterate from current positions or reset to start? reset for now */
iter1->move(iter1, 0, UITER_START);
iter2->move(iter2, 0, UITER_START);
/* compare identical prefixes - they do not need to be fixed up */
for(;;) {
c1=iter1->next(iter1);
c2=iter2->next(iter2);
if(c1!=c2) {
break;
}
if(c1==-1) {
return 0;
}
}
/* if both values are in or above the surrogate range, fix them up */
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
if(
(c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
(UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c1-=0x2800;
}
if(
(c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
(UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c2-=0x2800;
}
}
/* now c1 and c2 are in the requested (code unit or code point) order */
return (int32_t)c1-(int32_t)c2;
}
#if 0
/*
* ### TODO: The prototype above does not leave the iterators _on_ the different units.
* This is possible but would cost a few extra indirect function calls to back
* up if a unit (c1 or c2 respectively) was >=0.
*
* Consistently leaving them _behind_ the different units is not an option
* because the current "unit" is the end of the string if that is reached,
* and in such a case the iterator does not move.
* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
* of their strings. Calling previous() on each does not move them to where
* the comparison fails.
*
* So the simplest semantics for now is to not define where the iterators end up.
*
* The following fragment is part of what needs to be done for backing up.
*/
void fragment {
/* iff a surrogate is part of a surrogate pair, leave >=d800 */
if(c1<=0xdbff) {
if(!UTF_IS_TRAIL(iter1->current(iter1))) {
/* lead surrogate code point - make <d800 */
c1-=0x2800;
}
} else if(c1<=0xdfff) {
int32_t index=iter1->getIndex(iter1, UITER_CURRENT);
iter1->previous(iter1); /* ==c1 */
if(!UTF_IS_LEAD(iter1->previous(iter1))) {
/* trail surrogate code point - make <d800 */
c1-=0x2800;
}
/* go back to behind where the difference is */
iter1->move(iter1, index, UITER_ZERO);
} else /* 0xe000<=c1<=0xffff */ {
/* BMP code point - make <d800 */
c1-=0x2800;
}
}
#endif
/* end prototype ------------------------------------------------------------ */
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
/* get the sign of an integer */
@ -433,7 +533,9 @@ static void TestStringFunctions()
{ 0xd800, 0xdc02, 0 }, /* U+10002 */
{ 0xd84d, 0xdc56, 0 } /* U+23456 */
};
int32_t len1, len2;
UCharIterator iter1, iter2; /* ### TODO prototype */
int32_t len1, len2, r1, r2;
for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
@ -460,9 +562,22 @@ static void TestStringFunctions()
}
/* test u_strCompare(FALSE) */
if(_SIGN(u_strCompare(strings[i], -1, strings[i+1], -1, FALSE))!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
r2=u_strcmp(strings[i], strings[i+1]);
if(_SIGN(r1)!=_SIGN(r2)) {
log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
}
/* test u_strCompareIter() ### TODO prototype */
uiter_setString(&iter1, strings[i], len1);
uiter_setString(&iter2, strings[i+1], len2);
if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
}
r1=u_strCompareIter(&iter1, &iter2, FALSE);
if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
}
}
}