diff --git a/icu4c/source/test/cintltst/custrtst.c b/icu4c/source/test/cintltst/custrtst.c index 12f5ea3a091..2755dfd63c8 100644 --- a/icu4c/source/test/cintltst/custrtst.c +++ b/icu4c/source/test/cintltst/custrtst.c @@ -26,6 +26,106 @@ #include #include +/* ### TODO prototype ------------------------------------------------------- */ + +#include "unicode/uiter.h" + +/* + * Compare two strings as presented by UCharIterators. + * Use code unit or code point order. + * When the function returns, it is undefined where the iterators + * have stopped. + */ +U_CAPI int32_t U_EXPORT2 +u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) { + UChar32 c1, c2; + + /* ### TODO: iterate from current positions or reset to start? reset for now */ + iter1->move(iter1, 0, UITER_START); + iter2->move(iter2, 0, UITER_START); + + /* compare identical prefixes - they do not need to be fixed up */ + for(;;) { + c1=iter1->next(iter1); + c2=iter2->next(iter2); + if(c1!=c2) { + break; + } + if(c1==-1) { + return 0; + } + } + + /* if both values are in or above the surrogate range, fix them up */ + if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { + /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ + if( + (c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) || + (UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1)))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make current(iter2))) || + (UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2)))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make =0. + * + * Consistently leaving them _behind_ the different units is not an option + * because the current "unit" is the end of the string if that is reached, + * and in such a case the iterator does not move. + * For example, when comparing "ab" with "abc", both iterators rest _on_ the end + * of their strings. Calling previous() on each does not move them to where + * the comparison fails. + * + * So the simplest semantics for now is to not define where the iterators end up. + * + * The following fragment is part of what needs to be done for backing up. + */ +void fragment { + /* iff a surrogate is part of a surrogate pair, leave >=d800 */ + if(c1<=0xdbff) { + if(!UTF_IS_TRAIL(iter1->current(iter1))) { + /* lead surrogate code point - make getIndex(iter1, UITER_CURRENT); + iter1->previous(iter1); /* ==c1 */ + if(!UTF_IS_LEAD(iter1->previous(iter1))) { + /* trail surrogate code point - make move(iter1, index, UITER_ZERO); + } else /* 0xe000<=c1<=0xffff */ { + /* BMP code point - make =0) { @@ -460,9 +562,22 @@ static void TestStringFunctions() } /* test u_strCompare(FALSE) */ - if(_SIGN(u_strCompare(strings[i], -1, strings[i+1], -1, FALSE))!=_SIGN(u_strcmp(strings[i], strings[i+1]))) { + r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE); + r2=u_strcmp(strings[i], strings[i+1]); + if(_SIGN(r1)!=_SIGN(r2)) { log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i); } + + /* test u_strCompareIter() ### TODO prototype */ + uiter_setString(&iter1, strings[i], len1); + uiter_setString(&iter2, strings[i+1], len2); + if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) { + log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i); + } + r1=u_strCompareIter(&iter1, &iter2, FALSE); + if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) { + log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i); + } } }