ICU-2397 move u_strCompareIter to ustring.c

X-SVN-Rev: 10856
This commit is contained in:
Markus Scherer 2003-01-14 21:18:52 +00:00
parent 3ed7677387
commit 2cfa2ab7bb
3 changed files with 131 additions and 103 deletions

View file

@ -15,7 +15,9 @@
#ifndef USTRING_H
#define USTRING_H
#include "unicode/utypes.h"
#include "unicode/uiter.h"
/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @draft ICU 2.1*/
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
@ -458,6 +460,29 @@ u_strCompare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
UBool codePointOrder);
/**
* Compare two Unicode strings (binary order)
* as presented by UCharIterator objects.
* Works otherwise just like u_strCompare().
*
* Both iterators are reset to their start positions.
* When the function returns, it is undefined where the iterators
* have stopped.
*
* @param iter1 First source string iterator.
* @param s2 Second source string iterator.
* @param codePointOrder Choose between code unit order (FALSE)
* and code point order (TRUE).
*
* @return <0 or 0 or >0 as usual for string comparisons
*
* @see u_strCompare
*
* @draft ICU 2.6
*/
U_CAPI int32_t U_EXPORT2
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
#ifndef U_COMPARE_CODE_POINT_ORDER
/* see also unistr.h and unorm.h */
/**

View file

@ -17,6 +17,7 @@
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/uiter.h"
#include "unicode/ustring.h"
#include "unicode/putil.h"
#include "unicode/ucnv.h"
@ -822,10 +823,112 @@ uprv_strCompare(const UChar *s1, int32_t length1,
}
}
/* now c1 and c2 are in UTF-32-compatible order */
/* now c1 and c2 are in the requested (code unit or code point) order */
return (int32_t)c1-(int32_t)c2;
}
/*
* Compare two strings as presented by UCharIterators.
* Use code unit or code point order.
* When the function returns, it is undefined where the iterators
* have stopped.
*/
U_CAPI int32_t U_EXPORT2
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
UChar32 c1, c2;
/* argument checking */
if(iter1==NULL || iter2==NULL) {
return 0; /* bad arguments */
}
if(iter1==iter2) {
return 0; /* identical iterators */
}
/* reset iterators to start? */
iter1->move(iter1, 0, UITER_START);
iter2->move(iter2, 0, UITER_START);
/* compare identical prefixes - they do not need to be fixed up */
for(;;) {
c1=iter1->next(iter1);
c2=iter2->next(iter2);
if(c1!=c2) {
break;
}
if(c1==-1) {
return 0;
}
}
/* if both values are in or above the surrogate range, fix them up */
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
if(
(c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
(UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c1-=0x2800;
}
if(
(c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
(UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c2-=0x2800;
}
}
/* now c1 and c2 are in the requested (code unit or code point) order */
return (int32_t)c1-(int32_t)c2;
}
#if 0
/*
* u_strCompareIter() does not leave the iterators _on_ the different units.
* This is possible but would cost a few extra indirect function calls to back
* up if the last unit (c1 or c2 respectively) was >=0.
*
* Consistently leaving them _behind_ the different units is not an option
* because the current "unit" is the end of the string if that is reached,
* and in such a case the iterator does not move.
* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
* of their strings. Calling previous() on each does not move them to where
* the comparison fails.
*
* So the simplest semantics is to not define where the iterators end up.
*
* The following fragment is part of what would need to be done for backing up.
*/
void fragment {
/* iff a surrogate is part of a surrogate pair, leave >=d800 */
if(c1<=0xdbff) {
if(!UTF_IS_TRAIL(iter1->current(iter1))) {
/* lead surrogate code point - make <d800 */
c1-=0x2800;
}
} else if(c1<=0xdfff) {
int32_t index=iter1->getIndex(iter1, UITER_CURRENT);
iter1->previous(iter1); /* ==c1 */
if(!UTF_IS_LEAD(iter1->previous(iter1))) {
/* trail surrogate code point - make <d800 */
c1-=0x2800;
}
/* go back to behind where the difference is */
iter1->move(iter1, index, UITER_ZERO);
} else /* 0xe000<=c1<=0xffff */ {
/* BMP code point - make <d800 */
c1-=0x2800;
}
}
#endif
U_CAPI int32_t U_EXPORT2
u_strCompare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,

View file

@ -26,106 +26,6 @@
#include <string.h>
#include <stdlib.h>
/* ### TODO prototype ------------------------------------------------------- */
#include "unicode/uiter.h"
/*
* Compare two strings as presented by UCharIterators.
* Use code unit or code point order.
* When the function returns, it is undefined where the iterators
* have stopped.
*/
U_CAPI int32_t U_EXPORT2
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
UChar32 c1, c2;
/* ### TODO: iterate from current positions or reset to start? reset for now */
iter1->move(iter1, 0, UITER_START);
iter2->move(iter2, 0, UITER_START);
/* compare identical prefixes - they do not need to be fixed up */
for(;;) {
c1=iter1->next(iter1);
c2=iter2->next(iter2);
if(c1!=c2) {
break;
}
if(c1==-1) {
return 0;
}
}
/* if both values are in or above the surrogate range, fix them up */
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
if(
(c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
(UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c1-=0x2800;
}
if(
(c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
(UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
) {
/* part of a surrogate pair, leave >=d800 */
} else {
/* BMP code point - may be surrogate code point - make <d800 */
c2-=0x2800;
}
}
/* now c1 and c2 are in the requested (code unit or code point) order */
return (int32_t)c1-(int32_t)c2;
}
#if 0
/*
* ### TODO: The prototype above does not leave the iterators _on_ the different units.
* This is possible but would cost a few extra indirect function calls to back
* up if a unit (c1 or c2 respectively) was >=0.
*
* Consistently leaving them _behind_ the different units is not an option
* because the current "unit" is the end of the string if that is reached,
* and in such a case the iterator does not move.
* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
* of their strings. Calling previous() on each does not move them to where
* the comparison fails.
*
* So the simplest semantics for now is to not define where the iterators end up.
*
* The following fragment is part of what needs to be done for backing up.
*/
void fragment {
/* iff a surrogate is part of a surrogate pair, leave >=d800 */
if(c1<=0xdbff) {
if(!UTF_IS_TRAIL(iter1->current(iter1))) {
/* lead surrogate code point - make <d800 */
c1-=0x2800;
}
} else if(c1<=0xdfff) {
int32_t index=iter1->getIndex(iter1, UITER_CURRENT);
iter1->previous(iter1); /* ==c1 */
if(!UTF_IS_LEAD(iter1->previous(iter1))) {
/* trail surrogate code point - make <d800 */
c1-=0x2800;
}
/* go back to behind where the difference is */
iter1->move(iter1, index, UITER_ZERO);
} else /* 0xe000<=c1<=0xffff */ {
/* BMP code point - make <d800 */
c1-=0x2800;
}
}
#endif
/* end prototype ------------------------------------------------------------ */
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
/* get the sign of an integer */
@ -534,7 +434,7 @@ static void TestStringFunctions()
{ 0xd84d, 0xdc56, 0 } /* U+23456 */
};
UCharIterator iter1, iter2; /* ### TODO prototype */
UCharIterator iter1, iter2;
int32_t len1, len2, r1, r2;
for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
@ -568,7 +468,7 @@ static void TestStringFunctions()
log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
}
/* test u_strCompareIter() ### TODO prototype */
/* test u_strCompareIter() */
uiter_setString(&iter1, strings[i], len1);
uiter_setString(&iter2, strings[i+1], len2);
if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {