mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-2397 move u_strCompareIter to ustring.c
X-SVN-Rev: 10856
This commit is contained in:
parent
3ed7677387
commit
2cfa2ab7bb
3 changed files with 131 additions and 103 deletions
|
@ -15,7 +15,9 @@
|
|||
|
||||
#ifndef USTRING_H
|
||||
#define USTRING_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uiter.h"
|
||||
|
||||
/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @draft ICU 2.1*/
|
||||
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
|
@ -458,6 +460,29 @@ u_strCompare(const UChar *s1, int32_t length1,
|
|||
const UChar *s2, int32_t length2,
|
||||
UBool codePointOrder);
|
||||
|
||||
/**
|
||||
* Compare two Unicode strings (binary order)
|
||||
* as presented by UCharIterator objects.
|
||||
* Works otherwise just like u_strCompare().
|
||||
*
|
||||
* Both iterators are reset to their start positions.
|
||||
* When the function returns, it is undefined where the iterators
|
||||
* have stopped.
|
||||
*
|
||||
* @param iter1 First source string iterator.
|
||||
* @param s2 Second source string iterator.
|
||||
* @param codePointOrder Choose between code unit order (FALSE)
|
||||
* and code point order (TRUE).
|
||||
*
|
||||
* @return <0 or 0 or >0 as usual for string comparisons
|
||||
*
|
||||
* @see u_strCompare
|
||||
*
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
|
||||
|
||||
#ifndef U_COMPARE_CODE_POINT_ORDER
|
||||
/* see also unistr.h and unorm.h */
|
||||
/**
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uiter.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
@ -822,10 +823,112 @@ uprv_strCompare(const UChar *s1, int32_t length1,
|
|||
}
|
||||
}
|
||||
|
||||
/* now c1 and c2 are in UTF-32-compatible order */
|
||||
/* now c1 and c2 are in the requested (code unit or code point) order */
|
||||
return (int32_t)c1-(int32_t)c2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare two strings as presented by UCharIterators.
|
||||
* Use code unit or code point order.
|
||||
* When the function returns, it is undefined where the iterators
|
||||
* have stopped.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
|
||||
UChar32 c1, c2;
|
||||
|
||||
/* argument checking */
|
||||
if(iter1==NULL || iter2==NULL) {
|
||||
return 0; /* bad arguments */
|
||||
}
|
||||
if(iter1==iter2) {
|
||||
return 0; /* identical iterators */
|
||||
}
|
||||
|
||||
/* reset iterators to start? */
|
||||
iter1->move(iter1, 0, UITER_START);
|
||||
iter2->move(iter2, 0, UITER_START);
|
||||
|
||||
/* compare identical prefixes - they do not need to be fixed up */
|
||||
for(;;) {
|
||||
c1=iter1->next(iter1);
|
||||
c2=iter2->next(iter2);
|
||||
if(c1!=c2) {
|
||||
break;
|
||||
}
|
||||
if(c1==-1) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* if both values are in or above the surrogate range, fix them up */
|
||||
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
|
||||
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
|
||||
if(
|
||||
(c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
|
||||
(UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
/* BMP code point - may be surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
|
||||
if(
|
||||
(c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
|
||||
(UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
/* BMP code point - may be surrogate code point - make <d800 */
|
||||
c2-=0x2800;
|
||||
}
|
||||
}
|
||||
|
||||
/* now c1 and c2 are in the requested (code unit or code point) order */
|
||||
return (int32_t)c1-(int32_t)c2;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* u_strCompareIter() does not leave the iterators _on_ the different units.
|
||||
* This is possible but would cost a few extra indirect function calls to back
|
||||
* up if the last unit (c1 or c2 respectively) was >=0.
|
||||
*
|
||||
* Consistently leaving them _behind_ the different units is not an option
|
||||
* because the current "unit" is the end of the string if that is reached,
|
||||
* and in such a case the iterator does not move.
|
||||
* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
|
||||
* of their strings. Calling previous() on each does not move them to where
|
||||
* the comparison fails.
|
||||
*
|
||||
* So the simplest semantics is to not define where the iterators end up.
|
||||
*
|
||||
* The following fragment is part of what would need to be done for backing up.
|
||||
*/
|
||||
void fragment {
|
||||
/* iff a surrogate is part of a surrogate pair, leave >=d800 */
|
||||
if(c1<=0xdbff) {
|
||||
if(!UTF_IS_TRAIL(iter1->current(iter1))) {
|
||||
/* lead surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
} else if(c1<=0xdfff) {
|
||||
int32_t index=iter1->getIndex(iter1, UITER_CURRENT);
|
||||
iter1->previous(iter1); /* ==c1 */
|
||||
if(!UTF_IS_LEAD(iter1->previous(iter1))) {
|
||||
/* trail surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
/* go back to behind where the difference is */
|
||||
iter1->move(iter1, index, UITER_ZERO);
|
||||
} else /* 0xe000<=c1<=0xffff */ {
|
||||
/* BMP code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strCompare(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
|
|
|
@ -26,106 +26,6 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* ### TODO prototype ------------------------------------------------------- */
|
||||
|
||||
#include "unicode/uiter.h"
|
||||
|
||||
/*
|
||||
* Compare two strings as presented by UCharIterators.
|
||||
* Use code unit or code point order.
|
||||
* When the function returns, it is undefined where the iterators
|
||||
* have stopped.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
|
||||
UChar32 c1, c2;
|
||||
|
||||
/* ### TODO: iterate from current positions or reset to start? reset for now */
|
||||
iter1->move(iter1, 0, UITER_START);
|
||||
iter2->move(iter2, 0, UITER_START);
|
||||
|
||||
/* compare identical prefixes - they do not need to be fixed up */
|
||||
for(;;) {
|
||||
c1=iter1->next(iter1);
|
||||
c2=iter2->next(iter2);
|
||||
if(c1!=c2) {
|
||||
break;
|
||||
}
|
||||
if(c1==-1) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* if both values are in or above the surrogate range, fix them up */
|
||||
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
|
||||
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
|
||||
if(
|
||||
(c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
|
||||
(UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
/* BMP code point - may be surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
|
||||
if(
|
||||
(c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
|
||||
(UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
/* BMP code point - may be surrogate code point - make <d800 */
|
||||
c2-=0x2800;
|
||||
}
|
||||
}
|
||||
|
||||
/* now c1 and c2 are in the requested (code unit or code point) order */
|
||||
return (int32_t)c1-(int32_t)c2;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* ### TODO: The prototype above does not leave the iterators _on_ the different units.
|
||||
* This is possible but would cost a few extra indirect function calls to back
|
||||
* up if a unit (c1 or c2 respectively) was >=0.
|
||||
*
|
||||
* Consistently leaving them _behind_ the different units is not an option
|
||||
* because the current "unit" is the end of the string if that is reached,
|
||||
* and in such a case the iterator does not move.
|
||||
* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
|
||||
* of their strings. Calling previous() on each does not move them to where
|
||||
* the comparison fails.
|
||||
*
|
||||
* So the simplest semantics for now is to not define where the iterators end up.
|
||||
*
|
||||
* The following fragment is part of what needs to be done for backing up.
|
||||
*/
|
||||
void fragment {
|
||||
/* iff a surrogate is part of a surrogate pair, leave >=d800 */
|
||||
if(c1<=0xdbff) {
|
||||
if(!UTF_IS_TRAIL(iter1->current(iter1))) {
|
||||
/* lead surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
} else if(c1<=0xdfff) {
|
||||
int32_t index=iter1->getIndex(iter1, UITER_CURRENT);
|
||||
iter1->previous(iter1); /* ==c1 */
|
||||
if(!UTF_IS_LEAD(iter1->previous(iter1))) {
|
||||
/* trail surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
/* go back to behind where the difference is */
|
||||
iter1->move(iter1, index, UITER_ZERO);
|
||||
} else /* 0xe000<=c1<=0xffff */ {
|
||||
/* BMP code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* end prototype ------------------------------------------------------------ */
|
||||
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* get the sign of an integer */
|
||||
|
@ -534,7 +434,7 @@ static void TestStringFunctions()
|
|||
{ 0xd84d, 0xdc56, 0 } /* U+23456 */
|
||||
};
|
||||
|
||||
UCharIterator iter1, iter2; /* ### TODO prototype */
|
||||
UCharIterator iter1, iter2;
|
||||
int32_t len1, len2, r1, r2;
|
||||
|
||||
for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
|
||||
|
@ -568,7 +468,7 @@ static void TestStringFunctions()
|
|||
log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
|
||||
}
|
||||
|
||||
/* test u_strCompareIter() ### TODO prototype */
|
||||
/* test u_strCompareIter() */
|
||||
uiter_setString(&iter1, strings[i], len1);
|
||||
uiter_setString(&iter2, strings[i+1], len2);
|
||||
if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
|
||||
|
|
Loading…
Add table
Reference in a new issue