mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-17 02:37:25 +00:00
ICU-840 implement case-insensitive string compare
X-SVN-Rev: 3619
This commit is contained in:
parent
0c602137f7
commit
2959043936
2 changed files with 264 additions and 4 deletions
|
@ -199,6 +199,47 @@ u_strncmp(const UChar *ucs1,
|
|||
const UChar *ucs2,
|
||||
int32_t n);
|
||||
|
||||
/**
|
||||
* Compare two strings case-insensitively using full case folding.
|
||||
* This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
|
||||
*
|
||||
* @param s1 A string to compare.
|
||||
* @param s2 A string to compare.
|
||||
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @return A negative, zero, or positive integer indicating the comparison result.
|
||||
* @draft
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
|
||||
|
||||
/**
|
||||
* Compare two strings case-insensitively using full case folding.
|
||||
* This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), u_strFoldCase(s2, at most n, options)).
|
||||
*
|
||||
* @param s1 A string to compare.
|
||||
* @param s2 A string to compare.
|
||||
* @param n The maximum number of characters each string to case-fold and then compare.
|
||||
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @return A negative, zero, or positive integer indicating the comparison result.
|
||||
* @draft
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
|
||||
|
||||
/**
|
||||
* Compare two strings case-insensitively using full case folding.
|
||||
* This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options)).
|
||||
*
|
||||
* @param s1 A string to compare.
|
||||
* @param s2 A string to compare.
|
||||
* @param n The number of characters in each string to case-fold and then compare.
|
||||
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @return A negative, zero, or positive integer indicating the comparison result.
|
||||
* @draft
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
|
||||
|
||||
/**
|
||||
* Copy a ustring.
|
||||
* Adds a null terminator.
|
||||
|
@ -474,4 +515,32 @@ u_strToLower(UChar *dest, int32_t destCapacity,
|
|||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Case-fold the characters in a string.
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'I' in CaseFolding.txt.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer are allowed to overlap.
|
||||
*
|
||||
* @param dest A buffer for the result string. The result will be zero-terminated if
|
||||
* the buffer is large enough.
|
||||
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string
|
||||
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
|
||||
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string. It may be greater than destCapacity. In that case,
|
||||
* only some of the result was written to the destination buffer.
|
||||
* @draft
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strFoldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -437,13 +437,15 @@ u_strlen(const UChar *s)
|
|||
*/
|
||||
enum {
|
||||
TO_LOWER,
|
||||
TO_UPPER
|
||||
TO_UPPER,
|
||||
FOLD_CASE
|
||||
};
|
||||
|
||||
static int32_t
|
||||
u_strCaseMap(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
uint32_t options,
|
||||
int32_t toWhichCase,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar buffer[300];
|
||||
|
@ -491,9 +493,12 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
|
|||
if(toWhichCase==TO_LOWER) {
|
||||
destLength=u_internalStrToLower(temp, destCapacity, src, srcLength,
|
||||
locale, NULL, NULL, pErrorCode);
|
||||
} else {
|
||||
} else if(toWhichCase==TO_UPPER) {
|
||||
destLength=u_internalStrToUpper(temp, destCapacity, src, srcLength,
|
||||
locale, NULL, NULL, pErrorCode);
|
||||
} else {
|
||||
destLength=u_internalStrFoldCase(temp, destCapacity, src, srcLength,
|
||||
options, NULL, NULL, pErrorCode);
|
||||
}
|
||||
if(temp!=dest) {
|
||||
/* copy the result string to the destination buffer */
|
||||
|
@ -515,7 +520,7 @@ u_strToLower(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, TO_LOWER, pErrorCode);
|
||||
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, 0, TO_LOWER, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
|
@ -523,7 +528,193 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, TO_UPPER, pErrorCode);
|
||||
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, 0, TO_UPPER, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strFoldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
return u_strCaseMap(dest, destCapacity, src, srcLength, NULL, options, FOLD_CASE, pErrorCode);
|
||||
}
|
||||
|
||||
/* case-insensitive string comparisons */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
|
||||
UChar t1[32], t2[32]; /* temporary buffers holding case-folded parts of s1 and s2 */
|
||||
UChar32 c;
|
||||
UChar c2;
|
||||
int32_t pos1, pos2, len1, len2, result;
|
||||
|
||||
if(!uprv_haveProperties()) {
|
||||
/* hardcode ASCII strcasecmp() */
|
||||
UChar c1, c2;
|
||||
|
||||
for(;;) {
|
||||
c1=*s1++;
|
||||
if((uint16_t)(c1-0x41)<26) {
|
||||
c1+=0x20;
|
||||
}
|
||||
c2=*s2++;
|
||||
if((uint16_t)(c2-0x41)<26) {
|
||||
c2+=0x20;
|
||||
}
|
||||
result=(int32_t)c1-(int32_t)c2;
|
||||
if(result!=0 || c1==0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos1=pos2=len1=len2=0;
|
||||
for(;;) {
|
||||
/* make sure that the temporary buffers are not empty */
|
||||
if(pos1>=len1) {
|
||||
c=*s1++;
|
||||
if(c!=0) {
|
||||
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s1)) {
|
||||
c=UTF16_GET_PAIR_VALUE(c, c2);
|
||||
++s1;
|
||||
}
|
||||
len1=u_internalFoldCase(c, t1, options);
|
||||
pos1=0;
|
||||
} else if(pos2>=len2 && *s2==0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if(pos2>=len2) {
|
||||
c=*s2++;
|
||||
if(c!=0) {
|
||||
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s2)) {
|
||||
c=UTF16_GET_PAIR_VALUE(c, c2);
|
||||
++s2;
|
||||
}
|
||||
len2=u_internalFoldCase(c, t2, options);
|
||||
pos2=0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* compare the head code units from both folded strings */
|
||||
result=(int32_t)t1[pos1++]-(int32_t)t2[pos2++];
|
||||
if(result!=0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
u_internalStrcasecmp(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
uint32_t options) {
|
||||
UChar t1[32], t2[32]; /* temporary buffers holding case-folded parts of s1 and s2 */
|
||||
UChar32 c;
|
||||
UChar c2;
|
||||
int32_t pos1, pos2, len1, len2, result;
|
||||
|
||||
if(!uprv_haveProperties()) {
|
||||
/* hardcode ASCII strcasecmp() */
|
||||
UChar c1, c2;
|
||||
|
||||
for(;;) {
|
||||
if(length1<=0) {
|
||||
if(length2<=0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(length2<=0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
c1=*s1++;
|
||||
if((uint16_t)(c1-0x41)<26) {
|
||||
c1+=0x20;
|
||||
}
|
||||
c2=*s2++;
|
||||
if((uint16_t)(c2-0x41)<26) {
|
||||
c2+=0x20;
|
||||
}
|
||||
result=(int32_t)c1-(int32_t)c2;
|
||||
if(result!=0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
--length1;
|
||||
--length2;
|
||||
}
|
||||
}
|
||||
|
||||
pos1=pos2=len1=len2=0;
|
||||
for(;;) {
|
||||
/* make sure that the temporary buffers are not empty */
|
||||
if(pos1>=len1) {
|
||||
if(length1>0) {
|
||||
c=*s1++;
|
||||
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s1)) {
|
||||
c=UTF16_GET_PAIR_VALUE(c, c2);
|
||||
++s1;
|
||||
length1-=2;
|
||||
} else {
|
||||
--length1;
|
||||
}
|
||||
len1=u_internalFoldCase(c, t1, options);
|
||||
pos1=0;
|
||||
} else if(pos2>=len2 && length2<=0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if(pos2>=len2) {
|
||||
if(length2>0) {
|
||||
c=*s2++;
|
||||
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s2)) {
|
||||
c=UTF16_GET_PAIR_VALUE(c, c2);
|
||||
++s2;
|
||||
length2-=2;
|
||||
} else {
|
||||
--length2;
|
||||
}
|
||||
len2=u_internalFoldCase(c, t2, options);
|
||||
pos2=0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* compare the head code units from both folded strings */
|
||||
result=(int32_t)t1[pos1++]-(int32_t)t2[pos2++];
|
||||
if(result!=0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
|
||||
return u_internalStrcasecmp(s1, length, s2, length, options);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
|
||||
/*
|
||||
* This is a simple, sub-optimal implementation:
|
||||
* Determine the actual lengths of the strings and call u_internalStrcasecmp().
|
||||
* This saves us from having an additional variant of the above strcasecmp().
|
||||
*/
|
||||
const UChar *s;
|
||||
int32_t length1, length2;
|
||||
|
||||
for(s=s1, length1=0; length1<n && *s!=0; ++s, ++length1) {}
|
||||
for(s=s2, length2=0; length2<n && *s!=0; ++s, ++length2) {}
|
||||
|
||||
return u_internalStrcasecmp(s1, length1, s2, length2, options);
|
||||
}
|
||||
|
||||
/* conversions between char* and UChar* ------------------------------------- */
|
||||
|
|
Loading…
Add table
Reference in a new issue