ICU-840 implement case-insensitive string compare

X-SVN-Rev: 3619
This commit is contained in:
Markus Scherer 2001-02-14 00:58:35 +00:00
parent 0c602137f7
commit 2959043936
2 changed files with 264 additions and 4 deletions

View file

@ -199,6 +199,47 @@ u_strncmp(const UChar *ucs1,
const UChar *ucs2,
int32_t n);
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
* @return A negative, zero, or positive integer indicating the comparison result.
* @draft
*/
U_CAPI int32_t U_EXPORT2
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), u_strFoldCase(s2, at most n, options)).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param n The maximum number of characters each string to case-fold and then compare.
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
* @return A negative, zero, or positive integer indicating the comparison result.
* @draft
*/
U_CAPI int32_t U_EXPORT2
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options)).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @param n The number of characters in each string to case-fold and then compare.
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
* @return A negative, zero, or positive integer indicating the comparison result.
* @draft
*/
U_CAPI int32_t U_EXPORT2
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
/**
* Copy a ustring.
* Adds a null terminator.
@ -474,4 +515,32 @@ u_strToLower(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
/**
* Case-fold the characters in a string.
* Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'I' in CaseFolding.txt.
* The result may be longer or shorter than the original.
* The source string and the destination buffer are allowed to overlap.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param src The original string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The length of the result string. It may be greater than destCapacity. In that case,
* only some of the result was written to the destination buffer.
* @draft
*/
U_CAPI int32_t U_EXPORT2
u_strFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode);
#endif

View file

@ -437,13 +437,15 @@ u_strlen(const UChar *s)
*/
enum {
TO_LOWER,
TO_UPPER
TO_UPPER,
FOLD_CASE
};
static int32_t
u_strCaseMap(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
uint32_t options,
int32_t toWhichCase,
UErrorCode *pErrorCode) {
UChar buffer[300];
@ -491,9 +493,12 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
if(toWhichCase==TO_LOWER) {
destLength=u_internalStrToLower(temp, destCapacity, src, srcLength,
locale, NULL, NULL, pErrorCode);
} else {
} else if(toWhichCase==TO_UPPER) {
destLength=u_internalStrToUpper(temp, destCapacity, src, srcLength,
locale, NULL, NULL, pErrorCode);
} else {
destLength=u_internalStrFoldCase(temp, destCapacity, src, srcLength,
options, NULL, NULL, pErrorCode);
}
if(temp!=dest) {
/* copy the result string to the destination buffer */
@ -515,7 +520,7 @@ u_strToLower(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, TO_LOWER, pErrorCode);
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, 0, TO_LOWER, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -523,7 +528,193 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, TO_UPPER, pErrorCode);
return u_strCaseMap(dest, destCapacity, src, srcLength, locale, 0, TO_UPPER, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
u_strFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode) {
return u_strCaseMap(dest, destCapacity, src, srcLength, NULL, options, FOLD_CASE, pErrorCode);
}
/* case-insensitive string comparisons */
U_CAPI int32_t U_EXPORT2
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
UChar t1[32], t2[32]; /* temporary buffers holding case-folded parts of s1 and s2 */
UChar32 c;
UChar c2;
int32_t pos1, pos2, len1, len2, result;
if(!uprv_haveProperties()) {
/* hardcode ASCII strcasecmp() */
UChar c1, c2;
for(;;) {
c1=*s1++;
if((uint16_t)(c1-0x41)<26) {
c1+=0x20;
}
c2=*s2++;
if((uint16_t)(c2-0x41)<26) {
c2+=0x20;
}
result=(int32_t)c1-(int32_t)c2;
if(result!=0 || c1==0) {
return result;
}
}
}
pos1=pos2=len1=len2=0;
for(;;) {
/* make sure that the temporary buffers are not empty */
if(pos1>=len1) {
c=*s1++;
if(c!=0) {
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s1)) {
c=UTF16_GET_PAIR_VALUE(c, c2);
++s1;
}
len1=u_internalFoldCase(c, t1, options);
pos1=0;
} else if(pos2>=len2 && *s2==0) {
return 0;
} else {
return -1;
}
}
if(pos2>=len2) {
c=*s2++;
if(c!=0) {
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s2)) {
c=UTF16_GET_PAIR_VALUE(c, c2);
++s2;
}
len2=u_internalFoldCase(c, t2, options);
pos2=0;
} else {
return 1;
}
}
/* compare the head code units from both folded strings */
result=(int32_t)t1[pos1++]-(int32_t)t2[pos2++];
if(result!=0) {
return result;
}
}
}
U_CFUNC int32_t
u_internalStrcasecmp(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options) {
UChar t1[32], t2[32]; /* temporary buffers holding case-folded parts of s1 and s2 */
UChar32 c;
UChar c2;
int32_t pos1, pos2, len1, len2, result;
if(!uprv_haveProperties()) {
/* hardcode ASCII strcasecmp() */
UChar c1, c2;
for(;;) {
if(length1<=0) {
if(length2<=0) {
return 0;
} else {
return -1;
}
} else if(length2<=0) {
return 1;
}
c1=*s1++;
if((uint16_t)(c1-0x41)<26) {
c1+=0x20;
}
c2=*s2++;
if((uint16_t)(c2-0x41)<26) {
c2+=0x20;
}
result=(int32_t)c1-(int32_t)c2;
if(result!=0) {
return result;
}
--length1;
--length2;
}
}
pos1=pos2=len1=len2=0;
for(;;) {
/* make sure that the temporary buffers are not empty */
if(pos1>=len1) {
if(length1>0) {
c=*s1++;
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s1)) {
c=UTF16_GET_PAIR_VALUE(c, c2);
++s1;
length1-=2;
} else {
--length1;
}
len1=u_internalFoldCase(c, t1, options);
pos1=0;
} else if(pos2>=len2 && length2<=0) {
return 0;
} else {
return -1;
}
}
if(pos2>=len2) {
if(length2>0) {
c=*s2++;
if(UTF_IS_FIRST_SURROGATE(c) && UTF_IS_SECOND_SURROGATE(c2=*s2)) {
c=UTF16_GET_PAIR_VALUE(c, c2);
++s2;
length2-=2;
} else {
--length2;
}
len2=u_internalFoldCase(c, t2, options);
pos2=0;
} else {
return 1;
}
}
/* compare the head code units from both folded strings */
result=(int32_t)t1[pos1++]-(int32_t)t2[pos2++];
if(result!=0) {
return result;
}
}
}
U_CAPI int32_t U_EXPORT2
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
return u_internalStrcasecmp(s1, length, s2, length, options);
}
U_CAPI int32_t U_EXPORT2
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
/*
* This is a simple, sub-optimal implementation:
* Determine the actual lengths of the strings and call u_internalStrcasecmp().
* This saves us from having an additional variant of the above strcasecmp().
*/
const UChar *s;
int32_t length1, length2;
for(s=s1, length1=0; length1<n && *s!=0; ++s, ++length1) {}
for(s=s2, length2=0; length2<n && *s!=0; ++s, ++length2) {}
return u_internalStrcasecmp(s1, length1, s2, length2, options);
}
/* conversions between char* and UChar* ------------------------------------- */