diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index a499d000908..54e5ae1c541 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -4188,43 +4188,13 @@ while((start)<(end)) { \ /* */ /****************************************************************************/ -/** - * Merge two sort keys. - * This is useful, for example, to combine sort keys from first and last names - * to sort such pairs. - * Merged sort keys consider on each collation level the first part first entirely, - * then the second one. - * It is possible to merge multiple sort keys by consecutively merging - * another one with the intermediate result. - * - * The length of the merge result is the sum of the lengths of the input sort keys - * minus 1. - * - * @param src1 the first sort key - * @param src1Length the length of the first sort key, including the zero byte at the end; - * can be -1 if the function is to find the length - * @param src2 the second sort key - * @param src2Length the length of the second sort key, including the zero byte at the end; - * can be -1 if the function is to find the length - * @param dest the buffer where the merged sort key is written, - * can be NULL if destCapacity==0 - * @param destCapacity the number of bytes in the dest buffer - * @return the length of the merged sort key, src1Length+src2Length-1; - * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), - * in which cases the contents of dest is undefined - * - * @draft - */ U_CAPI int32_t U_EXPORT2 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity) { - int32_t destLength; - uint8_t b; - /* check arguments */ - if( src1==NULL || src1Length<-2 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || - src2==NULL || src2Length<-2 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || + if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || + src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || destCapacity<0 || (destCapacity>0 && dest==NULL) ) { /* error, attempt to write a zero byte and return 0 */ @@ -4242,34 +4212,38 @@ ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, src2Length=(int32_t)uprv_strlen((const char *)src2)+1; } - destLength=src1Length+src2Length-1; + int32_t destLength=src1Length+src2Length; if(destLength>destCapacity) { /* the merged sort key does not fit into the destination */ return destLength; } /* merge the sort keys with the same number of levels */ - while(*src1!=0 && *src2!=0) { /* while both have another level */ + uint8_t *p=dest; + for(;;) { /* copy level from src1 not including 00 or 01 */ + uint8_t b; while((b=*src1)>=2) { ++src1; - *dest++=b; + *p++=b; } /* add a 02 merge separator */ - *dest++=2; + *p++=2; /* copy level from src2 not including 00 or 01 */ while((b=*src2)>=2) { ++src2; - *dest++=b; + *p++=b; } /* if both sort keys have another level, then add a 01 level separator and continue */ if(*src1==1 && *src2==1) { ++src1; ++src2; - *dest++=1; + *p++=1; + } else { + break; } } @@ -4283,10 +4257,10 @@ ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, src2=src1; } /* append src2, "the other, unfinished sort key" */ - uprv_strcpy((char *)dest, (const char *)src2); + while((*p++=*src2++)!=0) {} - /* trust that neither sort key contained illegally embedded zero bytes */ - return destLength; + /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */ + return (int32_t)(p-dest); } U_NAMESPACE_BEGIN diff --git a/icu4c/source/i18n/unicode/ucol.h b/icu4c/source/i18n/unicode/ucol.h index cc97a9a91f0..8f98228527c 100644 --- a/icu4c/source/i18n/unicode/ucol.h +++ b/icu4c/source/i18n/unicode/ucol.h @@ -1087,26 +1087,40 @@ ucol_getVersion(const UCollator* coll, UVersionInfo info); U_STABLE void U_EXPORT2 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); -/** - * Merge two sort keys. The levels are merged with their corresponding counterparts +/** + * Merges two sort keys. The levels are merged with their corresponding counterparts * (primaries with primaries, secondaries with secondaries etc.). Between the values * from the same level a separator is inserted. - * example (uncompressed): - * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00 + * + * This is useful, for example, for combining sort keys from first and last names + * to sort such pairs. + * It is possible to merge multiple sort keys by consecutively merging + * another one with the intermediate result. + * + * The length of the merge result is the sum of the lengths of the input sort keys. + * + * Example (uncompressed): + *
191B1D 01 050505 01 910505 00
+ * 1F2123 01 050505 01 910505 00
* will be merged as - * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 - * This allows for concatenating of first and last names for sorting, among other things. - * If the destination buffer is not big enough, the results are undefined. - * If any of source lengths are zero or any of source pointers are NULL/undefined, - * result is of size zero. - * @param src1 pointer to the first sortkey - * @param src1Length length of the first sortkey - * @param src2 pointer to the second sortkey - * @param src2Length length of the second sortkey - * @param dest buffer to hold the result - * @param destCapacity size of the buffer for the result - * @return size of the result. If the buffer is big enough size is always - * src1Length+src2Length-1 + *
191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00
+ * + * If the destination buffer is not big enough, then its contents are undefined. + * If any of source lengths are zero or any of the source pointers are NULL/undefined, + * the result is of size zero. + * + * @param src1 the first sort key + * @param src1Length the length of the first sort key, including the zero byte at the end; + * can be -1 if the function is to find the length + * @param src2 the second sort key + * @param src2Length the length of the second sort key, including the zero byte at the end; + * can be -1 if the function is to find the length + * @param dest the buffer where the merged sort key is written, + * can be NULL if destCapacity==0 + * @param destCapacity the number of bytes in the dest buffer + * @return the length of the merged sort key, src1Length+src2Length; + * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), + * in which cases the contents of dest is undefined * @stable ICU 2.0 */ U_STABLE int32_t U_EXPORT2 diff --git a/icu4c/source/test/cintltst/capitst.c b/icu4c/source/test/cintltst/capitst.c index 3571416d57c..eefd67c1cdd 100644 --- a/icu4c/source/test/cintltst/capitst.c +++ b/icu4c/source/test/cintltst/capitst.c @@ -1948,7 +1948,7 @@ void TestMergeSortKeys(void) { uint32_t reqLen = 0; log_verbose("testing buffer overflow\n"); reqLen = ucol_mergeSortkeys(prefixKey, prefixKeyLen, suffixKey, suffixKeyLen, smallBuf, 3); - if(reqLen != (prefixKeyLen+suffixKeyLen-1)) { + if(reqLen != (prefixKeyLen+suffixKeyLen)) { log_err("Wrong preflight size for merged sortkey\n"); } }