ICU-847 upper and lower bound APIs - working version, not fully complete

X-SVN-Rev: 7469
This commit is contained in:
Vladimir Weinstein 2002-01-21 23:54:34 +00:00
parent 644f35d609
commit 429d200e9d
2 changed files with 165 additions and 0 deletions

View file

@ -4234,6 +4234,119 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
return sortKeySize;
}
/**
* Get an inclusive lower bound sortkey for a given sortkey and strength.
*/
U_CAPI int32_t U_EXPORT2
ucol_getLowerBoundSortKey(const UCollator *coll,
const uint8_t *source,
int32_t sourceLength,
UColAttributeValue strength,
uint8_t *result,
int32_t resultLength,
UErrorCode *status) {
/*
* The lowerbound is easy. Copy the input key up to the 01 terminating the
* requested level. Append a 00 and stop.
*/
// consistency checks
if(status == NULL || U_FAILURE(*status)) {
return 0;
}
if(coll == NULL || source == NULL ||
strength < UCOL_PRIMARY ||
(strength > UCOL_QUATERNARY && strength != UCOL_IDENTICAL)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
// number of 01 to skip
int32_t toSkip = strength-1;
// need to adjust for UCOL_IDENTICAL
int32_t sourceIndex = 0;
if(coll->caseLevel == UCOL_ON) { // There is another level after primary and before the tertiary level
if(strength > UCOL_SECONDARY) { // We need to have the case level too
toSkip++;
}
}
// Scan the string until we skip enough of the key OR reach the end of the key
do {
sourceIndex++;
if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
toSkip--;
}
} while (toSkip >= 0 && source[sourceIndex] != 0);
if(result != NULL && resultLength >= sourceIndex) {
uprv_memcpy(result, source, sourceIndex);
result[sourceIndex] = 0;
}
return sourceIndex+1;
}
/**
* Get an exclusive upper bound sortkey for a given sortkey and strength.
*/
U_CAPI int32_t U_EXPORT2
ucol_getUpperBoundSortKey(const UCollator *coll,
const uint8_t *source,
int32_t sourceLength,
UColAttributeValue strength,
uint8_t *result,
int32_t resultLength,
UErrorCode *status){
/*
* The upperbound is only slightly harder. Do the same as the lower bound, but then
* start backing up through the bytes. If a byte is neither FF nor 01, add one and
* stop. Otherwise, set it to 00 and continue to the previous byte.
*/
// consistency checks
if(status == NULL || U_FAILURE(*status)) {
return 0;
}
if(coll == NULL || source == NULL ||
strength < UCOL_PRIMARY ||
(strength > UCOL_QUATERNARY && strength != UCOL_IDENTICAL)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
// number of 01 to skip
int32_t toSkip = strength-1;
// need to adjust for UCOL_IDENTICAL
int32_t sourceIndex = 0;
if(coll->caseLevel == UCOL_ON) { // There is another level after primary and before the tertiary level
if(strength > UCOL_SECONDARY) { // We need to have the case level too
toSkip++;
}
}
// Scan the string until we skip enough of the key OR reach the end of the key
do {
sourceIndex++;
if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
toSkip--;
}
} while (toSkip >= 0 && source[sourceIndex] != 0);
while(source[sourceIndex] == UCOL_LEVELTERMINATOR ||
source[sourceIndex] == 0xFF ||
source[sourceIndex] == 0) {
sourceIndex--;
}
if(result != NULL && resultLength > sourceIndex) {
uprv_memcpy(result, source, sourceIndex+1);
result[sourceIndex++]++; // add one to the last significant byte in the key
result[sourceIndex] = 0; // and terminate it
}
return sourceIndex+1;
}
static
inline void uprv_appendByteToHexString(char *dst, uint8_t val) {
uint32_t len = (uint32_t)uprv_strlen(dst);

View file

@ -470,6 +470,58 @@ ucol_getSortKey(const UCollator *coll,
/**
* Get an inclusive lower bound sortkey for a given sortkey and strength.
* Return value is always the number of bytes needed, regardless of
* whether the result buffer was big enough or even valid.
* Sort keys may be compared using <TT>strcmp</TT>.
* @param coll The UCollator containing the collation rules.
* @param source The source sortkey.
* @param sourceLength The length of source, or -1 if null-terminated.
* (If an unmodified sortkey is passed, it is always null terminated).
* @param strength The strength of the lower bound
* @param result A pointer to a buffer to receive the resulting sortkey.
* @param resultLength The maximum size of result.
* @param status Used for returning error code if something went wrong.
* @return The size needed to fully store the sort key..
* @see ucol_keyHashCode
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
ucol_getLowerBoundSortKey(const UCollator *coll,
const uint8_t *source,
int32_t sourceLength,
UColAttributeValue strength,
uint8_t *result,
int32_t resultLength,
UErrorCode *status);
/**
* Get an exclusive upper bound sortkey for a given sortkey and strength.
* Return value is always the number of bytes needed, regardless of
* whether the result buffer was big enough or even valid.
* Sort keys may be compared using <TT>strcmp</TT>.
* @param coll The UCollator containing the collation rules.
* @param source The source sortkey.
* @param sourceLength The length of source, or -1 if null-terminated.
* (If an unmodified sortkey is passed, it is always null terminated).
* @param strength The strength of the upper bound
* @param result A pointer to a buffer to receive the resulting sortkey.
* @param resultLength The maximum size of result.
* @param status Used for returning error code if something went wrong.
* @return The size needed to fully store the sort key..
* @see ucol_keyHashCode
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
ucol_getUpperBoundSortKey(const UCollator *coll,
const uint8_t *source,
int32_t sourceLength,
UColAttributeValue strength,
uint8_t *result,
int32_t resultLength,
UErrorCode *status);
/**
* Gets the version information for a Collator.
* @param info the version # information, the result will be filled in