ICU-847 upper and lower bound APIs - working version, not fully complete

X-SVN-Rev: 7469
2025-04-14 01:11:02 +00:00 · 2002-01-21 23:54:34 +00:00 · 2002-01-21 23:54:34 +00:00 · 429d200e9d
commit 429d200e9d
parent 644f35d609
2 changed files with 165 additions and 0 deletions
--- a/icu4c/source/i18n/ucol.cpp
+++ b/icu4c/source/i18n/ucol.cpp
@ -4234,6 +4234,119 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
    return sortKeySize;
 }

+/**
+ * Get an inclusive lower bound sortkey for a given sortkey and strength.
+ */
+U_CAPI int32_t U_EXPORT2 
+ucol_getLowerBoundSortKey(const    UCollator    *coll,
+        const uint8_t       *source,
+        int32_t             sourceLength,
+        UColAttributeValue  strength,
+        uint8_t             *result,
+        int32_t             resultLength,
+        UErrorCode          *status) {
+/*
+ * The lowerbound is easy. Copy the input key up to the 01 terminating the
+ * requested level. Append a 00 and stop.
+ */
+  // consistency checks 
+  if(status == NULL || U_FAILURE(*status)) {
+    return 0;
+  }
+  if(coll == NULL || source == NULL ||
+    strength < UCOL_PRIMARY || 
+    (strength > UCOL_QUATERNARY && strength != UCOL_IDENTICAL)) {
+    *status = U_ILLEGAL_ARGUMENT_ERROR;
+    return 0;
+  }
+  // number of 01 to skip
+  int32_t toSkip = strength-1;
+  // need to adjust for UCOL_IDENTICAL
+  int32_t sourceIndex = 0;
+
+  if(coll->caseLevel == UCOL_ON) { // There is another level after primary and before the tertiary level
+    if(strength > UCOL_SECONDARY) { // We need to have the case level too
+      toSkip++;
+    }
+  }
+
+  // Scan the string until we skip enough of the key OR reach the end of the key
+  do {
+    sourceIndex++;
+    if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
+      toSkip--;
+    }
+  } while (toSkip >= 0 && source[sourceIndex] != 0);
+
+
+  if(result != NULL && resultLength >= sourceIndex) {
+    uprv_memcpy(result, source, sourceIndex);
+    result[sourceIndex] = 0;
+  }
+
+  return sourceIndex+1;
+}
+
+/**
+ * Get an exclusive upper bound sortkey for a given sortkey and strength.
+ */
+U_CAPI int32_t U_EXPORT2 
+ucol_getUpperBoundSortKey(const    UCollator    *coll,
+        const uint8_t       *source,
+        int32_t             sourceLength,
+        UColAttributeValue  strength,
+        uint8_t             *result,
+        int32_t             resultLength,
+        UErrorCode          *status){
+/*
+ * The upperbound is only slightly harder. Do the same as the lower bound, but then
+ * start backing up through the bytes. If a byte is neither FF nor 01, add one and
+ * stop. Otherwise, set it to 00 and continue to the previous byte.
+ */
+  // consistency checks 
+  if(status == NULL || U_FAILURE(*status)) {
+    return 0;
+  }
+  if(coll == NULL || source == NULL ||
+    strength < UCOL_PRIMARY || 
+    (strength > UCOL_QUATERNARY && strength != UCOL_IDENTICAL)) {
+    *status = U_ILLEGAL_ARGUMENT_ERROR;
+    return 0;
+  }
+  // number of 01 to skip
+  int32_t toSkip = strength-1;
+  // need to adjust for UCOL_IDENTICAL
+  int32_t sourceIndex = 0;
+
+  if(coll->caseLevel == UCOL_ON) { // There is another level after primary and before the tertiary level
+    if(strength > UCOL_SECONDARY) { // We need to have the case level too
+      toSkip++;
+    }
+  }
+
+  // Scan the string until we skip enough of the key OR reach the end of the key
+  do {
+    sourceIndex++;
+    if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
+      toSkip--;
+    }
+  } while (toSkip >= 0 && source[sourceIndex] != 0);
+
+  while(source[sourceIndex] == UCOL_LEVELTERMINATOR || 
+    source[sourceIndex] == 0xFF || 
+    source[sourceIndex] == 0) {
+    sourceIndex--;
+  }
+
+  if(result != NULL && resultLength > sourceIndex) {
+    uprv_memcpy(result, source, sourceIndex+1);
+    result[sourceIndex++]++; // add one to the last significant byte in the key
+    result[sourceIndex] = 0; // and terminate it
+  }
+
+  return sourceIndex+1;
+}
+
 static
 inline void uprv_appendByteToHexString(char *dst, uint8_t val) {
  uint32_t len = (uint32_t)uprv_strlen(dst);
--- a/icu4c/source/i18n/unicode/ucol.h
+++ b/icu4c/source/i18n/unicode/ucol.h
@ -470,6 +470,58 @@ ucol_getSortKey(const    UCollator    *coll,



+/**
+ * Get an inclusive lower bound sortkey for a given sortkey and strength.
+ * Return value is always the number of bytes needed, regardless of 
+ * whether the result buffer was big enough or even valid.
+ * Sort keys may be compared using <TT>strcmp</TT>.
+ * @param coll The UCollator containing the collation rules.
+ * @param source The source sortkey.
+ * @param sourceLength The length of source, or -1 if null-terminated. 
+ *                     (If an unmodified sortkey is passed, it is always null terminated).
+ * @param strength The strength of the lower bound
+ * @param result A pointer to a buffer to receive the resulting sortkey.
+ * @param resultLength The maximum size of result.
+ * @param status Used for returning error code if something went wrong.
+ * @return The size needed to fully store the sort key..
+ * @see ucol_keyHashCode
+ * @draft ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2 
+ucol_getLowerBoundSortKey(const    UCollator    *coll,
+        const uint8_t       *source,
+        int32_t             sourceLength,
+        UColAttributeValue  strength,
+        uint8_t             *result,
+        int32_t             resultLength,
+        UErrorCode          *status);
+
+/**
+ * Get an exclusive upper bound sortkey for a given sortkey and strength.
+ * Return value is always the number of bytes needed, regardless of 
+ * whether the result buffer was big enough or even valid.
+ * Sort keys may be compared using <TT>strcmp</TT>.
+ * @param coll The UCollator containing the collation rules.
+ * @param source The source sortkey.
+ * @param sourceLength The length of source, or -1 if null-terminated. 
+ *                     (If an unmodified sortkey is passed, it is always null terminated).
+ * @param strength The strength of the upper bound
+ * @param result A pointer to a buffer to receive the resulting sortkey.
+ * @param resultLength The maximum size of result.
+ * @param status Used for returning error code if something went wrong.
+ * @return The size needed to fully store the sort key..
+ * @see ucol_keyHashCode
+ * @draft ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2 
+ucol_getUpperBoundSortKey(const    UCollator    *coll,
+        const uint8_t       *source,
+        int32_t             sourceLength,
+        UColAttributeValue  strength,
+        uint8_t             *result,
+        int32_t             resultLength,
+        UErrorCode          *status);
+        
 /**
 * Gets the version information for a Collator. 
 * @param info the version # information, the result will be filled in