ICU-1794 add u_strHasMoreChar32Than

X-SVN-Rev: 10002
2025-04-09 15:27:38 +00:00 · 2002-10-09 20:40:05 +00:00 · 2002-10-09 20:40:05 +00:00 · 5b9c9d52df
commit 5b9c9d52df
parent 59ac5307fc
2 changed files with 89 additions and 0 deletions
--- a/icu4c/source/common/unicode/ustring.h
+++ b/icu4c/source/common/unicode/ustring.h
@ -86,6 +86,27 @@ u_strlen(const UChar *s);
 U_CAPI int32_t U_EXPORT2
 u_countChar32(const UChar *s, int32_t length);

+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ *               the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ *         than 'number'. Same as (u_countChar32(s, length)>number).
+ * @draft ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
 /**
 * Concatenate two ustrings.  Appends a copy of <TT>src</TT>,
 * including the null terminator, to <TT>dst</TT>. The initial copied
--- a/icu4c/source/common/ustring.c
+++ b/icu4c/source/common/ustring.c
@ -640,6 +640,74 @@ u_countChar32(const UChar *s, int32_t length) {
    return count;
 }

+U_CAPI UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
+
+    if(number<0) {
+        return TRUE;
+    }
+    if(s==NULL || length<-1) {
+        return FALSE;
+    }
+
+    if(length==-1) {
+        /* s is NUL-terminated */
+        UChar c;
+
+        /* count code points until they exceed */
+        for(;;) {
+            if((c=*s++)==0) {
+                return FALSE;
+            }
+            if(number==0) {
+                return TRUE;
+            }
+            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
+                ++s;
+            }
+            --number;
+        }
+    } else {
+        /* length>=0 known */
+        const UChar *limit;
+        int32_t maxSupplementary;
+
+        /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
+        if(((length+1)/2)>number) {
+            return TRUE;
+        }
+
+        /* check if s does not even contain enough UChars */
+        maxSupplementary=length-number;
+        if(maxSupplementary<=0) {
+            return FALSE;
+        }
+        /* there are maxSupplementary=length-number more UChars than asked-for code points */
+
+        /*
+         * count code points until they exceed and also check that there are
+         * no more than maxSupplementary supplementary code points (UChar pairs)
+         */
+        limit=s+length;
+        for(;;) {
+            if(s==limit) {
+                return FALSE;
+            }
+            if(number==0) {
+                return TRUE;
+            }
+            if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
+                ++s;
+                if(--maxSupplementary<=0) {
+                    /* too many pairs - too few code points */
+                    return FALSE;
+                }
+            }
+            --number;
+        }
+    }
+}
+
 U_CAPI UChar * U_EXPORT2
 u_memcpy(UChar *dest, const UChar *src, int32_t count) {
    return (UChar *)uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);