ICU-3100 redo getContractions after a review. Add tests for getContractions and getUnsafeSet

X-SVN-Rev: 15683
2025-04-10 07:39:16 +00:00 · 2004-06-03 19:26:16 +00:00 · 2004-06-03 19:26:16 +00:00 · 4c96e76b8f
commit 4c96e76b8f
parent 220fceaa01
3 changed files with 38 additions and 32 deletions
--- a/icu4c/source/i18n/ucol_sit.cpp
+++ b/icu4c/source/i18n/ucol_sit.cpp
@ -867,45 +867,28 @@ struct contContext {

 static void
 addContraction(const UCollator *coll, USet *contractions, UChar *buffer, int32_t bufLen, 
-               uint32_t CE, int32_t leftIndex, int32_t rightIndex, UErrorCode *status) 
+               uint32_t CE, int32_t rightIndex, UErrorCode *status) 
 {
-    int32_t lI = leftIndex, rI = rightIndex;
+    if(rightIndex == bufLen-1) {
+        *status = U_INTERNAL_PROGRAM_ERROR;
+        return;
+    }
    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
    uint32_t newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
    // we might have a contraction that ends from previous level
-    if(newCE != UCOL_NOT_FOUND) {
-        if(isSpecial(newCE) && getCETag(newCE) == SPEC_PROC_TAG) {
-            addContraction(coll, contractions, buffer, bufLen, newCE, leftIndex, rightIndex, status);
-        } else if(rightIndex > leftIndex) {
-            uset_addString(contractions, buffer+leftIndex, rightIndex - leftIndex + 1);
-        }
+    if(newCE != UCOL_NOT_FOUND && rightIndex > 1) {
+            uset_addString(contractions, buffer, rightIndex + 1);
    }

    UCharOffset++;
    while(*UCharOffset != 0xFFFF) {
-        // depending on how we got here, we want to put chars in
-        // either from left or from right (Japanese tends to start with a contraction,
-        // only to throw you a prefix, just for a spin ;)
        newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
-        if(getCETag(CE) == CONTRACTION_TAG) {
-            if(rI == internalBufferSize-1) {
-                *status = U_INTERNAL_PROGRAM_ERROR;
-                return;
-            }
-            buffer[++rI] = *UCharOffset;
+        buffer[rightIndex] = *UCharOffset;
+        if(isSpecial(newCE) && getCETag(newCE) == CONTRACTION_TAG) {
+            addContraction(coll, contractions, buffer, bufLen, newCE, rightIndex + 1, status);
        } else {
-            if(lI == 0) {
-                *status = U_INTERNAL_PROGRAM_ERROR;
-                return;
-            }
-            buffer[--lI] = *UCharOffset;
+            uset_addString(contractions, buffer, rightIndex + 1);
        }
-        if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag(newCE) == SPEC_PROC_TAG)) {
-            addContraction(coll, contractions, buffer, bufLen, newCE, lI, rI, status);
-        } else {
-            uset_addString(contractions, buffer+lI, rI - lI + 1);
-        }
-        lI = leftIndex; rI = rightIndex;
        UCharOffset++;
    }
 }
@ -919,7 +902,7 @@ _processContractions(const void *context, UChar32 start, UChar32 limit, uint32_t
    USet *removed = ((contContext *)context)->removedContractions;
    const UCollator *coll = ((contContext *)context)->coll;
    UChar contraction[internalBufferSize];
-    if(isSpecial(CE) && (getCETag(CE) == CONTRACTION_TAG || getCETag(CE) == SPEC_PROC_TAG)) {
+    if(isSpecial(CE) && getCETag(CE) == CONTRACTION_TAG) {
        while(start < limit && U_SUCCESS(*status)) {
            // if there are suppressed contractions, we don't 
            // want to add them.
@ -929,9 +912,8 @@ _processContractions(const void *context, UChar32 start, UChar32 limit, uint32_t
            }
            // we start our contraction from middle, since we don't know if it
            // will grow toward right or left
-            int32_t middle = internalBufferSize/2;
-            contraction[middle] = (UChar)start;
-            addContraction(coll, unsafe, contraction, internalBufferSize, CE, middle, middle, status);
+            contraction[0] = (UChar)start;
+            addContraction(coll, unsafe, contraction, internalBufferSize, CE, 1, status);
            start++;
        }
    }
--- a/icu4c/source/test/intltest/apicoll.cpp
+++ b/icu4c/source/test/intltest/apicoll.cpp
@ -2118,6 +2118,24 @@ void CollationAPITest::TestSubclass()
    }
 }

+void CollationAPITest::TestNULLCharTailoring()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    UChar buf[256] = {0};
+    int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
+    UnicodeString first(0x0061);
+    UnicodeString second(0);
+    RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
+    if(U_FAILURE(status)) {
+        errln("Failed to open collator");
+    }
+    UCollationResult res = coll->compare(first, second, status);
+    if(res != UCOL_LESS) {
+        errln("a should be less then NULL after tailoring");
+    }
+    delete coll;
+}
+
 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
 {
    if (exec) logln("TestSuite CollationAPITest: ");
@ -2144,6 +2162,7 @@ void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &n
        case 19: name = "TestGetTailoredSet"; if (exec) TestGetTailoredSet(); break;
        case 20: name = "TestUClassID"; if (exec) TestUClassID(); break;
        case 21: name = "TestSubclass"; if (exec) TestSubclass(); break;
+        case 22: name = "TestNULLCharTailoring"; if (exec) TestNULLCharTailoring(); break;
        default: name = ""; break;
    }
 }
--- a/icu4c/source/test/intltest/apicoll.h
+++ b/icu4c/source/test/intltest/apicoll.h
@ -159,6 +159,11 @@ public:
    */
    void TestUClassID();

+    /**
+    * Test NULL
+    */
+    void TestNULLCharTailoring();
+
 private:
    // If this is too small for the test data, just increase it.
    // Just don't make it too large, otherwise the executable will get too big