ICU-2360 UnicodeString pointer+length functions should recognize length=-1 for NUL-terminated input

X-SVN-Rev: 13548
2025-04-13 08:53:20 +00:00 · 2003-10-31 23:08:12 +00:00 · 2003-10-31 23:08:12 +00:00 · 2875ab287f
commit 2875ab287f
parent e5a79dc60c
2 changed files with 56 additions and 3 deletions
--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@ -276,7 +276,7 @@ UnicodeString::UnicodeString(UChar *buff,
    fCapacity = US_STACKBUF_SIZE;
    fArray = fStackBuffer;
    fFlags = kShortString;
-  } else if(buffLength < -1 || buffLength > buffCapacity) {
+  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    setToBogus();
  } else if(buffLength == -1) {
    // fLength = u_strlen(buff); but do not look beyond buffCapacity
@ -1045,9 +1045,16 @@ UnicodeString::setTo(UChar *buffer,
    return *this;
  }

-  if(buffLength < 0 || buffLength > buffCapacity) {
+  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    setToBogus();
    return *this;
+  } else if(buffLength == -1) {
+    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
+    const UChar *p = buffer, *limit = buffer + buffCapacity;
+    while(p != limit && *p != 0) {
+      ++p;
+    }
+    buffLength = (int32_t)(p - buffer);
  }

  releaseArray();
@ -1662,9 +1669,12 @@ UnicodeString::doCodepageCreate(const char *codepageData,
                const char *codepage)
 {
  // if there's nothing to convert, do nothing
-  if(codepageData == 0 || dataLength <= 0) {
+  if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
    return;
  }
+  if(dataLength == -1) {
+    dataLength = uprv_strlen(codepageData);
+  }

  UErrorCode status = U_ZERO_ERROR;

--- a/icu4c/source/test/intltest/ustrtest.cpp
+++ b/icu4c/source/test/intltest/ustrtest.cpp
@ -179,6 +179,49 @@ UnicodeStringTest::TestBasicManipulation()
            errln("operator+(UniStr, UniStr) failed");
        }
    }
+
+    {
+        // tests for Jitterbug 2360
+        // verify that APIs with source pointer + length accept length == -1
+        // mostly test only where modified, only few functions did not already do this
+        if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
+            errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
+        }
+
+        UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
+        UnicodeString s, t(buffer, -1, LENGTHOF(buffer));
+
+        if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
+            errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
+        }
+        if(t.length()!=u_strlen(buffer)) {
+            errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
+        }
+
+        if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
+            errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
+        }
+
+        buffer[u_strlen(buffer)]=0xe4;
+        UnicodeString u(buffer, -1, LENGTHOF(buffer));
+        if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=LENGTHOF(buffer)) {
+            errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
+        }
+        if(u.length()!=LENGTHOF(buffer)) {
+            errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
+        }
+
+        static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
+        UConverter *cnv;
+        UErrorCode errorCode=U_ZERO_ERROR;
+
+        cnv=ucnv_open("ISO-8859-1", &errorCode);
+        UnicodeString v(cs, -1, cnv, errorCode);
+        ucnv_close(cnv);
+        if(v!=UnicodeString("a\\xe4\\x85").unescape()) {
+            errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
+        }
+    }
 }

 void