diff --git a/icu4c/source/common/unames.cpp b/icu4c/source/common/unames.cpp index 9c230dc93ee..d9f61cac157 100644 --- a/icu4c/source/common/unames.cpp +++ b/icu4c/source/common/unames.cpp @@ -1526,7 +1526,7 @@ u_charFromName(UCharNameChoice nameChoice, uint32_t i; UChar32 cp = 0; char c0; - UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */ + static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return error; @@ -1560,39 +1560,45 @@ u_charFromName(UCharNameChoice nameChoice, /* try extended names first */ if (lower[0] == '<') { - if (nameChoice == U_EXTENDED_CHAR_NAME) { + if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') { // Parse a string like "" where HHHH is a hex code point. - if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') { - while (i >= 3 && lower[--i] != '-') {} + uint32_t limit = i; + while (i >= 3 && lower[--i] != '-') {} - if (i >= 2 && lower[i] == '-') { - uint32_t cIdx; + // There should be 1 to 8 hex digits. + int32_t hexLength = limit - (i + 1); + if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) { + uint32_t cIdx; - lower[i] = 0; + lower[i] = 0; - for (++i; lower[i] != '>'; ++i) { - if (lower[i] >= '0' && lower[i] <= '9') { - cp = (cp << 4) + lower[i] - '0'; - } else if (lower[i] >= 'a' && lower[i] <= 'f') { - cp = (cp << 4) + lower[i] - 'a' + 10; - } else { - *pErrorCode = U_ILLEGAL_CHAR_FOUND; - return error; - } + for (++i; i < limit; ++i) { + if (lower[i] >= '0' && lower[i] <= '9') { + cp = (cp << 4) + lower[i] - '0'; + } else if (lower[i] >= 'a' && lower[i] <= 'f') { + cp = (cp << 4) + lower[i] - 'a' + 10; + } else { + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; } + // Prevent signed-integer overflow and out-of-range code points. + if (cp > UCHAR_MAX_VALUE) { + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; + } + } - /* Now validate the category name. - We could use a binary search, or a trie, if - we really wanted to. */ + /* Now validate the category name. + We could use a binary search, or a trie, if + we really wanted to. */ + uint8_t cat = getCharCat(cp); + for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) { - for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) { - - if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { - if (getCharCat(cp) == cIdx) { - return cp; - } - break; + if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { + if (cat == cIdx) { + return cp; } + break; } } } diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c index 9e94bcba080..7a778638732 100644 --- a/icu4c/source/test/cintltst/cucdtst.c +++ b/icu4c/source/test/cintltst/cucdtst.c @@ -2005,29 +2005,62 @@ TestCharNames() { static void TestUCharFromNameUnderflow() { // Ticket #10889: Underflow crash when there is no dash. + const char *name=""; UErrorCode errorCode=U_ZERO_ERROR; - UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "", &errorCode); + UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode); if(U_SUCCESS(errorCode)) { - log_err("u_charFromName() = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + log_err("u_charFromName(%s) = U+%04x but should fail - %s\n", + name, c, u_errorName(errorCode)); } // Test related edge cases. + name="<-00a0>"; errorCode=U_ZERO_ERROR; - c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode); + c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode); if(U_SUCCESS(errorCode)) { - log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + log_err("u_charFromName(%s) = U+%04x but should fail - %s\n", + name, c, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; - c=u_charFromName(U_EXTENDED_CHAR_NAME, "", &errorCode); + name=""; + c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode); if(U_SUCCESS(errorCode)) { - log_err("u_charFromName() = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + log_err("u_charFromName(%s) = U+%04x but should fail - %s\n", + name, c, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; - c=u_charFromName(U_EXTENDED_CHAR_NAME, "", &errorCode); + name=""; + c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode); if(U_SUCCESS(errorCode)) { - log_err("u_charFromName() = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); + log_err("u_charFromName(%s) = U+%04x but should fail - %s\n", + name, c, u_errorName(errorCode)); + } + + // ICU-20292: integer overflow + errorCode=U_ZERO_ERROR; + name=""; + c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode); + if(U_SUCCESS(errorCode)) { + log_err("u_charFromName(%s) = U+%04x but should fail - %s\n", + name, c, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + name=""; // too many digits even if only leading 0s + c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode); + if(U_SUCCESS(errorCode)) { + log_err("u_charFromName(%s) = U+%04x but should fail - %s\n", + name, c, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + name=">"; + c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode); + if(U_SUCCESS(errorCode)) { + log_err("u_charFromName(%s) = U+%04x but should fail - %s\n", + name, c, u_errorName(errorCode)); } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java index 6f27a15ecc3..76570584eb6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java @@ -1350,6 +1350,12 @@ public final class UCharacterName int startIndex = name.lastIndexOf('-'); if (startIndex >= 0) { // We've got a category. startIndex ++; + + // There should be 1 to 8 hex digits. + int hexLength = endIndex - startIndex; + if (hexLength < 1 || 8 < hexLength) { + return -1; + } int result = -1; try { result = Integer.parseInt( @@ -1359,13 +1365,17 @@ public final class UCharacterName catch (NumberFormatException e) { return -1; } + if (result < 0 || 0x10ffff < result) { + return -1; + } // Now validate the category name. We could use a // binary search, or a trie, if we really wanted to. + int charType = getType(result); String type = name.substring(1, startIndex - 1); int length = TYPE_NAMES_.length; for (int i = 0; i < length; ++ i) { if (type.compareTo(TYPE_NAMES_[i]) == 0) { - if (getType(result) == i) { + if (charType == i) { return result; } break; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java index 02bd4536e97..890d9bebb7f 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java @@ -1232,28 +1232,54 @@ public final class UCharacterTest extends TestFmwk @Test public void TestUCharFromNameUnderflow() { // Ticket #10889: Underflow crash when there is no dash. - int c = UCharacter.getCharFromExtendedName(""); + String name = ""; + int c = UCharacter.getCharFromExtendedName(name); if(c >= 0) { - errln("UCharacter.getCharFromExtendedName() = U+" + hex(c) + + errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + " but should fail (-1)"); } // Test related edge cases. - c = UCharacter.getCharFromExtendedName("<-00a0>"); + name = "<-00a0>"; + c = UCharacter.getCharFromExtendedName(name); if(c >= 0) { - errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) + + errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + " but should fail (-1)"); } - c = UCharacter.getCharFromExtendedName(""); + name = ""; + c = UCharacter.getCharFromExtendedName(name); if(c >= 0) { - errln("UCharacter.getCharFromExtendedName() = U+" + hex(c) + + errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + " but should fail (-1)"); } - c = UCharacter.getCharFromExtendedName(""); + name = ""; + c = UCharacter.getCharFromExtendedName(name); if(c >= 0) { - errln("UCharacter.getCharFromExtendedName() = U+" + hex(c) + + errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + + " but should fail (-1)"); + } + + // ICU-20292: integer overflow + name = ""; + c = UCharacter.getCharFromExtendedName(name); + if(c >= 0) { + errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + + " but should fail (-1)"); + } + + name = ""; // too many digits even if only leading 0s + c = UCharacter.getCharFromExtendedName(name); + if(c >= 0) { + errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + + " but should fail (-1)"); + } + + name = ">"; + c = UCharacter.getCharFromExtendedName(name); + if(c >= 0) { + errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) + " but should fail (-1)"); } }