From 0d2a03dbba2f867c1cce2291a4832e1791cd8b5e Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 10 Apr 2023 13:21:26 -0700 Subject: [PATCH] ICU-22338 getProperty(Value)Name accepts nameChoice above 1 --- icu4c/source/common/propname.cpp | 11 ++- icu4c/source/test/intltest/ucdtest.cpp | 80 +++++++++++++++++ icu4c/source/test/intltest/ucdtest.h | 3 +- .../ibm/icu/dev/test/lang/UCharacterTest.java | 89 +++++++++++++++++++ 4 files changed, 178 insertions(+), 5 deletions(-) diff --git a/icu4c/source/common/propname.cpp b/icu4c/source/common/propname.cpp index ef2dedec437..45062bfbd93 100644 --- a/icu4c/source/common/propname.cpp +++ b/icu4c/source/common/propname.cpp @@ -289,7 +289,10 @@ U_NAMESPACE_END U_CAPI const char* U_EXPORT2 u_getPropertyName(UProperty property, - UPropertyNameChoice nameChoice) { + UPropertyNameChoice nameChoice) UPRV_NO_SANITIZE_UNDEFINED { + // The nameChoice is really an integer with a couple of named constants. + // Unicode allows for names other than short and long ones. + // If present, these will be returned for U_LONG_PROPERTY_NAME + i, where i=1, 2,... U_NAMESPACE_USE return PropNameData::getPropertyName(property, nameChoice); } @@ -304,9 +307,9 @@ U_CAPI const char* U_EXPORT2 u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice) UPRV_NO_SANITIZE_UNDEFINED { - if (nameChoice < 0 || U_PROPERTY_NAME_CHOICE_COUNT <= nameChoice) { - return nullptr; - } + // The nameChoice is really an integer with a couple of named constants. + // Unicode allows for names other than short and long ones. + // If present, these will be returned for U_LONG_PROPERTY_NAME + i, where i=1, 2,... U_NAMESPACE_USE return PropNameData::getPropertyValueName(property, value, nameChoice); } diff --git a/icu4c/source/test/intltest/ucdtest.cpp b/icu4c/source/test/intltest/ucdtest.cpp index 64b87ed9d15..4bf752bc6b9 100644 --- a/icu4c/source/test/intltest/ucdtest.cpp +++ b/icu4c/source/test/intltest/ucdtest.cpp @@ -75,6 +75,7 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, TESTCASE_AUTO(TestBinaryCharacterProperties); TESTCASE_AUTO(TestIntCharacterProperties); #endif + TESTCASE_AUTO(TestPropertyNames); TESTCASE_AUTO_END; } @@ -868,3 +869,82 @@ void UnicodeTest::TestIntCharacterProperties() { } #endif } + +namespace { + +const char *getPropName(UProperty property, int32_t nameChoice) UPRV_NO_SANITIZE_UNDEFINED { + const char *name = u_getPropertyName(property, (UPropertyNameChoice)nameChoice); + return name != nullptr ? name : "null"; +} + +const char *getValueName(UProperty property, int32_t value, int32_t nameChoice) + UPRV_NO_SANITIZE_UNDEFINED { + const char *name = u_getPropertyValueName(property, value, (UPropertyNameChoice)nameChoice); + return name != nullptr ? name : "null"; +} + +} // namespace + +void UnicodeTest::TestPropertyNames() { + IcuTestErrorCode errorCode(*this, "TestPropertyNames()"); + // Test names of certain properties & values. + // The UPropertyNameChoice is really an integer with only a couple of named constants. + UProperty prop = UCHAR_WHITE_SPACE; + constexpr int32_t SHORT = U_SHORT_PROPERTY_NAME; + constexpr int32_t LONG = U_LONG_PROPERTY_NAME; + assertEquals("White_Space: index -1", "null", getPropName(prop, -1)); + assertEquals("White_Space: short", "WSpace", getPropName(prop, SHORT)); + assertEquals("White_Space: long", "White_Space", getPropName(prop, LONG)); + assertEquals("White_Space: index 2", "space", getPropName(prop, 2)); + assertEquals("White_Space: index 3", "null", getPropName(prop, 3)); + + prop = UCHAR_SIMPLE_CASE_FOLDING; + assertEquals("Simple_Case_Folding: index -1", "null", getPropName(prop, -1)); + assertEquals("Simple_Case_Folding: short", "scf", getPropName(prop, SHORT)); + assertEquals("Simple_Case_Folding: long", "Simple_Case_Folding", getPropName(prop, LONG)); + assertEquals("Simple_Case_Folding: index 2", "sfc", getPropName(prop, 2)); + assertEquals("Simple_Case_Folding: index 3", "null", getPropName(prop, 3)); + + prop = UCHAR_CASED; + assertEquals("Cased=Y: index -1", "null", getValueName(prop, 1, -1)); + assertEquals("Cased=Y: short", "Y", getValueName(prop, 1, SHORT)); + assertEquals("Cased=Y: long", "Yes", getValueName(prop, 1, LONG)); + assertEquals("Cased=Y: index 2", "T", getValueName(prop, 1, 2)); + assertEquals("Cased=Y: index 3", "True", getValueName(prop, 1, 3)); + assertEquals("Cased=Y: index 4", "null", getValueName(prop, 1, 4)); + + prop = UCHAR_DECOMPOSITION_TYPE; + int32_t value = U_DT_NOBREAK; + assertEquals("dt=Nb: index -1", "null", getValueName(prop, value, -1)); + assertEquals("dt=Nb: short", "Nb", getValueName(prop, value, SHORT)); + assertEquals("dt=Nb: long", "Nobreak", getValueName(prop, value, LONG)); + assertEquals("dt=Nb: index 2", "nb", getValueName(prop, value, 2)); + assertEquals("dt=Nb: index 3", "null", getValueName(prop, value, 3)); + + // Canonical_Combining_Class: + // The UCD inserts the numeric values in the second filed of its PropertyValueAliases.txt lines. + // In ICU, we don't treat these as names, + // they are just the numeric values returned by u_getCombiningClass(). + // We return the real short and long names for the usual choice constants. + prop = UCHAR_CANONICAL_COMBINING_CLASS; + assertEquals("ccc=230: index -1", "null", getValueName(prop, 230, -1)); + assertEquals("ccc=230: short", "A", getValueName(prop, 230, SHORT)); + assertEquals("ccc=230: long", "Above", getValueName(prop, 230, LONG)); + assertEquals("ccc=230: index 2", "null", getValueName(prop, 230, 2)); + + prop = UCHAR_GENERAL_CATEGORY; + value = U_DECIMAL_DIGIT_NUMBER; + assertEquals("gc=Nd: index -1", "null", getValueName(prop, value, -1)); + assertEquals("gc=Nd: short", "Nd", getValueName(prop, value, SHORT)); + assertEquals("gc=Nd: long", "Decimal_Number", getValueName(prop, value, LONG)); + assertEquals("gc=Nd: index 2", "digit", getValueName(prop, value, 2)); + assertEquals("gc=Nd: index 3", "null", getValueName(prop, value, 3)); + + prop = UCHAR_GENERAL_CATEGORY_MASK; + value = U_GC_P_MASK; + assertEquals("gc=P mask: index -1", "null", getValueName(prop, value, -1)); + assertEquals("gc=P mask: short", "P", getValueName(prop, value, SHORT)); + assertEquals("gc=P mask: long", "Punctuation", getValueName(prop, value, LONG)); + assertEquals("gc=P mask: index 2", "punct", getValueName(prop, value, 2)); + assertEquals("gc=P mask: index 3", "null", getValueName(prop, value, 3)); +} diff --git a/icu4c/source/test/intltest/ucdtest.h b/icu4c/source/test/intltest/ucdtest.h index 8a5930643e9..4bbea3912da 100644 --- a/icu4c/source/test/intltest/ucdtest.h +++ b/icu4c/source/test/intltest/ucdtest.h @@ -31,7 +31,7 @@ class UnicodeTest: public IntlTest { public: UnicodeTest(); virtual ~UnicodeTest(); - + void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = nullptr ) override; void TestAdditionalProperties(); @@ -49,6 +49,7 @@ public: void TestInvalidCodePointFolding(); void TestBinaryCharacterProperties(); void TestIntCharacterProperties(); + void TestPropertyNames(); private: diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java index 51111c1cf56..4ecd17ba2f8 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java @@ -3894,4 +3894,93 @@ public final class UCharacterTest extends TestFmwk UCharacter.getIntPropertyValue(end, prop), range.getValue()); } } + + private static final String getPropName(int property, int nameChoice) { + try { + return UCharacter.getPropertyName(property, nameChoice); + } catch(IllegalArgumentException e) { + return "null"; + } + } + + private static final String getValueName(int property, int value, int nameChoice) { + try { + return UCharacter.getPropertyValueName(property, value, nameChoice); + } catch(IllegalArgumentException e) { + return "null"; + } + } + + @Test + public void TestPropertyNames() { + // Test names of certain properties & values. + // The UProperty.NameChoice is really an integer with only a couple of named constants. + int prop = UProperty.WHITE_SPACE; + final int SHORT = UProperty.NameChoice.SHORT; + final int LONG = UProperty.NameChoice.LONG; + assertEquals("White_Space: index -1", "null", getPropName(prop, -1)); + assertEquals("White_Space: short", "WSpace", getPropName(prop, SHORT)); + assertEquals("White_Space: long", "White_Space", getPropName(prop, LONG)); + assertEquals("White_Space: index 2", "space", getPropName(prop, 2)); + assertEquals("White_Space: index 3", "null", getPropName(prop, 3)); + + prop = UProperty.SIMPLE_CASE_FOLDING; + assertEquals("Simple_Case_Folding: index -1", "null", getPropName(prop, -1)); + assertEquals("Simple_Case_Folding: short", "scf", getPropName(prop, SHORT)); + assertEquals("Simple_Case_Folding: long", "Simple_Case_Folding", getPropName(prop, LONG)); + assertEquals("Simple_Case_Folding: index 2", "sfc", getPropName(prop, 2)); + assertEquals("Simple_Case_Folding: index 3", "null", getPropName(prop, 3)); + + prop = UProperty.CASED; + assertEquals("Cased=Y: index -1", "null", getValueName(prop, 1, -1)); + assertEquals("Cased=Y: short", "Y", getValueName(prop, 1, SHORT)); + assertEquals("Cased=Y: long", "Yes", getValueName(prop, 1, LONG)); + assertEquals("Cased=Y: index 2", "T", getValueName(prop, 1, 2)); + assertEquals("Cased=Y: index 3", "True", getValueName(prop, 1, 3)); + assertEquals("Cased=Y: index 4", "null", getValueName(prop, 1, 4)); + + prop = UProperty.DECOMPOSITION_TYPE; + int value = UCharacter.DecompositionType.NOBREAK; + assertEquals("dt=Nb: index -1", "null", getValueName(prop, value, -1)); + assertEquals("dt=Nb: short", "Nb", getValueName(prop, value, SHORT)); + assertEquals("dt=Nb: long", "Nobreak", getValueName(prop, value, LONG)); + assertEquals("dt=Nb: index 2", "nb", getValueName(prop, value, 2)); + assertEquals("dt=Nb: index 3", "null", getValueName(prop, value, 3)); + + // Canonical_Combining_Class: + // The UCD inserts the numeric values in the second filed of its + // PropertyValueAliases.txt lines. + // In ICU, we don't treat these as names, + // they are just the numeric values returned by u_getCombiningClass(). + // We return the real short and long names for the usual choice constants. + prop = UProperty.CANONICAL_COMBINING_CLASS; + assertEquals("ccc=230: index -1", "null", getValueName(prop, 230, -1)); + assertEquals("ccc=230: short", "A", getValueName(prop, 230, SHORT)); + assertEquals("ccc=230: long", "Above", getValueName(prop, 230, LONG)); + assertEquals("ccc=230: index 2", "null", getValueName(prop, 230, 2)); + + prop = UProperty.GENERAL_CATEGORY; + value = UCharacterCategory.DECIMAL_DIGIT_NUMBER; + assertEquals("gc=Nd: index -1", "null", getValueName(prop, value, -1)); + assertEquals("gc=Nd: short", "Nd", getValueName(prop, value, SHORT)); + assertEquals("gc=Nd: long", "Decimal_Number", getValueName(prop, value, LONG)); + assertEquals("gc=Nd: index 2", "digit", getValueName(prop, value, 2)); + assertEquals("gc=Nd: index 3", "null", getValueName(prop, value, 3)); + + prop = UProperty.GENERAL_CATEGORY_MASK; + final int U_GC_P_MASK = + (1 << UCharacterCategory.DASH_PUNCTUATION) | + (1 << UCharacterCategory.START_PUNCTUATION) | + (1 << UCharacterCategory.END_PUNCTUATION) | + (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) | + (1 << UCharacterCategory.OTHER_PUNCTUATION) | + (1 << UCharacterCategory.INITIAL_PUNCTUATION) | + (1 << UCharacterCategory.FINAL_PUNCTUATION); + value = U_GC_P_MASK; + assertEquals("gc=P mask: index -1", "null", getValueName(prop, value, -1)); + assertEquals("gc=P mask: short", "P", getValueName(prop, value, SHORT)); + assertEquals("gc=P mask: long", "Punctuation", getValueName(prop, value, LONG)); + assertEquals("gc=P mask: index 2", "punct", getValueName(prop, value, 2)); + assertEquals("gc=P mask: index 3", "null", getValueName(prop, value, 3)); + } }