diff --git a/icu4c/source/common/normalizer2.cpp b/icu4c/source/common/normalizer2.cpp index 60047fef73e..527731da6c4 100644 --- a/icu4c/source/common/normalizer2.cpp +++ b/icu4c/source/common/normalizer2.cpp @@ -600,6 +600,31 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) { return &((Normalizer2WithImpl *)norm2)->impl; } +const Normalizer2 * +Normalizer2::getNFCInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFCInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFDInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFDInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFKCInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFKCInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFKDInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFKDInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFKC_CFInstance(errorCode); +} + const Normalizer2 * Normalizer2::getInstance(const char *packageName, const char *name, @@ -682,6 +707,31 @@ U_NAMESPACE_END U_NAMESPACE_USE +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFCInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); +} + +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFDInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); +} + +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); +} + +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFKDInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); +} + +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); +} + U_DRAFT const UNormalizer2 * U_EXPORT2 unorm2_getInstance(const char *packageName, const char *name, diff --git a/icu4c/source/common/unicode/normalizer2.h b/icu4c/source/common/unicode/normalizer2.h index 5f208ebe250..a76eec12283 100644 --- a/icu4c/source/common/unicode/normalizer2.h +++ b/icu4c/source/common/unicode/normalizer2.h @@ -83,6 +83,76 @@ public: */ ~Normalizer2(); + /** + * Returns a Normalizer2 instance for Unicode NFC normalization. + * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFCInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFD normalization. + * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFDInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKC normalization. + * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFKCInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKD normalization. + * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFKDInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. + * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFKCCasefoldInstance(UErrorCode &errorCode); + /** * Returns a Normalizer2 instance which uses the specified data file * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) diff --git a/icu4c/source/common/unicode/unorm2.h b/icu4c/source/common/unicode/unorm2.h index 49ae17e80bd..6f002509a58 100644 --- a/icu4c/source/common/unicode/unorm2.h +++ b/icu4c/source/common/unicode/unorm2.h @@ -118,6 +118,76 @@ typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer #if !UCONFIG_NO_NORMALIZATION +/** + * Returns a UNormalizer2 instance for Unicode NFC normalization. + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFCInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFD normalization. + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFDInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKC normalization. + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKD normalization. + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFKDInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. + * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); + /** * Returns a UNormalizer2 instance which uses the specified data file * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) diff --git a/icu4c/source/i18n/alphaindex.cpp b/icu4c/source/i18n/alphaindex.cpp index 2a9a16993ec..eb294e64a1a 100644 --- a/icu4c/source/i18n/alphaindex.cpp +++ b/icu4c/source/i18n/alphaindex.cpp @@ -701,7 +701,7 @@ void AlphabeticIndex::staticInit(UErrorCode &status) { EMPTY_STRING = new UnicodeString(); - nfkdNormalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, status); + nfkdNormalizer = Normalizer2::getNFKDInstance(status); if (nfkdNormalizer == NULL) { goto err; } @@ -812,7 +812,7 @@ UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErr ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status); extras.addAll(expansions).removeAll(*TO_TRY); if (extras.size() != 0) { - const Normalizer2 *normalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); + const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status); UnicodeSetIterator extrasIter(extras); while (extrasIter.next()) { const UnicodeString ¤t = extrasIter.next(); diff --git a/icu4c/source/test/cintltst/cnormtst.c b/icu4c/source/test/cintltst/cnormtst.c index 14c17e569a9..87240406667 100644 --- a/icu4c/source/test/cintltst/cnormtst.c +++ b/icu4c/source/test/cintltst/cnormtst.c @@ -1525,9 +1525,9 @@ TestGetRawDecomposition() { int32_t length; UErrorCode errorCode=U_ZERO_ERROR; - const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode); + const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode); if(U_FAILURE(errorCode)) { - log_err_status(errorCode, "unorm2_getInstance(nfkc) failed: %s\n", u_errorName(errorCode)); + log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); return; } /* @@ -1592,9 +1592,9 @@ TestAppendRestoreMiddle() { static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 }; int32_t length; UErrorCode errorCode=U_ZERO_ERROR; - const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode); + const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); if(U_FAILURE(errorCode)) { - log_err_status(errorCode, "unorm2_getInstance(nfc/COMPOSE) failed: %s\n", u_errorName(errorCode)); + log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); return; } /* @@ -1621,4 +1621,74 @@ TestAppendRestoreMiddle() { } } +static void +TestGetEasyToUseInstance() { + static const UChar in[]={ + 0xA0, /* -> 0020 */ + 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */ + }; + UChar out[32]; + int32_t length; + + UErrorCode errorCode=U_ZERO_ERROR; + const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); + if(U_FAILURE(errorCode)) { + log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode)); + return; + } + length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); + if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) { + log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n", + (int)length, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + n2=unorm2_getNFDInstance(&errorCode); + if(U_FAILURE(errorCode)) { + log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode)); + return; + } + length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); + if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { + log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n", + (int)length, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + n2=unorm2_getNFKCInstance(&errorCode); + if(U_FAILURE(errorCode)) { + log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode)); + return; + } + length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); + if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) { + log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n", + (int)length, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + n2=unorm2_getNFKDInstance(&errorCode); + if(U_FAILURE(errorCode)) { + log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode)); + return; + } + length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); + if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) { + log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n", + (int)length, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + n2=unorm2_getNFKCCasefoldInstance(&errorCode); + if(U_FAILURE(errorCode)) { + log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode)); + return; + } + length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode); + if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) { + log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n", + (int)length, u_errorName(errorCode)); + } +} + #endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c index 0e44a09b2c8..b0db2821f13 100644 --- a/icu4c/source/test/cintltst/cucdtst.c +++ b/icu4c/source/test/cintltst/cucdtst.c @@ -1324,8 +1324,8 @@ static void TestUnicodeData() errorCode=U_ZERO_ERROR; #if !UCONFIG_NO_NORMALIZATION - context.nfc=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode); - context.nfkc=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode); + context.nfc=unorm2_getNFCInstance(&errorCode); + context.nfkc=unorm2_getNFKCInstance(&errorCode); if(U_FAILURE(errorCode)) { log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode)); return; diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp index eb22d610d05..9353eb57b53 100644 --- a/icu4c/source/test/intltest/itutil.cpp +++ b/icu4c/source/test/intltest/itutil.cpp @@ -352,7 +352,7 @@ void LocalPointerTest::TestLocalXyzPointer() { #endif /* UCONFIG_NO_FORMATTING */ #if !UCONFIG_NO_NORMALIZATION - const UNormalizer2 *nfc=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode); + const UNormalizer2 *nfc=unorm2_getNFCInstance(errorCode); UnicodeSet emptySet; LocalUNormalizer2Pointer fn2(unorm2_openFiltered(nfc, emptySet.toUSet(), errorCode)); if(errorCode.logIfFailureAndReset("unorm2_openFiltered()")) { diff --git a/icu4c/source/test/intltest/tstnorm.cpp b/icu4c/source/test/intltest/tstnorm.cpp index 4202f4c676d..cde6dbe88c7 100644 --- a/icu4c/source/test/intltest/tstnorm.cpp +++ b/icu4c/source/test/intltest/tstnorm.cpp @@ -1346,7 +1346,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT], UErrorCode &errorC // For each character about which we are unsure, see if it changes when we add // one of the back-combining characters. - const Normalizer2 *norm2=Normalizer2::getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode); + const Normalizer2 *norm2=Normalizer2::getNFCInstance(errorCode); UnicodeString s; iter.reset(*unsure); while(iter.next()) { diff --git a/icu4c/source/test/intltest/ucdtest.cpp b/icu4c/source/test/intltest/ucdtest.cpp index 36f15b1c9b3..c1b5f960c2d 100644 --- a/icu4c/source/test/intltest/ucdtest.cpp +++ b/icu4c/source/test/intltest/ucdtest.cpp @@ -340,7 +340,7 @@ void UnicodeTest::TestConsistency() { * of the set for the first. */ IcuTestErrorCode errorCode(*this, "TestConsistency"); - const Normalizer2 *nfd=Normalizer2::getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode); + const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) { dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n",