ICU-8246 add Normalizer2::getNFCInstance(), getNFKDInstance(), ...

X-SVN-Rev: 30994
This commit is contained in:
Markus Scherer 2011-12-01 00:43:35 +00:00
parent 7d66d16531
commit 3a86b119b0
9 changed files with 271 additions and 11 deletions

View file

@ -600,6 +600,31 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
return &((Normalizer2WithImpl *)norm2)->impl;
}
const Normalizer2 *
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFCInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFDInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFKCInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFKDInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFKC_CFInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
const char *name,
@ -682,6 +707,31 @@ U_NAMESPACE_END
U_NAMESPACE_USE
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFCInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
}
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFDInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
}
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
}
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
}
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
}
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
const char *name,

View file

@ -83,6 +83,76 @@ public:
*/
~Normalizer2();
/**
* Returns a Normalizer2 instance for Unicode NFC normalization.
* Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
static const Normalizer2 *
getNFCInstance(UErrorCode &errorCode);
/**
* Returns a Normalizer2 instance for Unicode NFD normalization.
* Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
static const Normalizer2 *
getNFDInstance(UErrorCode &errorCode);
/**
* Returns a Normalizer2 instance for Unicode NFKC normalization.
* Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
static const Normalizer2 *
getNFKCInstance(UErrorCode &errorCode);
/**
* Returns a Normalizer2 instance for Unicode NFKD normalization.
* Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
static const Normalizer2 *
getNFKDInstance(UErrorCode &errorCode);
/**
* Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
* Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
static const Normalizer2 *
getNFKCCasefoldInstance(UErrorCode &errorCode);
/**
* Returns a Normalizer2 instance which uses the specified data file
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)

View file

@ -118,6 +118,76 @@ typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer
#if !UCONFIG_NO_NORMALIZATION
/**
* Returns a UNormalizer2 instance for Unicode NFC normalization.
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFCInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFD normalization.
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFDInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFKC normalization.
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFKD normalization.
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
* Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the requested Normalizer2, if successful
* @draft ICU 49
*/
U_DRAFT const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
/**
* Returns a UNormalizer2 instance which uses the specified data file
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)

View file

@ -701,7 +701,7 @@ void AlphabeticIndex::staticInit(UErrorCode &status) {
EMPTY_STRING = new UnicodeString();
nfkdNormalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, status);
nfkdNormalizer = Normalizer2::getNFKDInstance(status);
if (nfkdNormalizer == NULL) {
goto err;
}
@ -812,7 +812,7 @@ UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErr
ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
extras.addAll(expansions).removeAll(*TO_TRY);
if (extras.size() != 0) {
const Normalizer2 *normalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);
const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
UnicodeSetIterator extrasIter(extras);
while (extrasIter.next()) {
const UnicodeString &current = extrasIter.next();

View file

@ -1525,9 +1525,9 @@ TestGetRawDecomposition() {
int32_t length;
UErrorCode errorCode=U_ZERO_ERROR;
const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode);
const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getInstance(nfkc) failed: %s\n", u_errorName(errorCode));
log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
return;
}
/*
@ -1592,9 +1592,9 @@ TestAppendRestoreMiddle() {
static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
int32_t length;
UErrorCode errorCode=U_ZERO_ERROR;
const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode);
const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getInstance(nfc/COMPOSE) failed: %s\n", u_errorName(errorCode));
log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
return;
}
/*
@ -1621,4 +1621,74 @@ TestAppendRestoreMiddle() {
}
}
static void
TestGetEasyToUseInstance() {
static const UChar in[]={
0xA0, /* -> <noBreak> 0020 */
0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
};
UChar out[32];
int32_t length;
UErrorCode errorCode=U_ZERO_ERROR;
const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
return;
}
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
n2=unorm2_getNFDInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
return;
}
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
n2=unorm2_getNFKCInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
return;
}
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
n2=unorm2_getNFKDInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
return;
}
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
n2=unorm2_getNFKCCasefoldInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
return;
}
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
(int)length, u_errorName(errorCode));
}
}
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -1324,8 +1324,8 @@ static void TestUnicodeData()
errorCode=U_ZERO_ERROR;
#if !UCONFIG_NO_NORMALIZATION
context.nfc=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode);
context.nfkc=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode);
context.nfc=unorm2_getNFCInstance(&errorCode);
context.nfkc=unorm2_getNFKCInstance(&errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
return;

View file

@ -352,7 +352,7 @@ void LocalPointerTest::TestLocalXyzPointer() {
#endif /* UCONFIG_NO_FORMATTING */
#if !UCONFIG_NO_NORMALIZATION
const UNormalizer2 *nfc=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode);
const UNormalizer2 *nfc=unorm2_getNFCInstance(errorCode);
UnicodeSet emptySet;
LocalUNormalizer2Pointer fn2(unorm2_openFiltered(nfc, emptySet.toUSet(), errorCode));
if(errorCode.logIfFailureAndReset("unorm2_openFiltered()")) {

View file

@ -1346,7 +1346,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT], UErrorCode &errorC
// For each character about which we are unsure, see if it changes when we add
// one of the back-combining characters.
const Normalizer2 *norm2=Normalizer2::getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode);
const Normalizer2 *norm2=Normalizer2::getNFCInstance(errorCode);
UnicodeString s;
iter.reset(*unsure);
while(iter.next()) {

View file

@ -340,7 +340,7 @@ void UnicodeTest::TestConsistency() {
* of the set for the first.
*/
IcuTestErrorCode errorCode(*this, "TestConsistency");
const Normalizer2 *nfd=Normalizer2::getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode);
const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);
if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) {
dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n",