mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-8246 add Normalizer2::getNFCInstance(), getNFKDInstance(), ...
X-SVN-Rev: 30994
This commit is contained in:
parent
7d66d16531
commit
3a86b119b0
9 changed files with 271 additions and 11 deletions
|
@ -600,6 +600,31 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
|
|||
return &((Normalizer2WithImpl *)norm2)->impl;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFCInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFDInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFKCInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFKDInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFKC_CFInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getInstance(const char *packageName,
|
||||
const char *name,
|
||||
|
@ -682,6 +707,31 @@ U_NAMESPACE_END
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFCInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFDInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getInstance(const char *packageName,
|
||||
const char *name,
|
||||
|
|
|
@ -83,6 +83,76 @@ public:
|
|||
*/
|
||||
~Normalizer2();
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFC normalization.
|
||||
* Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFCInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFD normalization.
|
||||
* Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFDInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKC normalization.
|
||||
* Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKCInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKD normalization.
|
||||
* Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKDInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
|
||||
* Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKCCasefoldInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance which uses the specified data file
|
||||
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
|
||||
|
|
|
@ -118,6 +118,76 @@ typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer
|
|||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFC normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFCInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFD normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFDInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFKC normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFKD normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKDInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
*/
|
||||
U_DRAFT const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance which uses the specified data file
|
||||
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
|
||||
|
|
|
@ -701,7 +701,7 @@ void AlphabeticIndex::staticInit(UErrorCode &status) {
|
|||
|
||||
EMPTY_STRING = new UnicodeString();
|
||||
|
||||
nfkdNormalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, status);
|
||||
nfkdNormalizer = Normalizer2::getNFKDInstance(status);
|
||||
if (nfkdNormalizer == NULL) {
|
||||
goto err;
|
||||
}
|
||||
|
@ -812,7 +812,7 @@ UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErr
|
|||
ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
|
||||
extras.addAll(expansions).removeAll(*TO_TRY);
|
||||
if (extras.size() != 0) {
|
||||
const Normalizer2 *normalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);
|
||||
const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
|
||||
UnicodeSetIterator extrasIter(extras);
|
||||
while (extrasIter.next()) {
|
||||
const UnicodeString ¤t = extrasIter.next();
|
||||
|
|
|
@ -1525,9 +1525,9 @@ TestGetRawDecomposition() {
|
|||
int32_t length;
|
||||
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode);
|
||||
const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err_status(errorCode, "unorm2_getInstance(nfkc) failed: %s\n", u_errorName(errorCode));
|
||||
log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
/*
|
||||
|
@ -1592,9 +1592,9 @@ TestAppendRestoreMiddle() {
|
|||
static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
|
||||
int32_t length;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode);
|
||||
const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err_status(errorCode, "unorm2_getInstance(nfc/COMPOSE) failed: %s\n", u_errorName(errorCode));
|
||||
log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
/*
|
||||
|
@ -1621,4 +1621,74 @@ TestAppendRestoreMiddle() {
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
TestGetEasyToUseInstance() {
|
||||
static const UChar in[]={
|
||||
0xA0, /* -> <noBreak> 0020 */
|
||||
0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
|
||||
};
|
||||
UChar out[32];
|
||||
int32_t length;
|
||||
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
|
||||
log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
n2=unorm2_getNFDInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
|
||||
log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
n2=unorm2_getNFKCInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
|
||||
log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
n2=unorm2_getNFKDInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
|
||||
log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
n2=unorm2_getNFKCCasefoldInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
|
||||
log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -1324,8 +1324,8 @@ static void TestUnicodeData()
|
|||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
context.nfc=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, &errorCode);
|
||||
context.nfkc=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode);
|
||||
context.nfc=unorm2_getNFCInstance(&errorCode);
|
||||
context.nfkc=unorm2_getNFKCInstance(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
|
|
|
@ -352,7 +352,7 @@ void LocalPointerTest::TestLocalXyzPointer() {
|
|||
#endif /* UCONFIG_NO_FORMATTING */
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
const UNormalizer2 *nfc=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode);
|
||||
const UNormalizer2 *nfc=unorm2_getNFCInstance(errorCode);
|
||||
UnicodeSet emptySet;
|
||||
LocalUNormalizer2Pointer fn2(unorm2_openFiltered(nfc, emptySet.toUSet(), errorCode));
|
||||
if(errorCode.logIfFailureAndReset("unorm2_openFiltered()")) {
|
||||
|
|
|
@ -1346,7 +1346,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT], UErrorCode &errorC
|
|||
|
||||
// For each character about which we are unsure, see if it changes when we add
|
||||
// one of the back-combining characters.
|
||||
const Normalizer2 *norm2=Normalizer2::getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode);
|
||||
const Normalizer2 *norm2=Normalizer2::getNFCInstance(errorCode);
|
||||
UnicodeString s;
|
||||
iter.reset(*unsure);
|
||||
while(iter.next()) {
|
||||
|
|
|
@ -340,7 +340,7 @@ void UnicodeTest::TestConsistency() {
|
|||
* of the set for the first.
|
||||
*/
|
||||
IcuTestErrorCode errorCode(*this, "TestConsistency");
|
||||
const Normalizer2 *nfd=Normalizer2::getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode);
|
||||
const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
|
||||
const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) {
|
||||
dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n",
|
||||
|
|
Loading…
Add table
Reference in a new issue