mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-8246 add Normalizer2.getNFCInstance(), getNFKDInstance(), ...
X-SVN-Rev: 30996
This commit is contained in:
parent
c50c6a20d7
commit
81e9b13f7b
8 changed files with 121 additions and 19 deletions
|
@ -105,6 +105,66 @@ public abstract class Normalizer2 {
|
|||
COMPOSE_CONTIGUOUS
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFC normalization.
|
||||
* Same as getInstance(null, "nfc", Mode.COMPOSE).
|
||||
* Returns an unmodifiable singleton instance.
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Normalizer2 getNFCInstance() {
|
||||
return Norm2AllModes.getNFCInstance().comp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFD normalization.
|
||||
* Same as getInstance(null, "nfc", Mode.DECOMPOSE).
|
||||
* Returns an unmodifiable singleton instance.
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Normalizer2 getNFDInstance() {
|
||||
return Norm2AllModes.getNFCInstance().decomp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKC normalization.
|
||||
* Same as getInstance(null, "nfkc", Mode.COMPOSE).
|
||||
* Returns an unmodifiable singleton instance.
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Normalizer2 getNFKCInstance() {
|
||||
return Norm2AllModes.getNFKCInstance().comp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKD normalization.
|
||||
* Same as getInstance(null, "nfkc", Mode.DECOMPOSE).
|
||||
* Returns an unmodifiable singleton instance.
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Normalizer2 getNFKDInstance() {
|
||||
return Norm2AllModes.getNFKCInstance().decomp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
|
||||
* Same as getInstance(null, "nfkc_cf", Mode.COMPOSE).
|
||||
* Returns an unmodifiable singleton instance.
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 49
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Normalizer2 getNFKCCasefoldInstance() {
|
||||
return Norm2AllModes.getNFKC_CFInstance().comp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance which uses the specified data file
|
||||
* (an ICU data file if data=null, or else custom binary data)
|
||||
|
|
|
@ -10,7 +10,6 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.lang.CharSequences;
|
||||
import com.ibm.icu.text.Normalizer2.Mode;
|
||||
|
||||
/**
|
||||
* Simple internal utility class for helping with getSource/TargetSet
|
||||
|
@ -20,7 +19,7 @@ class SourceTargetUtility {
|
|||
final UnicodeSet sourceCache;
|
||||
final Set<String> sourceStrings;
|
||||
static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze();
|
||||
static Normalizer2 NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
|
||||
static Normalizer2 NFC = Normalizer2.getNFCInstance();
|
||||
//static final UnicodeSet TRAILING_COMBINING = new UnicodeSet();
|
||||
|
||||
public SourceTargetUtility(Transform<String, String> transform) {
|
||||
|
|
|
@ -30,7 +30,6 @@ import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
|
|||
import com.ibm.icu.text.AlphabeticIndex.Record;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.text.Normalizer2.Mode;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
@ -662,7 +661,7 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
|
||||
extras.addAll(expansions).removeAll(TO_TRY);
|
||||
if (extras.size() != 0) {
|
||||
Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
|
||||
Normalizer2 normalizer = Normalizer2.getNFKCInstance();
|
||||
for (String current : extras) {
|
||||
if (!TO_TRY.containsAll(current))
|
||||
continue;
|
||||
|
|
|
@ -678,8 +678,8 @@ public final class UCharacterTest extends TestFmwk
|
|||
final String DIR =
|
||||
"L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN ";
|
||||
|
||||
Normalizer2 nfc = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
|
||||
Normalizer2 nfkc = Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE);
|
||||
Normalizer2 nfc = Normalizer2.getNFCInstance();
|
||||
Normalizer2 nfkc = Normalizer2.getNFKCInstance();
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -2492,7 +2492,7 @@ public final class UCharacterTest extends TestFmwk
|
|||
* In general, the set for the middle such character should be a subset
|
||||
* of the set for the first.
|
||||
*/
|
||||
Normalizer2 norm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE);
|
||||
Normalizer2 norm2=Normalizer2.getNFDInstance();
|
||||
set1=new UnicodeSet();
|
||||
Norm2AllModes.getNFCInstance().impl.
|
||||
ensureCanonIterData().getCanonStartSet(0x49, set1);
|
||||
|
|
|
@ -2015,7 +2015,7 @@ public class BasicTest extends TestFmwk {
|
|||
}
|
||||
|
||||
// test all of these precomposed characters
|
||||
Normalizer2 nfcNorm2 = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
|
||||
Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
|
||||
UnicodeSetIterator it = new UnicodeSetIterator(set);
|
||||
int c;
|
||||
while(it.next() && (c=it.codepoint)!=UnicodeSetIterator.IS_STRING) {
|
||||
|
@ -2322,7 +2322,7 @@ public class BasicTest extends TestFmwk {
|
|||
|
||||
// For each character about which we are unsure, see if it changes when we add
|
||||
// one of the back-combining characters.
|
||||
Normalizer2 norm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
|
||||
Normalizer2 norm2=Normalizer2.getNFCInstance();
|
||||
StringBuilder s=new StringBuilder();
|
||||
iter.reset(unsure);
|
||||
while(iter.next()) {
|
||||
|
@ -2526,7 +2526,7 @@ public class BasicTest extends TestFmwk {
|
|||
}
|
||||
|
||||
public void TestGetRawDecomposition() {
|
||||
Normalizer2 n2=Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE);
|
||||
Normalizer2 n2=Normalizer2.getNFKCInstance();
|
||||
/*
|
||||
* Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
|
||||
* without recursive decomposition.
|
||||
|
@ -2637,7 +2637,7 @@ public class BasicTest extends TestFmwk {
|
|||
}
|
||||
|
||||
public void TestFilteredNormalizer2() {
|
||||
Normalizer2 nfcNorm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
|
||||
Normalizer2 nfcNorm2=Normalizer2.getNFCInstance();
|
||||
UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
|
||||
FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
|
||||
int c;
|
||||
|
@ -2650,4 +2650,45 @@ public class BasicTest extends TestFmwk {
|
|||
expectedCC, cc);
|
||||
}
|
||||
}
|
||||
|
||||
public void TestGetEasyToUseInstance() {
|
||||
// Test input string:
|
||||
// U+00A0 -> <noBreak> 0020
|
||||
// U+00C7 0301 = 1E08 = 0043 0327 0301
|
||||
String in="\u00A0\u00C7\u0301";
|
||||
Normalizer2 n2=Normalizer2.getNFCInstance();
|
||||
String out=n2.normalize(in);
|
||||
assertEquals(
|
||||
"getNFCInstance() did not return an NFC instance " +
|
||||
"(normalizes to " + prettify(out) + ')',
|
||||
"\u00A0\u1E08", out);
|
||||
|
||||
n2=Normalizer2.getNFDInstance();
|
||||
out=n2.normalize(in);
|
||||
assertEquals(
|
||||
"getNFDInstance() did not return an NFD instance " +
|
||||
"(normalizes to " + prettify(out) + ')',
|
||||
"\u00A0C\u0327\u0301", out);
|
||||
|
||||
n2=Normalizer2.getNFKCInstance();
|
||||
out=n2.normalize(in);
|
||||
assertEquals(
|
||||
"getNFKCInstance() did not return an NFKC instance " +
|
||||
"(normalizes to " + prettify(out) + ')',
|
||||
" \u1E08", out);
|
||||
|
||||
n2=Normalizer2.getNFKDInstance();
|
||||
out=n2.normalize(in);
|
||||
assertEquals(
|
||||
"getNFKDInstance() did not return an NFKD instance " +
|
||||
"(normalizes to " + prettify(out) + ')',
|
||||
" C\u0327\u0301", out);
|
||||
|
||||
n2=Normalizer2.getNFKCCasefoldInstance();
|
||||
out=n2.normalize(in);
|
||||
assertEquals(
|
||||
"getNFKCCasefoldInstance() did not return an NFKC_Casefold instance " +
|
||||
"(normalizes to " + prettify(out) + ')',
|
||||
" \u1E09", out);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -484,7 +484,7 @@ public class SpoofCheckerTest extends TestFmwk {
|
|||
"\\ufeff?" + "(?:([0-9A-F\\s]+);([0-9A-F\\s]+);\\s*(SL|ML|SA|MA)\\s*(?:#.*?)?$)"
|
||||
+ "|\\ufeff?(\\s*(?:#.*)?)"). // Comment line
|
||||
matcher("");
|
||||
Normalizer2 normalizer = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE);
|
||||
Normalizer2 normalizer = Normalizer2.getNFDInstance();
|
||||
int lineNum = 0;
|
||||
String inputLine;
|
||||
while ((inputLine = confusablesRdr.readLine()) != null) {
|
||||
|
|
|
@ -25,7 +25,6 @@ import com.ibm.icu.lang.UCharacter;
|
|||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.CanonicalIterator;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.text.Normalizer2.Mode;
|
||||
import com.ibm.icu.text.Replaceable;
|
||||
import com.ibm.icu.text.ReplaceableString;
|
||||
import com.ibm.icu.text.StringTransform;
|
||||
|
@ -3016,8 +3015,8 @@ public class TransliteratorTest extends TestFmwk {
|
|||
public void TestSourceTargetSet2() {
|
||||
|
||||
|
||||
Normalizer2 nfc = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
|
||||
Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Mode.DECOMPOSE);
|
||||
Normalizer2 nfc = Normalizer2.getNFCInstance();
|
||||
Normalizer2 nfd = Normalizer2.getNFDInstance();
|
||||
|
||||
// Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
|
||||
// UnicodeSet nfkdSource = new UnicodeSet();
|
||||
|
|
|
@ -19,10 +19,14 @@ public class IcuUnicodeNormalizerFactory implements UnicodeTransform.Factory {
|
|||
|
||||
public UnicodeTransform getInstance(Type type) {
|
||||
switch (type) {
|
||||
case NFC: case NFKC:
|
||||
return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type.toString(), Mode.COMPOSE));
|
||||
case NFD: case NFKD:
|
||||
return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type == Type.NFD ? "NFC" : "NFKC", Mode.DECOMPOSE));
|
||||
case NFC:
|
||||
return new IcuUnicodeNormalizer(Normalizer2.getNFCInstance());
|
||||
case NFKC:
|
||||
return new IcuUnicodeNormalizer(Normalizer2.getNFKCInstance());
|
||||
case NFD:
|
||||
return new IcuUnicodeNormalizer(Normalizer2.getNFDInstance());
|
||||
case NFKD:
|
||||
return new IcuUnicodeNormalizer(Normalizer2.getNFKDInstance());
|
||||
case CASEFOLD:
|
||||
return new CaseFolder();
|
||||
default:
|
||||
|
|
Loading…
Add table
Reference in a new issue