mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-7264 merge Unicode 6.0 into trunk from branches/markus/uni60 -r 28341:28656
X-SVN-Rev: 28662
This commit is contained in:
parent
b5e1330176
commit
114432eec8
40 changed files with 3435 additions and 1952 deletions
|
@ -47,7 +47,7 @@ final class CollationParsedRuleBuilder {
|
|||
* thrown when argument rules have an invalid syntax
|
||||
*/
|
||||
CollationParsedRuleBuilder(String rules) throws ParseException {
|
||||
m_nfcImpl_.getFCDTrie(); // initialize the optional FCD trie
|
||||
m_nfcImpl_.getFCDTrie(); // initialize the optional FCD trie
|
||||
m_parser_ = new CollationRuleParser(rules);
|
||||
m_parser_.assembleTokenList();
|
||||
m_utilColEIter_ = RuleBasedCollator.UCA_
|
||||
|
|
|
@ -45,7 +45,6 @@ public class CharTrie extends Trie
|
|||
throw new IllegalArgumentException(
|
||||
"Data given does not belong to a char trie.");
|
||||
}
|
||||
m_friendAgent_ = new FriendAgent();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -105,53 +104,10 @@ public class CharTrie extends Trie
|
|||
m_data_[i]=(char)leadUnitValue;
|
||||
}
|
||||
}
|
||||
|
||||
m_friendAgent_ = new FriendAgent();
|
||||
}
|
||||
|
||||
/**
|
||||
* Java friend implementation
|
||||
*/
|
||||
public class FriendAgent
|
||||
{
|
||||
/**
|
||||
* Gives out the index array of the trie
|
||||
* @return index array of trie
|
||||
*/
|
||||
public char[] getPrivateIndex()
|
||||
{
|
||||
return m_index_;
|
||||
}
|
||||
/**
|
||||
* Gives out the data array of the trie
|
||||
* @return data array of trie
|
||||
*/
|
||||
public char[] getPrivateData()
|
||||
{
|
||||
return m_data_;
|
||||
}
|
||||
/**
|
||||
* Gives out the data offset in the trie
|
||||
* @return data offset in the trie
|
||||
*/
|
||||
public int getPrivateInitialValue()
|
||||
{
|
||||
return m_initialValue_;
|
||||
}
|
||||
}
|
||||
|
||||
// public methods --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Java friend implementation
|
||||
* To store the index and data array into the argument.
|
||||
* @param friend java friend UCharacterProperty object to store the array
|
||||
*/
|
||||
public void putIndexData(UCharacterProperty friend)
|
||||
{
|
||||
friend.setIndexData(m_friendAgent_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the value associated with the codepoint.
|
||||
* If no value is associated with the codepoint, a default value will be
|
||||
|
@ -350,8 +306,4 @@ public class CharTrie extends Trie
|
|||
* Array of char data
|
||||
*/
|
||||
private char m_data_[];
|
||||
/**
|
||||
* Agent for friends
|
||||
*/
|
||||
private FriendAgent m_friendAgent_;
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
public final class ICUBinary
|
||||
{
|
||||
// public inner interface ------------------------------------------------
|
||||
|
@ -131,7 +133,19 @@ public final class ICUBinary
|
|||
}
|
||||
return unicodeVersion;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Same as readHeader(), but returns a VersionInfo rather than a byte[].
|
||||
*/
|
||||
public static final VersionInfo readHeaderAndDataVersion(InputStream inputStream,
|
||||
byte dataFormatIDExpected[],
|
||||
Authenticate authenticate)
|
||||
throws IOException {
|
||||
byte[] dataVersion = readHeader(inputStream, dataFormatIDExpected, authenticate);
|
||||
return VersionInfo.getInstance(dataVersion[0], dataVersion[1],
|
||||
dataVersion[2], dataVersion[3]);
|
||||
}
|
||||
|
||||
// private variables -------------------------------------------------
|
||||
|
||||
/**
|
||||
|
|
|
@ -385,23 +385,19 @@ public final class Normalizer2Impl {
|
|||
|
||||
public Normalizer2Impl() {}
|
||||
|
||||
private static final class Reader implements ICUBinary.Authenticate {
|
||||
private static final class IsAcceptable implements ICUBinary.Authenticate {
|
||||
// @Override when we switch to Java 6
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0]==1;
|
||||
}
|
||||
public VersionInfo readHeader(InputStream data) throws IOException {
|
||||
byte[] dataVersion=ICUBinary.readHeader(data, DATA_FORMAT, this);
|
||||
return VersionInfo.getInstance(dataVersion[0], dataVersion[1],
|
||||
dataVersion[2], dataVersion[3]);
|
||||
}
|
||||
private static final byte DATA_FORMAT[] = { 0x4e, 0x72, 0x6d, 0x32 }; // "Nrm2"
|
||||
}
|
||||
private static final Reader READER=new Reader();
|
||||
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
|
||||
private static final byte DATA_FORMAT[] = { 0x4e, 0x72, 0x6d, 0x32 }; // "Nrm2"
|
||||
|
||||
public Normalizer2Impl load(InputStream data) {
|
||||
try {
|
||||
BufferedInputStream bis=new BufferedInputStream(data);
|
||||
dataVersion=READER.readHeader(bis);
|
||||
dataVersion=ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
|
||||
DataInputStream ds=new DataInputStream(bis);
|
||||
int indexesLength=ds.readInt()/4; // inIndexes[IX_NORM_TRIE_OFFSET]/4
|
||||
if(indexesLength<=IX_MIN_MAYBE_YES) {
|
||||
|
|
|
@ -23,11 +23,11 @@ import java.io.BufferedInputStream;
|
|||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
|
||||
public final class UBiDiProps {
|
||||
// constructors etc. --------------------------------------------------- ***
|
||||
|
@ -41,13 +41,6 @@ public final class UBiDiProps {
|
|||
is.close();
|
||||
}
|
||||
|
||||
private UBiDiProps(boolean makeDummy) { // ignore makeDummy, only creates a unique signature
|
||||
indexes=new int[IX_TOP];
|
||||
indexes[0]=IX_TOP;
|
||||
trie=new CharTrie(0, 0, null); // dummy trie, always returns 0
|
||||
}
|
||||
|
||||
|
||||
private void readData(InputStream is) throws IOException {
|
||||
DataInputStream inputStream=new DataInputStream(is);
|
||||
|
||||
|
@ -57,7 +50,7 @@ public final class UBiDiProps {
|
|||
// read indexes[]
|
||||
int i, count;
|
||||
count=inputStream.readInt();
|
||||
if(count<IX_INDEX_TOP) {
|
||||
if(count<IX_TOP) {
|
||||
throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
|
||||
}
|
||||
indexes=new int[count];
|
||||
|
@ -68,7 +61,14 @@ public final class UBiDiProps {
|
|||
}
|
||||
|
||||
// read the trie
|
||||
trie=new CharTrie(inputStream, null);
|
||||
trie=Trie2_16.createFromSerialized(inputStream);
|
||||
int expectedTrieLength=indexes[IX_TRIE_SIZE];
|
||||
int trieLength=trie.getSerializedLength();
|
||||
if(trieLength>expectedTrieLength) {
|
||||
throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
|
||||
}
|
||||
// skip padding after trie bytes
|
||||
inputStream.skipBytes(expectedTrieLength-trieLength);
|
||||
|
||||
// read mirrors[]
|
||||
count=indexes[IX_MIRROR_LENGTH];
|
||||
|
@ -90,43 +90,10 @@ public final class UBiDiProps {
|
|||
// implement ICUBinary.Authenticate
|
||||
private final class IsAcceptable implements ICUBinary.Authenticate {
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0]==1 &&
|
||||
version[2]==Trie.INDEX_STAGE_1_SHIFT_ && version[3]==Trie.INDEX_STAGE_2_SHIFT_;
|
||||
return version[0]==2;
|
||||
}
|
||||
}
|
||||
|
||||
// port of ubidi_getSingleton()
|
||||
//
|
||||
// Note: Do we really need this API?
|
||||
public static UBiDiProps getSingleton() throws IOException {
|
||||
if (FULL_INSTANCE == null) {
|
||||
synchronized (UBiDiProps.class) {
|
||||
if (FULL_INSTANCE == null) {
|
||||
FULL_INSTANCE = new UBiDiProps();
|
||||
}
|
||||
}
|
||||
}
|
||||
return FULL_INSTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a singleton dummy object, one that works with no real data.
|
||||
* This can be used when the real data is not available.
|
||||
* Using the dummy can reduce checks for available data after an initial failure.
|
||||
* Port of ucase_getDummy().
|
||||
*/
|
||||
// Note: do we really need this API?
|
||||
public static UBiDiProps getDummy() {
|
||||
if (DUMMY_INSTANCE == null) {
|
||||
synchronized (UBiDiProps.class) {
|
||||
if (DUMMY_INSTANCE == null) {
|
||||
DUMMY_INSTANCE = new UBiDiProps(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
return DUMMY_INSTANCE;
|
||||
}
|
||||
|
||||
// set of property starts for UnicodeSet ------------------------------- ***
|
||||
|
||||
public final void addPropertyStarts(UnicodeSet set) {
|
||||
|
@ -136,11 +103,10 @@ public final class UBiDiProps {
|
|||
byte prev, jg;
|
||||
|
||||
/* add the start code point of each same-value range of the trie */
|
||||
TrieIterator iter=new TrieIterator(trie);
|
||||
RangeValueIterator.Element element=new RangeValueIterator.Element();
|
||||
|
||||
while(iter.next(element)){
|
||||
set.add(element.start);
|
||||
Iterator<Trie2.Range> trieIterator=trie.iterator();
|
||||
Trie2.Range range;
|
||||
while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
|
||||
set.add(range.startCodePoint);
|
||||
}
|
||||
|
||||
/* add the code points from the bidi mirroring table */
|
||||
|
@ -192,18 +158,18 @@ public final class UBiDiProps {
|
|||
}
|
||||
|
||||
public final int getClass(int c) {
|
||||
return getClassFromProps(trie.getCodePointValue(c));
|
||||
return getClassFromProps(trie.get(c));
|
||||
}
|
||||
|
||||
public final boolean isMirrored(int c) {
|
||||
return getFlagFromProps(trie.getCodePointValue(c), IS_MIRRORED_SHIFT);
|
||||
return getFlagFromProps(trie.get(c), IS_MIRRORED_SHIFT);
|
||||
}
|
||||
|
||||
public final int getMirror(int c) {
|
||||
int props;
|
||||
int delta;
|
||||
|
||||
props=trie.getCodePointValue(c);
|
||||
props=trie.get(c);
|
||||
delta=((short)props)>>MIRROR_DELTA_SHIFT;
|
||||
if(delta!=ESC_MIRROR_DELTA) {
|
||||
return c+delta;
|
||||
|
@ -233,15 +199,15 @@ public final class UBiDiProps {
|
|||
}
|
||||
|
||||
public final boolean isBidiControl(int c) {
|
||||
return getFlagFromProps(trie.getCodePointValue(c), BIDI_CONTROL_SHIFT);
|
||||
return getFlagFromProps(trie.get(c), BIDI_CONTROL_SHIFT);
|
||||
}
|
||||
|
||||
public final boolean isJoinControl(int c) {
|
||||
return getFlagFromProps(trie.getCodePointValue(c), JOIN_CONTROL_SHIFT);
|
||||
return getFlagFromProps(trie.get(c), JOIN_CONTROL_SHIFT);
|
||||
}
|
||||
|
||||
public final int getJoiningType(int c) {
|
||||
return (trie.getCodePointValue(c)&JT_MASK)>>JT_SHIFT;
|
||||
return (trie.get(c)&JT_MASK)>>JT_SHIFT;
|
||||
}
|
||||
|
||||
public final int getJoiningGroup(int c) {
|
||||
|
@ -261,7 +227,7 @@ public final class UBiDiProps {
|
|||
private int mirrors[];
|
||||
private byte jgArray[];
|
||||
|
||||
private CharTrie trie;
|
||||
private Trie2_16 trie;
|
||||
|
||||
// data format constants ----------------------------------------------- ***
|
||||
private static final String DATA_NAME="ubidi";
|
||||
|
@ -272,9 +238,9 @@ public final class UBiDiProps {
|
|||
private static final byte FMT[]={ 0x42, 0x69, 0x44, 0x69 };
|
||||
|
||||
/* indexes into indexes[] */
|
||||
private static final int IX_INDEX_TOP=0;
|
||||
//private static final int IX_INDEX_TOP=0;
|
||||
//private static final int IX_LENGTH=1;
|
||||
//private static final int IX_TRIE_SIZE=2;
|
||||
private static final int IX_TRIE_SIZE=2;
|
||||
private static final int IX_MIRROR_LENGTH=3;
|
||||
|
||||
private static final int IX_JG_START=4;
|
||||
|
@ -333,21 +299,13 @@ public final class UBiDiProps {
|
|||
*/
|
||||
public static final UBiDiProps INSTANCE;
|
||||
|
||||
private static volatile UBiDiProps FULL_INSTANCE;
|
||||
private static volatile UBiDiProps DUMMY_INSTANCE;
|
||||
|
||||
// This static initializer block must be placed after
|
||||
// other static member initialization
|
||||
static {
|
||||
UBiDiProps bp;
|
||||
try {
|
||||
bp = new UBiDiProps();
|
||||
FULL_INSTANCE = bp;
|
||||
INSTANCE = new UBiDiProps();
|
||||
} catch (IOException e) {
|
||||
// creating dummy
|
||||
bp = new UBiDiProps(true);
|
||||
DUMMY_INSTANCE = bp;
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
INSTANCE = bp;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,12 +23,12 @@ import java.io.BufferedInputStream;
|
|||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
public final class UCaseProps {
|
||||
|
@ -44,12 +44,6 @@ public final class UCaseProps {
|
|||
is.close();
|
||||
}
|
||||
|
||||
private UCaseProps(boolean makeDummy) { // ignore makeDummy, only creates a unique signature
|
||||
indexes=new int[IX_TOP];
|
||||
indexes[0]=IX_TOP;
|
||||
trie=new CharTrie(0, 0, null); // dummy trie, always returns 0
|
||||
}
|
||||
|
||||
private final void readData(InputStream is) throws IOException {
|
||||
DataInputStream inputStream=new DataInputStream(is);
|
||||
|
||||
|
@ -59,7 +53,7 @@ public final class UCaseProps {
|
|||
// read indexes[]
|
||||
int i, count;
|
||||
count=inputStream.readInt();
|
||||
if(count<IX_INDEX_TOP) {
|
||||
if(count<IX_TOP) {
|
||||
throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
|
||||
}
|
||||
indexes=new int[count];
|
||||
|
@ -70,7 +64,14 @@ public final class UCaseProps {
|
|||
}
|
||||
|
||||
// read the trie
|
||||
trie=new CharTrie(inputStream, null);
|
||||
trie=Trie2_16.createFromSerialized(inputStream);
|
||||
int expectedTrieLength=indexes[IX_TRIE_SIZE];
|
||||
int trieLength=trie.getSerializedLength();
|
||||
if(trieLength>expectedTrieLength) {
|
||||
throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
|
||||
}
|
||||
// skip padding after trie bytes
|
||||
inputStream.skipBytes(expectedTrieLength-trieLength);
|
||||
|
||||
// read exceptions[]
|
||||
count=indexes[IX_EXC_LENGTH];
|
||||
|
@ -93,53 +94,20 @@ public final class UCaseProps {
|
|||
|
||||
// implement ICUBinary.Authenticate
|
||||
private final class IsAcceptable implements ICUBinary.Authenticate {
|
||||
// @Override when we switch to Java 6
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0]==1 &&
|
||||
version[2]==Trie.INDEX_STAGE_1_SHIFT_ && version[3]==Trie.INDEX_STAGE_2_SHIFT_;
|
||||
return version[0]==2;
|
||||
}
|
||||
}
|
||||
|
||||
// port of ucase_getSingleton()
|
||||
//
|
||||
// Note: Do we really need this API?
|
||||
public static UCaseProps getSingleton() throws IOException {
|
||||
if (FULL_INSTANCE == null) {
|
||||
synchronized (UCaseProps.class) {
|
||||
if (FULL_INSTANCE == null) {
|
||||
FULL_INSTANCE = new UCaseProps();
|
||||
}
|
||||
}
|
||||
}
|
||||
return FULL_INSTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a singleton dummy object, one that works with no real data.
|
||||
* This can be used when the real data is not available.
|
||||
* Using the dummy can reduce checks for available data after an initial failure.
|
||||
* Port of ucase_getDummy().
|
||||
*/
|
||||
// Note: do we really need this API?
|
||||
public static UCaseProps getDummy() {
|
||||
if (DUMMY_INSTANCE == null) {
|
||||
synchronized (UCaseProps.class) {
|
||||
if (DUMMY_INSTANCE == null) {
|
||||
DUMMY_INSTANCE = new UCaseProps(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
return DUMMY_INSTANCE;
|
||||
}
|
||||
|
||||
// set of property starts for UnicodeSet ------------------------------- ***
|
||||
|
||||
public final void addPropertyStarts(UnicodeSet set) {
|
||||
/* add the start code point of each same-value range of the trie */
|
||||
TrieIterator iter=new TrieIterator(trie);
|
||||
RangeValueIterator.Element element=new RangeValueIterator.Element();
|
||||
|
||||
while(iter.next(element)){
|
||||
set.add(element.start);
|
||||
Iterator<Trie2.Range> trieIterator=trie.iterator();
|
||||
Trie2.Range range;
|
||||
while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
|
||||
set.add(range.startCodePoint);
|
||||
}
|
||||
|
||||
/* add code points with hardcoded properties, plus the ones following them */
|
||||
|
@ -227,7 +195,7 @@ public final class UCaseProps {
|
|||
// simple case mappings ------------------------------------------------ ***
|
||||
|
||||
public final int tolower(int c) {
|
||||
int props=trie.getCodePointValue(c);
|
||||
int props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)>=UPPER) {
|
||||
c+=getDelta(props);
|
||||
|
@ -243,7 +211,7 @@ public final class UCaseProps {
|
|||
}
|
||||
|
||||
public final int toupper(int c) {
|
||||
int props=trie.getCodePointValue(c);
|
||||
int props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)==LOWER) {
|
||||
c+=getDelta(props);
|
||||
|
@ -259,7 +227,7 @@ public final class UCaseProps {
|
|||
}
|
||||
|
||||
public final int totitle(int c) {
|
||||
int props=trie.getCodePointValue(c);
|
||||
int props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)==LOWER) {
|
||||
c+=getDelta(props);
|
||||
|
@ -318,7 +286,7 @@ public final class UCaseProps {
|
|||
break;
|
||||
}
|
||||
|
||||
int props=trie.getCodePointValue(c);
|
||||
int props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)!=NONE) {
|
||||
/* add the one simple case mapping, no matter what type it is */
|
||||
|
@ -497,12 +465,12 @@ public final class UCaseProps {
|
|||
|
||||
/** @return NONE, LOWER, UPPER, TITLE */
|
||||
public final int getType(int c) {
|
||||
return getTypeFromProps(trie.getCodePointValue(c));
|
||||
return getTypeFromProps(trie.get(c));
|
||||
}
|
||||
|
||||
/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
|
||||
public final int getTypeOrIgnorable(int c) {
|
||||
int props=trie.getCodePointValue(c);
|
||||
int props=trie.get(c);
|
||||
int type=getTypeFromProps(props);
|
||||
if(propsHasException(props)) {
|
||||
if((exceptions[getExceptionsOffset(props)]&EXC_CASE_IGNORABLE)!=0) {
|
||||
|
@ -516,7 +484,7 @@ public final class UCaseProps {
|
|||
|
||||
/** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */
|
||||
public final int getDotType(int c) {
|
||||
int props=trie.getCodePointValue(c);
|
||||
int props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
return props&DOT_MASK;
|
||||
} else {
|
||||
|
@ -529,7 +497,7 @@ public final class UCaseProps {
|
|||
}
|
||||
|
||||
public final boolean isCaseSensitive(int c) {
|
||||
return (trie.getCodePointValue(c)&SENSITIVE)!=0;
|
||||
return (trie.get(c)&SENSITIVE)!=0;
|
||||
}
|
||||
|
||||
// string casing ------------------------------------------------------- ***
|
||||
|
@ -862,7 +830,7 @@ public final class UCaseProps {
|
|||
int result, props;
|
||||
|
||||
result=c;
|
||||
props=trie.getCodePointValue(c);
|
||||
props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)>=UPPER) {
|
||||
result=c+getDelta(props);
|
||||
|
@ -1010,7 +978,7 @@ public final class UCaseProps {
|
|||
int props;
|
||||
|
||||
result=c;
|
||||
props=trie.getCodePointValue(c);
|
||||
props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)==LOWER) {
|
||||
result=c+getDelta(props);
|
||||
|
@ -1159,7 +1127,7 @@ public final class UCaseProps {
|
|||
|
||||
/* return the simple case folding mapping for c */
|
||||
public final int fold(int c, int options) {
|
||||
int props=trie.getCodePointValue(c);
|
||||
int props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)>=UPPER) {
|
||||
c+=getDelta(props);
|
||||
|
@ -1222,7 +1190,7 @@ public final class UCaseProps {
|
|||
int props;
|
||||
|
||||
result=c;
|
||||
props=trie.getCodePointValue(c);
|
||||
props=trie.get(c);
|
||||
if(!propsHasException(props)) {
|
||||
if(getTypeFromProps(props)>=UPPER) {
|
||||
result=c+getDelta(props);
|
||||
|
@ -1355,7 +1323,7 @@ public final class UCaseProps {
|
|||
private char exceptions[];
|
||||
private char unfold[];
|
||||
|
||||
private CharTrie trie;
|
||||
private Trie2_16 trie;
|
||||
|
||||
// data format constants ----------------------------------------------- ***
|
||||
private static final String DATA_NAME="ucase";
|
||||
|
@ -1366,9 +1334,9 @@ public final class UCaseProps {
|
|||
private static final byte FMT[]={ 0x63, 0x41, 0x53, 0x45 };
|
||||
|
||||
/* indexes into indexes[] */
|
||||
private static final int IX_INDEX_TOP=0;
|
||||
//private static final int IX_INDEX_TOP=0;
|
||||
//private static final int IX_LENGTH=1;
|
||||
//private static final int IX_TRIE_SIZE=2;
|
||||
private static final int IX_TRIE_SIZE=2;
|
||||
private static final int IX_EXC_LENGTH=3;
|
||||
private static final int IX_UNFOLD_LENGTH=4;
|
||||
|
||||
|
@ -1464,27 +1432,18 @@ public final class UCaseProps {
|
|||
private static final int UNFOLD_ROW_WIDTH=1;
|
||||
private static final int UNFOLD_STRING_WIDTH=2;
|
||||
|
||||
|
||||
/*
|
||||
* public singleton instance
|
||||
*/
|
||||
public static final UCaseProps INSTANCE;
|
||||
|
||||
private static volatile UCaseProps FULL_INSTANCE;
|
||||
private static volatile UCaseProps DUMMY_INSTANCE;
|
||||
|
||||
// This static initializer block must be placed after
|
||||
// other static member initialization
|
||||
static {
|
||||
UCaseProps cp;
|
||||
try {
|
||||
cp = new UCaseProps();
|
||||
FULL_INSTANCE = cp;
|
||||
INSTANCE = new UCaseProps();
|
||||
} catch (IOException e) {
|
||||
// creating dummy
|
||||
cp = new UCaseProps(true);
|
||||
DUMMY_INSTANCE = cp;
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
INSTANCE = cp;
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,162 +0,0 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
/**
|
||||
* <p>Internal reader class for ICU data file uprops.icu containing
|
||||
* Unicode codepoint data.</p>
|
||||
* <p>This class simply reads uprops.icu, authenticates that it is a valid
|
||||
* ICU data file and split its contents up into blocks of data for use in
|
||||
* <a href=UCharacterProperty.html>com.ibm.icu.impl.UCharacterProperty</a>.
|
||||
* </p>
|
||||
* <p>uprops.icu which is in big-endian format is jared together with this
|
||||
* package.</p>
|
||||
*
|
||||
* Unicode character properties file format see
|
||||
* (ICU4C)/source/tools/genprops/store.c
|
||||
*
|
||||
* @author Syn Wee Quek
|
||||
* @since release 2.1, February 1st 2002
|
||||
*/
|
||||
final class UCharacterPropertyReader implements ICUBinary.Authenticate
|
||||
{
|
||||
// public methods ----------------------------------------------------
|
||||
|
||||
public boolean isDataVersionAcceptable(byte version[])
|
||||
{
|
||||
return version[0] == DATA_FORMAT_VERSION_[0]
|
||||
&& version[2] == DATA_FORMAT_VERSION_[2]
|
||||
&& version[3] == DATA_FORMAT_VERSION_[3];
|
||||
}
|
||||
|
||||
// protected constructor ---------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Protected constructor.</p>
|
||||
* @param inputStream ICU uprop.dat file input stream
|
||||
* @exception IOException throw if data file fails authentication
|
||||
*/
|
||||
protected UCharacterPropertyReader(InputStream inputStream)
|
||||
throws IOException
|
||||
{
|
||||
m_unicodeVersion_ = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
|
||||
this);
|
||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||
}
|
||||
|
||||
// protected methods -------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Reads uprops.icu, parse it into blocks of data to be stored in
|
||||
* UCharacterProperty.</P
|
||||
* @param ucharppty UCharacterProperty instance
|
||||
* @exception IOException thrown when data reading fails
|
||||
*/
|
||||
protected void read(UCharacterProperty ucharppty) throws IOException
|
||||
{
|
||||
// read the indexes
|
||||
int count = INDEX_SIZE_;
|
||||
m_propertyOffset_ = m_dataInputStream_.readInt();
|
||||
count --;
|
||||
m_exceptionOffset_ = m_dataInputStream_.readInt();
|
||||
count --;
|
||||
m_caseOffset_ = m_dataInputStream_.readInt();
|
||||
count --;
|
||||
m_additionalOffset_ = m_dataInputStream_.readInt();
|
||||
count --;
|
||||
m_additionalVectorsOffset_ = m_dataInputStream_.readInt();
|
||||
count --;
|
||||
m_additionalColumnsCount_ = m_dataInputStream_.readInt();
|
||||
count --;
|
||||
m_reservedOffset_ = m_dataInputStream_.readInt();
|
||||
count --;
|
||||
m_dataInputStream_.skipBytes(3 << 2);
|
||||
count -= 3;
|
||||
ucharppty.m_maxBlockScriptValue_ = m_dataInputStream_.readInt();
|
||||
count --; // 10
|
||||
ucharppty.m_maxJTGValue_ = m_dataInputStream_.readInt();
|
||||
count --; // 11
|
||||
m_dataInputStream_.skipBytes(count << 2);
|
||||
|
||||
// read the trie index block
|
||||
// m_props_index_ in terms of ints
|
||||
ucharppty.m_trie_ = new CharTrie(m_dataInputStream_, null);
|
||||
|
||||
// skip the 32 bit properties block
|
||||
int size = m_exceptionOffset_ - m_propertyOffset_;
|
||||
m_dataInputStream_.skipBytes(size * 4);
|
||||
|
||||
// reads the 32 bit exceptions block
|
||||
size = m_caseOffset_ - m_exceptionOffset_;
|
||||
m_dataInputStream_.skipBytes(size * 4);
|
||||
|
||||
// reads the 32 bit case block
|
||||
size = (m_additionalOffset_ - m_caseOffset_) << 1;
|
||||
m_dataInputStream_.skipBytes(size * 2);
|
||||
|
||||
if(m_additionalColumnsCount_ > 0) {
|
||||
// reads the additional property block
|
||||
ucharppty.m_additionalTrie_ = new CharTrie(m_dataInputStream_, null);
|
||||
|
||||
// additional properties
|
||||
size = m_reservedOffset_ - m_additionalVectorsOffset_;
|
||||
ucharppty.m_additionalVectors_ = new int[size];
|
||||
for (int i = 0; i < size; i ++) {
|
||||
ucharppty.m_additionalVectors_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
}
|
||||
|
||||
m_dataInputStream_.close();
|
||||
ucharppty.m_additionalColumnsCount_ = m_additionalColumnsCount_;
|
||||
ucharppty.m_unicodeVersion_ = VersionInfo.getInstance(
|
||||
(int)m_unicodeVersion_[0], (int)m_unicodeVersion_[1],
|
||||
(int)m_unicodeVersion_[2], (int)m_unicodeVersion_[3]);
|
||||
}
|
||||
|
||||
// private variables -------------------------------------------------
|
||||
|
||||
/**
|
||||
* Index size
|
||||
*/
|
||||
private static final int INDEX_SIZE_ = 16;
|
||||
|
||||
/**
|
||||
* ICU data file input stream
|
||||
*/
|
||||
private DataInputStream m_dataInputStream_;
|
||||
|
||||
/**
|
||||
* Offset information in the indexes.
|
||||
*/
|
||||
private int m_propertyOffset_;
|
||||
private int m_exceptionOffset_;
|
||||
private int m_caseOffset_;
|
||||
private int m_additionalOffset_;
|
||||
private int m_additionalVectorsOffset_;
|
||||
private int m_additionalColumnsCount_;
|
||||
private int m_reservedOffset_;
|
||||
private byte m_unicodeVersion_[];
|
||||
|
||||
/**
|
||||
* Data format "UPro".
|
||||
*/
|
||||
private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x50,
|
||||
(byte)0x72, (byte)0x6F};
|
||||
/**
|
||||
* Format version; this code works with all versions with the same major
|
||||
* version number and the same Trie bit distribution.
|
||||
*/
|
||||
private static final byte DATA_FORMAT_VERSION_[] = {(byte)6, (byte)0,
|
||||
(byte)Trie.INDEX_STAGE_1_SHIFT_,
|
||||
(byte)Trie.INDEX_STAGE_2_SHIFT_};
|
||||
}
|
|
@ -288,6 +288,14 @@ public final class UTS46 extends IDNA {
|
|||
}
|
||||
return length;
|
||||
}
|
||||
// Some non-ASCII characters are equivalent to sequences with
|
||||
// non-LDH ASCII characters. To find them:
|
||||
// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
|
||||
private static boolean
|
||||
isNonASCIIDisallowedSTD3Valid(int c) {
|
||||
return c==0x2260 || c==0x226E || c==0x226F;
|
||||
}
|
||||
|
||||
|
||||
// Replace the label in dest with the label string, if the label was modified.
|
||||
// If label==dest then the label was modified in-place and labelLength
|
||||
|
@ -393,9 +401,11 @@ public final class UTS46 extends IDNA {
|
|||
}
|
||||
} else {
|
||||
oredChars|=c;
|
||||
if(c==0xfffd) {
|
||||
if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) {
|
||||
addLabelError(info, Error.DISALLOWED);
|
||||
labelString.setCharAt(i, '\ufffd');
|
||||
} else if(c==0xfffd) {
|
||||
addLabelError(info, Error.DISALLOWED);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
++i;
|
||||
|
|
|
@ -9,12 +9,14 @@ package com.ibm.icu.lang;
|
|||
|
||||
import java.lang.ref.SoftReference;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.icu.impl.IllegalIcuArgumentException;
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.impl.Normalizer2Impl;
|
||||
import com.ibm.icu.impl.Trie2;
|
||||
import com.ibm.icu.impl.UBiDiProps;
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
import com.ibm.icu.impl.UCharacterName;
|
||||
|
@ -996,10 +998,37 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
/** @stable ICU 4.4 */
|
||||
public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
|
||||
|
||||
/* New blocks in Unicode 6.0 */
|
||||
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int MANDAIC_ID = 198; /*[0840]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int BATAK_ID = 199; /*[1BC0]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int BRAHMI_ID = 201; /*[11000]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int EMOTICONS_ID = 206; /*[1F600]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
|
||||
|
||||
/**
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int COUNT = 198;
|
||||
public static final int COUNT = 210;
|
||||
|
||||
// blocks objects ---------------------------------------------------
|
||||
|
||||
|
@ -2042,6 +2071,47 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
|
||||
|
||||
/* New blocks in Unicode 6.0 */
|
||||
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock MANDAIC =
|
||||
new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock BATAK =
|
||||
new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
|
||||
new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock BRAHMI =
|
||||
new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock BAMUM_SUPPLEMENT =
|
||||
new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock KANA_SUPPLEMENT =
|
||||
new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock PLAYING_CARDS =
|
||||
new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
|
||||
new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
|
||||
MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock EMOTICONS =
|
||||
new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
|
||||
new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
|
||||
new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
|
||||
/** @stable ICU 4.6 */
|
||||
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
|
||||
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
|
||||
|
||||
/**
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
@ -2089,22 +2159,8 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
return INVALID_CODE;
|
||||
}
|
||||
|
||||
return UnicodeBlock.getInstance((UCharacterProperty.INSTANCE.getAdditional(ch, 0)
|
||||
& BLOCK_MASK_) >> BLOCK_SHIFT_);
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function returning of(ch).getID().
|
||||
*
|
||||
* @param ch
|
||||
* @return numeric block value
|
||||
*/
|
||||
static int idOf(int ch) {
|
||||
if (ch < 0 || ch > MAX_VALUE) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (UCharacterProperty.INSTANCE.getAdditional(ch, 0) & BLOCK_MASK_) >> BLOCK_SHIFT_;
|
||||
return UnicodeBlock.getInstance(
|
||||
UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2411,10 +2467,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int HAH = 13;
|
||||
/** @stable ICU 4.6 */
|
||||
public static final int TEH_MARBUTA_GOAL = 14;
|
||||
/**
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int HAMZA_ON_HEH_GOAL = 14;
|
||||
public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
|
||||
/**
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
@ -3140,7 +3198,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
int value = digit(ch);
|
||||
if (value < 0) {
|
||||
// ch is not a decimal digit, try latin letters
|
||||
value = getEuropeanDigit(ch);
|
||||
value = UCharacterProperty.getEuropeanDigit(ch);
|
||||
}
|
||||
return (value < radix) ? value : -1;
|
||||
} else {
|
||||
|
@ -3163,13 +3221,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static int digit(int ch)
|
||||
{
|
||||
int props = getProperty(ch);
|
||||
int value = getNumericTypeValue(props) - NTV_DECIMAL_START_;
|
||||
if(value<=9) {
|
||||
return value;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
return UCharacterProperty.INSTANCE.digit(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3188,41 +3240,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static int getNumericValue(int ch)
|
||||
{
|
||||
// slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
|
||||
int props = UCharacterProperty.INSTANCE.getProperty(ch);
|
||||
int ntv = getNumericTypeValue(props);
|
||||
|
||||
if(ntv==NTV_NONE_) {
|
||||
return getEuropeanDigit(ch);
|
||||
} else if(ntv<NTV_DIGIT_START_) {
|
||||
/* decimal digit */
|
||||
return ntv-NTV_DECIMAL_START_;
|
||||
} else if(ntv<NTV_NUMERIC_START_) {
|
||||
/* other digit */
|
||||
return ntv-NTV_DIGIT_START_;
|
||||
} else if(ntv<NTV_FRACTION_START_) {
|
||||
/* small integer */
|
||||
return ntv-NTV_NUMERIC_START_;
|
||||
} else if(ntv<NTV_LARGE_START_) {
|
||||
/* fraction */
|
||||
return -2;
|
||||
} else if(ntv<NTV_RESERVED_START_) {
|
||||
/* large, single-significant-digit integer */
|
||||
int mant=(ntv>>5)-14;
|
||||
int exp=(ntv&0x1f)+2;
|
||||
if(exp<9 || (exp==9 && mant<=2)) {
|
||||
int numValue=mant;
|
||||
do {
|
||||
numValue*=10;
|
||||
} while(--exp>0);
|
||||
return numValue;
|
||||
} else {
|
||||
return -2;
|
||||
}
|
||||
} else {
|
||||
/* reserved */
|
||||
return -2;
|
||||
}
|
||||
return UCharacterProperty.INSTANCE.getNumericValue(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3243,58 +3261,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static double getUnicodeNumericValue(int ch)
|
||||
{
|
||||
// equivalent to c version double u_getNumericValue(UChar32 c)
|
||||
int props = UCharacterProperty.INSTANCE.getProperty(ch);
|
||||
int ntv = getNumericTypeValue(props);
|
||||
|
||||
if(ntv==NTV_NONE_) {
|
||||
return NO_NUMERIC_VALUE;
|
||||
} else if(ntv<NTV_DIGIT_START_) {
|
||||
/* decimal digit */
|
||||
return ntv-NTV_DECIMAL_START_;
|
||||
} else if(ntv<NTV_NUMERIC_START_) {
|
||||
/* other digit */
|
||||
return ntv-NTV_DIGIT_START_;
|
||||
} else if(ntv<NTV_FRACTION_START_) {
|
||||
/* small integer */
|
||||
return ntv-NTV_NUMERIC_START_;
|
||||
} else if(ntv<NTV_LARGE_START_) {
|
||||
/* fraction */
|
||||
int numerator=(ntv>>4)-12;
|
||||
int denominator=(ntv&0xf)+1;
|
||||
return (double)numerator/denominator;
|
||||
} else if(ntv<NTV_RESERVED_START_) {
|
||||
/* large, single-significant-digit integer */
|
||||
double numValue;
|
||||
int mant=(ntv>>5)-14;
|
||||
int exp=(ntv&0x1f)+2;
|
||||
numValue=mant;
|
||||
|
||||
/* multiply by 10^exp without math.h */
|
||||
while(exp>=4) {
|
||||
numValue*=10000.;
|
||||
exp-=4;
|
||||
}
|
||||
switch(exp) {
|
||||
case 3:
|
||||
numValue*=1000.;
|
||||
break;
|
||||
case 2:
|
||||
numValue*=100.;
|
||||
break;
|
||||
case 1:
|
||||
numValue*=10.;
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return numValue;
|
||||
} else {
|
||||
/* reserved */
|
||||
return NO_NUMERIC_VALUE;
|
||||
}
|
||||
return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3328,7 +3295,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static int getType(int ch)
|
||||
{
|
||||
return getProperty(ch) & UCharacterProperty.TYPE_MASK;
|
||||
return UCharacterProperty.INSTANCE.getType(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5098,7 +5065,41 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static RangeValueIterator getTypeIterator()
|
||||
{
|
||||
return new UCharacterTypeIterator(UCharacterProperty.INSTANCE);
|
||||
return new UCharacterTypeIterator();
|
||||
}
|
||||
|
||||
private static final class UCharacterTypeIterator implements RangeValueIterator {
|
||||
UCharacterTypeIterator() {
|
||||
reset();
|
||||
}
|
||||
|
||||
// implements RangeValueIterator
|
||||
public boolean next(Element element) {
|
||||
if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
|
||||
element.start=range.startCodePoint;
|
||||
element.limit=range.endCodePoint+1;
|
||||
element.value=range.value;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// implements RangeValueIterator
|
||||
public void reset() {
|
||||
trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
|
||||
}
|
||||
|
||||
private Iterator<Trie2.Range> trieIterator;
|
||||
private Trie2.Range range;
|
||||
|
||||
private static final class MaskType implements Trie2.ValueMapper {
|
||||
// Extracts the general category ("character type") from the trie value.
|
||||
public int map(int value) {
|
||||
return value & UCharacterProperty.TYPE_MASK;
|
||||
}
|
||||
}
|
||||
private static final MaskType MASK_TYPE=new MaskType();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5276,27 +5277,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
|
||||
* Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
|
||||
*/
|
||||
private static final int /* UHangulSyllableType */ gcbToHst[]={
|
||||
HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */
|
||||
HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CONTROL */
|
||||
HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CR */
|
||||
HangulSyllableType.NOT_APPLICABLE, /* U_GCB_EXTEND */
|
||||
HangulSyllableType.LEADING_JAMO, /* U_GCB_L */
|
||||
HangulSyllableType.NOT_APPLICABLE, /* U_GCB_LF */
|
||||
HangulSyllableType.LV_SYLLABLE, /* U_GCB_LV */
|
||||
HangulSyllableType.LVT_SYLLABLE, /* U_GCB_LVT */
|
||||
HangulSyllableType.TRAILING_JAMO, /* U_GCB_T */
|
||||
HangulSyllableType.VOWEL_JAMO /* U_GCB_V */
|
||||
/*
|
||||
* Omit GCB values beyond what we need for hst.
|
||||
* The code below checks for the array length.
|
||||
*/
|
||||
};
|
||||
|
||||
/**
|
||||
* {@icu} <p>Returns the property value for an Unicode property type of a code point.
|
||||
* Also returns binary and mask property values.</p>
|
||||
|
@ -5338,78 +5318,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static int getIntPropertyValue(int ch, int type)
|
||||
{
|
||||
if (type < UProperty.BINARY_START) {
|
||||
return 0; // undefined
|
||||
}
|
||||
else if (type < UProperty.BINARY_LIMIT) {
|
||||
return hasBinaryProperty(ch, type) ? 1 : 0;
|
||||
}
|
||||
else if (type < UProperty.INT_START) {
|
||||
return 0; // undefined
|
||||
}
|
||||
else if (type < UProperty.INT_LIMIT) {
|
||||
switch (type) {
|
||||
case UProperty.BIDI_CLASS:
|
||||
return getDirection(ch);
|
||||
case UProperty.BLOCK:
|
||||
return UnicodeBlock.idOf(ch);
|
||||
case UProperty.CANONICAL_COMBINING_CLASS:
|
||||
return getCombiningClass(ch);
|
||||
case UProperty.DECOMPOSITION_TYPE:
|
||||
return UCharacterProperty.INSTANCE.getAdditional(ch, 2)
|
||||
& DECOMPOSITION_TYPE_MASK_;
|
||||
case UProperty.EAST_ASIAN_WIDTH:
|
||||
return (UCharacterProperty.INSTANCE.getAdditional(ch, 0)
|
||||
& EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_;
|
||||
case UProperty.GENERAL_CATEGORY:
|
||||
return getType(ch);
|
||||
case UProperty.JOINING_GROUP:
|
||||
return UBiDiProps.INSTANCE.getJoiningGroup(ch);
|
||||
case UProperty.JOINING_TYPE:
|
||||
return UBiDiProps.INSTANCE.getJoiningType(ch);
|
||||
case UProperty.LINE_BREAK:
|
||||
return (UCharacterProperty.INSTANCE
|
||||
.getAdditional(ch, LB_VWORD)& LB_MASK)>>LB_SHIFT;
|
||||
case UProperty.NUMERIC_TYPE:
|
||||
return ntvGetType(getNumericTypeValue(UCharacterProperty
|
||||
.INSTANCE.getProperty(ch)));
|
||||
case UProperty.SCRIPT:
|
||||
return UScript.getScript(ch);
|
||||
case UProperty.HANGUL_SYLLABLE_TYPE: {
|
||||
/* see comments on gcbToHst[] above */
|
||||
int gcb=(UCharacterProperty.INSTANCE.getAdditional(ch, 2)&GCB_MASK)>>GCB_SHIFT;
|
||||
if(gcb<gcbToHst.length) {
|
||||
return gcbToHst[gcb];
|
||||
} else {
|
||||
return HangulSyllableType.NOT_APPLICABLE;
|
||||
}
|
||||
}
|
||||
case UProperty.NFD_QUICK_CHECK:
|
||||
case UProperty.NFKD_QUICK_CHECK:
|
||||
case UProperty.NFC_QUICK_CHECK:
|
||||
case UProperty.NFKC_QUICK_CHECK:
|
||||
return Norm2AllModes.getN2WithImpl(type-UProperty.NFD_QUICK_CHECK).getQuickCheck(ch);
|
||||
case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
|
||||
return Norm2AllModes.getNFCInstance().impl.getFCDTrie().get(ch)>>8;
|
||||
case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return Norm2AllModes.getNFCInstance().impl.getFCDTrie().get(ch)&0xff;
|
||||
case UProperty.GRAPHEME_CLUSTER_BREAK:
|
||||
return (UCharacterProperty.INSTANCE.getAdditional(ch, 2)& GCB_MASK)>>GCB_SHIFT;
|
||||
case UProperty.SENTENCE_BREAK:
|
||||
return (UCharacterProperty.INSTANCE.getAdditional(ch, 2)& SB_MASK)>>SB_SHIFT;
|
||||
case UProperty.WORD_BREAK:
|
||||
return (UCharacterProperty.INSTANCE.getAdditional(ch, 2)& WB_MASK)>>WB_SHIFT;
|
||||
/* Values were tested for variable type from Integer.MIN_VALUE
|
||||
* to UProperty.INT_LIMIT and none would not reach the default case.
|
||||
*/
|
||||
///CLOVER:OFF
|
||||
default: return 0; /* undefined */
|
||||
///CLOVER:ON
|
||||
}
|
||||
} else if (type == UProperty.GENERAL_CATEGORY_MASK) {
|
||||
return UCharacterProperty.getMask(getType(ch));
|
||||
}
|
||||
return 0; // undefined
|
||||
return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
|
||||
}
|
||||
/**
|
||||
* {@icu} Returns a string version of the property value.
|
||||
|
@ -5501,66 +5410,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static int getIntPropertyMaxValue(int type)
|
||||
{
|
||||
if (type < UProperty.BINARY_START) {
|
||||
return -1; // undefined
|
||||
}
|
||||
else if (type < UProperty.BINARY_LIMIT) {
|
||||
return 1; // maximum TRUE for all binary properties
|
||||
}
|
||||
else if (type < UProperty.INT_START) {
|
||||
return -1; // undefined
|
||||
}
|
||||
else if (type < UProperty.INT_LIMIT) {
|
||||
switch (type) {
|
||||
case UProperty.BIDI_CLASS:
|
||||
case UProperty.JOINING_GROUP:
|
||||
case UProperty.JOINING_TYPE:
|
||||
return UBiDiProps.INSTANCE.getMaxValue(type);
|
||||
case UProperty.BLOCK:
|
||||
return (UCharacterProperty.INSTANCE.getMaxValues(0) & BLOCK_MASK_)
|
||||
>> BLOCK_SHIFT_;
|
||||
case UProperty.CANONICAL_COMBINING_CLASS:
|
||||
case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
|
||||
case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return 0xff; // TODO do we need to be more precise,
|
||||
// getting the actual maximum?
|
||||
case UProperty.DECOMPOSITION_TYPE:
|
||||
return UCharacterProperty.INSTANCE.getMaxValues(2) & DECOMPOSITION_TYPE_MASK_;
|
||||
case UProperty.EAST_ASIAN_WIDTH:
|
||||
return (UCharacterProperty.INSTANCE.getMaxValues(0) & EAST_ASIAN_MASK_)
|
||||
>> EAST_ASIAN_SHIFT_;
|
||||
case UProperty.GENERAL_CATEGORY:
|
||||
return UCharacterCategory.CHAR_CATEGORY_COUNT - 1;
|
||||
case UProperty.LINE_BREAK:
|
||||
return (UCharacterProperty.INSTANCE.getMaxValues(LB_VWORD) & LB_MASK)
|
||||
>> LB_SHIFT;
|
||||
case UProperty.NUMERIC_TYPE:
|
||||
return NumericType.COUNT - 1;
|
||||
case UProperty.SCRIPT:
|
||||
return UCharacterProperty.INSTANCE.getMaxValues(0) & SCRIPT_MASK_;
|
||||
case UProperty.HANGUL_SYLLABLE_TYPE:
|
||||
return HangulSyllableType.COUNT-1;
|
||||
case UProperty.NFD_QUICK_CHECK:
|
||||
case UProperty.NFKD_QUICK_CHECK:
|
||||
return 1; // YES -- these are never "maybe", only "no" or "yes"
|
||||
case UProperty.NFC_QUICK_CHECK:
|
||||
case UProperty.NFKC_QUICK_CHECK:
|
||||
return 2; // MAYBE
|
||||
case UProperty.GRAPHEME_CLUSTER_BREAK:
|
||||
return (UCharacterProperty.INSTANCE.getMaxValues(2) & GCB_MASK) >> GCB_SHIFT;
|
||||
case UProperty.SENTENCE_BREAK:
|
||||
return (UCharacterProperty.INSTANCE.getMaxValues(2) & SB_MASK) >> SB_SHIFT;
|
||||
case UProperty.WORD_BREAK:
|
||||
return (UCharacterProperty.INSTANCE.getMaxValues(2) & WB_MASK) >> WB_SHIFT;
|
||||
/* Values were tested for variable type from Integer.MIN_VALUE
|
||||
* to UProperty.INT_LIMIT and none would not reach the default case.
|
||||
*/
|
||||
///CLOVER:OFF
|
||||
default: return -1; // undefined
|
||||
///CLOVER:ON
|
||||
}
|
||||
}
|
||||
return -1; // undefined
|
||||
return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -6189,29 +6039,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
* Delete code point
|
||||
*/
|
||||
private static final int DELETE_ = 0x007F;
|
||||
/**
|
||||
* Numeric types and values in the main properties words.
|
||||
*/
|
||||
private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
|
||||
private static final int getNumericTypeValue(int props) {
|
||||
return props >> NUMERIC_TYPE_VALUE_SHIFT_;
|
||||
}
|
||||
/* constants for the storage form of numeric types and values */
|
||||
private static final int NTV_NONE_ = 0;
|
||||
private static final int NTV_DECIMAL_START_ = 1;
|
||||
private static final int NTV_DIGIT_START_ = 11;
|
||||
private static final int NTV_NUMERIC_START_ = 21;
|
||||
private static final int NTV_FRACTION_START_ = 0xb0;
|
||||
private static final int NTV_LARGE_START_ = 0x1e0;
|
||||
private static final int NTV_RESERVED_START_ = 0x300;
|
||||
|
||||
private static final int ntvGetType(int ntv) {
|
||||
return
|
||||
(ntv==NTV_NONE_) ? NumericType.NONE :
|
||||
(ntv<NTV_DIGIT_START_) ? NumericType.DECIMAL :
|
||||
(ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
|
||||
NumericType.NUMERIC;
|
||||
}
|
||||
|
||||
/**
|
||||
* Han digit characters
|
||||
|
@ -6235,82 +6062,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c;
|
||||
private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104;
|
||||
|
||||
// /**
|
||||
// * Zero Width Non Joiner.
|
||||
// * Equivalent to icu4c ZWNJ.
|
||||
// */
|
||||
// private static final int ZERO_WIDTH_NON_JOINER_ = 0x200c;
|
||||
// /**
|
||||
// * Zero Width Joiner
|
||||
// * Equivalent to icu4c ZWJ.
|
||||
// */
|
||||
// private static final int ZERO_WIDTH_JOINER_ = 0x200d;
|
||||
|
||||
/*
|
||||
* Properties in vector word 2
|
||||
* Bits
|
||||
* 31..26 reserved
|
||||
* 25..20 Line Break
|
||||
* 19..15 Sentence Break
|
||||
* 14..10 Word Break
|
||||
* 9.. 5 Grapheme Cluster Break
|
||||
* 4.. 0 Decomposition Type
|
||||
*/
|
||||
private static final int LB_MASK = 0x03f00000;
|
||||
private static final int LB_SHIFT = 20;
|
||||
private static final int LB_VWORD = 2;
|
||||
|
||||
private static final int SB_MASK = 0x000f8000;
|
||||
private static final int SB_SHIFT = 15;
|
||||
|
||||
private static final int WB_MASK = 0x00007c00;
|
||||
private static final int WB_SHIFT = 10;
|
||||
|
||||
private static final int GCB_MASK = 0x000003e0;
|
||||
private static final int GCB_SHIFT = 5;
|
||||
|
||||
/**
|
||||
* Integer properties mask for decomposition type.
|
||||
* Equivalent to icu4c UPROPS_DT_MASK.
|
||||
*/
|
||||
private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
|
||||
|
||||
/*
|
||||
* Properties in vector word 0
|
||||
* Bits
|
||||
* 31..24 DerivedAge version major/minor one nibble each
|
||||
* 23..20 reserved
|
||||
* 19..17 East Asian Width
|
||||
* 16.. 8 UBlockCode
|
||||
* 7.. 0 UScriptCode
|
||||
*/
|
||||
|
||||
/**
|
||||
* Integer properties mask and shift values for East Asian cell width.
|
||||
* Equivalent to icu4c UPROPS_EA_MASK
|
||||
*/
|
||||
private static final int EAST_ASIAN_MASK_ = 0x000e0000;
|
||||
/**
|
||||
* Integer properties mask and shift values for East Asian cell width.
|
||||
* Equivalent to icu4c UPROPS_EA_SHIFT
|
||||
*/
|
||||
private static final int EAST_ASIAN_SHIFT_ = 17;
|
||||
/**
|
||||
* Integer properties mask and shift values for blocks.
|
||||
* Equivalent to icu4c UPROPS_BLOCK_MASK
|
||||
*/
|
||||
private static final int BLOCK_MASK_ = 0x0001ff00;
|
||||
/**
|
||||
* Integer properties mask and shift values for blocks.
|
||||
* Equivalent to icu4c UPROPS_BLOCK_SHIFT
|
||||
*/
|
||||
private static final int BLOCK_SHIFT_ = 8;
|
||||
/**
|
||||
* Integer properties mask and shift values for scripts.
|
||||
* Equivalent to icu4c UPROPS_SHIFT_MASK
|
||||
*/
|
||||
static final int SCRIPT_MASK_ = 0x000000ff;
|
||||
|
||||
// private constructor -----------------------------------------------
|
||||
///CLOVER:OFF
|
||||
/**
|
||||
|
@ -6320,85 +6071,4 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
{
|
||||
}
|
||||
///CLOVER:ON
|
||||
// private methods ---------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the digit values of characters like 'A' - 'Z', normal,
|
||||
* half-width and full-width. This method assumes that the other digit
|
||||
* characters are checked by the calling method.
|
||||
* @param ch character to test
|
||||
* @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
|
||||
* its corresponding digit will be returned.
|
||||
*/
|
||||
private static int getEuropeanDigit(int ch) {
|
||||
if ((ch > 0x7a && ch < 0xff21)
|
||||
|| ch < 0x41 || (ch > 0x5a && ch < 0x61)
|
||||
|| ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
|
||||
return -1;
|
||||
}
|
||||
if (ch <= 0x7a) {
|
||||
// ch >= 0x41 or ch < 0x61
|
||||
return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
|
||||
}
|
||||
// ch >= 0xff21
|
||||
if (ch <= 0xff3a) {
|
||||
return ch + 10 - 0xff21;
|
||||
}
|
||||
// ch >= 0xff41 && ch <= 0xff5a
|
||||
return ch + 10 - 0xff41;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the property value at the index.
|
||||
* This is optimized.
|
||||
* Note this is alittle different from CharTrie the index m_trieData_
|
||||
* is never negative.
|
||||
* This is a duplicate of UCharacterProperty.getProperty. For optimization
|
||||
* purposes, this method calls the trie data directly instead of through
|
||||
* UCharacterProperty.getProperty.
|
||||
* @param ch code point whose property value is to be retrieved
|
||||
* @return property value of code point
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
private static final int getProperty(int ch)
|
||||
{
|
||||
if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
|
||||
|| (ch > UTF16.LEAD_SURROGATE_MAX_VALUE
|
||||
&& ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
|
||||
// BMP codepoint 0000..D7FF or DC00..FFFF
|
||||
try { // using try for ch < 0 is faster than using an if statement
|
||||
return UCharacterProperty.INSTANCE.m_trieData_[
|
||||
(UCharacterProperty.INSTANCE.m_trieIndex_[ch >> 5] << 2)
|
||||
+ (ch & 0x1f)];
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// TODO: Tested all the values from 0 ... UTF16.LEAD_SURROGATE_MIN_VALUE
|
||||
// and UTF16.LEAD_SURROGATE_MAX_VALUE ... UTF16.SUPPLEMENTARY_MIN_VALUE
|
||||
// but it never results into the catch section of the try-catch
|
||||
///CLOVER:OFF
|
||||
return UCharacterProperty.INSTANCE.m_trieInitialValue_;
|
||||
///CLOVER:ON
|
||||
}
|
||||
}
|
||||
if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
|
||||
// lead surrogate D800..DBFF
|
||||
return UCharacterProperty.INSTANCE.m_trieData_[
|
||||
(UCharacterProperty.INSTANCE.m_trieIndex_[(0x2800 >> 5) +
|
||||
(ch >> 5)] << 2)
|
||||
+ (ch & 0x1f)];
|
||||
}
|
||||
// for optimization
|
||||
if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
|
||||
// supplementary code point 10000..10FFFF
|
||||
// look at the construction of supplementary characters
|
||||
// trail forms the ends of it.
|
||||
return UCharacterProperty.INSTANCE.m_trie_.getSurrogateValue(
|
||||
UTF16.getLeadSurrogate(ch),
|
||||
(char)(ch & 0x3ff));
|
||||
}
|
||||
// return m_dataOffset_ if there is an error, in this case we return
|
||||
// the default value: m_initialValue_
|
||||
// we cannot assume that m_initialValue_ is at offset 0
|
||||
// this is for optimization.
|
||||
return UCharacterProperty.INSTANCE.m_trieInitialValue_;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.lang;
|
||||
|
||||
import com.ibm.icu.impl.TrieIterator;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
|
||||
/**
|
||||
* Class enabling iteration of the codepoints according to their types.
|
||||
* Result of each iteration contains the interval of codepoints that have
|
||||
* the same type.
|
||||
* Example of use:<br>
|
||||
* <pre>
|
||||
* RangeValueIterator iterator = UCharacter.getTypeIterator();
|
||||
* RangeValueIterator.Element element = new RangeValueIterator.Element();
|
||||
* while (iterator.next(element)) {
|
||||
* System.out.println("Codepoint \\u" +
|
||||
* Integer.toHexString(element.start) +
|
||||
* " to codepoint \\u" +
|
||||
* Integer.toHexString(element.limit - 1) +
|
||||
* " has the character type " +
|
||||
* element.value);
|
||||
* }
|
||||
* </pre>
|
||||
* @author synwee
|
||||
* @see com.ibm.icu.util.TrieIterator
|
||||
* @since release 2.1, Jan 24 2002
|
||||
*/
|
||||
class UCharacterTypeIterator extends TrieIterator
|
||||
{
|
||||
// protected constructor ---------------------------------------------
|
||||
|
||||
/**
|
||||
* TrieEnumeration constructor
|
||||
* @param property the unicode character properties to be used
|
||||
*/
|
||||
protected UCharacterTypeIterator(UCharacterProperty property)
|
||||
{
|
||||
super(property.m_trie_);
|
||||
}
|
||||
|
||||
// protected methods ----------------------------------------------
|
||||
|
||||
/**
|
||||
* Called by nextElement() to extracts a 32 bit value from a trie value
|
||||
* used for comparison.
|
||||
* This method is to be overwritten if special manipulation is to be done
|
||||
* to retrieve a relevant comparison.
|
||||
* The default function is to return the value as it is.
|
||||
* @param value a value from the trie
|
||||
* @return extracted value
|
||||
*/
|
||||
protected int extract(int value)
|
||||
{
|
||||
return value & UCharacterProperty.TYPE_MASK;
|
||||
}
|
||||
}
|
|
@ -839,6 +839,30 @@ public interface UProperty
|
|||
*/
|
||||
public static final int STRING_LIMIT = 0x400D;
|
||||
|
||||
/**
|
||||
* Provisional property Script_Extensions (new in Unicode 6.0).
|
||||
* As a provisional property, it may be modified or removed
|
||||
* in future versions of the Unicode Standard, and thus in ICU.
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
* Corresponds to UScript.hasScript and UScript.getScriptExtensions.
|
||||
* @draft ICU 4.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int SCRIPT_EXTENSIONS=0x7000;
|
||||
/**
|
||||
* First constant for Unicode properties with unusual value types.
|
||||
* @draft ICU 4.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int OTHER_PROPERTY_START=SCRIPT_EXTENSIONS;
|
||||
/**
|
||||
* One more than the last constant for Unicode properties with unusual value types.
|
||||
* @draft ICU 4.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int OTHER_PROPERTY_LIMIT=0x7001;
|
||||
|
||||
/**
|
||||
* Selector constants for UCharacter.getPropertyName() and
|
||||
* UCharacter.getPropertyValueName(). These selectors are used to
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
package com.ibm.icu.lang;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Locale;
|
||||
import java.util.MissingResourceException;
|
||||
|
||||
|
@ -472,21 +473,31 @@ public final class UScript {
|
|||
* @stable ICU 3.6
|
||||
*/
|
||||
public static final int LINEAR_A = 83; /* Lina */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int MANDAIC = 84; /* Mand */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public static final int MANDAEAN = 84; /* Mand */
|
||||
public static final int MANDAEAN = MANDAIC;
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public static final int MAYAN_HIEROGLYPHS = 85; /* Maya */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int MEROITIC_HIEROGLYPHS = 86; /* Mero */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public static final int MEROITIC = 86; /* Mero */
|
||||
public static final int MEROITIC = MEROITIC_HIEROGLYPHS;
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 3.6
|
||||
|
@ -741,10 +752,78 @@ public final class UScript {
|
|||
public static final int OLD_SOUTH_ARABIAN = 133;/* Sarb */
|
||||
|
||||
/**
|
||||
* Limit
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int BASSA_VAH = 134;/* Bass */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int DUPLOYAN_SHORTAND = 135;/* Dupl */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int ELBASAN = 136;/* Elba */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int GRANTHA = 137;/* Gran */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int KPELLE = 138;/* Kpel */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int LOMA = 139;/* Loma */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int MENDE = 140;/* Mend */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int MEROITIC_CURSIVE = 141;/* Merc */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int OLD_NORTH_ARABIAN = 142;/* Narb */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int NABATAEAN = 143;/* Nbat */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int PALMYRENE = 144;/* Palm */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int SINDHI = 145;/* Sind */
|
||||
/**
|
||||
* ISO 15924 script code
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final int WARANG_CITI = 146;/* Wara */
|
||||
|
||||
/**
|
||||
* One higher than the last ISO 15924 script code integer.
|
||||
* This value will increase as ISO 15924 adds script codes
|
||||
* for which integer constants are added above.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static final int CODE_LIMIT = 134;
|
||||
public static final int CODE_LIMIT = 147;
|
||||
|
||||
private static final String kLocaleScript = "LocaleScript";
|
||||
|
||||
|
@ -870,12 +949,98 @@ public final class UScript {
|
|||
*/
|
||||
public static final int getScript(int codepoint){
|
||||
if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) {
|
||||
return (UCharacterProperty.INSTANCE.getAdditional(codepoint,0) & UCharacter.SCRIPT_MASK_);
|
||||
int scriptX=UCharacterProperty.INSTANCE.getAdditional(codepoint, 0)&UCharacterProperty.SCRIPT_X_MASK;
|
||||
if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
|
||||
return scriptX;
|
||||
} else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) {
|
||||
return UScript.COMMON;
|
||||
} else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) {
|
||||
return UScript.INHERITED;
|
||||
} else {
|
||||
return UCharacterProperty.INSTANCE.m_scriptExtensions_[scriptX&UCharacterProperty.SCRIPT_MASK_];
|
||||
}
|
||||
}else{
|
||||
throw new IllegalArgumentException(Integer.toString(codepoint));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is code point c used in script sc?
|
||||
* That is, does code point c have the Script property value sc,
|
||||
* or do code point c's Script_Extensions include script code sc?
|
||||
*
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
*
|
||||
* The Script_Extensions property is provisional. It may be modified or removed
|
||||
* in future versions of the Unicode Standard, and thus in ICU.
|
||||
* @param c code point
|
||||
* @param sc script code
|
||||
* @return true if Script(c)==sc or sc is in Script_Extensions(c)
|
||||
* @draft ICU 4.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final boolean hasScript(int c, int sc) {
|
||||
int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
|
||||
if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
|
||||
return sc==scriptX;
|
||||
}
|
||||
|
||||
char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
|
||||
int scx=scriptX&UCharacterProperty.SCRIPT_MASK_; // index into scriptExtensions
|
||||
int script;
|
||||
if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) {
|
||||
script=UScript.COMMON;
|
||||
} else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) {
|
||||
script=UScript.INHERITED;
|
||||
} else {
|
||||
script=scriptExtensions[scx];
|
||||
scx=scriptExtensions[scx+1];
|
||||
}
|
||||
if(sc==script) {
|
||||
return true;
|
||||
}
|
||||
while(sc>scriptExtensions[scx]) {
|
||||
++scx;
|
||||
}
|
||||
return sc==(scriptExtensions[scx]&0x7fff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets code point c's Script_Extensions as script code integers into the output BitSet.
|
||||
*
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
*
|
||||
* The Script_Extensions property is provisional. It may be modified or removed
|
||||
* in future versions of the Unicode Standard, and thus in ICU.
|
||||
* @param c code point
|
||||
* @param set set of script code integers; will be cleared, then bits are set
|
||||
* corresponding to c's Script_Extensions
|
||||
* @return set
|
||||
* @draft ICU 4.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final BitSet getScriptExtensions(int c, BitSet set) {
|
||||
set.clear();
|
||||
int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
|
||||
if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
|
||||
return set;
|
||||
}
|
||||
|
||||
char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
|
||||
int scx=scriptX&UCharacterProperty.SCRIPT_MASK_; // index into scriptExtensions
|
||||
if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
|
||||
scx=scriptExtensions[scx+1];
|
||||
}
|
||||
int sx;
|
||||
do {
|
||||
sx=scriptExtensions[scx++];
|
||||
set.set(sx&0x7fff);
|
||||
} while(sx<0x8000);
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a script name associated with the given script code.
|
||||
* Returns "Malayam" given MALAYAM
|
||||
|
|
|
@ -7,9 +7,6 @@
|
|||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.MissingResourceException;
|
||||
|
||||
import com.ibm.icu.impl.UBiDiProps;
|
||||
import com.ibm.icu.lang.UCharacterDirection;
|
||||
|
||||
|
@ -851,15 +848,7 @@ public final class ArabicShaping {
|
|||
int length,
|
||||
char digitBase,
|
||||
boolean lastStrongWasAL) {
|
||||
UBiDiProps bdp;
|
||||
try {
|
||||
bdp=UBiDiProps.getSingleton();
|
||||
} catch (IOException e) {
|
||||
///CLOVER:OFF
|
||||
// This is dependent on the UBiDiProps object
|
||||
throw new MissingResourceException(e.getMessage(), "(BidiProps)", "");
|
||||
///CLOVER:ON
|
||||
}
|
||||
UBiDiProps bdp=UBiDiProps.INSTANCE;
|
||||
digitBase -= '0'; // move common adjustment out of loop
|
||||
|
||||
for(int i = start + length; --i >= start;) {
|
||||
|
|
|
@ -27,11 +27,9 @@ package com.ibm.icu.text;
|
|||
|
||||
import java.awt.font.NumericShaper;
|
||||
import java.awt.font.TextAttribute;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Array;
|
||||
import java.text.AttributedCharacterIterator;
|
||||
import java.util.Arrays;
|
||||
import java.util.MissingResourceException;
|
||||
|
||||
import com.ibm.icu.impl.UBiDiProps;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
@ -1157,14 +1155,7 @@ public class Bidi {
|
|||
direction = 0;
|
||||
*/
|
||||
/* get Bidi properties */
|
||||
try {
|
||||
bdp = UBiDiProps.getSingleton();
|
||||
}
|
||||
catch (IOException e) {
|
||||
///CLOVER:OFF
|
||||
throw new MissingResourceException(e.getMessage(), "(BidiProps)", "");
|
||||
///CLOVER:ON
|
||||
}
|
||||
bdp = UBiDiProps.INSTANCE;
|
||||
|
||||
/* allocate memory for arrays as requested */
|
||||
if (maxLength > 0) {
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
import java.io.IOException;
|
||||
import java.nio.CharBuffer;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
|
@ -1373,12 +1372,7 @@ public final class Normalizer implements Cloneable {
|
|||
// case folding and NFKC.)
|
||||
// For the derivation, see Unicode's DerivedNormalizationProps.txt.
|
||||
Normalizer2 nfkc=NFKCModeImpl.INSTANCE.normalizer2;
|
||||
UCaseProps csp;
|
||||
try {
|
||||
csp=UCaseProps.getSingleton();
|
||||
} catch(IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
UCaseProps csp=UCaseProps.INSTANCE;
|
||||
// first: b = NFKC(Fold(a))
|
||||
StringBuffer folded=new StringBuffer();
|
||||
int folded1Length=csp.toFullFolding(c, folded, 0);
|
||||
|
@ -2054,11 +2048,7 @@ public final class Normalizer implements Cloneable {
|
|||
nfcImpl=null;
|
||||
}
|
||||
if((options&COMPARE_IGNORE_CASE)!=0) {
|
||||
try {
|
||||
csp=UCaseProps.getSingleton();
|
||||
} catch(IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
csp=UCaseProps.INSTANCE;
|
||||
fold1=new StringBuffer();
|
||||
fold2=new StringBuffer();
|
||||
} else {
|
||||
|
|
|
@ -308,7 +308,7 @@ public final class StringPrep {
|
|||
b.close();
|
||||
|
||||
if(checkBiDi) {
|
||||
bdp=UBiDiProps.getSingleton();
|
||||
bdp=UBiDiProps.INSTANCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,13 +6,11 @@
|
|||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.impl.BMPSet;
|
||||
|
@ -27,6 +25,7 @@ import com.ibm.icu.impl.UnicodeSetStringSpan;
|
|||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.util.Freezable;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
@ -3058,8 +3057,16 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
}
|
||||
|
||||
private static class ScriptExtensionsFilter implements Filter {
|
||||
int script;
|
||||
ScriptExtensionsFilter(int script) { this.script = script; }
|
||||
public boolean contains(int c) {
|
||||
return UScript.hasScript(c, script);
|
||||
}
|
||||
}
|
||||
|
||||
// VersionInfo for unassigned characters
|
||||
static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
|
||||
private static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
|
||||
|
||||
private static class VersionFilter implements Filter {
|
||||
VersionInfo version;
|
||||
|
@ -3079,45 +3086,41 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
if(INCLUSIONS[src] == null) {
|
||||
UnicodeSet incl = new UnicodeSet();
|
||||
try {
|
||||
switch(src) {
|
||||
case UCharacterProperty.SRC_CHAR:
|
||||
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_PROPSVEC:
|
||||
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_CHAR_AND_PROPSVEC:
|
||||
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
||||
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_CASE_AND_NORM:
|
||||
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
||||
UCaseProps.getSingleton().addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFC:
|
||||
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFKC:
|
||||
Norm2AllModes.getNFKCInstance().impl.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFKC_CF:
|
||||
Norm2AllModes.getNFKC_CFInstance().impl.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFC_CANON_ITER:
|
||||
Norm2AllModes.getNFCInstance().impl.addCanonIterPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_CASE:
|
||||
UCaseProps.getSingleton().addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_BIDI:
|
||||
UBiDiProps.getSingleton().addPropertyStarts(incl);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")");
|
||||
}
|
||||
} catch(IOException e) {
|
||||
throw new MissingResourceException(e.getMessage(),"","");
|
||||
switch(src) {
|
||||
case UCharacterProperty.SRC_CHAR:
|
||||
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_PROPSVEC:
|
||||
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_CHAR_AND_PROPSVEC:
|
||||
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
||||
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_CASE_AND_NORM:
|
||||
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
||||
UCaseProps.INSTANCE.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFC:
|
||||
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFKC:
|
||||
Norm2AllModes.getNFKCInstance().impl.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFKC_CF:
|
||||
Norm2AllModes.getNFKC_CFInstance().impl.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_NFC_CANON_ITER:
|
||||
Norm2AllModes.getNFCInstance().impl.addCanonIterPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_CASE:
|
||||
UCaseProps.INSTANCE.addPropertyStarts(incl);
|
||||
break;
|
||||
case UCharacterProperty.SRC_BIDI:
|
||||
UBiDiProps.INSTANCE.addPropertyStarts(incl);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")");
|
||||
}
|
||||
INCLUSIONS[src] = incl;
|
||||
}
|
||||
|
@ -3128,19 +3131,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* Generic filter-based scanning code for UCD property UnicodeSets.
|
||||
*/
|
||||
private UnicodeSet applyFilter(Filter filter, int src) {
|
||||
// Walk through all Unicode characters, noting the start
|
||||
// Logically, walk through all Unicode characters, noting the start
|
||||
// and end of each range for which filter.contain(c) is
|
||||
// true. Add each range to a set.
|
||||
//
|
||||
// To improve performance, use the INCLUSIONS set, which
|
||||
// To improve performance, use an inclusions set which
|
||||
// encodes information about character ranges that are known
|
||||
// to have identical properties, such as the CJK Ideographs
|
||||
// from U+4E00 to U+9FA5. INCLUSIONS contains all characters
|
||||
// except the first characters of such ranges.
|
||||
//
|
||||
// TODO Where possible, instead of scanning over code points,
|
||||
// use internal property data to initialize UnicodeSets for
|
||||
// those properties. Scanning code points is slow.
|
||||
// to have identical properties.
|
||||
// getInclusions(src) contains exactly the first characters of
|
||||
// same-value ranges for the given properties "source".
|
||||
|
||||
clear();
|
||||
|
||||
|
@ -3233,6 +3232,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
checkFrozen();
|
||||
if (prop == UProperty.GENERAL_CATEGORY_MASK) {
|
||||
applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR);
|
||||
} else if (prop == UProperty.SCRIPT_EXTENSIONS) {
|
||||
applyFilter(new ScriptExtensionsFilter(value), UCharacterProperty.SRC_PROPSVEC);
|
||||
} else {
|
||||
applyFilter(new IntPropertyFilter(prop, value), UCharacterProperty.INSTANCE.getSource(prop));
|
||||
}
|
||||
|
@ -3327,7 +3328,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
|
||||
else {
|
||||
|
||||
switch (p) {
|
||||
case UProperty.NUMERIC_VALUE:
|
||||
{
|
||||
|
@ -3344,14 +3344,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
String buf = mungeCharName(valueAlias);
|
||||
int ch =
|
||||
(p == UProperty.NAME) ?
|
||||
UCharacter.getCharFromExtendedName(buf) :
|
||||
UCharacter.getCharFromName1_0(buf);
|
||||
if (ch == -1) {
|
||||
throw new IllegalArgumentException("Invalid character name");
|
||||
}
|
||||
clear();
|
||||
add_unchecked(ch);
|
||||
return this;
|
||||
UCharacter.getCharFromExtendedName(buf) :
|
||||
UCharacter.getCharFromName1_0(buf);
|
||||
if (ch == -1) {
|
||||
throw new IllegalArgumentException("Invalid character name");
|
||||
}
|
||||
clear();
|
||||
add_unchecked(ch);
|
||||
return this;
|
||||
}
|
||||
case UProperty.AGE:
|
||||
{
|
||||
|
@ -3362,11 +3362,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC);
|
||||
return this;
|
||||
}
|
||||
case UProperty.SCRIPT_EXTENSIONS:
|
||||
v = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, valueAlias);
|
||||
// fall through to calling applyIntPropertyValue()
|
||||
break;
|
||||
default:
|
||||
// p is a non-binary, non-enumerated property that we
|
||||
// don't support (yet).
|
||||
throw new IllegalArgumentException("Unsupported property");
|
||||
}
|
||||
|
||||
// p is a non-binary, non-enumerated property that we
|
||||
// don't support (yet).
|
||||
throw new IllegalArgumentException("Unsupported property");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3690,12 +3694,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public UnicodeSet closeOver(int attribute) {
|
||||
checkFrozen();
|
||||
if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
|
||||
UCaseProps csp;
|
||||
try {
|
||||
csp = UCaseProps.getSingleton();
|
||||
} catch(IOException e) {
|
||||
return this;
|
||||
}
|
||||
UCaseProps csp = UCaseProps.INSTANCE;
|
||||
UnicodeSet foldSet = new UnicodeSet(this);
|
||||
ULocale root = ULocale.ROOT;
|
||||
|
||||
|
|
|
@ -125,6 +125,12 @@ public final class VersionInfo implements Comparable<VersionInfo>
|
|||
*/
|
||||
public static final VersionInfo UNICODE_5_2;
|
||||
|
||||
/**
|
||||
* Unicode 6.0 version
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
public static final VersionInfo UNICODE_6_0;
|
||||
|
||||
/**
|
||||
* ICU4J current release version
|
||||
* @stable ICU 2.8
|
||||
|
@ -474,10 +480,11 @@ public final class VersionInfo implements Comparable<VersionInfo>
|
|||
UNICODE_5_0 = getInstance(5, 0, 0, 0);
|
||||
UNICODE_5_1 = getInstance(5, 1, 0, 0);
|
||||
UNICODE_5_2 = getInstance(5, 2, 0, 0);
|
||||
UNICODE_6_0 = getInstance(6, 0, 0, 0);
|
||||
|
||||
ICU_VERSION = getInstance(4, 5, 2, 0);
|
||||
ICU_DATA_VERSION = getInstance(4, 5, 0, 0);
|
||||
UNICODE_VERSION = UNICODE_5_2;
|
||||
UNICODE_VERSION = UNICODE_6_0;
|
||||
|
||||
UCOL_RUNTIME_VERSION = getInstance(6);
|
||||
UCOL_BUILDER_VERSION = getInstance(7);
|
||||
|
|
|
@ -1,13 +1,11 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2009, Google, International Business Machines Corporation *
|
||||
* and others. All Rights Reserved. *
|
||||
* Copyright (C) 2009-2010, Google, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
|
||||
/**
|
||||
|
@ -46,11 +44,7 @@ class CaseFoldTransliterator extends Transliterator{
|
|||
|
||||
public CaseFoldTransliterator() {
|
||||
super(_ID, null);
|
||||
try {
|
||||
csp=UCaseProps.getSingleton();
|
||||
} catch (IOException e) {
|
||||
csp=null;
|
||||
}
|
||||
csp=UCaseProps.INSTANCE;
|
||||
iter=new ReplaceableContextIterator();
|
||||
result = new StringBuffer();
|
||||
}
|
||||
|
|
|
@ -6,8 +6,6 @@
|
|||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -51,11 +49,7 @@ class LowercaseTransliterator extends Transliterator{
|
|||
public LowercaseTransliterator(ULocale loc) {
|
||||
super(_ID, null);
|
||||
locale = loc;
|
||||
try {
|
||||
csp=UCaseProps.getSingleton();
|
||||
} catch (IOException e) {
|
||||
csp=null;
|
||||
}
|
||||
csp=UCaseProps.INSTANCE;
|
||||
iter=new ReplaceableContextIterator();
|
||||
result = new StringBuffer();
|
||||
locCache = new int[1];
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -49,11 +47,7 @@ class TitlecaseTransliterator extends Transliterator {
|
|||
locale = loc;
|
||||
// Need to look back 2 characters in the case of "can't"
|
||||
setMaximumContextLength(2);
|
||||
try {
|
||||
csp=UCaseProps.getSingleton();
|
||||
} catch (IOException e) {
|
||||
csp=null;
|
||||
}
|
||||
csp=UCaseProps.INSTANCE;
|
||||
iter=new ReplaceableContextIterator();
|
||||
result = new StringBuffer();
|
||||
locCache = new int[1];
|
||||
|
|
|
@ -6,8 +6,6 @@
|
|||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -47,11 +45,7 @@ class UppercaseTransliterator extends Transliterator {
|
|||
public UppercaseTransliterator(ULocale loc) {
|
||||
super(_ID, null);
|
||||
locale = loc;
|
||||
try {
|
||||
csp=UCaseProps.getSingleton();
|
||||
} catch (IOException e) {
|
||||
csp=null;
|
||||
}
|
||||
csp=UCaseProps.INSTANCE;
|
||||
iter=new ReplaceableContextIterator();
|
||||
result = new StringBuffer();
|
||||
locCache = new int[1];
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6c7451af00c4f9bcf0a42b126ff9170c525eb83849f01ff34245daf947875765
|
||||
size 7080727
|
||||
oid sha256:031fed38d5a135f0db95c36923acce57c5051906bbdf8a77ace1ee26bc8c84fc
|
||||
size 7484296
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:90492aac38e08b91140a948112b587ddc520ad3a9680bb7f8b0fdd28ca079ccd
|
||||
size 717903
|
||||
oid sha256:fd3e33be61fcefc049d4da9d4aec0adbe25c1444736fe96d4dfa494feac94351
|
||||
size 717911
|
||||
|
|
|
@ -21,6 +21,8 @@ import java.util.MissingResourceException;
|
|||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.CollationElementIterator;
|
||||
import com.ibm.icu.text.CollationKey;
|
||||
import com.ibm.icu.text.Collator;
|
||||
|
@ -424,15 +426,18 @@ public class CollationAPITest extends TestFmwk {
|
|||
logln("Test ctors : ");
|
||||
Collator col = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
|
||||
logln("Test getVersion");
|
||||
VersionInfo expectedVersion = VersionInfo.getInstance(0x31, 0xC0, 0x00, 0x05);
|
||||
doAssert(col.getVersion().equals(expectedVersion), "Expected version "+expectedVersion.toString()+" got "+col.getVersion().toString());
|
||||
|
||||
// Check for a version greater than some value rather than equality
|
||||
// so that we need not update the expected version each time.
|
||||
VersionInfo expectedVersion = VersionInfo.getInstance(0x31, 0xC0, 0x00, 0x05); // from ICU 4.4/UCA 5.2
|
||||
doAssert(col.getVersion().compareTo(expectedVersion) >= 0, "Expected minimum version "+expectedVersion.toString()+" got "+col.getVersion().toString());
|
||||
|
||||
logln("Test getUCAVersion");
|
||||
VersionInfo expectedUCAVersion = VersionInfo.getInstance(5, 2, 0, 0);
|
||||
doAssert(col.getUCAVersion().equals(expectedUCAVersion), "Expected UCA version "+expectedUCAVersion.toString()+" got "+col.getUCAVersion().toString());
|
||||
|
||||
// Assume that the UCD and UCA versions are the same,
|
||||
// rather than hardcoding (and updating each time) a particular UCA version.
|
||||
VersionInfo ucdVersion = UCharacter.getUnicodeVersion();
|
||||
doAssert(col.getUCAVersion().equals(ucdVersion), "Expected UCA version "+ucdVersion.toString()+" got "+col.getUCAVersion().toString());
|
||||
|
||||
doAssert((col.compare("ab", "abc") < 0), "ab < abc comparison failed");
|
||||
doAssert((col.compare("ab", "AB") < 0), "ab < AB comparison failed");
|
||||
doAssert((col.compare("blackbird", "black-bird") > 0), "black-bird > blackbird comparison failed");
|
||||
|
@ -998,21 +1003,26 @@ public class CollationAPITest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
private void
|
||||
private boolean
|
||||
doSetsTest(UnicodeSet ref, UnicodeSet set, String inSet, String outSet) {
|
||||
|
||||
boolean ok = true;
|
||||
set.clear();
|
||||
set.applyPattern(inSet);
|
||||
|
||||
if(!ref.containsAll(set)) {
|
||||
err("Some stuff from "+inSet+" is not present in the set\n");
|
||||
err("Some stuff from "+inSet+" is not present in the set.\nMissing:"+
|
||||
set.removeAll(ref).toPattern(true)+"\n");
|
||||
ok = false;
|
||||
}
|
||||
|
||||
|
||||
set.clear();
|
||||
set.applyPattern(outSet);
|
||||
if(!ref.containsNone(set)) {
|
||||
err("Some stuff from "+outSet+" is present in the set\n");
|
||||
err("Some stuff from "+outSet+" is present in the set.\nUnexpected:"+
|
||||
set.retainAll(ref).toPattern(true)+"\n");
|
||||
ok = false;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
public void TestGetContractions()throws Exception {
|
||||
|
@ -1074,11 +1084,19 @@ public class CollationAPITest extends TestFmwk {
|
|||
logln("Testing locale: "+ tests[i][0]);
|
||||
coll = (RuleBasedCollator)Collator.getInstance(new ULocale(tests[i][0]));
|
||||
coll.getContractionsAndExpansions(conts, exp, true);
|
||||
boolean ok = true;
|
||||
logln("Contractions "+conts.size()+":\n"+conts.toPattern(true));
|
||||
doSetsTest(conts, set, tests[i][1], tests[i][2]);
|
||||
ok &= doSetsTest(conts, set, tests[i][1], tests[i][2]);
|
||||
logln("Expansions "+exp.size()+":\n"+exp.toPattern(true));
|
||||
doSetsTest(exp, set, tests[i][3], tests[i][4]);
|
||||
|
||||
ok &= doSetsTest(exp, set, tests[i][3], tests[i][4]);
|
||||
if(!ok) {
|
||||
// In case of failure, log the rule string for better diagnostics.
|
||||
String rules = coll.getRules(false);
|
||||
logln("Collation rules (getLocale()="+
|
||||
coll.getLocale(ULocale.ACTUAL_LOCALE).toString()+"): "+
|
||||
Utility.escape(rules));
|
||||
}
|
||||
|
||||
// No unsafe set in ICU4J
|
||||
//noConts = ucol_getUnsafeSet(coll, conts, &status);
|
||||
//doSetsTest(conts, set, tests[i][5], tests[i][6]);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# CompositionExclusions-5.2.0.txt
|
||||
# Date: 2009-05-22, 12:52:00 PDT [KW]
|
||||
# CompositionExclusions-6.0.0.txt
|
||||
# Date: 2010-06-25, 14:34:00 PDT [KW]
|
||||
#
|
||||
# This file lists the characters for the Composition Exclusion Table
|
||||
# defined in UAX #15, Unicode Normalization Forms.
|
||||
|
@ -7,11 +7,11 @@
|
|||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# For more information, see
|
||||
# http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
|
||||
# http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table
|
||||
#
|
||||
# For a full derivation of composition exclusions, see the derived property
|
||||
# Full_Composition_Exclusion in DerivedNormalizationProps.txt
|
||||
|
@ -126,8 +126,8 @@ FB4E # HEBREW LETTER PE WITH RAFE
|
|||
# (3) Singleton Decompositions
|
||||
#
|
||||
# These characters can be derived from the UnicodeData.txt file
|
||||
# by including all characters whose canonical decomposition
|
||||
# consists of a single character.
|
||||
# by including all canonically decomposable characters whose
|
||||
# canonical decomposition consists of a single character.
|
||||
#
|
||||
# These characters are simply quoted here for reference.
|
||||
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
|
||||
|
@ -180,9 +180,18 @@ FB4E # HEBREW LETTER PE WITH RAFE
|
|||
# (4) Non-Starter Decompositions
|
||||
#
|
||||
# These characters can be derived from the UnicodeData file
|
||||
# by including all characters whose canonical decomposition consists
|
||||
# of a sequence of characters, the first of which has a non-zero
|
||||
# combining class.
|
||||
# by including each expanding canonical decomposition
|
||||
# (i.e., those which canonically decompose to a sequence
|
||||
# of characters instead of a single character), such that:
|
||||
#
|
||||
# A. The character is not a Starter.
|
||||
#
|
||||
# OR (inclusive)
|
||||
#
|
||||
# B. The character's canonical decomposition begins
|
||||
# with a character that is not a Starter.
|
||||
#
|
||||
# Note that a "Starter" is any character with a zero combining class.
|
||||
#
|
||||
# These characters are simply quoted here for reference.
|
||||
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# NormalizationCorrections-5.2.0.txt
|
||||
# Date: 2009-05-22, 13:54:00 PDT [KW]
|
||||
# NormalizationCorrections-6.0.0.txt
|
||||
# Date: 2010-05-19, 11:21:00 PDT [KW]
|
||||
#
|
||||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# The normalization stabilization policy of the Unicode
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# NormalizationTest-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:39 GMT [MD]
|
||||
# NormalizationTest-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:49:30 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -1196,6 +1196,14 @@
|
|||
2092;2092;2092;006F;006F;
|
||||
2093;2093;2093;0078;0078;
|
||||
2094;2094;2094;0259;0259;
|
||||
2095;2095;2095;0068;0068;
|
||||
2096;2096;2096;006B;006B;
|
||||
2097;2097;2097;006C;006C;
|
||||
2098;2098;2098;006D;006D;
|
||||
2099;2099;2099;006E;006E;
|
||||
209A;209A;209A;0070;0070;
|
||||
209B;209B;209B;0073;0073;
|
||||
209C;209C;209C;0074;0074;
|
||||
20A8;20A8;20A8;0052 0073;0052 0073;
|
||||
2100;2100;2100;0061 002F 0063;0061 002F 0063;
|
||||
2101;2101;2101;0061 002F 0073;0061 002F 0073;
|
||||
|
@ -16155,18 +16163,42 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
1F12C;1F12C;1F12C;0052;0052;
|
||||
1F12D;1F12D;1F12D;0043 0044;0043 0044;
|
||||
1F12E;1F12E;1F12E;0057 005A;0057 005A;
|
||||
1F130;1F130;1F130;0041;0041;
|
||||
1F131;1F131;1F131;0042;0042;
|
||||
1F132;1F132;1F132;0043;0043;
|
||||
1F133;1F133;1F133;0044;0044;
|
||||
1F134;1F134;1F134;0045;0045;
|
||||
1F135;1F135;1F135;0046;0046;
|
||||
1F136;1F136;1F136;0047;0047;
|
||||
1F137;1F137;1F137;0048;0048;
|
||||
1F138;1F138;1F138;0049;0049;
|
||||
1F139;1F139;1F139;004A;004A;
|
||||
1F13A;1F13A;1F13A;004B;004B;
|
||||
1F13B;1F13B;1F13B;004C;004C;
|
||||
1F13C;1F13C;1F13C;004D;004D;
|
||||
1F13D;1F13D;1F13D;004E;004E;
|
||||
1F13E;1F13E;1F13E;004F;004F;
|
||||
1F13F;1F13F;1F13F;0050;0050;
|
||||
1F140;1F140;1F140;0051;0051;
|
||||
1F141;1F141;1F141;0052;0052;
|
||||
1F142;1F142;1F142;0053;0053;
|
||||
1F143;1F143;1F143;0054;0054;
|
||||
1F144;1F144;1F144;0055;0055;
|
||||
1F145;1F145;1F145;0056;0056;
|
||||
1F146;1F146;1F146;0057;0057;
|
||||
1F147;1F147;1F147;0058;0058;
|
||||
1F148;1F148;1F148;0059;0059;
|
||||
1F149;1F149;1F149;005A;005A;
|
||||
1F14A;1F14A;1F14A;0048 0056;0048 0056;
|
||||
1F14B;1F14B;1F14B;004D 0056;004D 0056;
|
||||
1F14C;1F14C;1F14C;0053 0044;0053 0044;
|
||||
1F14D;1F14D;1F14D;0053 0053;0053 0053;
|
||||
1F14E;1F14E;1F14E;0050 0050 0056;0050 0050 0056;
|
||||
1F14F;1F14F;1F14F;0057 0043;0057 0043;
|
||||
1F190;1F190;1F190;0044 004A;0044 004A;
|
||||
1F200;1F200;1F200;307B 304B;307B 304B;
|
||||
1F201;1F201;1F201;30B3 30B3;30B3 30B3;
|
||||
1F202;1F202;1F202;30B5;30B5;
|
||||
1F210;1F210;1F210;624B;624B;
|
||||
1F211;1F211;1F211;5B57;5B57;
|
||||
1F212;1F212;1F212;53CC;53CC;
|
||||
|
@ -16201,6 +16233,15 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
1F22F;1F22F;1F22F;6307;6307;
|
||||
1F230;1F230;1F230;8D70;8D70;
|
||||
1F231;1F231;1F231;6253;6253;
|
||||
1F232;1F232;1F232;7981;7981;
|
||||
1F233;1F233;1F233;7A7A;7A7A;
|
||||
1F234;1F234;1F234;5408;5408;
|
||||
1F235;1F235;1F235;6E80;6E80;
|
||||
1F236;1F236;1F236;6709;6709;
|
||||
1F237;1F237;1F237;6708;6708;
|
||||
1F238;1F238;1F238;7533;7533;
|
||||
1F239;1F239;1F239;5272;5272;
|
||||
1F23A;1F23A;1F23A;55B6;55B6;
|
||||
1F240;1F240;1F240;3014 672C 3015;3014 672C 3015;
|
||||
1F241;1F241;1F241;3014 4E09 3015;3014 4E09 3015;
|
||||
1F242;1F242;1F242;3014 4E8C 3015;3014 4E8C 3015;
|
||||
|
@ -16210,6 +16251,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
1F246;1F246;1F246;3014 76D7 3015;3014 76D7 3015;
|
||||
1F247;1F247;1F247;3014 52DD 3015;3014 52DD 3015;
|
||||
1F248;1F248;1F248;3014 6557 3015;3014 6557 3015;
|
||||
1F250;1F250;1F250;5F97;5F97;
|
||||
1F251;1F251;1F251;53EF;53EF;
|
||||
2F800;4E3D;4E3D;4E3D;4E3D;
|
||||
2F801;4E38;4E38;4E38;4E38;
|
||||
2F802;4E41;4E41;4E41;4E41;
|
||||
|
@ -17151,6 +17194,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 065D 0315 0300 05AE 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;
|
||||
0061 0315 0300 05AE 065E 0062;00E0 05AE 065E 0315 0062;0061 05AE 0300 065E 0315 0062;00E0 05AE 065E 0315 0062;0061 05AE 0300 065E 0315 0062;
|
||||
0061 065E 0315 0300 05AE 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;
|
||||
0061 059A 0316 302A 065F 0062;0061 302A 0316 065F 059A 0062;0061 302A 0316 065F 059A 0062;0061 302A 0316 065F 059A 0062;0061 302A 0316 065F 059A 0062;
|
||||
0061 065F 059A 0316 302A 0062;0061 302A 065F 0316 059A 0062;0061 302A 065F 0316 059A 0062;0061 302A 065F 0316 059A 0062;0061 302A 065F 0316 059A 0062;
|
||||
0061 0711 0670 0652 0670 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;
|
||||
0061 0670 0711 0670 0652 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;
|
||||
0061 0315 0300 05AE 06D6 0062;00E0 05AE 06D6 0315 0062;0061 05AE 0300 06D6 0315 0062;00E0 05AE 06D6 0315 0062;0061 05AE 0300 06D6 0315 0062;
|
||||
|
@ -17307,6 +17352,12 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 082C 0315 0300 05AE 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;
|
||||
0061 0315 0300 05AE 082D 0062;00E0 05AE 082D 0315 0062;0061 05AE 0300 082D 0315 0062;00E0 05AE 082D 0315 0062;0061 05AE 0300 082D 0315 0062;
|
||||
0061 082D 0315 0300 05AE 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;
|
||||
0061 059A 0316 302A 0859 0062;0061 302A 0316 0859 059A 0062;0061 302A 0316 0859 059A 0062;0061 302A 0316 0859 059A 0062;0061 302A 0316 0859 059A 0062;
|
||||
0061 0859 059A 0316 302A 0062;0061 302A 0859 0316 059A 0062;0061 302A 0859 0316 059A 0062;0061 302A 0859 0316 059A 0062;0061 302A 0859 0316 059A 0062;
|
||||
0061 059A 0316 302A 085A 0062;0061 302A 0316 085A 059A 0062;0061 302A 0316 085A 059A 0062;0061 302A 0316 085A 059A 0062;0061 302A 0316 085A 059A 0062;
|
||||
0061 085A 059A 0316 302A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;
|
||||
0061 059A 0316 302A 085B 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;
|
||||
0061 085B 059A 0316 302A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;
|
||||
0061 3099 093C 0334 093C 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;
|
||||
0061 093C 3099 093C 0334 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;
|
||||
0061 05B0 094D 3099 094D 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;
|
||||
|
@ -17423,6 +17474,10 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 103A 05B0 094D 3099 0062;0061 3099 103A 094D 05B0 0062;0061 3099 103A 094D 05B0 0062;0061 3099 103A 094D 05B0 0062;0061 3099 103A 094D 05B0 0062;
|
||||
0061 059A 0316 302A 108D 0062;0061 302A 0316 108D 059A 0062;0061 302A 0316 108D 059A 0062;0061 302A 0316 108D 059A 0062;0061 302A 0316 108D 059A 0062;
|
||||
0061 108D 059A 0316 302A 0062;0061 302A 108D 0316 059A 0062;0061 302A 108D 0316 059A 0062;0061 302A 108D 0316 059A 0062;0061 302A 108D 0316 059A 0062;
|
||||
0061 0315 0300 05AE 135D 0062;00E0 05AE 135D 0315 0062;0061 05AE 0300 135D 0315 0062;00E0 05AE 135D 0315 0062;0061 05AE 0300 135D 0315 0062;
|
||||
0061 135D 0315 0300 05AE 0062;0061 05AE 135D 0300 0315 0062;0061 05AE 135D 0300 0315 0062;0061 05AE 135D 0300 0315 0062;0061 05AE 135D 0300 0315 0062;
|
||||
0061 0315 0300 05AE 135E 0062;00E0 05AE 135E 0315 0062;0061 05AE 0300 135E 0315 0062;00E0 05AE 135E 0315 0062;0061 05AE 0300 135E 0315 0062;
|
||||
0061 135E 0315 0300 05AE 0062;0061 05AE 135E 0300 0315 0062;0061 05AE 135E 0300 0315 0062;0061 05AE 135E 0300 0315 0062;0061 05AE 135E 0300 0315 0062;
|
||||
0061 0315 0300 05AE 135F 0062;00E0 05AE 135F 0315 0062;0061 05AE 0300 135F 0315 0062;00E0 05AE 135F 0315 0062;0061 05AE 0300 135F 0315 0062;
|
||||
0061 135F 0315 0300 05AE 0062;0061 05AE 135F 0300 0315 0062;0061 05AE 135F 0300 0315 0062;0061 05AE 135F 0300 0315 0062;0061 05AE 135F 0300 0315 0062;
|
||||
0061 05B0 094D 3099 1714 0062;0061 3099 094D 1714 05B0 0062;0061 3099 094D 1714 05B0 0062;0061 3099 094D 1714 05B0 0062;0061 3099 094D 1714 05B0 0062;
|
||||
|
@ -17489,6 +17544,12 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 1B73 0315 0300 05AE 0062;0061 05AE 1B73 0300 0315 0062;0061 05AE 1B73 0300 0315 0062;0061 05AE 1B73 0300 0315 0062;0061 05AE 1B73 0300 0315 0062;
|
||||
0061 05B0 094D 3099 1BAA 0062;0061 3099 094D 1BAA 05B0 0062;0061 3099 094D 1BAA 05B0 0062;0061 3099 094D 1BAA 05B0 0062;0061 3099 094D 1BAA 05B0 0062;
|
||||
0061 1BAA 05B0 094D 3099 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;
|
||||
0061 3099 093C 0334 1BE6 0062;0061 0334 093C 1BE6 3099 0062;0061 0334 093C 1BE6 3099 0062;0061 0334 093C 1BE6 3099 0062;0061 0334 093C 1BE6 3099 0062;
|
||||
0061 1BE6 3099 093C 0334 0062;0061 0334 1BE6 093C 3099 0062;0061 0334 1BE6 093C 3099 0062;0061 0334 1BE6 093C 3099 0062;0061 0334 1BE6 093C 3099 0062;
|
||||
0061 05B0 094D 3099 1BF2 0062;0061 3099 094D 1BF2 05B0 0062;0061 3099 094D 1BF2 05B0 0062;0061 3099 094D 1BF2 05B0 0062;0061 3099 094D 1BF2 05B0 0062;
|
||||
0061 1BF2 05B0 094D 3099 0062;0061 3099 1BF2 094D 05B0 0062;0061 3099 1BF2 094D 05B0 0062;0061 3099 1BF2 094D 05B0 0062;0061 3099 1BF2 094D 05B0 0062;
|
||||
0061 05B0 094D 3099 1BF3 0062;0061 3099 094D 1BF3 05B0 0062;0061 3099 094D 1BF3 05B0 0062;0061 3099 094D 1BF3 05B0 0062;0061 3099 094D 1BF3 05B0 0062;
|
||||
0061 1BF3 05B0 094D 3099 0062;0061 3099 1BF3 094D 05B0 0062;0061 3099 1BF3 094D 05B0 0062;0061 3099 1BF3 094D 05B0 0062;0061 3099 1BF3 094D 05B0 0062;
|
||||
0061 3099 093C 0334 1C37 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;
|
||||
0061 1C37 3099 093C 0334 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;
|
||||
0061 0315 0300 05AE 1CD0 0062;00E0 05AE 1CD0 0315 0062;0061 05AE 0300 1CD0 0315 0062;00E0 05AE 1CD0 0315 0062;0061 05AE 0300 1CD0 0315 0062;
|
||||
|
@ -17617,6 +17678,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 1DE5 0315 0300 05AE 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;
|
||||
0061 0315 0300 05AE 1DE6 0062;00E0 05AE 1DE6 0315 0062;0061 05AE 0300 1DE6 0315 0062;00E0 05AE 1DE6 0315 0062;0061 05AE 0300 1DE6 0315 0062;
|
||||
0061 1DE6 0315 0300 05AE 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;
|
||||
0061 035D 035C 0315 1DFC 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;
|
||||
0061 1DFC 035D 035C 0315 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;
|
||||
0061 059A 0316 302A 1DFD 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;
|
||||
0061 1DFD 059A 0316 302A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;
|
||||
0061 0315 0300 05AE 1DFE 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;
|
||||
|
@ -17681,6 +17744,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 2CF0 0315 0300 05AE 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;
|
||||
0061 0315 0300 05AE 2CF1 0062;00E0 05AE 2CF1 0315 0062;0061 05AE 0300 2CF1 0315 0062;00E0 05AE 2CF1 0315 0062;0061 05AE 0300 2CF1 0315 0062;
|
||||
0061 2CF1 0315 0300 05AE 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;
|
||||
0061 05B0 094D 3099 2D7F 0062;0061 3099 094D 2D7F 05B0 0062;0061 3099 094D 2D7F 05B0 0062;0061 3099 094D 2D7F 05B0 0062;0061 3099 094D 2D7F 05B0 0062;
|
||||
0061 2D7F 05B0 094D 3099 0062;0061 3099 2D7F 094D 05B0 0062;0061 3099 2D7F 094D 05B0 0062;0061 3099 2D7F 094D 05B0 0062;0061 3099 2D7F 094D 05B0 0062;
|
||||
0061 0315 0300 05AE 2DE0 0062;00E0 05AE 2DE0 0315 0062;0061 05AE 0300 2DE0 0315 0062;00E0 05AE 2DE0 0315 0062;0061 05AE 0300 2DE0 0315 0062;
|
||||
0061 2DE0 0315 0300 05AE 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;
|
||||
0061 0315 0300 05AE 2DE1 0062;00E0 05AE 2DE1 0315 0062;0061 05AE 0300 2DE1 0315 0062;00E0 05AE 2DE1 0315 0062;0061 05AE 0300 2DE1 0315 0062;
|
||||
|
@ -17873,6 +17938,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 10A3A 059A 0316 302A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;
|
||||
0061 05B0 094D 3099 10A3F 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;
|
||||
0061 10A3F 05B0 094D 3099 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;
|
||||
0061 05B0 094D 3099 11046 0062;0061 3099 094D 11046 05B0 0062;0061 3099 094D 11046 05B0 0062;0061 3099 094D 11046 05B0 0062;0061 3099 094D 11046 05B0 0062;
|
||||
0061 11046 05B0 094D 3099 0062;0061 3099 11046 094D 05B0 0062;0061 3099 11046 094D 05B0 0062;0061 3099 11046 094D 05B0 0062;0061 3099 11046 094D 05B0 0062;
|
||||
0061 05B0 094D 3099 110B9 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;
|
||||
0061 110B9 05B0 094D 3099 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;
|
||||
0061 3099 093C 0334 110BA 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SpecialCasing-5.2.0.txt
|
||||
# Date: 2009-09-22, 23:25:59 GMT [MD]
|
||||
# SpecialCasing-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:49:39 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -7,6 +7,7 @@
|
|||
|
||||
package com.ibm.icu.dev.test.lang;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Locale;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
|
@ -318,6 +319,81 @@ public class TestUScript extends TestFmwk {
|
|||
errln("UScript.getScript failed.");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestGetScriptOfCharsWithScriptExtensions() {
|
||||
/* test characters which have Script_Extensions */
|
||||
if(!(
|
||||
UScript.COMMON==UScript.getScript(0x0640) &&
|
||||
UScript.INHERITED==UScript.getScript(0x0650) &&
|
||||
UScript.ARABIC==UScript.getScript(0xfdf2))
|
||||
) {
|
||||
errln("UScript.getScript(character with Script_Extensions) failed");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestHasScript() {
|
||||
if(!(
|
||||
!UScript.hasScript(0x063f, UScript.COMMON) &&
|
||||
UScript.hasScript(0x063f, UScript.ARABIC) && /* main Script value */
|
||||
!UScript.hasScript(0x063f, UScript.SYRIAC) &&
|
||||
!UScript.hasScript(0x063f, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+063F, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */
|
||||
UScript.hasScript(0x0640, UScript.ARABIC) &&
|
||||
UScript.hasScript(0x0640, UScript.SYRIAC) &&
|
||||
!UScript.hasScript(0x0640, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+0640, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */
|
||||
UScript.hasScript(0x0650, UScript.ARABIC) &&
|
||||
UScript.hasScript(0x0650, UScript.SYRIAC) &&
|
||||
!UScript.hasScript(0x0650, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+0650, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */
|
||||
UScript.hasScript(0x0660, UScript.ARABIC) &&
|
||||
!UScript.hasScript(0x0660, UScript.SYRIAC) &&
|
||||
UScript.hasScript(0x0660, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+0660, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
!UScript.hasScript(0xfdf2, UScript.COMMON) &&
|
||||
UScript.hasScript(0xfdf2, UScript.ARABIC) && /* main Script value */
|
||||
!UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
|
||||
UScript.hasScript(0xfdf2, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+FDF2, ...) is wrong\n");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestGetScriptExtensions() {
|
||||
BitSet scripts=new BitSet(UScript.CODE_LIMIT);
|
||||
|
||||
/* normal usage */
|
||||
if(!UScript.getScriptExtensions(0x063f, scripts).isEmpty()) {
|
||||
errln("UScript.getScriptExtensions(U+063F) is not empty");
|
||||
}
|
||||
if(UScript.getScriptExtensions(0x0640, scripts).cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC)) {
|
||||
errln("UScript.getScriptExtensions(U+0640) failed");
|
||||
}
|
||||
UScript.getScriptExtensions(0xfdf2, scripts);
|
||||
if(scripts.cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
|
||||
errln("UScript.getScriptExtensions(U+FDF2) failed");
|
||||
}
|
||||
UScript.getScriptExtensions(0xff65, scripts);
|
||||
if(scripts.cardinality()!=6 || !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
|
||||
errln("UScript.getScriptExtensions(U+FF65) failed");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestScriptNames(){
|
||||
for(int i=0; i<UScript.CODE_LIMIT;i++){
|
||||
String name = UScript.getName(i);
|
||||
|
@ -360,9 +436,9 @@ public class TestUScript extends TestFmwk {
|
|||
* Whenever this happens, the long script names here need to be updated.
|
||||
*/
|
||||
String[] expectedLong = new String[]{
|
||||
"Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
|
||||
"Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
|
||||
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
|
||||
"Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
|
||||
"Lepcha", "Lina", "Mandaic", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
|
||||
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
|
||||
"Zxxx", "Unknown",
|
||||
"Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
|
||||
|
@ -374,6 +450,9 @@ public class TestUScript extends TestFmwk {
|
|||
"Zmth", "Zsym",
|
||||
/* new in ICU 4.4 */
|
||||
"Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
|
||||
/* new in ICU 4.6 */
|
||||
"Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
|
||||
"Narb", "Nbat", "Palm", "Sind", "Wara",
|
||||
};
|
||||
String[] expectedShort = new String[]{
|
||||
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
|
||||
|
@ -389,6 +468,9 @@ public class TestUScript extends TestFmwk {
|
|||
"Samr", "Tavt", "Zmth", "Zsym",
|
||||
/* new in ICU 4.4 */
|
||||
"Bamu", "Lisu", "Nkgb", "Sarb",
|
||||
/* new in ICU 4.6 */
|
||||
"Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
|
||||
"Narb", "Nbat", "Palm", "Sind", "Wara",
|
||||
};
|
||||
if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
|
||||
errln("need to add new script codes in lang.TestUScript.java!");
|
||||
|
|
|
@ -15,8 +15,6 @@ import com.ibm.icu.dev.test.TestFmwk;
|
|||
import com.ibm.icu.dev.test.TestUtil;
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.impl.Normalizer2Impl;
|
||||
import com.ibm.icu.impl.UBiDiProps;
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
import com.ibm.icu.impl.UCharacterName;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
@ -48,7 +46,7 @@ public final class UCharacterTest extends TestFmwk
|
|||
/**
|
||||
* ICU4J data version number
|
||||
*/
|
||||
private final VersionInfo VERSION_ = VersionInfo.getInstance("5.2.0.0");
|
||||
private final VersionInfo VERSION_ = VersionInfo.getInstance("6.0.0.0");
|
||||
|
||||
// constructor ===================================================
|
||||
|
||||
|
@ -399,7 +397,7 @@ public final class UCharacterTest extends TestFmwk
|
|||
public void TestVersion()
|
||||
{
|
||||
if (!UCharacter.getUnicodeVersion().equals(VERSION_))
|
||||
errln("FAIL expected: " + VERSION_ + "got: " + UCharacter.getUnicodeVersion());
|
||||
errln("FAIL expected: " + VERSION_ + " got: " + UCharacter.getUnicodeVersion());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1815,7 +1813,6 @@ public final class UCharacterTest extends TestFmwk
|
|||
{ 0x072A, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.DALATH_RISH },
|
||||
{ 0x0647, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH },
|
||||
{ 0x06C1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH_GOAL },
|
||||
{ 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HAMZA_ON_HEH_GOAL },
|
||||
|
||||
{ 0x200C, UProperty.JOINING_TYPE, UCharacter.JoiningType.NON_JOINING },
|
||||
{ 0x200D, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING },
|
||||
|
@ -1948,6 +1945,11 @@ public final class UCharacterTest extends TestFmwk
|
|||
{ 0xa4d0, UProperty.SCRIPT, UScript.LISU },
|
||||
{ 0x10a7f, UProperty.SCRIPT, UScript.OLD_SOUTH_ARABIAN },
|
||||
|
||||
{ -1, 0x600, 0 }, /* version break for Unicode 6.0 */
|
||||
|
||||
/* value changed in Unicode 6.0 */
|
||||
{ 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.TEH_MARBUTA_GOAL },
|
||||
|
||||
/* undefined UProperty values */
|
||||
{ 0x61, 0x4a7, 0 },
|
||||
{ 0x234bc, 0x15ed, 0 }
|
||||
|
@ -2253,19 +2255,11 @@ public final class UCharacterTest extends TestFmwk
|
|||
String a_name, String b_name,
|
||||
boolean expect,
|
||||
boolean diffIsError){
|
||||
int i, start, end, length;
|
||||
boolean equal;
|
||||
equal=true;
|
||||
i=0;
|
||||
for(;;) {
|
||||
int i, start, end;
|
||||
boolean equal=true;
|
||||
for(i=0; i < a.getRangeCount(); ++i) {
|
||||
start = a.getRangeStart(i);
|
||||
length = (i < a.getRangeCount()) ? 0 : a.getRangeCount();
|
||||
end = a.getRangeEnd(i);
|
||||
|
||||
if(length!=0) {
|
||||
return equal; /* done with code points, got a string or -1 */
|
||||
}
|
||||
|
||||
if(expect!=b.contains(start, end)) {
|
||||
equal=false;
|
||||
while(start<=end) {
|
||||
|
@ -2287,9 +2281,8 @@ public final class UCharacterTest extends TestFmwk
|
|||
++start;
|
||||
}
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
return equal;
|
||||
}
|
||||
private boolean showAMinusB(UnicodeSet a, UnicodeSet b,
|
||||
String a_name, String b_name,
|
||||
|
@ -2332,7 +2325,7 @@ public final class UCharacterTest extends TestFmwk
|
|||
*
|
||||
* Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
|
||||
* but not from Hyphen.
|
||||
* UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
|
||||
* UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
|
||||
* Therefore, do not show errors when testing the Hyphen property.
|
||||
*/
|
||||
logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
|
||||
|
@ -2442,20 +2435,6 @@ public final class UCharacterTest extends TestFmwk
|
|||
}
|
||||
}
|
||||
|
||||
public void TestCasePropsDummy() {
|
||||
// code coverage for UCaseProps.getDummy()
|
||||
if(UCaseProps.getDummy().tolower(0x41)!=0x41) {
|
||||
errln("UCaseProps.getDummy().tolower(0x41)!=0x41");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestBiDiPropsDummy() {
|
||||
// code coverage for UBiDiProps.getDummy()
|
||||
if(UBiDiProps.getDummy().getClass(0x20)!=0) {
|
||||
errln("UBiDiProps.getDummy().getClass(0x20)!=0");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestBlockData()
|
||||
{
|
||||
Class ubc = UCharacter.UnicodeBlock.class;
|
||||
|
@ -2510,30 +2489,6 @@ public final class UCharacterTest extends TestFmwk
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The following method tests
|
||||
* static int idOf(int ch)
|
||||
*/
|
||||
public void TestIDOf(){
|
||||
int[] invalid_test = {-2, -1, UTF16.CODEPOINT_MAX_VALUE+1, UTF16.CODEPOINT_MAX_VALUE+2};
|
||||
|
||||
for(int i=0; i < invalid_test.length; i++){
|
||||
int result = UCharacter.getIntPropertyValue(invalid_test[i], UProperty.BLOCK);
|
||||
if(result != -1){
|
||||
errln("UCharacter.UnicodeBlock.idOf() was suppose to return -1. Got " + result);
|
||||
}
|
||||
}
|
||||
|
||||
int[] valid_test = {0, 1, UTF16.CODEPOINT_MAX_VALUE, UTF16.CODEPOINT_MAX_VALUE-1};
|
||||
|
||||
for(int i=0; i < valid_test.length; i++){
|
||||
int result = UCharacter.getIntPropertyValue(valid_test[i], UProperty.BLOCK);
|
||||
if(result == -1){
|
||||
errln("UCharacter.UnicodeBlock.idOf() was not suppose to return -1. Got " + result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The following method tests
|
||||
* public static final UnicodeBlock forName(String blockName)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.lang;
|
||||
|
@ -215,9 +215,10 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
|
||||
s.clear();
|
||||
s.applyPropertyAlias("nv", "0.5");
|
||||
expectToPattern(s, "[\\u00BD\\u0D74\\u0F2A\\u2CFD\\uA831\\U00010141\\U00010175\\U00010176\\U00010E7B]", null);
|
||||
expectToPattern(s, "[\\u00BD\\u0B73\\u0D74\\u0F2A\\u2CFD\\uA831\\U00010141\\U00010175\\U00010176\\U00010E7B]", null);
|
||||
// Unicode 5.1 adds Malayalam 1/2 (\u0D74)
|
||||
// Unicode 5.2 adds U+A831 NORTH INDIC FRACTION ONE HALF and U+10E7B RUMI FRACTION ONE HALF
|
||||
// Unicode 6.0 adds U+0B73 ORIYA FRACTION ONE HALF
|
||||
|
||||
s.clear();
|
||||
s.applyPropertyAlias("gc", "Lu");
|
||||
|
@ -1134,6 +1135,16 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
"A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
|
||||
"\\u0888\\uFDD3\\uFFFE\\U00050005",
|
||||
|
||||
// Script_Extensions, new in Unicode 6.0
|
||||
"[:scx=Arab:]",
|
||||
"\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
|
||||
"\\u061D\\u065F\\uFDEF\\uFDFE",
|
||||
|
||||
// U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
|
||||
// so scx-sc is missing U+FDF2.
|
||||
"[[:Script_Extensions=Arabic:]-[:Arab:]]",
|
||||
"\\u0640\\u064B\\u0650\\u0655\\uFDFD",
|
||||
"\\uFDF2"
|
||||
};
|
||||
|
||||
for (int i=0; i<DATA.length; i+=3) {
|
||||
|
|
|
@ -2263,432 +2263,61 @@ public class BasicTest extends TestFmwk {
|
|||
|
||||
}
|
||||
|
||||
static final int D = 0, C = 1, KD= 2, KC = 3, FCD=4, NONE=5;
|
||||
private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets){
|
||||
if( skipSets.length < 4 ){
|
||||
return null;
|
||||
}
|
||||
skipSets[D].applyPattern(
|
||||
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
|
||||
+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
|
||||
+ "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
|
||||
+ "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
|
||||
+ "\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC"
|
||||
+ "\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B"
|
||||
+ "\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F"
|
||||
+ "\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-"
|
||||
+ "\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407"
|
||||
+ "\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
|
||||
+ "-\\u045E\\u0476\\u0477\\u0483-\\u0487\\u04C1\\u04C2\\u04D0-"
|
||||
+ "\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
|
||||
+ "\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
|
||||
+ "\\u05C5\\u05C7\\u0610-\\u061A\\u0622-\\u0626\\u064B-\\u065E"
|
||||
+ "\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
|
||||
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
|
||||
+ "\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-"
|
||||
+ "\\u082D\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
|
||||
+ "-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
|
||||
+ "\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
|
||||
+ "\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
|
||||
+ "\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA"
|
||||
+ "\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE"
|
||||
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
|
||||
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
|
||||
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
|
||||
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u135F\\u1714\\u1734"
|
||||
+ "\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75"
|
||||
+ "-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34"
|
||||
+ "\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA"
|
||||
+ "\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED"
|
||||
+ "\\u1DC0-\\u1DE6\\u1DFD-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-"
|
||||
+ "\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-"
|
||||
+ "\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4"
|
||||
+ "\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-"
|
||||
+ "\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFD\\u2000"
|
||||
+ "\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A"
|
||||
+ "\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204\\u2209\\u220C"
|
||||
+ "\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
|
||||
+ "\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
|
||||
+ "\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
|
||||
+ "\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-"
|
||||
+ "\\u302F\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A"
|
||||
+ "\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071"
|
||||
+ "\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094"
|
||||
+ "\\u3099\\u309A\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4\\u30B6"
|
||||
+ "\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7\\u30C9"
|
||||
+ "\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC"
|
||||
+ "\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\uA66F\\uA67C\\uA67D\\uA6F0"
|
||||
+ "\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D\\uA953"
|
||||
+ "\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF"
|
||||
+ "\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12"
|
||||
+ "\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D"
|
||||
+ "\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36"
|
||||
+ "\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-"
|
||||
+ "\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010A"
|
||||
+ "38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB"
|
||||
+ "\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
|
||||
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
|
||||
+ "F800-\\U0002FA1D]", false);
|
||||
static final int D = 0, C = 1, KD= 2, KC = 3, FCD=4, NONE=5;
|
||||
|
||||
skipSets[C].applyPattern(
|
||||
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
|
||||
+ "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-"
|
||||
+ "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124"
|
||||
+ "\\u0125\\u0128-\\u012D\\u0130\\u0139\\u013A\\u013D\\u013E\\u0143"
|
||||
+ "\\u0144\\u0147\\u0148\\u014C-\\u0151\\u0154\\u0155\\u0158-"
|
||||
+ "\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168-\\u0171\\u0174-"
|
||||
+ "\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7\\u01CD-\\u01DC\\u01DE"
|
||||
+ "-\\u01E1\\u01E6-\\u01EB\\u01F4\\u01F5\\u01F8-\\u01FB\\u0200-"
|
||||
+ "\\u021B\\u021E\\u021F\\u0226-\\u0233\\u0292\\u0300-\\u034E"
|
||||
+ "\\u0350-\\u036F\\u0374\\u037E\\u0387\\u0391\\u0395\\u0397\\u0399"
|
||||
+ "\\u039F\\u03A1\\u03A5\\u03A9\\u03AC\\u03AE\\u03B1\\u03B5\\u03B7"
|
||||
+ "\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-\\u03CB\\u03CE\\u03D2\\u0406"
|
||||
+ "\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B"
|
||||
+ "\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447"
|
||||
+ "\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487\\u04D8\\u04D9"
|
||||
+ "\\u04E8\\u04E9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5"
|
||||
+ "\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627\\u0648\\u064A-"
|
||||
+ "\\u065E\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4"
|
||||
+ "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
|
||||
+ "\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-"
|
||||
+ "\\u082D\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958"
|
||||
+ "-\\u095F\\u09BC\\u09BE\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF"
|
||||
+ "\\u0A33\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD"
|
||||
+ "\\u0B3C\\u0B3E\\u0B47\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92"
|
||||
+ "\\u0BBE\\u0BC6\\u0BC7\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56"
|
||||
+ "\\u0CBC\\u0CBF\\u0CC2\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E"
|
||||
+ "\\u0D46\\u0D47\\u0D4D\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF"
|
||||
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
|
||||
+ "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
|
||||
+ "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
|
||||
+ "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u1100-\\u1112"
|
||||
+ "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
|
||||
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-"
|
||||
+ "\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34"
|
||||
+ "\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73"
|
||||
+ "\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
|
||||
+ "\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F\\u1E12-"
|
||||
+ "\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-"
|
||||
+ "\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E99\\u1EA0-"
|
||||
+ "\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-"
|
||||
+ "\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51"
|
||||
+ "\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-"
|
||||
+ "\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99"
|
||||
+ "\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC\\u1FBE"
|
||||
+ "\\u1FBF\\u1FC3\\u1FC6\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB\\u1FE3"
|
||||
+ "\\u1FEB\\u1FEE\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000"
|
||||
+ "\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A"
|
||||
+ "\\u212B\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
|
||||
+ "\\u220B\\u2223\\u2225\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261"
|
||||
+ "\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282"
|
||||
+ "\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB"
|
||||
+ "\\u22B2-\\u22B5\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-"
|
||||
+ "\\u2DFF\\u302A-\\u302F\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053"
|
||||
+ "\\u3055\\u3057\\u3059\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066"
|
||||
+ "\\u3068\\u306F\\u3072\\u3075\\u3078\\u307B\\u3099\\u309A\\u309D"
|
||||
+ "\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9"
|
||||
+ "\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2"
|
||||
+ "\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2\\u30FD\\uA66F\\uA67C\\uA67D"
|
||||
+ "\\uA6F0\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D"
|
||||
+ "\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE"
|
||||
+ "\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C"
|
||||
+ "\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88"
|
||||
+ "\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84"
|
||||
+ "\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80"
|
||||
+ "\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C"
|
||||
+ "\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178"
|
||||
+ "\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274"
|
||||
+ "\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370"
|
||||
+ "\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C"
|
||||
+ "\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568"
|
||||
+ "\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664"
|
||||
+ "\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760"
|
||||
+ "\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C"
|
||||
+ "\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958"
|
||||
+ "\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54"
|
||||
+ "\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50"
|
||||
+ "\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C"
|
||||
+ "\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48"
|
||||
+ "\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44"
|
||||
+ "\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40"
|
||||
+ "\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C"
|
||||
+ "\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138"
|
||||
+ "\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234"
|
||||
+ "\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330"
|
||||
+ "\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C"
|
||||
+ "\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528"
|
||||
+ "\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624"
|
||||
+ "\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720"
|
||||
+ "\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C"
|
||||
+ "\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918"
|
||||
+ "\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14"
|
||||
+ "\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10"
|
||||
+ "\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C"
|
||||
+ "\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08"
|
||||
+ "\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04"
|
||||
+ "\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00"
|
||||
+ "\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC"
|
||||
+ "\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8"
|
||||
+ "\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4"
|
||||
+ "\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0"
|
||||
+ "\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC"
|
||||
+ "\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8"
|
||||
+ "\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4"
|
||||
+ "\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0"
|
||||
+ "\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10"
|
||||
+ "\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-"
|
||||
+ "\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
|
||||
+ "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
|
||||
+ "-\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010"
|
||||
+ "A38-\\U00010A3A\\U00010A3F\\U00011099\\U0001109B\\U000110A5"
|
||||
+ "\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
|
||||
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
|
||||
+ "F800-\\U0002FA1D]", false);
|
||||
|
||||
skipSets[KD].applyPattern(
|
||||
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
|
||||
+ "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
|
||||
+ "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
|
||||
+ "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
|
||||
+ "\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151"
|
||||
+ "\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0"
|
||||
+ "\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B"
|
||||
+ "\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD"
|
||||
+ "\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A"
|
||||
+ "\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0"
|
||||
+ "\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
+ "\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439"
|
||||
+ "\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
|
||||
+ "-\\u0487\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
|
||||
+ "\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
|
||||
+ "\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610"
|
||||
+ "-\\u061A\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
|
||||
+ "\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
|
||||
+ "\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
|
||||
+ "\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D"
|
||||
+ "\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
|
||||
+ "\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
|
||||
+ "\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
|
||||
+ "\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48"
|
||||
+ "\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB"
|
||||
+ "\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33"
|
||||
+ "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-"
|
||||
+ "\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39"
|
||||
+ "\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
|
||||
+ "-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
+ "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u10FC\\u135F\\u1714"
|
||||
+ "\\u1734\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60"
|
||||
+ "\\u1A75-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12"
|
||||
+ "\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73"
|
||||
+ "\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
|
||||
+ "\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-"
|
||||
+ "\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E9B\\u1EA0-\\u1EF9"
|
||||
+ "\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D"
|
||||
+ "\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-"
|
||||
+ "\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-"
|
||||
+ "\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE\\u2000-\\u200A\\u2011"
|
||||
+ "\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C"
|
||||
+ "\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071\\u2074-"
|
||||
+ "\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-"
|
||||
+ "\\u20F0\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115"
|
||||
+ "\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126\\u2128"
|
||||
+ "\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140"
|
||||
+ "\\u2145-\\u2149\\u2150-\\u217F\\u2189\\u219A\\u219B\\u21AE"
|
||||
+ "\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D"
|
||||
+ "\\u222F\\u2230\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
|
||||
+ "\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
|
||||
+ "\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
|
||||
+ "\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC"
|
||||
+ "\\u2C7C\\u2C7D\\u2CEF-\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F"
|
||||
+ "\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-"
|
||||
+ "\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A"
|
||||
+ "\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071"
|
||||
+ "\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094"
|
||||
+ "\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4"
|
||||
+ "\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7"
|
||||
+ "\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA"
|
||||
+ "\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF\\u3131-"
|
||||
+ "\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3247\\u3250-"
|
||||
+ "\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA66F\\uA67C\\uA67D"
|
||||
+ "\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-"
|
||||
+ "\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8"
|
||||
+ "\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D"
|
||||
+ "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
|
||||
+ "-\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-"
|
||||
+ "\\uFB17\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41"
|
||||
+ "\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F"
|
||||
+ "\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE26"
|
||||
+ "\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B"
|
||||
+ "\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-"
|
||||
+ "\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-"
|
||||
+ "\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010A0D\\U00010A0F\\U00010A"
|
||||
+ "38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB"
|
||||
+ "\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
|
||||
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
|
||||
+ "D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
|
||||
+ "\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
|
||||
+ "4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
|
||||
+ "\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
|
||||
+ "D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
|
||||
+ "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
|
||||
+ "D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
|
||||
+ "\\U0001F100-\\U0001F10A\\U0001F110-\\U0001F12E\\U0001F131\\U0001"
|
||||
+ "F13D\\U0001F13F\\U0001F142\\U0001F146\\U0001F14A-\\U0001F14E"
|
||||
+ "\\U0001F190\\U0001F200\\U0001F210-\\U0001F231\\U0001F240-\\U0001"
|
||||
+ "F248\\U0002F800-\\U0002FA1D]", false);
|
||||
|
||||
skipSets[KC].applyPattern(
|
||||
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
|
||||
+ "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
|
||||
+ "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
|
||||
+ "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
|
||||
+ "\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A"
|
||||
+ "\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151"
|
||||
+ "\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168"
|
||||
+ "-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7"
|
||||
+ "\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5"
|
||||
+ "\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233"
|
||||
+ "\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-"
|
||||
+ "\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387"
|
||||
+ "\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC"
|
||||
+ "\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-"
|
||||
+ "\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
+ "\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
|
||||
+ "\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
|
||||
+ "\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487"
|
||||
+ "\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
|
||||
+ "\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627"
|
||||
+ "\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
|
||||
+ "\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
|
||||
+ "\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-"
|
||||
+ "\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0928\\u0930\\u0933"
|
||||
+ "\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
|
||||
+ "\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
|
||||
+ "\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
|
||||
+ "\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7"
|
||||
+ "\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2"
|
||||
+ "\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D"
|
||||
+ "\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A"
|
||||
+ "\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC"
|
||||
+ "\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D"
|
||||
+ "\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84"
|
||||
+ "\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
|
||||
+ "\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u10FC\\u1100-\\u1112"
|
||||
+ "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
|
||||
+ "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-"
|
||||
+ "\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34"
|
||||
+ "\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73"
|
||||
+ "\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
|
||||
+ "\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-"
|
||||
+ "\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F"
|
||||
+ "\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53"
|
||||
+ "\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E9B"
|
||||
+ "\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19"
|
||||
+ "\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50"
|
||||
+ "\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79"
|
||||
+ "\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98"
|
||||
+ "\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-\\u1FC1"
|
||||
+ "\\u1FC3\\u1FC6\\u1FC9\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD-"
|
||||
+ "\\u1FDF\\u1FE3\\u1FEB\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB"
|
||||
+ "-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F"
|
||||
+ "\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057"
|
||||
+ "\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u2094\\u20A8"
|
||||
+ "\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2100-\\u2103\\u2105-"
|
||||
+ "\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-\\u211D\\u2120-"
|
||||
+ "\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2131"
|
||||
+ "\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149\\u2150-\\u217F"
|
||||
+ "\\u2189\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
|
||||
+ "\\u220B\\u2223\\u2225\\u222C\\u222D\\u222F\\u2230\\u223C\\u2243"
|
||||
+ "\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276"
|
||||
+ "\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292"
|
||||
+ "\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2460"
|
||||
+ "-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2CEF-"
|
||||
+ "\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F\\u2EF3\\u2F00-\\u2FD5"
|
||||
+ "\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A\\u3046\\u304B"
|
||||
+ "\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059\\u305B\\u305D"
|
||||
+ "\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072\\u3075\\u3078"
|
||||
+ "\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1"
|
||||
+ "\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4"
|
||||
+ "\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2"
|
||||
+ "\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E"
|
||||
+ "\\u3220-\\u3247\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF"
|
||||
+ "\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-"
|
||||
+ "\\uA8F1\\uA92B-\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-"
|
||||
+ "\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C"
|
||||
+ "\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18"
|
||||
+ "\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14"
|
||||
+ "\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10"
|
||||
+ "\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C"
|
||||
+ "\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108"
|
||||
+ "\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204"
|
||||
+ "\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300"
|
||||
+ "\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC"
|
||||
+ "\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8"
|
||||
+ "\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4"
|
||||
+ "\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0"
|
||||
+ "\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC"
|
||||
+ "\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8"
|
||||
+ "\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4"
|
||||
+ "\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0"
|
||||
+ "\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC"
|
||||
+ "\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8"
|
||||
+ "\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4"
|
||||
+ "\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0"
|
||||
+ "\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC"
|
||||
+ "\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8"
|
||||
+ "\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4"
|
||||
+ "\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0"
|
||||
+ "\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC"
|
||||
+ "\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8"
|
||||
+ "\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4"
|
||||
+ "\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0"
|
||||
+ "\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC"
|
||||
+ "\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8"
|
||||
+ "\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4"
|
||||
+ "\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0"
|
||||
+ "\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C"
|
||||
+ "\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98"
|
||||
+ "\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94"
|
||||
+ "\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90"
|
||||
+ "\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C"
|
||||
+ "\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088"
|
||||
+ "\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184"
|
||||
+ "\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280"
|
||||
+ "\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C"
|
||||
+ "\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478"
|
||||
+ "\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574"
|
||||
+ "\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670"
|
||||
+ "\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C"
|
||||
+ "\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
|
||||
+ "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6D\\uFA70-"
|
||||
+ "\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
|
||||
+ "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
|
||||
+ "-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
|
||||
+ "\\uFE19\\uFE20-\\uFE26\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
|
||||
+ "\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
|
||||
+ "\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
|
||||
+ "\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010"
|
||||
+ "A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U00011099"
|
||||
+ "\\U0001109B\\U000110A5\\U000110B9\\U000110BA\\U0001D15E-\\U0001D"
|
||||
+ "169\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
|
||||
+ "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001"
|
||||
+ "D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-\\U0001D49C"
|
||||
+ "\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4"
|
||||
+ "A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
|
||||
+ "\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A\\U0001"
|
||||
+ "D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
|
||||
+ "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
|
||||
+ "D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB"
|
||||
+ "\\U0001D7CE-\\U0001D7FF\\U0001F100-\\U0001F10A\\U0001F110-"
|
||||
+ "\\U0001F12E\\U0001F131\\U0001F13D\\U0001F13F\\U0001F142\\U0001F1"
|
||||
+ "46\\U0001F14A-\\U0001F14E\\U0001F190\\U0001F200\\U0001F210-"
|
||||
+ "\\U0001F231\\U0001F240-\\U0001F248\\U0002F800-\\U0002FA1D]", false);
|
||||
|
||||
private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets) {
|
||||
skipSets[D].applyPattern("[[:NFD_QC=Yes:]&[:ccc=0:]]", false);
|
||||
skipSets[C].applyPattern("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
|
||||
skipSets[KD].applyPattern("[[:NFKD_QC=Yes:]&[:ccc=0:]]", false);
|
||||
skipSets[KC].applyPattern("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
|
||||
|
||||
// Remove from the NFC and NFKC sets all those characters that change
|
||||
// when a back-combining character is added.
|
||||
// First, get all of the back-combining characters and their combining classes.
|
||||
UnicodeSet combineBack=new UnicodeSet("[:NFC_QC=Maybe:]");
|
||||
int numCombineBack=combineBack.size();
|
||||
int[] combineBackCharsAndCc=new int[numCombineBack*2];
|
||||
UnicodeSetIterator iter=new UnicodeSetIterator(combineBack);
|
||||
for(int i=0; i<numCombineBack; ++i) {
|
||||
iter.next();
|
||||
int c=iter.codepoint;
|
||||
combineBackCharsAndCc[2*i]=c;
|
||||
combineBackCharsAndCc[2*i+1]=UCharacter.getCombiningClass(c);
|
||||
}
|
||||
|
||||
// We need not look at control codes, Han characters nor Hangul LVT syllables because they
|
||||
// do not combine forward. LV syllables are already removed.
|
||||
UnicodeSet notInteresting=new UnicodeSet("[[:C:][:Unified_Ideograph:][:HST=LVT:]]");
|
||||
UnicodeSet unsure=((UnicodeSet)(skipSets[C].clone())).removeAll(notInteresting);
|
||||
// System.out.format("unsure.size()=%d\n", unsure.size());
|
||||
|
||||
// For each character about which we are unsure, see if it changes when we add
|
||||
// one of the back-combining characters.
|
||||
Normalizer2 norm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
|
||||
StringBuilder s=new StringBuilder();
|
||||
iter.reset(unsure);
|
||||
while(iter.next()) {
|
||||
int c=iter.codepoint;
|
||||
s.delete(0, 0x7fffffff).appendCodePoint(c);
|
||||
int cLength=s.length();
|
||||
int tccc=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
|
||||
for(int i=0; i<numCombineBack; ++i) {
|
||||
// If c's decomposition ends with a character with non-zero combining class, then
|
||||
// c can only change if it combines with a character with a non-zero combining class.
|
||||
int cc2=combineBackCharsAndCc[2*i+1];
|
||||
if(tccc==0 || cc2!=0) {
|
||||
int c2=combineBackCharsAndCc[2*i];
|
||||
s.appendCodePoint(c2);
|
||||
if(!norm2.isNormalized(s)) {
|
||||
// System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2);
|
||||
skipSets[C].remove(c);
|
||||
skipSets[KC].remove(c);
|
||||
break;
|
||||
}
|
||||
s.delete(cLength, 0x7fffffff);
|
||||
}
|
||||
}
|
||||
}
|
||||
return skipSets;
|
||||
}
|
||||
|
||||
|
@ -2719,10 +2348,12 @@ public class BasicTest extends TestFmwk {
|
|||
}
|
||||
for(int i=0; i<expectSets.length; ++i) {
|
||||
if(!skipSets[i].equals(expectSets[i])) {
|
||||
errln("error: TestSkippable skipSets["+i+"]!=expectedSets["+i+"]\n"+
|
||||
"May need to update hardcoded UnicodeSet patterns in com.ibm.icu.dev.test.normalizer.BasicTest.java\n"+
|
||||
"See ICU4J - unicodetools.com.ibm.text.UCD.NFSkippable\n" +
|
||||
"Run com.ibm.text.UCD.Main with the option NFSkippable.");
|
||||
errln("error: TestSkippable skipSets["+i+"]!=expectedSets["+i+"]\n");
|
||||
// Note: This used to depend on hardcoded UnicodeSet patterns generated by
|
||||
// Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by
|
||||
// running com.ibm.text.UCD.Main with the option NFSkippable.
|
||||
// Since ICU 4.6/Unicode 6, we are generating the
|
||||
// expectSets ourselves in initSkippables().
|
||||
|
||||
s=new StringBuilder();
|
||||
|
||||
|
|
|
@ -82,6 +82,13 @@ public class UTS46Test extends TestFmwk {
|
|||
if(!UTF16Plus.equal(result, input) || !info.getErrors().equals(EnumSet.of(IDNA.Error.BIDI))) {
|
||||
errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed");
|
||||
}
|
||||
// Characters that are canonically equivalent to sequences with non-LDH ASCII.
|
||||
input="a\u2260b\u226Ec\u226Fd";
|
||||
not3.nameToUnicode(input, result, info);
|
||||
if(!UTF16Plus.equal(result, input) || info.hasErrors()) {
|
||||
errln(String.format("notSTD3.nameToUnicode(equiv to non-LDH ASCII) unexpected errors %04lx string %s",
|
||||
info.getErrors(), prettify(result.toString())));
|
||||
}
|
||||
}
|
||||
|
||||
private static final Map<String, IDNA.Error> errorNamesToErrors;
|
||||
|
@ -174,6 +181,10 @@ public class UTS46Test extends TestFmwk {
|
|||
{ "\u65E5\u672C\u8A9E\u3002\uFF2A\uFF30", "B", // Japanese with fullwidth ".jp"
|
||||
"\u65E5\u672C\u8A9E.jp", "" },
|
||||
{ "\u2615", "B", "\u2615", "" }, // Unicode 4.0 HOT BEVERAGE
|
||||
// some characters are disallowed because they are canonically equivalent
|
||||
// to sequences with non-LDH ASCII
|
||||
{ "a\u2260b\u226Ec\u226Fd", "B",
|
||||
"a\uFFFDb\uFFFDc\uFFFDd", "UIDNA_ERROR_DISALLOWED" },
|
||||
// many deviation characters, test the special mapping code
|
||||
{ "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
|
||||
"\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
|
||||
|
|
|
@ -623,7 +623,8 @@ public class RBBITest extends TestFmwk {
|
|||
public void doTest() {
|
||||
BreakIterator brkIter;
|
||||
switch( type ) {
|
||||
case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break;
|
||||
// TODO(andy): Match Thai grapheme break behavior to Unicode 6.0 and remove this time bomb.
|
||||
case BreakIterator.KIND_CHARACTER: if(skipIfBeforeICU(4, 5, 2))return;else brkIter = BreakIterator.getCharacterInstance(locale); break;
|
||||
case BreakIterator.KIND_WORD: brkIter = BreakIterator.getWordInstance(locale); break;
|
||||
case BreakIterator.KIND_LINE: brkIter = BreakIterator.getLineInstance(locale); break;
|
||||
case BreakIterator.KIND_SENTENCE: brkIter = BreakIterator.getSentenceInstance(locale); break;
|
||||
|
|
|
@ -14,7 +14,6 @@ import com.ibm.icu.impl.IntTrieBuilder;
|
|||
import com.ibm.icu.impl.Trie;
|
||||
import com.ibm.icu.impl.TrieBuilder;
|
||||
import com.ibm.icu.impl.TrieIterator;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
|
||||
|
@ -474,41 +473,6 @@ public final class TrieTest extends TestFmwk
|
|||
_testTrieRanges4(setRanges3, setRanges3.length, checkRanges3,
|
||||
checkRanges3.length);
|
||||
}
|
||||
|
||||
public void TestCharValues()
|
||||
{
|
||||
CharTrie trie = null;
|
||||
try {
|
||||
trie = UCharacterProperty.INSTANCE.m_trie_;
|
||||
} catch (Exception e) {
|
||||
warnln("Error creating ucharacter trie");
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 0xFFFF; i ++) {
|
||||
if (i < 0xFF
|
||||
&& trie.getBMPValue((char)i)
|
||||
!= trie.getLatin1LinearValue((char)i)) {
|
||||
errln("For latin 1 codepoint, getBMPValue should be the same " +
|
||||
"as getLatin1LinearValue");
|
||||
}
|
||||
if (trie.getBMPValue((char)i) != trie.getCodePointValue(i)) {
|
||||
errln("For BMP codepoint, getBMPValue should be the same " +
|
||||
"as getCodepointValue");
|
||||
}
|
||||
}
|
||||
for (int i = 0x10000; i < 0x10ffff; i ++) {
|
||||
char lead = UTF16.getLeadSurrogate(i);
|
||||
char trail = UTF16.getTrailSurrogate(i);
|
||||
char value = trie.getCodePointValue(i);
|
||||
if (value != trie.getSurrogateValue(lead, trail) ||
|
||||
value != trie.getTrailValue(trie.getLeadValue(lead),
|
||||
trail)) {
|
||||
errln("For Non-BMP codepoints, getSurrogateValue should be "
|
||||
+ "the same s getCodepointValue and getTrailValue");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class DummyGetFoldingOffset implements Trie.DataManipulate {
|
||||
public int getFoldingOffset(int value) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2003-2009, International Business Machines Corporation and *
|
||||
* Copyright (C) 2003-2010, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,11 +15,15 @@ import com.ibm.icu.util.VersionInfo;
|
|||
public abstract class AbstractTestLog implements TestLog {
|
||||
|
||||
public static boolean dontSkipForVersion = false;
|
||||
/**
|
||||
* Returns true if the current ICU version is before, or equal to, the specified major.minor.micro version.
|
||||
* TODO: Why is this called "before" when it returns true for "before or equal"? Can we fix it?
|
||||
*/
|
||||
public boolean skipIfBeforeICU(int major, int minor, int micro) {
|
||||
if (dontSkipForVersion || VersionInfo.ICU_VERSION.compareTo(VersionInfo.getInstance(major, minor, micro)) > 0) {
|
||||
return false;
|
||||
}
|
||||
logln("Test skipped before ICU release " + major + "." + minor);
|
||||
logln("Test skipped before ICU release " + major + "." + minor + "." + micro);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -66,8 +66,8 @@ public class RoundTripTest extends TestFmwk {
|
|||
A bug has been filed to remind us to do this: #1979.
|
||||
*/
|
||||
|
||||
static String KATAKANA = "[[[:katakana:][\u30A1-\u30FA\u30FC]]-[\u30FF\u31F0-\u31FF]]";
|
||||
static String HIRAGANA = "[[[:hiragana:][\u3040-\u3094]]-[\u3095-\u3096\u309F-\u30A0\\U0001F200-\\U0001F2FF]]";
|
||||
static String KATAKANA = "[[[:katakana:][\u30A1-\u30FA\u30FC]]-[\u30FF\u31F0-\u31FF]-[:^age=5.2:]]";
|
||||
static String HIRAGANA = "[[[:hiragana:][\u3040-\u3094]]-[\u3095-\u3096\u309F-\u30A0\\U0001F200-\\U0001F2FF]-[:^age=5.2:]]";
|
||||
static String LENGTH = "[\u30FC]";
|
||||
static String HALFWIDTH_KATAKANA = "[\uFF65-\uFF9D]";
|
||||
static String KATAKANA_ITERATION = "[\u30FD\u30FE]";
|
||||
|
|
Loading…
Add table
Reference in a new issue