mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 12:40:02 +00:00
ICU-1707
New character name iteration X-SVN-Rev: 7913
This commit is contained in:
parent
2868b2a4d6
commit
51df46827d
5 changed files with 937 additions and 283 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $
|
||||
* $Date: 2002/03/02 02:04:07 $
|
||||
* $Revision: 1.30 $
|
||||
* $Date: 2002/03/08 02:03:16 $
|
||||
* $Revision: 1.31 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -24,6 +24,7 @@ import com.ibm.icu.lang.UCharacter;
|
|||
import com.ibm.icu.lang.UCharacterCategory;
|
||||
import com.ibm.icu.lang.UCharacterDirection;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.icu.util.ValueIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
|
@ -54,6 +55,21 @@ public final class UCharacterTest extends TestFmwk
|
|||
|
||||
// public methods ================================================
|
||||
|
||||
public static void main(String[] arg)
|
||||
{
|
||||
try
|
||||
{
|
||||
UCharacterTest test = new UCharacterTest();
|
||||
UCharacter.getName1_0(0x1d18b);
|
||||
test.TestNameIteration();
|
||||
//test.run(arg);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing the uppercase and lowercase function of UCharacter
|
||||
*/
|
||||
|
@ -635,8 +651,7 @@ public final class UCharacterTest extends TestFmwk
|
|||
errln(
|
||||
"FAIL: 'LATin smALl letTER A' should result in character U+0061");
|
||||
}
|
||||
|
||||
|
||||
|
||||
// extra testing different from icu
|
||||
for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)
|
||||
{
|
||||
|
@ -650,6 +665,123 @@ public final class UCharacterTest extends TestFmwk
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing name iteration
|
||||
*/
|
||||
public void TestNameIteration()
|
||||
{
|
||||
ValueIterator iterator = UCharacter.getNameIterator();
|
||||
ValueIterator.Element element = new ValueIterator.Element();
|
||||
ValueIterator.Element old = new ValueIterator.Element();
|
||||
// testing subrange
|
||||
iterator.setRange(0xF, 0x45);
|
||||
while (iterator.next(element)) {
|
||||
if (element.integer <= old.integer) {
|
||||
errln("FAIL next returned a less codepoint \\u" +
|
||||
Integer.toHexString(element.integer) + " than \\u" +
|
||||
Integer.toHexString(old.integer));
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.getName(element.integer).equals(element.value)) {
|
||||
errln("FAIL next codepoint \\u" +
|
||||
Integer.toHexString(element.integer) +
|
||||
" does not have the expected name " +
|
||||
UCharacter.getName(element.integer) +
|
||||
" instead have the name " + (String)element.value);
|
||||
break;
|
||||
}
|
||||
old.integer = element.integer;
|
||||
}
|
||||
|
||||
iterator.reset();
|
||||
iterator.next(element);
|
||||
if (element.integer != 0x20) {
|
||||
errln("FAIL reset in iterator");
|
||||
}
|
||||
|
||||
iterator.setRange(0, 0x110000);
|
||||
old.integer = 0;
|
||||
while (iterator.next(element)) {
|
||||
if (element.integer != 0 && element.integer <= old.integer) {
|
||||
errln("FAIL next returned a less codepoint \\u" +
|
||||
Integer.toHexString(element.integer) + " than \\u" +
|
||||
Integer.toHexString(old.integer));
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.getName(element.integer).equals(element.value)) {
|
||||
errln("FAIL next codepoint \\u" +
|
||||
Integer.toHexString(element.integer) +
|
||||
" does not have the expected name " +
|
||||
UCharacter.getName(element.integer) +
|
||||
" instead have the name " + (String)element.value);
|
||||
break;
|
||||
}
|
||||
for (int i = old.integer + 1; i < element.integer; i ++) {
|
||||
if (UCharacter.getName(i) != null) {
|
||||
errln("FAIL between codepoints are not null \\u" +
|
||||
Integer.toHexString(old.integer) + " and " +
|
||||
Integer.toHexString(element.integer) + " has " +
|
||||
Integer.toHexString(i) + " with a name " +
|
||||
UCharacter.getName(i));
|
||||
break;
|
||||
}
|
||||
}
|
||||
old.integer = element.integer;
|
||||
}
|
||||
|
||||
iterator = UCharacter.getExtendedNameIterator();
|
||||
old.integer = 0;
|
||||
while (iterator.next(element)) {
|
||||
if (element.integer != 0 && element.integer != old.integer) {
|
||||
errln("FAIL next returned a codepoint \\u" +
|
||||
Integer.toHexString(element.integer) +
|
||||
" different from \\u" +
|
||||
Integer.toHexString(old.integer));
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.getExtendedName(element.integer).equals(
|
||||
element.value)) {
|
||||
errln("FAIL next codepoint \\u" +
|
||||
Integer.toHexString(element.integer) + " name should be "
|
||||
+ UCharacter.getExtendedName(element.integer) +
|
||||
" instead of " + (String)element.value);
|
||||
break;
|
||||
}
|
||||
old.integer++;
|
||||
}
|
||||
iterator = UCharacter.getName1_0Iterator();
|
||||
old.integer = 0;
|
||||
while (iterator.next(element)) {
|
||||
System.out.println(Integer.toHexString(element.integer) + " " +
|
||||
(String)element.value);
|
||||
if (element.integer != 0 && element.integer <= old.integer) {
|
||||
errln("FAIL next returned a less codepoint \\u" +
|
||||
Integer.toHexString(element.integer) + " than \\u" +
|
||||
Integer.toHexString(old.integer));
|
||||
break;
|
||||
}
|
||||
if (!element.value.equals(UCharacter.getName1_0(element.integer))) {
|
||||
errln("FAIL next codepoint \\u" +
|
||||
Integer.toHexString(element.integer) +
|
||||
" name cannot be null");
|
||||
break;
|
||||
}
|
||||
for (int i = old.integer + 1; i < element.integer; i ++) {
|
||||
if (UCharacter.getName1_0(i) != null) {
|
||||
errln("FAIL between codepoints are not null \\u" +
|
||||
Integer.toHexString(old.integer) + " and " +
|
||||
Integer.toHexString(element.integer) + " has " +
|
||||
Integer.toHexString(i) + " with a name " +
|
||||
UCharacter.getName1_0(i));
|
||||
break;
|
||||
}
|
||||
}
|
||||
old.integer = element.integer;
|
||||
}
|
||||
|
||||
/* ### TODO: test error cases and other interesting things */
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing the for illegal characters
|
||||
*/
|
||||
|
@ -1069,19 +1201,5 @@ public final class UCharacterTest extends TestFmwk
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void main(String[] arg)
|
||||
{
|
||||
try
|
||||
{
|
||||
UCharacterTest test = new UCharacterTest();
|
||||
test.TestCaseTitle();
|
||||
//test.run(arg);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
|
||||
* $Date: 2002/03/02 02:04:09 $
|
||||
* $Revision: 1.27 $
|
||||
* $Date: 2002/03/08 02:04:00 $
|
||||
* $Revision: 1.28 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -18,6 +18,7 @@ import com.ibm.icu.impl.UnicodeProperty;
|
|||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.icu.util.ValueIterator;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
|
||||
/**
|
||||
|
@ -879,7 +880,7 @@ public final class UCharacter
|
|||
*/
|
||||
public static String getUnicodeVersion()
|
||||
{
|
||||
return PROPERTY_.m_unicodeVersion_;
|
||||
return PROPERTY_.m_unicodeVersion_.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1067,6 +1068,7 @@ public final class UCharacter
|
|||
* @param breakiter break iterator to determine the positions in which
|
||||
* the character should be title cased.
|
||||
* @return lowercase version of the argument string
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static String toTitleCase(String str, BreakIterator breakiter)
|
||||
{
|
||||
|
@ -1117,6 +1119,7 @@ public final class UCharacter
|
|||
* @param breakiter break iterator to determine the positions in which
|
||||
* the character should be title cased.
|
||||
* @return lowercase version of the argument string
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static String toTitleCase(Locale locale, String str,
|
||||
BreakIterator breakiter)
|
||||
|
@ -1340,13 +1343,14 @@ public final class UCharacter
|
|||
* Example of use:<br>
|
||||
* <pre>
|
||||
* RangeValueIterator iterator = UCharacter.getTypeIterator();
|
||||
* while (iterator.next()) {
|
||||
* RangeValueIterator.Element element = new RangeValueIterator.Element();
|
||||
* while (iterator.next(element)) {
|
||||
* System.out.println("Codepoint \\u" +
|
||||
* Integer.toHexString(iterator.getStart()) +
|
||||
* Integer.toHexString(element.start) +
|
||||
* " to codepoint \\u" +
|
||||
* Integer.toHexString(iterator.getLimit() - 1) +
|
||||
* Integer.toHexString(element.limit - 1) +
|
||||
* " has the character type " +
|
||||
* iterator.getValue());
|
||||
* element.value);
|
||||
* }
|
||||
* </pre>
|
||||
* @return an iterator
|
||||
|
@ -1356,6 +1360,98 @@ public final class UCharacter
|
|||
{
|
||||
return new UCharacterTypeIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets an iterator for character names, iterating over codepoints.</p>
|
||||
* <p>This API only gets the iterator for the modern, most up-to-date
|
||||
* Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
|
||||
* for extended names use getExtendedNameIterator().</p>
|
||||
* Example of use:<br>
|
||||
* <pre>
|
||||
* ValueIterator iterator = UCharacter.getNameIterator();
|
||||
* ValueIterator.Element element = new ValueIterator.Element();
|
||||
* while (iterator.next(element)) {
|
||||
* System.out.println("Codepoint \\u" +
|
||||
* Integer.toHexString(element.codepoint) +
|
||||
* " has the name " + (String)element.value);
|
||||
* }
|
||||
* </pre>
|
||||
* @return an iterator
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static ValueIterator getNameIterator()
|
||||
{
|
||||
return new UCharacterNameIterator(NAME_,
|
||||
UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets an iterator for character names, iterating over codepoints.</p>
|
||||
* <p>This API only gets the iterator for the older 1.0 Unicode names.
|
||||
* For modern, most up-to-date Unicode names use getNameIterator() or
|
||||
* for extended names use getExtendedNameIterator().</p>
|
||||
* Example of use:<br>
|
||||
* <pre>
|
||||
* ValueIterator iterator = UCharacter.get1_0NameIterator();
|
||||
* ValueIterator.Element element = new ValueIterator.Element();
|
||||
* while (iterator.next(element)) {
|
||||
* System.out.println("Codepoint \\u" +
|
||||
* Integer.toHexString(element.codepoint) +
|
||||
* " has the name " + (String)element.value);
|
||||
* }
|
||||
* </pre>
|
||||
* @return an iterator
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static ValueIterator getName1_0Iterator()
|
||||
{
|
||||
return new UCharacterNameIterator(NAME_,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets an iterator for character names, iterating over codepoints.</p>
|
||||
* <p>This API only gets the iterator for the extended names.
|
||||
* For modern, most up-to-date Unicode names use getNameIterator() or
|
||||
* for older 1.0 Unicode names use get1_0NameIterator().</p>
|
||||
* Example of use:<br>
|
||||
* <pre>
|
||||
* ValueIterator iterator = UCharacter.getExtendedNameIterator();
|
||||
* ValueIterator.Element element = new ValueIterator.Element();
|
||||
* while (iterator.next(element)) {
|
||||
* System.out.println("Codepoint \\u" +
|
||||
* Integer.toHexString(element.codepoint) +
|
||||
* " has the name " + (String)element.value);
|
||||
* }
|
||||
* </pre>
|
||||
* @return an iterator
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static ValueIterator getExtendedNameIterator()
|
||||
{
|
||||
return new UCharacterNameIterator(NAME_,
|
||||
UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
|
||||
}
|
||||
|
||||
// protected data members --------------------------------------------
|
||||
|
||||
/**
|
||||
* Database storing the sets of character name
|
||||
*/
|
||||
protected static final UCharacterName NAME_;
|
||||
|
||||
// block to initialise name database and unicode 1.0 data indicator
|
||||
static
|
||||
{
|
||||
try
|
||||
{
|
||||
NAME_ = new UCharacterName();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new RuntimeException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// protected methods -------------------------------------------------
|
||||
|
||||
|
@ -1382,24 +1478,6 @@ public final class UCharacter
|
|||
private static final UCharacterProperty PROPERTY_ =
|
||||
UnicodeProperty.PROPERTY;
|
||||
|
||||
/**
|
||||
* Database storing the sets of character name
|
||||
*/
|
||||
private static final UCharacterName NAME_;
|
||||
|
||||
// block to initialise name database and unicode 1.0 data indicator
|
||||
static
|
||||
{
|
||||
try
|
||||
{
|
||||
NAME_ = new UCharacterName();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new RuntimeException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* To get the last character out from a data type
|
||||
*/
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
||||
* $Date: 2002/03/02 01:50:51 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2002/03/08 02:04:00 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -273,8 +273,7 @@ final class UCharacterName
|
|||
indexes[0] = offset;
|
||||
|
||||
// joining up the factorized strings
|
||||
if (compareFactorString(indexes,
|
||||
name.substring(prefixlen))) {
|
||||
if (compareFactorString(indexes, name, prefixlen)) {
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
|
@ -337,16 +336,18 @@ final class UCharacterName
|
|||
* the argument string
|
||||
* @param index array with each index corresponding to each factor block
|
||||
* @param str string to compare with
|
||||
* @param offset of str to start comparison
|
||||
* @return true if string matches
|
||||
*/
|
||||
private boolean compareFactorString(int index[], String str)
|
||||
private boolean compareFactorString(int index[], String str,
|
||||
int offset)
|
||||
{
|
||||
int size = m_factor_.length;
|
||||
if (index == null || index.length != size)
|
||||
return false;
|
||||
|
||||
int count = 0;
|
||||
int strcount = 0;
|
||||
int strcount = offset;
|
||||
int factor;
|
||||
size --;
|
||||
for (int i = 0; i <= size; i ++)
|
||||
|
@ -372,6 +373,22 @@ final class UCharacterName
|
|||
}
|
||||
}
|
||||
|
||||
// protected data members --------------------------------------------
|
||||
|
||||
/**
|
||||
* Maximum number of groups
|
||||
*/
|
||||
protected int m_groupcount_ = 0;
|
||||
/**
|
||||
* Size of each groups
|
||||
*/
|
||||
protected int m_groupsize_ = 0;
|
||||
/**
|
||||
* Number of lines per group
|
||||
* 1 << GROUP_SHIFT_
|
||||
*/
|
||||
protected static final int LINES_PER_GROUP_ = 1 << 5;
|
||||
|
||||
// protected constructor ---------------------------------------------
|
||||
|
||||
/**
|
||||
|
@ -541,113 +558,6 @@ final class UCharacterName
|
|||
return false;
|
||||
}
|
||||
|
||||
// private data members ----------------------------------------------
|
||||
|
||||
/**
|
||||
* Data used in unames.dat
|
||||
*/
|
||||
private char m_tokentable_[];
|
||||
private byte m_tokenstring_[];
|
||||
private char m_groupinfo_[];
|
||||
private byte m_groupstring_[];
|
||||
private AlgorithmName m_algorithm_[];
|
||||
|
||||
/**
|
||||
* Number of group sets
|
||||
*/
|
||||
private int m_groupcount_ = 0;
|
||||
private int m_groupsize_ = 0;
|
||||
|
||||
/**
|
||||
* Default name of the name datafile
|
||||
*/
|
||||
private static final String NAME_FILE_NAME_ =
|
||||
"/com/ibm/icu/impl/data/unames.dat";
|
||||
|
||||
/**
|
||||
* Default buffer size of datafile
|
||||
*/
|
||||
private static final int NAME_BUFFER_SIZE_ = 100000;
|
||||
|
||||
/**
|
||||
* Shift count to retrieve group information
|
||||
*/
|
||||
private static final int GROUP_SHIFT_ = 5;
|
||||
|
||||
/**
|
||||
* Number of lines per group
|
||||
*/
|
||||
private static final int LINES_PER_GROUP_ = 1 << GROUP_SHIFT_;
|
||||
|
||||
/**
|
||||
* Mask to retrieve the offset for a particular character within a group
|
||||
*/
|
||||
private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
|
||||
|
||||
/**
|
||||
* Position of offsethigh in group information array
|
||||
*/
|
||||
private static final int OFFSET_HIGH_OFFSET_ = 1;
|
||||
|
||||
/**
|
||||
* Position of offsetlow in group information array
|
||||
*/
|
||||
private static final int OFFSET_LOW_OFFSET_ = 2;
|
||||
/**
|
||||
* Double nibble indicator, any nibble > this number has to be combined
|
||||
* with its following nibble
|
||||
*/
|
||||
private static final int SINGLE_NIBBLE_MAX_ = 11;
|
||||
|
||||
// private methods ---------------------------------------------------
|
||||
|
||||
/**
|
||||
* Gets the algorithmic name for the argument character
|
||||
* @param ch character to determine name for
|
||||
* @param choice name choice
|
||||
* @return the algorithmic name or null if not found
|
||||
*/
|
||||
private String getAlgName(int ch, int choice)
|
||||
{
|
||||
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
||||
// the same as the modern ones, extension A was only introduced with
|
||||
// Unicode 3.0, and the Hangul syllable block was moved and changed
|
||||
// around Unicode 1.1.5.
|
||||
if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
|
||||
// index in terms integer index
|
||||
StringBuffer s = new StringBuffer();
|
||||
|
||||
for (int index = m_algorithm_.length - 1; index >= 0; index --) {
|
||||
if (m_algorithm_[index].contains(ch)) {
|
||||
if (index >= 0) {
|
||||
m_algorithm_[index].appendName(ch, s);
|
||||
return s.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the character with the tokenized argument name
|
||||
* @param name of the character
|
||||
* @return character with the tokenized argument name or -1 if character
|
||||
* is not found
|
||||
*/
|
||||
private int getGroupChar(String name, int choice)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
for (int i = 0; i < m_groupcount_; i ++) {
|
||||
result = getGroupChar(i, name, choice);
|
||||
if (result != -1) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a block of compressed lengths of 32 strings and expands them into
|
||||
* offsets and lengths for each string. Lengths are stored with a
|
||||
|
@ -664,7 +574,7 @@ final class UCharacterName
|
|||
* @return next index of the data string immediately after the lengths
|
||||
* in terms of byte address
|
||||
*/
|
||||
private int getGroupLengths(int index, char offsets[], char lengths[])
|
||||
protected int getGroupLengths(int index, char offsets[], char lengths[])
|
||||
{
|
||||
char length = 0xffff;
|
||||
byte b = 0,
|
||||
|
@ -687,22 +597,22 @@ final class UCharacterName
|
|||
// getting nibble
|
||||
n = (byte)((b >> shift) & 0x0F);
|
||||
if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
|
||||
length = (char)((n - 12) << 4);
|
||||
length = (char)((n - 12) << 4);
|
||||
}
|
||||
else {
|
||||
if (length != 0xffff) {
|
||||
lengths[i] = (char)((length | n) + 12);
|
||||
}
|
||||
else {
|
||||
lengths[i] = (char)n;
|
||||
}
|
||||
if (length != 0xffff) {
|
||||
lengths[i] = (char)((length | n) + 12);
|
||||
}
|
||||
else {
|
||||
lengths[i] = (char)n;
|
||||
}
|
||||
|
||||
if (i < LINES_PER_GROUP_) {
|
||||
offsets[i + 1] = (char)(offsets[i] + lengths[i]);
|
||||
}
|
||||
if (i < LINES_PER_GROUP_) {
|
||||
offsets[i + 1] = (char)(offsets[i] + lengths[i]);
|
||||
}
|
||||
|
||||
length = 0xffff;
|
||||
i ++;
|
||||
length = 0xffff;
|
||||
i ++;
|
||||
}
|
||||
|
||||
shift -= 4;
|
||||
|
@ -710,7 +620,7 @@ final class UCharacterName
|
|||
}
|
||||
return stringoffset;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the name of the argument group index
|
||||
* @param index of the group name string in byte count
|
||||
|
@ -718,13 +628,13 @@ final class UCharacterName
|
|||
* @param choice of Unicode 1.0 name or the most current name
|
||||
* @return name of the group
|
||||
*/
|
||||
private String getGroupName(int index, int length, int choice)
|
||||
protected String getGroupName(int index, int length, int choice)
|
||||
{
|
||||
if (choice == UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
|
||||
int oldindex = index;
|
||||
index += UCharacterUtil.skipByteSubString(m_groupstring_, index,
|
||||
length, (byte)';');
|
||||
length -= (index - oldindex);
|
||||
int oldindex = index;
|
||||
index += UCharacterUtil.skipByteSubString(m_groupstring_,
|
||||
index, length, (byte)';');
|
||||
length -= (index - oldindex);
|
||||
}
|
||||
|
||||
StringBuffer s = new StringBuffer();
|
||||
|
@ -736,7 +646,7 @@ final class UCharacterName
|
|||
|
||||
if (b >= m_tokentable_.length) {
|
||||
if (b == ';') {
|
||||
break;
|
||||
break;
|
||||
}
|
||||
s.append(b); // implicit letter
|
||||
}
|
||||
|
@ -750,6 +660,13 @@ final class UCharacterName
|
|||
}
|
||||
if (token == 0xFFFF) {
|
||||
if (b == ';') {
|
||||
// skip the semicolon if we are seeking extended
|
||||
// names and there was no 2.0 name but there
|
||||
// is a 1.0 name.
|
||||
if (s.length() == 0 && choice ==
|
||||
UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
s.append((char)(b & 0x00ff)); // explicit letter
|
||||
|
@ -766,6 +683,300 @@ final class UCharacterName
|
|||
}
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the extended name
|
||||
*/
|
||||
protected String getExtendedName(int ch)
|
||||
{
|
||||
String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
if (result == null) {
|
||||
if (getType(ch) == UCharacterCategory.CONTROL) {
|
||||
result = getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
if (result == null) {
|
||||
result = getExtendedOr10Name(ch);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the group index for the codepoint, or the group before it.
|
||||
* @param codepoint
|
||||
* @return group index containing codepoint or the group before it.
|
||||
*/
|
||||
protected int getGroup(int codepoint)
|
||||
{
|
||||
int endGroup = m_groupcount_;
|
||||
int msb = getCodepointMSB(codepoint);
|
||||
int result = 0;
|
||||
// binary search for the group of names that contains the one for
|
||||
// code
|
||||
// find the group that contains codepoint, or the highest before it
|
||||
while (result < endGroup - 1) {
|
||||
int gindex = (result + endGroup) >> 1;
|
||||
if (msb < getGroupMSB(gindex)) {
|
||||
endGroup = gindex;
|
||||
}
|
||||
else {
|
||||
result = gindex;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the extended and 1.0 name when the most current unicode names
|
||||
* fail
|
||||
* @param ch codepoint
|
||||
* @return name of codepoint extended or 1.0
|
||||
*/
|
||||
protected String getExtendedOr10Name(int ch)
|
||||
{
|
||||
String result = null;
|
||||
if (getType(ch) == UCharacterCategory.CONTROL) {
|
||||
result = getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
if (result == null) {
|
||||
int type = getType(ch);
|
||||
// Return unknown if the table of names above is not up to
|
||||
// date.
|
||||
if (type >= UCharacterCategory.TYPE_NAMES_.length) {
|
||||
result = UCharacterCategory.UNKNOWN_TYPE_NAME_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TYPE_NAMES_[type];
|
||||
}
|
||||
StringBuffer tempResult = new StringBuffer(result);
|
||||
tempResult.insert(0, '<');
|
||||
tempResult.append('-');
|
||||
String chStr = Integer.toHexString(ch).toUpperCase();
|
||||
int zeros = 4 - chStr.length();
|
||||
while (zeros > 0) {
|
||||
tempResult.append('0');
|
||||
zeros --;
|
||||
}
|
||||
tempResult.append(chStr);
|
||||
tempResult.append('>');
|
||||
result = tempResult.toString();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// these are all UCharacterNameIterator use methods -------------------
|
||||
|
||||
/**
|
||||
* Gets the MSB from the group index
|
||||
* @param gindex group index
|
||||
* @return the MSB of the group if gindex is valid, -1 otherwise
|
||||
*/
|
||||
protected int getGroupMSB(int gindex)
|
||||
{
|
||||
if (gindex >= m_groupcount_) {
|
||||
return -1;
|
||||
}
|
||||
return m_groupinfo_[gindex * m_groupsize_];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the MSB of the codepoint
|
||||
* @param codepoint
|
||||
* @return the MSB of the codepoint
|
||||
*/
|
||||
protected int getCodepointMSB(int codepoint)
|
||||
{
|
||||
return codepoint >> GROUP_SHIFT_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the maximum codepoint + 1 of the group
|
||||
* @param msb most significant byte of the group
|
||||
* @return limit codepoint of the group
|
||||
*/
|
||||
protected int getGroupLimit(int msb)
|
||||
{
|
||||
return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum codepoint of the group
|
||||
* @param msb most significant byte of the group
|
||||
* @return minimum codepoint of the group
|
||||
*/
|
||||
protected int getGroupMin(int msb)
|
||||
{
|
||||
return msb << GROUP_SHIFT_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the offset to a group
|
||||
* @param codepoint
|
||||
* @return offset to a group
|
||||
*/
|
||||
protected int getGroupOffset(int codepoint)
|
||||
{
|
||||
return codepoint & GROUP_MASK_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum codepoint of a group
|
||||
* @param codepoint
|
||||
* @return minimum codepoint in the group which codepoint belongs to
|
||||
*/
|
||||
protected int getGroupMinFromCodepoint(int codepoint)
|
||||
{
|
||||
return codepoint & ~GROUP_MASK_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Algorithm range length
|
||||
* @return Algorithm range length
|
||||
*/
|
||||
protected int getAlgorithmLength()
|
||||
{
|
||||
return m_algorithm_.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the start of the range
|
||||
* @param index algorithm index
|
||||
* @return algorithm range start
|
||||
*/
|
||||
protected int getAlgorithmStart(int index)
|
||||
{
|
||||
return m_algorithm_[index].m_rangestart_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the end of the range
|
||||
* @param index algorithm index
|
||||
* @return algorithm range end
|
||||
*/
|
||||
protected int getAlgorithmEnd(int index)
|
||||
{
|
||||
return m_algorithm_[index].m_rangeend_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Algorithmic name of the codepoint
|
||||
* @param index algorithmic range index
|
||||
* @param codepoint
|
||||
* @return algorithmic name of codepoint
|
||||
*/
|
||||
protected String getAlgorithmName(int index, int codepoint)
|
||||
{
|
||||
StringBuffer result = new StringBuffer();
|
||||
m_algorithm_[index].appendName(codepoint, result);
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
|
||||
// private data members ----------------------------------------------
|
||||
|
||||
/**
|
||||
* Data used in unames.dat
|
||||
*/
|
||||
private char m_tokentable_[];
|
||||
private byte m_tokenstring_[];
|
||||
private char m_groupinfo_[];
|
||||
private byte m_groupstring_[];
|
||||
private AlgorithmName m_algorithm_[];
|
||||
|
||||
/**
|
||||
* Group use
|
||||
*/
|
||||
private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
|
||||
private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
|
||||
|
||||
/**
|
||||
* Default name of the name datafile
|
||||
*/
|
||||
private static final String NAME_FILE_NAME_ =
|
||||
"/com/ibm/icu/impl/data/unames.dat";
|
||||
/**
|
||||
* Shift count to retrieve group information
|
||||
*/
|
||||
private static final int GROUP_SHIFT_ = 5;
|
||||
/**
|
||||
* Mask to retrieve the offset for a particular character within a group
|
||||
*/
|
||||
private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
|
||||
/**
|
||||
* Default buffer size of datafile
|
||||
*/
|
||||
private static final int NAME_BUFFER_SIZE_ = 100000;
|
||||
|
||||
/**
|
||||
* Position of offsethigh in group information array
|
||||
*/
|
||||
private static final int OFFSET_HIGH_OFFSET_ = 1;
|
||||
|
||||
/**
|
||||
* Position of offsetlow in group information array
|
||||
*/
|
||||
private static final int OFFSET_LOW_OFFSET_ = 2;
|
||||
/**
|
||||
* Double nibble indicator, any nibble > this number has to be combined
|
||||
* with its following nibble
|
||||
*/
|
||||
private static final int SINGLE_NIBBLE_MAX_ = 11;
|
||||
|
||||
|
||||
// private methods ---------------------------------------------------
|
||||
|
||||
/**
|
||||
* Gets the algorithmic name for the argument character
|
||||
* @param ch character to determine name for
|
||||
* @param choice name choice
|
||||
* @return the algorithmic name or null if not found
|
||||
*/
|
||||
private String getAlgName(int ch, int choice)
|
||||
{
|
||||
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
||||
// the same as the modern ones, extension A was only introduced with
|
||||
// Unicode 3.0, and the Hangul syllable block was moved and changed
|
||||
// around Unicode 1.1.5.
|
||||
if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
|
||||
// index in terms integer index
|
||||
StringBuffer s = new StringBuffer();
|
||||
|
||||
for (int index = m_algorithm_.length - 1; index >= 0; index --) {
|
||||
if (m_algorithm_[index].contains(ch)) {
|
||||
m_algorithm_[index].appendName(ch, s);
|
||||
return s.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the character with the tokenized argument name
|
||||
* @param name of the character
|
||||
* @return character with the tokenized argument name or -1 if character
|
||||
* is not found
|
||||
*/
|
||||
private synchronized int getGroupChar(String name, int choice)
|
||||
{
|
||||
for (int i = 0; i < m_groupcount_; i ++) {
|
||||
// populating the data set of grouptable
|
||||
|
||||
int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
|
||||
m_grouplengths_);
|
||||
|
||||
// shift out to function
|
||||
int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
|
||||
choice);
|
||||
if (result != -1) {
|
||||
return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
|
||||
| result;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares and retrieve character if name is found within the argument
|
||||
|
@ -884,62 +1095,45 @@ final class UCharacterName
|
|||
* @param ch character to get the group name
|
||||
* @param choice name choice selector to choose a unicode 1.0 or newer name
|
||||
*/
|
||||
private String getGroupName(int ch, int choice)
|
||||
private synchronized String getGroupName(int ch, int choice)
|
||||
{
|
||||
// gets the msb
|
||||
int msb = ch >> GROUP_SHIFT_,
|
||||
end = m_groupcount_,
|
||||
start,
|
||||
gindex = 0;
|
||||
|
||||
// binary search for the group of names that contains the one for
|
||||
// code
|
||||
for (start = 0; start < end - 1;) {
|
||||
gindex = (start + end) >> 1;
|
||||
if (msb < m_groupinfo_[gindex * m_groupsize_]) {
|
||||
end = gindex;
|
||||
}
|
||||
else {
|
||||
start = gindex;
|
||||
}
|
||||
}
|
||||
int msb = getCodepointMSB(ch);
|
||||
int group = getGroup(ch);
|
||||
|
||||
// return this if it is an exact match
|
||||
if (msb == m_groupinfo_[start * m_groupsize_]) {
|
||||
char offsets[] = new char[LINES_PER_GROUP_ + 1];
|
||||
char lengths[] = new char[LINES_PER_GROUP_ + 1];
|
||||
|
||||
int index = getGroupLengths(start, offsets, lengths);
|
||||
if (msb == m_groupinfo_[group * m_groupsize_]) {
|
||||
int index = getGroupLengths(group, m_groupoffsets_,
|
||||
m_grouplengths_);
|
||||
int offset = ch & GROUP_MASK_;
|
||||
return getGroupName(index + offsets[offset], lengths[offset],
|
||||
choice);
|
||||
return getGroupName(index + m_groupoffsets_[offset],
|
||||
m_grouplengths_[offset], choice);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Getting the character with the tokenized argument name
|
||||
* @param index of the group to check
|
||||
* @param name of the character
|
||||
* @param choice of Unicode version used
|
||||
* @return character with the tokenized argument name or -1 if character
|
||||
* is not found
|
||||
* Gets the character extended type
|
||||
* @param ch character to be tested
|
||||
* @return extended type it is associated with
|
||||
*/
|
||||
private int getGroupChar(int index, String name, int choice)
|
||||
private int getType(int ch)
|
||||
{
|
||||
// populating the data set of grouptable
|
||||
char offsets[] = new char[LINES_PER_GROUP_ + 1];
|
||||
char lengths[] = new char[LINES_PER_GROUP_ + 1];
|
||||
int startgpstrindex = getGroupLengths(index, offsets, lengths);
|
||||
|
||||
// shift out to function
|
||||
int result = getGroupChar(startgpstrindex, lengths, name, choice);
|
||||
if (result != -1) {
|
||||
return (m_groupinfo_[index * m_groupsize_] << GROUP_SHIFT_) |
|
||||
result;
|
||||
}
|
||||
return -1;
|
||||
if (UCharacter.isNonCharacter(ch)) {
|
||||
// not a character we return a invalid category count
|
||||
return UCharacterCategory.NON_CHARACTER_;
|
||||
}
|
||||
int result = UCharacter.getType(ch);
|
||||
if (result == UCharacterCategory.SURROGATE) {
|
||||
if (ch <= UnicodeProperty.LEAD_SURROGATE_MAX_VALUE) {
|
||||
result = UCharacterCategory.LEAD_SURROGATE_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TRAIL_SURROGATE_;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -987,65 +1181,4 @@ final class UCharacterName
|
|||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character extended type
|
||||
* @param ch character to be tested
|
||||
* @return extended type it is associated with
|
||||
*/
|
||||
private int getType(int ch)
|
||||
{
|
||||
if (UCharacter.isNonCharacter(ch)) {
|
||||
// not a character we return a invalid category count
|
||||
return UCharacterCategory.NON_CHARACTER_;
|
||||
}
|
||||
int result = UCharacter.getType(ch);
|
||||
if (result == UCharacterCategory.SURROGATE) {
|
||||
if (ch <= UnicodeProperty.LEAD_SURROGATE_MAX_VALUE) {
|
||||
result = UCharacterCategory.LEAD_SURROGATE_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TRAIL_SURROGATE_;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the extended name
|
||||
*/
|
||||
private String getExtendedName(int ch)
|
||||
{
|
||||
String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
if (result == null) {
|
||||
if (getType(ch) == UCharacterCategory.CONTROL) {
|
||||
result = getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
if (result == null) {
|
||||
int type = getType(ch);
|
||||
// Return unknown if the table of names above is not up to
|
||||
// date.
|
||||
if (type >= UCharacterCategory.TYPE_NAMES_.length) {
|
||||
result = UCharacterCategory.UNKNOWN_TYPE_NAME_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TYPE_NAMES_[type];
|
||||
}
|
||||
StringBuffer tempResult = new StringBuffer(result);
|
||||
tempResult.insert(0, '<');
|
||||
tempResult.append('-');
|
||||
String chStr = Integer.toHexString(ch).toUpperCase();
|
||||
int zeros = 4 - chStr.length();
|
||||
while (zeros > 0) {
|
||||
tempResult.append('0');
|
||||
zeros --;
|
||||
}
|
||||
tempResult.append(chStr);
|
||||
tempResult.append('>');
|
||||
result = tempResult.toString();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
313
icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java
Normal file
313
icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java
Normal file
|
@ -0,0 +1,313 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java,v $
|
||||
* $Date: 2002/03/08 02:04:00 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.lang;
|
||||
|
||||
import com.ibm.icu.util.ValueIterator;
|
||||
|
||||
/**
|
||||
* Class enabling iteration of the codepoints and their names.
|
||||
* Result of each iteration contains a valid codepoints that have the result
|
||||
* name.
|
||||
* See UCharacter.getNameIterator() for an example of use.
|
||||
* @author synwee
|
||||
* @since release 2.1, March 5 2002
|
||||
*/
|
||||
class UCharacterNameIterator implements ValueIterator
|
||||
{
|
||||
// public methods ----------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Gets the next result for this iteration and returns
|
||||
* true if we are not at the end of the iteration, false otherwise.</p>
|
||||
* <p>If the return boolean is a false, the contents of elements will not
|
||||
* be updated.</p>
|
||||
* @param element for storing the result range and value
|
||||
* @return true if we are not at the end of the iteration, false otherwise.
|
||||
* @see Element
|
||||
* @draft 2.1
|
||||
*/
|
||||
public boolean next(ValueIterator.Element element)
|
||||
{
|
||||
if (m_current_ >= m_limit_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_choice_ != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
|
||||
int length = m_name_.getAlgorithmLength();
|
||||
if (m_algorithmIndex_ < length) {
|
||||
while (m_algorithmIndex_ < length) {
|
||||
// find the algorithm range that could contain m_current_
|
||||
if (m_algorithmIndex_ < 0 ||
|
||||
m_name_.getAlgorithmEnd(m_algorithmIndex_) <
|
||||
m_current_) {
|
||||
m_algorithmIndex_ ++;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_algorithmIndex_ < length) {
|
||||
// interleave the data-driven ones with the algorithmic ones
|
||||
// iterate over all algorithmic ranges; assume that they are
|
||||
// in ascending order
|
||||
int start = m_name_.getAlgorithmStart(m_algorithmIndex_);
|
||||
if (m_current_ < start) {
|
||||
// this should get rid of those codepoints that are not
|
||||
// in the algorithmic range
|
||||
int end = start;
|
||||
if (m_limit_ <= start) {
|
||||
end = m_limit_;
|
||||
}
|
||||
if (!iterateGroup(element, end)) {
|
||||
m_current_ ++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_current_ >= m_limit_) {
|
||||
// after iterateGroup fails, current codepoint may be
|
||||
// greater than limit
|
||||
return false;
|
||||
}
|
||||
|
||||
element.integer = m_current_;
|
||||
element.value = m_name_.getAlgorithmName(m_algorithmIndex_,
|
||||
m_current_);
|
||||
// reset the group index if we are in the algorithmic names
|
||||
m_groupIndex_ = -1;
|
||||
m_current_ ++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// enumerate the character names after the last algorithmic range
|
||||
if (!iterateGroup(element, m_limit_)) {
|
||||
m_current_ ++;
|
||||
return true;
|
||||
}
|
||||
else if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
if (!iterateExtended(element, m_limit_)) {
|
||||
m_current_ ++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the iterator to the beginning of the iteration.
|
||||
* @draft 2.1
|
||||
*/
|
||||
public void reset()
|
||||
{
|
||||
m_current_ = m_start_;
|
||||
m_groupIndex_ = -1;
|
||||
m_algorithmIndex_ = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the range for iteration
|
||||
* @param start first codepoint to iterate
|
||||
* @param limit one codepoint after the last codepoint to iterate
|
||||
* @exception IllegalArgumentException thrown when start or limit exceed
|
||||
* the Unicode codepoint bounds or when start > limit.
|
||||
*/
|
||||
public void setRange(int start, int limit)
|
||||
{
|
||||
if (start > limit || start < UCharacter.MIN_VALUE ||
|
||||
limit > UCharacter.MAX_VALUE + 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"start or limit has to be valid Unicode codepoints and start <= limit");
|
||||
}
|
||||
m_start_ = start;
|
||||
m_limit_ = limit;
|
||||
m_current_ = start;
|
||||
}
|
||||
|
||||
// protected constructor ---------------------------------------------
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param name name data
|
||||
* @param choice name choice from the class
|
||||
* com.ibm.icu.lang.UCharacterNameChoice
|
||||
* @draft 2.1
|
||||
*/
|
||||
protected UCharacterNameIterator(UCharacterName name, int choice)
|
||||
{
|
||||
m_name_ = name;
|
||||
// no explicit choice in UCharacter so no checks on choice
|
||||
m_choice_ = choice;
|
||||
m_start_ = UCharacter.MIN_VALUE;
|
||||
m_limit_ = UCharacter.MAX_VALUE + 1;
|
||||
m_current_ = m_start_;
|
||||
}
|
||||
|
||||
// private data members ---------------------------------------------
|
||||
|
||||
/**
|
||||
* Name data
|
||||
*/
|
||||
private UCharacterName m_name_;
|
||||
/**
|
||||
* Name choice
|
||||
*/
|
||||
private int m_choice_;
|
||||
/**
|
||||
* Start iteration range
|
||||
*/
|
||||
private int m_start_;
|
||||
/**
|
||||
* End + 1 iteration range
|
||||
*/
|
||||
private int m_limit_;
|
||||
/**
|
||||
* Current codepoint
|
||||
*/
|
||||
private int m_current_;
|
||||
/**
|
||||
* Group index
|
||||
*/
|
||||
private int m_groupIndex_ = -1;
|
||||
/**
|
||||
* Algorithm index
|
||||
*/
|
||||
private int m_algorithmIndex_ = -1;
|
||||
/**
|
||||
* Group use
|
||||
*/
|
||||
private static char GROUP_OFFSETS_[] =
|
||||
new char[UCharacterName.LINES_PER_GROUP_ + 1];
|
||||
private static char GROUP_LENGTHS_[] =
|
||||
new char[UCharacterName.LINES_PER_GROUP_ + 1];
|
||||
|
||||
// private methods --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Group name iteration, iterate all the names in the current 32-group and
|
||||
* returns the first codepoint that has a valid name.
|
||||
* @param result stores the result codepoint and name
|
||||
* @param limit last codepoint + 1 in range to search
|
||||
* @return false if a codepoint with a name is found in group and we can
|
||||
* bail from further iteration, true to continue on with the
|
||||
* iteration
|
||||
*/
|
||||
private boolean iterateSingleGroup(Element result, int limit)
|
||||
{
|
||||
synchronized(GROUP_OFFSETS_) {
|
||||
synchronized(GROUP_LENGTHS_) {
|
||||
int index = m_name_.getGroupLengths(m_groupIndex_, GROUP_OFFSETS_,
|
||||
GROUP_LENGTHS_);
|
||||
while (m_current_ < limit) {
|
||||
int offset = m_name_.getGroupOffset(m_current_);
|
||||
String name = m_name_.getGroupName(
|
||||
index + GROUP_OFFSETS_[offset],
|
||||
GROUP_LENGTHS_[offset], m_choice_);
|
||||
if ((name == null || name.length() == 0) &&
|
||||
m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
name = m_name_.getExtendedName(m_current_);
|
||||
}
|
||||
if (name != null && name.length() > 0) {
|
||||
result.integer = m_current_;
|
||||
result.value = name;
|
||||
return false;
|
||||
}
|
||||
++ m_current_;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Group name iteration, iterate all the names in the current 32-group and
|
||||
* returns the first codepoint that has a valid name.
|
||||
* @param result stores the result codepoint and name
|
||||
* @param limit last codepoint + 1 in range to search
|
||||
* @return false if a codepoint with a name is found in group and we can
|
||||
* bail from further iteration, true to continue on with the
|
||||
* iteration
|
||||
*/
|
||||
private boolean iterateGroup(Element result, int limit)
|
||||
{
|
||||
if (m_groupIndex_ < 0) {
|
||||
m_groupIndex_ = m_name_.getGroup(m_current_);
|
||||
}
|
||||
|
||||
while (m_groupIndex_ < m_name_.m_groupcount_ &&
|
||||
m_current_ < limit) {
|
||||
// iterate till the last group or the last codepoint
|
||||
int startMSB = m_name_.getCodepointMSB(m_current_);
|
||||
int gMSB = m_name_.getGroupMSB(m_groupIndex_); // can be -1
|
||||
if (startMSB == gMSB) {
|
||||
if (startMSB == m_name_.getCodepointMSB(limit - 1)) {
|
||||
// if start and limit - 1 are in the same group, then enumerate
|
||||
// only in that one
|
||||
return iterateSingleGroup(result, limit);
|
||||
}
|
||||
// enumerate characters in the partial start group
|
||||
// if (m_name_.getGroupOffset(m_current_) != 0) {
|
||||
if (!iterateSingleGroup(result,
|
||||
m_name_.getGroupLimit(gMSB))) {
|
||||
return false;
|
||||
}
|
||||
++ m_groupIndex_; // continue with the next group
|
||||
}
|
||||
else if (startMSB > gMSB) {
|
||||
// make sure that we start enumerating with the first group
|
||||
// after start
|
||||
m_groupIndex_ ++;
|
||||
}
|
||||
else {
|
||||
int gMIN = m_name_.getGroupMin(gMSB);
|
||||
if (gMIN > limit) {
|
||||
gMIN = limit;
|
||||
}
|
||||
if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
if (!iterateExtended(result, gMIN)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
m_current_ = gMIN;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate extended names.
|
||||
* @param result stores the result codepoint and name
|
||||
* @param limit last codepoint + 1 in range to search
|
||||
* @return false if a codepoint with a name is found and we can
|
||||
* bail from further iteration, true to continue on with the
|
||||
* iteration (this will always be false for valid codepoints)
|
||||
*/
|
||||
private boolean iterateExtended(UCharacterNameIterator.Element result,
|
||||
int limit)
|
||||
{
|
||||
while (m_current_ < limit) {
|
||||
String name = m_name_.getExtendedOr10Name(m_current_);
|
||||
if (name != null && name.length() > 0) {
|
||||
result.integer = m_current_;
|
||||
result.value = name;
|
||||
return false;
|
||||
}
|
||||
++ m_current_;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/Attic/UCharacterNameReader.java,v $
|
||||
* $Date: 2002/02/28 23:42:04 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2002/03/08 02:04:00 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -92,7 +92,19 @@ final class UCharacterNameReader
|
|||
|
||||
size = m_algnamesindex_ - m_groupstringindex_;
|
||||
byte groupstring[] = new byte[size];
|
||||
System.out.println("size " + size);
|
||||
m_dataInputStream_.readFully(groupstring);
|
||||
for (int i = 0; i < size; i ++) {
|
||||
if (groupstring[i] == 0x14 &&
|
||||
groupstring[i + 1] == 0x12 &&
|
||||
groupstring[i + 2] == 0x3e &&
|
||||
groupstring[i + 3] == 0x01 &&
|
||||
groupstring[i + 4] == 0x39 &&
|
||||
groupstring[i + 5] == 0x4 &&
|
||||
groupstring[i + 6] == 0x1e)
|
||||
System.out.println("found at " + i);
|
||||
}
|
||||
|
||||
data.setGroup(group, groupstring);
|
||||
|
||||
count = m_dataInputStream_.readInt();
|
||||
|
|
Loading…
Add table
Reference in a new issue