* clean up code
* added iso comments
* added get max names length
* added get names set
* shifted UCharacterName and related code to impl

X-SVN-Rev: 9892
This commit is contained in:
Syn Wee Quek 2002-09-19 21:19:04 +00:00
parent f9008228cf
commit 5163258339
8 changed files with 1816 additions and 1325 deletions

File diff suppressed because it is too large Load diff

View file

@ -6,13 +6,13 @@
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
* $Date: 2002/02/16 03:05:57 $
* $Revision: 1.4 $
* $Date: 2002/09/19 21:19:04 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;
package com.ibm.icu.impl;
/**
* Internal class containing selector constants for the unicode character names.
@ -24,12 +24,13 @@ package com.ibm.icu.lang;
* @since oct0600
*/
interface UCharacterNameChoice
public interface UCharacterNameChoice
{
// public variables =============================================
static final int U_UNICODE_CHAR_NAME = 0;
static final int U_UNICODE_10_CHAR_NAME = 1;
static final int U_EXTENDED_CHAR_NAME = 2;
static final int U_CHAR_NAME_CHOICE_COUNT = 3;
static final int UNICODE_CHAR_NAME = 0;
static final int UNICODE_10_CHAR_NAME = 1;
static final int EXTENDED_CHAR_NAME = 2;
static final int CHAR_NAME_CHOICE_COUNT = 3;
static final int ISO_COMMENT_ = CHAR_NAME_CHOICE_COUNT;
}

View file

@ -4,13 +4,13 @@
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/Attic/UCharacterNameReader.java,v $
* $Date: 2002/08/01 19:50:26 $
* $Revision: 1.11 $
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java,v $
* $Date: 2002/09/19 21:19:04 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;
package com.ibm.icu.impl;
import java.io.InputStream;
import java.io.DataInputStream;

View file

@ -4,13 +4,13 @@
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/Attic/UCharacterUtil.java,v $
* $Date: 2002/07/22 23:28:21 $
* $Revision: 1.6 $
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharacterUtility.java,v $
* $Date: 2002/09/19 21:19:04 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;
package com.ibm.icu.impl;
/**
* Internal character utility class for simple data type conversion and String
@ -19,18 +19,26 @@ package com.ibm.icu.lang;
* @since sep2900
*/
final class UCharacterUtil
public final class UCharacterUtility
{
// constructor =====================================================
// public methods -----------------------------------------------------
/**
* private constructor to avoid initialisation
* Determines if codepoint is a non character
* @param ch codepoint
* @return true if codepoint is a non character false otherwise
*/
private UCharacterUtil()
public static boolean isNonCharacter(int ch)
{
if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) ==
NON_CHARACTER_SUFFIX_MIN_3_0_) {
return true;
}
return ch >= NON_CHARACTER_MIN_3_1_ && ch <= NON_CHARACTER_MAX_3_1_;
}
// protected methods ===============================================
// package private methods ---------------------------------------------
/**
* joining 2 chars to form an int
@ -38,7 +46,7 @@ final class UCharacterUtil
* @param lsc least significant char
* @return int form
*/
protected static int toInt(char msc, char lsc)
static int toInt(char msc, char lsc)
{
return ((msc << 16) | lsc);
}
@ -49,7 +57,7 @@ final class UCharacterUtil
* @param lsb the least significant byte
* @return char form
*/
protected static char toChar(byte msb, byte lsb)
static char toChar(byte msb, byte lsb)
{
return (char)((msb << 8) | (lsb & 0xFF));
}
@ -65,7 +73,7 @@ final class UCharacterUtil
* @param index to start substring in byte count
* @return the end position of the substring within the character array
*/
protected static int getNullTermByteSubString(StringBuffer str, byte[] array,
static int getNullTermByteSubString(StringBuffer str, byte[] array,
int index)
{
byte b = 1;
@ -93,7 +101,7 @@ final class UCharacterUtil
* @return the end position of the substring within str if matches otherwise
* a -1
*/
protected static int compareNullTermByteSubString(String str, byte[] array,
static int compareNullTermByteSubString(String str, byte[] array,
int strindex, int aindex)
{
byte b = 1;
@ -127,7 +135,7 @@ final class UCharacterUtil
* @param skipcount number of null terminated substrings to skip
* @return the end position of the substrings within the character array
*/
protected static int skipNullTermByteSubString(byte[] array, int index,
static int skipNullTermByteSubString(byte[] array, int index,
int skipcount)
{
byte b;
@ -154,7 +162,7 @@ final class UCharacterUtil
* @param skipend value of byte to skip to
* @return the number of bytes skipped
*/
protected static int skipByteSubString(byte[] array, int index, int length,
static int skipByteSubString(byte[] array, int index, int length,
byte skipend)
{
int result;
@ -172,5 +180,30 @@ final class UCharacterUtil
return result;
}
// private data member --------------------------------------------------
/**
* Minimum suffix value that indicates if a character is non character.
* Unicode 3.0 non characters
*/
private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE;
/**
* New minimum non character in Unicode 3.1
*/
private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0;
/**
* New non character range in Unicode 3.1
*/
private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF;
// private constructor --------------------------------------------------
/**
* private constructor to avoid initialisation
*/
private UCharacterUtility()
{
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
* $Date: 2002/09/11 00:12:39 $
* $Revision: 1.46 $
* $Date: 2002/09/19 21:18:14 $
* $Revision: 1.47 $
*
*******************************************************************************
*/
@ -21,6 +21,9 @@ import com.ibm.icu.util.VersionInfo;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.impl.NormalizerImpl;
import com.ibm.icu.impl.UCharacterUtility;
import com.ibm.icu.impl.UCharacterName;
import com.ibm.icu.impl.UCharacterNameChoice;
/**
* <p>
@ -842,7 +845,7 @@ public final class UCharacter
if (ch <= UTF16.SURROGATE_MAX_VALUE) {
return false;
}
if (isNonCharacter(ch)) {
if (UCharacterUtility.isNonCharacter(ch)) {
return false;
}
return (ch <= MAX_VALUE);
@ -898,7 +901,7 @@ public final class UCharacter
*/
public static String getName(int ch)
{
return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
}
/**
@ -914,7 +917,7 @@ public final class UCharacter
public static String getName1_0(int ch)
{
return NAME_.getName(ch,
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
UCharacterNameChoice.UNICODE_10_CHAR_NAME);
}
/**
@ -937,7 +940,22 @@ public final class UCharacter
*/
public static String getExtendedName(int ch)
{
return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
}
/**
* Get the ISO 10646 comment for a character.
* The ISO 10646 comment is an informative field in the Unicode Character
* Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
* @param ch The code point for which to get the ISO comment.
* It must be <code>0<=c<=0x10ffff</code>.
* @return The ISO comment, or null if there is no comment for this
* character.
* @draft ICU 2.4
*/
public static String getISOComment(int ch)
{
return NAME_.getName(ch, UCharacterNameChoice.ISO_COMMENT_);
}
/**
@ -952,7 +970,7 @@ public final class UCharacter
public static int getCharFromName(String name)
{
return NAME_.getCharFromName(
UCharacterNameChoice.U_UNICODE_CHAR_NAME, name);
UCharacterNameChoice.UNICODE_CHAR_NAME, name);
}
/**
@ -967,7 +985,7 @@ public final class UCharacter
public static int getCharFromName1_0(String name)
{
return NAME_.getCharFromName(
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
UCharacterNameChoice.UNICODE_10_CHAR_NAME, name);
}
/**
@ -992,7 +1010,7 @@ public final class UCharacter
public static int getCharFromExtendedName(String name)
{
return NAME_.getCharFromName(
UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
}
/**
@ -1462,7 +1480,7 @@ public final class UCharacter
public static ValueIterator getNameIterator()
{
return new UCharacterNameIterator(NAME_,
UCharacterNameChoice.U_UNICODE_CHAR_NAME);
UCharacterNameChoice.UNICODE_CHAR_NAME);
}
/**
@ -1487,7 +1505,7 @@ public final class UCharacter
public static ValueIterator getName1_0Iterator()
{
return new UCharacterNameIterator(NAME_,
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
UCharacterNameChoice.UNICODE_10_CHAR_NAME);
}
/**
@ -1512,7 +1530,7 @@ public final class UCharacter
public static ValueIterator getExtendedNameIterator()
{
return new UCharacterNameIterator(NAME_,
UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
UCharacterNameChoice.EXTENDED_CHAR_NAME);
}
/**
@ -1616,7 +1634,7 @@ public final class UCharacter
{
return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
}
// protected data members --------------------------------------------
/**
@ -1629,30 +1647,13 @@ public final class UCharacter
{
try
{
NAME_ = new UCharacterName();
NAME_ = UCharacterName.getInstance();
}
catch (Exception e)
{
throw new RuntimeException(e.getMessage());
}
}
// protected methods -------------------------------------------------
/**
* Determines if codepoint is a non character
* @param ch codepoint
* @return true if codepoint is a non character false otherwise
*/
static boolean isNonCharacter(int ch)
{
if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) ==
NON_CHARACTER_SUFFIX_MIN_3_0_) {
return true;
}
return ch >= NON_CHARACTER_MIN_3_1_ && ch <= NON_CHARACTER_MAX_3_1_;
}
// private variables -------------------------------------------------
@ -1692,24 +1693,8 @@ public final class UCharacter
/**
* Shift 24 bits
*/
private static final int SHIFT_24_ = 24;
/**
* Minimum suffix value that indicates if a character is non character.
* Unicode 3.0 non characters
*/
private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE;
private static final int SHIFT_24_ = 24;
/**
* New minimum non character in Unicode 3.1
*/
private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0;
/**
* New non character range in Unicode 3.1
*/
private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF;
/**
* Decimal radix
*/

View file

@ -6,8 +6,8 @@
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
* $Date: 2002/09/11 00:12:39 $
* $Revision: 1.8 $
* $Date: 2002/09/19 21:18:14 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -250,63 +250,4 @@ public final class UCharacterCategory
private UCharacterCategory()
{
}
// package private data members --------------------------------------
/**
* Not a character type
*/
static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
/**
* Lead surrogate type
*/
static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
/**
* Trail surrogate type
*/
static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
/**
* Extended category count
*/
static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
/**
* Type names used for extended names
*/
static final String TYPE_NAMES_[] = {"unassigned",
"uppercase letter",
"lowercase letter",
"titlecase letter",
"modifier letter",
"other letter",
"non spacing mark",
"enclosing mark",
"combining spacing mark",
"decimal digit number",
"letter number",
"other number",
"space separator",
"line separator",
"paragraph separator",
"control",
"format",
"private use area",
"surrogate",
"dash punctuation",
"start punctuation",
"end punctuation",
"connector punctuation",
"other punctuation",
"math symbol",
"currency symbol",
"modifier symbol",
"other symbol",
"initial punctuation",
"final punctuation",
"noncharacter",
"lead surrogate",
"trail surrogate"};
/**
* Unknown type name
*/
static final String UNKNOWN_TYPE_NAME_ = "unknown";
}

File diff suppressed because it is too large Load diff

View file

@ -5,8 +5,8 @@
******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java,v $
* $Date: 2002/04/05 01:38:15 $
* $Revision: 1.3 $
* $Date: 2002/09/19 21:18:14 $
* $Revision: 1.4 $
*
******************************************************************************
*/
@ -14,6 +14,8 @@
package com.ibm.icu.lang;
import com.ibm.icu.util.ValueIterator;
import com.ibm.icu.impl.UCharacterName;
import com.ibm.icu.impl.UCharacterNameChoice;
/**
* <p>Class enabling iteration of the codepoints and their names.</p>
@ -43,7 +45,7 @@ class UCharacterNameIterator implements ValueIterator
return false;
}
if (m_choice_ != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
if (m_choice_ != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
int length = m_name_.getAlgorithmLength();
if (m_algorithmIndex_ < length) {
while (m_algorithmIndex_ < length) {
@ -97,7 +99,7 @@ class UCharacterNameIterator implements ValueIterator
m_current_ ++;
return true;
}
else if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
if (!iterateExtended(element, m_limit_)) {
m_current_ ++;
return true;
@ -238,7 +240,7 @@ class UCharacterNameIterator implements ValueIterator
index + GROUP_OFFSETS_[offset],
GROUP_LENGTHS_[offset], m_choice_);
if ((name == null || name.length() == 0) &&
m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
name = m_name_.getExtendedName(m_current_);
}
if (name != null && name.length() > 0) {
@ -297,7 +299,7 @@ class UCharacterNameIterator implements ValueIterator
if (gMIN > limit) {
gMIN = limit;
}
if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
if (!iterateExtended(result, gMIN)) {
return false;
}