mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-2285
* clean up code * added iso comments * added get max names length * added get names set * shifted UCharacterName and related code to impl X-SVN-Rev: 9892
This commit is contained in:
parent
f9008228cf
commit
5163258339
8 changed files with 1816 additions and 1325 deletions
1710
icu4j/src/com/ibm/icu/impl/UCharacterName.java
Normal file
1710
icu4j/src/com/ibm/icu/impl/UCharacterName.java
Normal file
File diff suppressed because it is too large
Load diff
17
icu4j/src/com/ibm/icu/lang/UCharacterNameChoice.java → icu4j/src/com/ibm/icu/impl/UCharacterNameChoice.java
Executable file → Normal file
17
icu4j/src/com/ibm/icu/lang/UCharacterNameChoice.java → icu4j/src/com/ibm/icu/impl/UCharacterNameChoice.java
Executable file → Normal file
|
@ -6,13 +6,13 @@
|
|||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
||||
* $Date: 2002/02/16 03:05:57 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2002/09/19 21:19:04 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.lang;
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
/**
|
||||
* Internal class containing selector constants for the unicode character names.
|
||||
|
@ -24,12 +24,13 @@ package com.ibm.icu.lang;
|
|||
* @since oct0600
|
||||
*/
|
||||
|
||||
interface UCharacterNameChoice
|
||||
public interface UCharacterNameChoice
|
||||
{
|
||||
// public variables =============================================
|
||||
|
||||
static final int U_UNICODE_CHAR_NAME = 0;
|
||||
static final int U_UNICODE_10_CHAR_NAME = 1;
|
||||
static final int U_EXTENDED_CHAR_NAME = 2;
|
||||
static final int U_CHAR_NAME_CHOICE_COUNT = 3;
|
||||
static final int UNICODE_CHAR_NAME = 0;
|
||||
static final int UNICODE_10_CHAR_NAME = 1;
|
||||
static final int EXTENDED_CHAR_NAME = 2;
|
||||
static final int CHAR_NAME_CHOICE_COUNT = 3;
|
||||
static final int ISO_COMMENT_ = CHAR_NAME_CHOICE_COUNT;
|
||||
}
|
8
icu4j/src/com/ibm/icu/lang/UCharacterNameReader.java → icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java
Executable file → Normal file
8
icu4j/src/com/ibm/icu/lang/UCharacterNameReader.java → icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java
Executable file → Normal file
|
@ -4,13 +4,13 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/Attic/UCharacterNameReader.java,v $
|
||||
* $Date: 2002/08/01 19:50:26 $
|
||||
* $Revision: 1.11 $
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java,v $
|
||||
* $Date: 2002/09/19 21:19:04 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.lang;
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.DataInputStream;
|
67
icu4j/src/com/ibm/icu/lang/UCharacterUtil.java → icu4j/src/com/ibm/icu/impl/UCharacterUtility.java
Executable file → Normal file
67
icu4j/src/com/ibm/icu/lang/UCharacterUtil.java → icu4j/src/com/ibm/icu/impl/UCharacterUtility.java
Executable file → Normal file
|
@ -4,13 +4,13 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/Attic/UCharacterUtil.java,v $
|
||||
* $Date: 2002/07/22 23:28:21 $
|
||||
* $Revision: 1.6 $
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharacterUtility.java,v $
|
||||
* $Date: 2002/09/19 21:19:04 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.lang;
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
/**
|
||||
* Internal character utility class for simple data type conversion and String
|
||||
|
@ -19,18 +19,26 @@ package com.ibm.icu.lang;
|
|||
* @since sep2900
|
||||
*/
|
||||
|
||||
final class UCharacterUtil
|
||||
public final class UCharacterUtility
|
||||
{
|
||||
// constructor =====================================================
|
||||
|
||||
// public methods -----------------------------------------------------
|
||||
|
||||
/**
|
||||
* private constructor to avoid initialisation
|
||||
* Determines if codepoint is a non character
|
||||
* @param ch codepoint
|
||||
* @return true if codepoint is a non character false otherwise
|
||||
*/
|
||||
private UCharacterUtil()
|
||||
public static boolean isNonCharacter(int ch)
|
||||
{
|
||||
if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) ==
|
||||
NON_CHARACTER_SUFFIX_MIN_3_0_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return ch >= NON_CHARACTER_MIN_3_1_ && ch <= NON_CHARACTER_MAX_3_1_;
|
||||
}
|
||||
|
||||
// protected methods ===============================================
|
||||
|
||||
// package private methods ---------------------------------------------
|
||||
|
||||
/**
|
||||
* joining 2 chars to form an int
|
||||
|
@ -38,7 +46,7 @@ final class UCharacterUtil
|
|||
* @param lsc least significant char
|
||||
* @return int form
|
||||
*/
|
||||
protected static int toInt(char msc, char lsc)
|
||||
static int toInt(char msc, char lsc)
|
||||
{
|
||||
return ((msc << 16) | lsc);
|
||||
}
|
||||
|
@ -49,7 +57,7 @@ final class UCharacterUtil
|
|||
* @param lsb the least significant byte
|
||||
* @return char form
|
||||
*/
|
||||
protected static char toChar(byte msb, byte lsb)
|
||||
static char toChar(byte msb, byte lsb)
|
||||
{
|
||||
return (char)((msb << 8) | (lsb & 0xFF));
|
||||
}
|
||||
|
@ -65,7 +73,7 @@ final class UCharacterUtil
|
|||
* @param index to start substring in byte count
|
||||
* @return the end position of the substring within the character array
|
||||
*/
|
||||
protected static int getNullTermByteSubString(StringBuffer str, byte[] array,
|
||||
static int getNullTermByteSubString(StringBuffer str, byte[] array,
|
||||
int index)
|
||||
{
|
||||
byte b = 1;
|
||||
|
@ -93,7 +101,7 @@ final class UCharacterUtil
|
|||
* @return the end position of the substring within str if matches otherwise
|
||||
* a -1
|
||||
*/
|
||||
protected static int compareNullTermByteSubString(String str, byte[] array,
|
||||
static int compareNullTermByteSubString(String str, byte[] array,
|
||||
int strindex, int aindex)
|
||||
{
|
||||
byte b = 1;
|
||||
|
@ -127,7 +135,7 @@ final class UCharacterUtil
|
|||
* @param skipcount number of null terminated substrings to skip
|
||||
* @return the end position of the substrings within the character array
|
||||
*/
|
||||
protected static int skipNullTermByteSubString(byte[] array, int index,
|
||||
static int skipNullTermByteSubString(byte[] array, int index,
|
||||
int skipcount)
|
||||
{
|
||||
byte b;
|
||||
|
@ -154,7 +162,7 @@ final class UCharacterUtil
|
|||
* @param skipend value of byte to skip to
|
||||
* @return the number of bytes skipped
|
||||
*/
|
||||
protected static int skipByteSubString(byte[] array, int index, int length,
|
||||
static int skipByteSubString(byte[] array, int index, int length,
|
||||
byte skipend)
|
||||
{
|
||||
int result;
|
||||
|
@ -172,5 +180,30 @@ final class UCharacterUtil
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
// private data member --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Minimum suffix value that indicates if a character is non character.
|
||||
* Unicode 3.0 non characters
|
||||
*/
|
||||
private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE;
|
||||
/**
|
||||
* New minimum non character in Unicode 3.1
|
||||
*/
|
||||
private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0;
|
||||
/**
|
||||
* New non character range in Unicode 3.1
|
||||
*/
|
||||
private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF;
|
||||
|
||||
// private constructor --------------------------------------------------
|
||||
|
||||
/**
|
||||
* private constructor to avoid initialisation
|
||||
*/
|
||||
private UCharacterUtility()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
|
||||
* $Date: 2002/09/11 00:12:39 $
|
||||
* $Revision: 1.46 $
|
||||
* $Date: 2002/09/19 21:18:14 $
|
||||
* $Revision: 1.47 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -21,6 +21,9 @@ import com.ibm.icu.util.VersionInfo;
|
|||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.impl.NormalizerImpl;
|
||||
import com.ibm.icu.impl.UCharacterUtility;
|
||||
import com.ibm.icu.impl.UCharacterName;
|
||||
import com.ibm.icu.impl.UCharacterNameChoice;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
@ -842,7 +845,7 @@ public final class UCharacter
|
|||
if (ch <= UTF16.SURROGATE_MAX_VALUE) {
|
||||
return false;
|
||||
}
|
||||
if (isNonCharacter(ch)) {
|
||||
if (UCharacterUtility.isNonCharacter(ch)) {
|
||||
return false;
|
||||
}
|
||||
return (ch <= MAX_VALUE);
|
||||
|
@ -898,7 +901,7 @@ public final class UCharacter
|
|||
*/
|
||||
public static String getName(int ch)
|
||||
{
|
||||
return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -914,7 +917,7 @@ public final class UCharacter
|
|||
public static String getName1_0(int ch)
|
||||
{
|
||||
return NAME_.getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
UCharacterNameChoice.UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -937,7 +940,22 @@ public final class UCharacter
|
|||
*/
|
||||
public static String getExtendedName(int ch)
|
||||
{
|
||||
return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
|
||||
return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the ISO 10646 comment for a character.
|
||||
* The ISO 10646 comment is an informative field in the Unicode Character
|
||||
* Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
|
||||
* @param ch The code point for which to get the ISO comment.
|
||||
* It must be <code>0<=c<=0x10ffff</code>.
|
||||
* @return The ISO comment, or null if there is no comment for this
|
||||
* character.
|
||||
* @draft ICU 2.4
|
||||
*/
|
||||
public static String getISOComment(int ch)
|
||||
{
|
||||
return NAME_.getName(ch, UCharacterNameChoice.ISO_COMMENT_);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -952,7 +970,7 @@ public final class UCharacter
|
|||
public static int getCharFromName(String name)
|
||||
{
|
||||
return NAME_.getCharFromName(
|
||||
UCharacterNameChoice.U_UNICODE_CHAR_NAME, name);
|
||||
UCharacterNameChoice.UNICODE_CHAR_NAME, name);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -967,7 +985,7 @@ public final class UCharacter
|
|||
public static int getCharFromName1_0(String name)
|
||||
{
|
||||
return NAME_.getCharFromName(
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
|
||||
UCharacterNameChoice.UNICODE_10_CHAR_NAME, name);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -992,7 +1010,7 @@ public final class UCharacter
|
|||
public static int getCharFromExtendedName(String name)
|
||||
{
|
||||
return NAME_.getCharFromName(
|
||||
UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
|
||||
UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1462,7 +1480,7 @@ public final class UCharacter
|
|||
public static ValueIterator getNameIterator()
|
||||
{
|
||||
return new UCharacterNameIterator(NAME_,
|
||||
UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
UCharacterNameChoice.UNICODE_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1487,7 +1505,7 @@ public final class UCharacter
|
|||
public static ValueIterator getName1_0Iterator()
|
||||
{
|
||||
return new UCharacterNameIterator(NAME_,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
UCharacterNameChoice.UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1512,7 +1530,7 @@ public final class UCharacter
|
|||
public static ValueIterator getExtendedNameIterator()
|
||||
{
|
||||
return new UCharacterNameIterator(NAME_,
|
||||
UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
|
||||
UCharacterNameChoice.EXTENDED_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1616,7 +1634,7 @@ public final class UCharacter
|
|||
{
|
||||
return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
|
||||
}
|
||||
|
||||
|
||||
// protected data members --------------------------------------------
|
||||
|
||||
/**
|
||||
|
@ -1629,30 +1647,13 @@ public final class UCharacter
|
|||
{
|
||||
try
|
||||
{
|
||||
NAME_ = new UCharacterName();
|
||||
NAME_ = UCharacterName.getInstance();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new RuntimeException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// protected methods -------------------------------------------------
|
||||
|
||||
/**
|
||||
* Determines if codepoint is a non character
|
||||
* @param ch codepoint
|
||||
* @return true if codepoint is a non character false otherwise
|
||||
*/
|
||||
static boolean isNonCharacter(int ch)
|
||||
{
|
||||
if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) ==
|
||||
NON_CHARACTER_SUFFIX_MIN_3_0_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return ch >= NON_CHARACTER_MIN_3_1_ && ch <= NON_CHARACTER_MAX_3_1_;
|
||||
}
|
||||
|
||||
// private variables -------------------------------------------------
|
||||
|
||||
|
@ -1692,24 +1693,8 @@ public final class UCharacter
|
|||
/**
|
||||
* Shift 24 bits
|
||||
*/
|
||||
private static final int SHIFT_24_ = 24;
|
||||
|
||||
/**
|
||||
* Minimum suffix value that indicates if a character is non character.
|
||||
* Unicode 3.0 non characters
|
||||
*/
|
||||
private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE;
|
||||
private static final int SHIFT_24_ = 24;
|
||||
|
||||
/**
|
||||
* New minimum non character in Unicode 3.1
|
||||
*/
|
||||
private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0;
|
||||
|
||||
/**
|
||||
* New non character range in Unicode 3.1
|
||||
*/
|
||||
private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF;
|
||||
|
||||
/**
|
||||
* Decimal radix
|
||||
*/
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
|
||||
* $Date: 2002/09/11 00:12:39 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2002/09/19 21:18:14 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -250,63 +250,4 @@ public final class UCharacterCategory
|
|||
private UCharacterCategory()
|
||||
{
|
||||
}
|
||||
|
||||
// package private data members --------------------------------------
|
||||
|
||||
/**
|
||||
* Not a character type
|
||||
*/
|
||||
static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
|
||||
/**
|
||||
* Lead surrogate type
|
||||
*/
|
||||
static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
|
||||
/**
|
||||
* Trail surrogate type
|
||||
*/
|
||||
static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
|
||||
/**
|
||||
* Extended category count
|
||||
*/
|
||||
static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
|
||||
/**
|
||||
* Type names used for extended names
|
||||
*/
|
||||
static final String TYPE_NAMES_[] = {"unassigned",
|
||||
"uppercase letter",
|
||||
"lowercase letter",
|
||||
"titlecase letter",
|
||||
"modifier letter",
|
||||
"other letter",
|
||||
"non spacing mark",
|
||||
"enclosing mark",
|
||||
"combining spacing mark",
|
||||
"decimal digit number",
|
||||
"letter number",
|
||||
"other number",
|
||||
"space separator",
|
||||
"line separator",
|
||||
"paragraph separator",
|
||||
"control",
|
||||
"format",
|
||||
"private use area",
|
||||
"surrogate",
|
||||
"dash punctuation",
|
||||
"start punctuation",
|
||||
"end punctuation",
|
||||
"connector punctuation",
|
||||
"other punctuation",
|
||||
"math symbol",
|
||||
"currency symbol",
|
||||
"modifier symbol",
|
||||
"other symbol",
|
||||
"initial punctuation",
|
||||
"final punctuation",
|
||||
"noncharacter",
|
||||
"lead surrogate",
|
||||
"trail surrogate"};
|
||||
/**
|
||||
* Unknown type name
|
||||
*/
|
||||
static final String UNKNOWN_TYPE_NAME_ = "unknown";
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,8 +5,8 @@
|
|||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java,v $
|
||||
* $Date: 2002/04/05 01:38:15 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2002/09/19 21:18:14 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -14,6 +14,8 @@
|
|||
package com.ibm.icu.lang;
|
||||
|
||||
import com.ibm.icu.util.ValueIterator;
|
||||
import com.ibm.icu.impl.UCharacterName;
|
||||
import com.ibm.icu.impl.UCharacterNameChoice;
|
||||
|
||||
/**
|
||||
* <p>Class enabling iteration of the codepoints and their names.</p>
|
||||
|
@ -43,7 +45,7 @@ class UCharacterNameIterator implements ValueIterator
|
|||
return false;
|
||||
}
|
||||
|
||||
if (m_choice_ != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
|
||||
if (m_choice_ != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
|
||||
int length = m_name_.getAlgorithmLength();
|
||||
if (m_algorithmIndex_ < length) {
|
||||
while (m_algorithmIndex_ < length) {
|
||||
|
@ -97,7 +99,7 @@ class UCharacterNameIterator implements ValueIterator
|
|||
m_current_ ++;
|
||||
return true;
|
||||
}
|
||||
else if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
|
||||
if (!iterateExtended(element, m_limit_)) {
|
||||
m_current_ ++;
|
||||
return true;
|
||||
|
@ -238,7 +240,7 @@ class UCharacterNameIterator implements ValueIterator
|
|||
index + GROUP_OFFSETS_[offset],
|
||||
GROUP_LENGTHS_[offset], m_choice_);
|
||||
if ((name == null || name.length() == 0) &&
|
||||
m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
|
||||
name = m_name_.getExtendedName(m_current_);
|
||||
}
|
||||
if (name != null && name.length() > 0) {
|
||||
|
@ -297,7 +299,7 @@ class UCharacterNameIterator implements ValueIterator
|
|||
if (gMIN > limit) {
|
||||
gMIN = limit;
|
||||
}
|
||||
if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
|
||||
if (!iterateExtended(result, gMIN)) {
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue