From ace86ed92be214e194338e34e4354d11ce8926cc Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Wed, 10 Mar 2004 02:21:38 +0000 Subject: [PATCH] ICU-3630 cover jsr 204 APIs where possible, also jb 3523 after a fashion X-SVN-Rev: 14666 --- icu4j/src/com/ibm/icu/lang/UCharacter.java | 741 +++++++++++++----- .../com/ibm/icu/lang/UCharacterCategory.java | 338 ++------ .../com/ibm/icu/lang/UCharacterDirection.java | 213 ++--- .../src/com/ibm/icu/lang/UCharacterEnums.java | 492 ++++++++++++ 4 files changed, 1157 insertions(+), 627 deletions(-) create mode 100644 icu4j/src/com/ibm/icu/lang/UCharacterEnums.java diff --git a/icu4j/src/com/ibm/icu/lang/UCharacter.java b/icu4j/src/com/ibm/icu/lang/UCharacter.java index 0ee39fbe19d..c888caaa933 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacter.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacter.java @@ -1,19 +1,23 @@ /** ******************************************************************************* -* Copyright (C) 1996-2003, International Business Machines Corporation and * +* Copyright (C) 1996-2004, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $ -* $Date: 2004/02/06 21:54:00 $ -* $Revision: 1.86 $ +* $Date: 2004/03/10 02:21:38 $ +* $Revision: 1.87 $ * ******************************************************************************* */ package com.ibm.icu.lang; +import java.lang.ref.SoftReference; +import java.util.HashMap; import java.util.Locale; +import java.util.Map; + import com.ibm.icu.impl.UCharacterProperty; import com.ibm.icu.util.RangeValueIterator; import com.ibm.icu.util.ValueIterator; @@ -27,6 +31,8 @@ import com.ibm.icu.impl.UCharacterName; import com.ibm.icu.impl.UCharacterNameChoice; import com.ibm.icu.impl.UPropertyAliases; +import com.ibm.icu.lang.UCharacterEnums.*; + /** *

* The UCharacter class provides extensions to the @@ -83,17 +89,10 @@ import com.ibm.icu.impl.UPropertyAliases; *

* @author Syn Wee Quek * @stable ICU 2.1 - * @see com.ibm.icu.lang.UCharacterCategory - * @see com.ibm.icu.lang.UCharacterDirection + * @see com.ibm.icu.lang.UCharacterEnums */ -/* - * notes: - * 1) forDigit is not provided since there is no difference between the - * icu4c version and the jdk version - */ - -public final class UCharacter +public final class UCharacter implements ECharacterCategory, ECharacterDirection { // public inner classes ---------------------------------------------- @@ -1229,36 +1228,36 @@ public final class UCharacter */ public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; - /** @draft ICU 2.6 */ - public static final int LIMBU_ID = 111; /*[1900]*/ - /** @draft ICU 2.6 */ - public static final int TAI_LE_ID = 112; /*[1950]*/ - /** @draft ICU 2.6 */ - public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ - /** @draft ICU 2.6 */ - public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ - /** @draft ICU 2.6 */ - public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ - /** @draft ICU 2.6 */ - public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ - /** @draft ICU 2.6 */ - public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ - /** @draft ICU 2.6 */ - public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ - /** @draft ICU 2.6 */ - public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ - /** @draft ICU 2.6 */ - public static final int UGARITIC_ID = 120; /*[10380]*/ - /** @draft ICU 2.6 */ - public static final int SHAVIAN_ID = 121; /*[10450]*/ - /** @draft ICU 2.6 */ - public static final int OSMANYA_ID = 122; /*[10480]*/ - /** @draft ICU 2.6 */ - public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ - /** @draft ICU 2.6 */ - public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ - /** @draft ICU 2.6 */ - public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ + /** @draft ICU 2.6 */ + public static final int LIMBU_ID = 111; /*[1900]*/ + /** @draft ICU 2.6 */ + public static final int TAI_LE_ID = 112; /*[1950]*/ + /** @draft ICU 2.6 */ + public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ + /** @draft ICU 2.6 */ + public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ + /** @draft ICU 2.6 */ + public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ + /** @draft ICU 2.6 */ + public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ + /** @draft ICU 2.6 */ + public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ + /** @draft ICU 2.6 */ + public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ + /** @draft ICU 2.6 */ + public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ + /** @draft ICU 2.6 */ + public static final int UGARITIC_ID = 120; /*[10380]*/ + /** @draft ICU 2.6 */ + public static final int SHAVIAN_ID = 121; /*[10450]*/ + /** @draft ICU 2.6 */ + public static final int OSMANYA_ID = 122; /*[10480]*/ + /** @draft ICU 2.6 */ + public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ + /** @draft ICU 2.6 */ + public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ + /** @draft ICU 2.6 */ + public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ /** * @draft ICU 2.4 */ @@ -1300,6 +1299,40 @@ public final class UCharacter & BLOCK_MASK_) >> BLOCK_SHIFT_); } + /** + * Cover the JDK 1.5 API. Return the Unicode block with the + * given name.
Note: Unlike JDK 1.5, this only matches + * against the official UCD name and the Java block name + * (ignoring case). + * @param blockName the name of the block to match + * @return the UnicodeBlock with that name + * @throws IllegalArgumentException if the blockName could not be matched + * @draft ICU 3.0 + */ + public static final UnicodeBlock forName(String blockName) { + Map m = null; + if (mref != null) { + m = (Map)mref.get(); + } + if (m == null) { + m = new HashMap(BLOCKS_.length); + for (int i = 0; i < BLOCKS_.length; ++i) { + UnicodeBlock b = BLOCKS_[i]; + String name = getPropertyValueName(UProperty.BLOCK, b.getID(), UProperty.NameChoice.LONG); + m.put(name.toUpperCase(), b); + m.put(b.toString().toUpperCase(), b); + } + mref = new SoftReference(m); + } + UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase()); + if (b == null) { + throw new IllegalArgumentException(); + } + return b; + } + private static SoftReference mref; + + /** * Returns the type ID of this Unicode block * @return integer type ID of this Unicode block @@ -1954,42 +1987,42 @@ public final class UCharacter }; /** - * Hangul Syllable Type constants. - * - * @see UProperty#HANGUL_SYLLABLE_TYPE - * @draft ICU 2.6 - */ - public static interface HangulSyllableType - { - /** + * Hangul Syllable Type constants. + * + * @see UProperty#HANGUL_SYLLABLE_TYPE * @draft ICU 2.6 */ - public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ - /** + public static interface HangulSyllableType + { + /** * @draft ICU 2.6 */ - public static final int LEADING_JAMO = 1; /*[L]*/ - /** + public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ + /** * @draft ICU 2.6 */ - public static final int VOWEL_JAMO = 2; /*[V]*/ - /** + public static final int LEADING_JAMO = 1; /*[L]*/ + /** * @draft ICU 2.6 */ - public static final int TRAILING_JAMO = 3; /*[T]*/ - /** + public static final int VOWEL_JAMO = 2; /*[V]*/ + /** * @draft ICU 2.6 */ - public static final int LV_SYLLABLE = 4; /*[LV]*/ - /** + public static final int TRAILING_JAMO = 3; /*[T]*/ + /** * @draft ICU 2.6 */ - public static final int LVT_SYLLABLE = 5; /*[LVT]*/ - /** + public static final int LV_SYLLABLE = 4; /*[LV]*/ + /** * @draft ICU 2.6 */ - public static final int COUNT = 6; - } + public static final int LVT_SYLLABLE = 5; /*[LVT]*/ + /** + * @draft ICU 2.6 + */ + public static final int COUNT = 6; + } // public data members ----------------------------------------------- @@ -2020,8 +2053,8 @@ public final class UCharacter * is no existing character. * @stable ICU 2.1 */ - public static final int REPLACEMENT_CHAR = '\uFFFD'; - + public static final int REPLACEMENT_CHAR = '\uFFFD'; + /** * Special value that is returned by getUnicodeNumericValue(int) when no * numeric value is defined for a code point. @@ -2064,7 +2097,7 @@ public final class UCharacter } // if props == 0, it will just fall through and return -1 if (isNotExceptionIndicator(props)) { - // not contained in exception data + // not contained in exception data // getSignedValue is just shifting so we can check for the sign // first // Optimization @@ -2078,7 +2111,7 @@ public final class UCharacter } else { int index = UCharacterProperty.getExceptionIndex(props); - if (PROPERTY_.hasExceptionValue(index, + if (PROPERTY_.hasExceptionValue(index, UCharacterProperty.EXC_NUMERIC_VALUE_)) { int result = PROPERTY_.getException(index, UCharacterProperty.EXC_NUMERIC_VALUE_); @@ -2865,10 +2898,10 @@ public final class UCharacter */ public static int getCombiningClass(int ch) { - if (ch < MIN_VALUE || ch > MAX_VALUE) { - throw new IllegalArgumentException("Codepoint out of bounds"); - } - return NormalizerImpl.getCombiningClass(ch); + if (ch < MIN_VALUE || ch > MAX_VALUE) { + throw new IllegalArgumentException("Codepoint out of bounds"); + } + return NormalizerImpl.getCombiningClass(ch); } /** @@ -2953,7 +2986,7 @@ public final class UCharacter */ public static String getName(int ch) { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); @@ -2993,7 +3026,7 @@ public final class UCharacter */ public static String getName1_0(int ch) { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return NAME_.getName(ch, @@ -3020,7 +3053,7 @@ public final class UCharacter */ public static String getExtendedName(int ch) { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); @@ -3080,7 +3113,7 @@ public final class UCharacter */ public static int getCharFromName1_0(String name) { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return NAME_.getCharFromName( @@ -3108,7 +3141,7 @@ public final class UCharacter */ public static int getCharFromExtendedName(String name) { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return NAME_.getCharFromName( @@ -3298,9 +3331,9 @@ public final class UCharacter public static int getCodePoint(char lead, char trail) { if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE && - lead <= UTF16.LEAD_SURROGATE_MAX_VALUE && + lead <= UTF16.LEAD_SURROGATE_MAX_VALUE && trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE && - trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) { + trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) { return UCharacterProperty.getRawSupplementary(lead, trail); } throw new IllegalArgumentException("Illegal surrogate characters"); @@ -3379,9 +3412,9 @@ public final class UCharacter */ public static String toUpperCase(Locale locale, String str) { - if (locale == null) { - locale = Locale.getDefault(); - } + if (locale == null) { + locale = Locale.getDefault(); + } return PROPERTY_.toUpperCase(locale, str, 0, str.length()); } @@ -3395,11 +3428,11 @@ public final class UCharacter */ public static String toLowerCase(Locale locale, String str) { - int length = str.length(); - StringBuffer result = new StringBuffer(length); - if (locale == null) { - locale = Locale.getDefault(); - } + int length = str.length(); + StringBuffer result = new StringBuffer(length); + if (locale == null) { + locale = Locale.getDefault(); + } PROPERTY_.toLowerCase(locale, str, 0, length, result); return result.toString(); } @@ -3427,9 +3460,9 @@ public final class UCharacter BreakIterator breakiter) { if (breakiter == null) { - if (locale == null) { - locale = Locale.getDefault(); - } + if (locale == null) { + locale = Locale.getDefault(); + } breakiter = BreakIterator.getWordInstance(locale); } return PROPERTY_.toTitleCase(locale, str, breakiter); @@ -3652,12 +3685,12 @@ public final class UCharacter return result.toString(); } - /** - * Bit mask for getting just the options from a string compare options word - * that are relevant for case folding (of a single string or code point). - * @internal - */ - private static final int FOLD_CASE_OPTIONS_MASK = 0xff; + /** + * Bit mask for getting just the options from a string compare options word + * that are relevant for case folding (of a single string or code point). + * @internal + */ + private static final int FOLD_CASE_OPTIONS_MASK = 0xff; /** * Option value for case folding: use default mappings defined in CaseFolding.txt. @@ -3686,21 +3719,21 @@ public final class UCharacter * @see #foldCase(String, boolean) * @draft ICU 2.6 */ - /* - * Issue for canonical caseless match (UAX #21): - * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve - * canonical equivalence, unlike default-option casefolding. - * For example, I-grave and I + grave fold to strings that are not canonically - * equivalent. - * For more details, see the comment in Normalizer.compare() - * and the intermediate prototype changes for Jitterbug 2021. - * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) - * - * This did not get fixed because it appears that it is not possible to fix - * it for uppercase and lowercase characters (I-grave vs. i-grave) - * together in a way that they still fold to common result strings. - */ - public static int foldCase(int ch, int options) + /* + * Issue for canonical caseless match (UAX #21): + * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve + * canonical equivalence, unlike default-option casefolding. + * For example, I-grave and I + grave fold to strings that are not canonically + * equivalent. + * For more details, see the comment in Normalizer.compare() + * and the intermediate prototype changes for Jitterbug 2021. + * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) + * + * This did not get fixed because it appears that it is not possible to fix + * it for uppercase and lowercase characters (I-grave vs. i-grave) + * together in a way that they still fold to common result strings. + */ + public static int foldCase(int ch, int options) { int props = PROPERTY_.getProperty(ch); if (isNotExceptionIndicator(props)) { @@ -3776,7 +3809,7 @@ public final class UCharacter * @see #foldCase(int, boolean) * @draft ICU 2.6 */ - public static final String foldCase(String str, int options){ + public static final String foldCase(String str, int options){ int size = str.length(); StringBuffer result = new StringBuffer(size); int offset = 0; @@ -3859,7 +3892,7 @@ public final class UCharacter } return result.toString(); - } + } /** * Return numeric value of Han code points. *
This returns the value of Han 'numeric' code points, @@ -3946,7 +3979,7 @@ public final class UCharacter return new UCharacterTypeIterator(PROPERTY_); } - /** + /** *

Gets an iterator for character names, iterating over codepoints.

*

This API only gets the iterator for the modern, most up-to-date * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or @@ -3968,7 +4001,7 @@ public final class UCharacter */ public static ValueIterator getNameIterator() { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return new UCharacterNameIterator(NAME_, @@ -3996,7 +4029,7 @@ public final class UCharacter */ public static ValueIterator getName1_0Iterator() { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return new UCharacterNameIterator(NAME_, @@ -4024,7 +4057,7 @@ public final class UCharacter */ public static ValueIterator getExtendedNameIterator() { - if(NAME_==null){ + if(NAME_==null){ throw new RuntimeException("Could not load unames.icu"); } return new UCharacterNameIterator(NAME_, @@ -4045,93 +4078,93 @@ public final class UCharacter */ public static VersionInfo getAge(int ch) { - if (ch < MIN_VALUE || ch > MAX_VALUE) { - throw new IllegalArgumentException("Codepoint out of bounds"); - } - return PROPERTY_.getAge(ch); + if (ch < MIN_VALUE || ch > MAX_VALUE) { + throw new IllegalArgumentException("Codepoint out of bounds"); + } + return PROPERTY_.getAge(ch); } /** - *

Check a binary Unicode property for a code point.

- *

Unicode, especially in version 3.2, defines many more properties - * than the original set in UnicodeData.txt.

- *

This API is intended to reflect Unicode properties as defined in - * the Unicode Character Database (UCD) and Unicode Technical Reports - * (UTR).

- *

For details about the properties see - * http://www.unicode.org/.

- *

For names of Unicode properties see the UCD file - * PropertyAliases.txt.

- *

This API does not check the validity of the codepoint.

- *

Important: If ICU is built with UCD files from Unicode versions - * below 3.2, then properties marked with "new" are not or - * not fully available.

- * @param ch code point to test. - * @param property selector constant from com.ibm.icu.lang.UProperty, - * identifies which binary property to check. - * @return true or false according to the binary Unicode property value - * for ch. Also false if property is out of bounds or if the - * Unicode version does not have data for the property at all, or - * not for this code point. - * @see com.ibm.icu.lang.UProperty - * @stable ICU 2.6 - */ - public static boolean hasBinaryProperty(int ch, int property) - { - if (ch < MIN_VALUE || ch > MAX_VALUE) { - throw new IllegalArgumentException("Codepoint out of bounds"); - } - return PROPERTY_.hasBinaryProperty(ch, property); - } - - /** - *

Check if a code point has the Alphabetic Unicode property.

- *

Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).

- *

Different from UCharacter.isLetter(ch)!

- * @stable ICU 2.6 - * @param ch codepoint to be tested - */ - public static boolean isUAlphabetic(int ch) - { - return hasBinaryProperty(ch, UProperty.ALPHABETIC); - } + *

Check a binary Unicode property for a code point.

+ *

Unicode, especially in version 3.2, defines many more properties + * than the original set in UnicodeData.txt.

+ *

This API is intended to reflect Unicode properties as defined in + * the Unicode Character Database (UCD) and Unicode Technical Reports + * (UTR).

+ *

For details about the properties see + * http://www.unicode.org/.

+ *

For names of Unicode properties see the UCD file + * PropertyAliases.txt.

+ *

This API does not check the validity of the codepoint.

+ *

Important: If ICU is built with UCD files from Unicode versions + * below 3.2, then properties marked with "new" are not or + * not fully available.

+ * @param ch code point to test. + * @param property selector constant from com.ibm.icu.lang.UProperty, + * identifies which binary property to check. + * @return true or false according to the binary Unicode property value + * for ch. Also false if property is out of bounds or if the + * Unicode version does not have data for the property at all, or + * not for this code point. + * @see com.ibm.icu.lang.UProperty + * @stable ICU 2.6 + */ + public static boolean hasBinaryProperty(int ch, int property) + { + if (ch < MIN_VALUE || ch > MAX_VALUE) { + throw new IllegalArgumentException("Codepoint out of bounds"); + } + return PROPERTY_.hasBinaryProperty(ch, property); + } + + /** + *

Check if a code point has the Alphabetic Unicode property.

+ *

Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).

+ *

Different from UCharacter.isLetter(ch)!

+ * @stable ICU 2.6 + * @param ch codepoint to be tested + */ + public static boolean isUAlphabetic(int ch) + { + return hasBinaryProperty(ch, UProperty.ALPHABETIC); + } - /** - *

Check if a code point has the Lowercase Unicode property.

- *

Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).

- *

This is different from UCharacter.isLowerCase(ch)!

- * @param ch codepoint to be tested - * @stable ICU 2.6 - */ - public static boolean isULowercase(int ch) - { - return hasBinaryProperty(ch, UProperty.LOWERCASE); - } + /** + *

Check if a code point has the Lowercase Unicode property.

+ *

Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).

+ *

This is different from UCharacter.isLowerCase(ch)!

+ * @param ch codepoint to be tested + * @stable ICU 2.6 + */ + public static boolean isULowercase(int ch) + { + return hasBinaryProperty(ch, UProperty.LOWERCASE); + } - /** - *

Check if a code point has the Uppercase Unicode property.

- *

Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).

- *

This is different from UCharacter.isUpperCase(ch)!

- * @param ch codepoint to be tested - * @stable ICU 2.6 - */ - public static boolean isUUppercase(int ch) - { - return hasBinaryProperty(ch, UProperty.UPPERCASE); - } + /** + *

Check if a code point has the Uppercase Unicode property.

+ *

Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).

+ *

This is different from UCharacter.isUpperCase(ch)!

+ * @param ch codepoint to be tested + * @stable ICU 2.6 + */ + public static boolean isUUppercase(int ch) + { + return hasBinaryProperty(ch, UProperty.UPPERCASE); + } - /** - *

Check if a code point has the White_Space Unicode property.

- *

Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).

- *

This is different from both UCharacter.isSpace(ch) and - * UCharacter.isWhitespace(ch)!

- * @param ch codepoint to be tested - * @stable ICU 2.6 - */ - public static boolean isUWhiteSpace(int ch) - { - return hasBinaryProperty(ch, UProperty.WHITE_SPACE); - } + /** + *

Check if a code point has the White_Space Unicode property.

+ *

Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).

+ *

This is different from both UCharacter.isSpace(ch) and + * UCharacter.isWhitespace(ch)!

+ * @param ch codepoint to be tested + * @stable ICU 2.6 + */ + public static boolean isUWhiteSpace(int ch) + { + return hasBinaryProperty(ch, UProperty.WHITE_SPACE); + } /** @@ -4205,7 +4238,7 @@ public final class UCharacter return (PROPERTY_.getAdditional(ch, 2) & JOINING_GROUP_MASK_) >> JOINING_GROUP_SHIFT_; case UProperty.JOINING_TYPE: - return (int)(PROPERTY_.getAdditional(ch, 2)& JOINING_TYPE_MASK_)>> JOINING_TYPE_SHIFT_; + return (int)(PROPERTY_.getAdditional(ch, 2)& JOINING_TYPE_MASK_)>> JOINING_TYPE_SHIFT_; // ArabicShaping.txt: // Note: Characters of joining type T and most characters of // joining type U are not explicitly listed in this file. @@ -4225,7 +4258,7 @@ public final class UCharacter return result; */ case UProperty.LINE_BREAK: - return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_; + return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_; /* * LineBreak.txt: * - Assigned characters that are not listed explicitly are given the value @@ -4284,7 +4317,7 @@ public final class UCharacter default: - return 0; /* undefined */ + return 0; /* undefined */ } } else if (type == UProperty.GENERAL_CATEGORY_MASK) { return UCharacterProperty.getMask(getType(ch)); @@ -4395,11 +4428,309 @@ public final class UCharacter return -1; // undefined } + /** + * Provide the java.lang.Character forDigit API, for convenience. + * @draft ICU 3.0 + */ + public static char forDigit(int digit, int radix) { + return java.lang.Character.forDigit(digit, radix); + } + + // JDK 1.5 API coverage + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.LEAD_SURROGATE_MIN_VALUE + * @draft ICU 3.0 + */ + public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.LEAD_SURROGATE_MAX_VALUE + * @draft ICU 3.0 + */ + public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.TRAIL_SURROGATE_MIN_VALUE + * @draft ICU 3.0 + */ + public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.TRAIL_SURROGATE_MAX_VALUE + * @draft ICU 3.0 + */ + public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.SURROGATE_MIN_VALUE + * @draft ICU 3.0 + */ + public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.SURROGATE_MAX_VALUE + * @draft ICU 3.0 + */ + public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.SUPPLEMENTARY_MIN_VALUE + * @draft ICU 3.0 + */ + public static final int MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16.CODEPOINT_MAX_VALUE + * @draft ICU 3.0 + */ + public static final int MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @param cp the code point to check + * @return true if cp is a valid code point + * @draft ICU 3.0 + */ + public static final boolean isValidCodePoint(int cp) { + return cp >= 0 && cp <= MAX_CODE_POINT; + } + + /** + * Cover the JDK 1.5 API, for convenience. + * @param cp the code point to check + * @return true if cp is a supplementary code point + * @draft ICU 3.0 + */ + public static final boolean isSupplementaryCodePoint(int cp) { + return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE + && cp <= UTF16.CODEPOINT_MAX_VALUE; + } + + /** + * Cover the JDK 1.5 API, for convenience. + * @param ch the char to check + * @return true if ch is a high (lead) surrogate + * @draft ICU 3.0 + */ + public static boolean isHighSurrogate(char ch) { + return ch >= MIN_HIGH_SURROGATE && ch <= MIN_LOW_SURROGATE; + } + + /** + * Cover the JDK 1.5 API, for convenience. + * @param ch the char to check + * @return true if ch is a low (trail) surrogate + * @draft ICU 3.0 + */ + public static boolean isLowSurrogate(char ch) { + return ch >= MIN_LOW_SURROGATE && ch <= MIN_HIGH_SURROGATE; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return true if the chars + * form a valid surrogate pair. + * @param high the high (lead) char + * @param low the low (trail) char + * @return true if high, low form a surrogate pair + * @draft ICU 3.0 + */ + public static final boolean isSurrogatePair(char high, char low) { + return isHighSurrogate(high) && isLowSurrogate(low); + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the number of chars needed + * to represent the code point. This does not check the + * code point for validity. + * @param cp the code point to check + * @param return the number of chars needed to represent the code point + * @see UTF16.getCharCount + * @draft ICU 3.0 + */ + public static int charCount(int cp) { + return UTF16.getCharCount(cp); + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point represented by + * the characters. This does not check the surrogate pair for validity. + * @param high the high (lead) surrogate + * @param low the low (trail) surrogate + * @return the code point formed by the surrogate pair + * @draft ICU 3.0 + */ + public static final int toCodePoint(char high, char low) { + return UCharacterProperty.getRawSupplementary(high, low); + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point at index. + *
Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index and index+1. + * @param seq the characters to check + * @param index the index of the first or only char forming the code point + * @return the code point at the index + * @draft ICU 3.0 + */ + public static final int codePointAt(CharSequence seq, int index) { + char c1 = seq.charAt(index++); + if (isHighSurrogate(c1)) { + if (index < seq.length()) { + char c2 = seq.charAt(index); + if (isLowSurrogate(c2)) { + return toCodePoint(c1, c2); + } + } + } + return c1; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point at index. + *
Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index and index+1. + * @param text the characters to check + * @param index the index of the first or only char forming the code point + * @return the code point at the index + * @draft ICU 3.0 + */ + public static final int codePointAt(char[] text, int index) { + char c1 = text[index++]; + if (isHighSurrogate(c1)) { + if (index < text.length) { + char c2 = text[index]; + if (isLowSurrogate(c2)) { + return toCodePoint(c1, c2); + } + } + } + return c1; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point before index. + *
Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index-1 and index-2. + * @param seq the characters to check + * @param index the index after the last or only char forming the code point + * @return the code point before the index + * @draft ICU 3.0 + */ + public static final int codePointBefore(CharSequence seq, int index) { + char c2 = seq.charAt(--index); + if (isLowSurrogate(c2)) { + if (index > 0) { + char c1 = seq.charAt(--index); + if (isHighSurrogate(c1)) { + return toCodePoint(c1, c2); + } + } + } + return c2; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point before index. + *
Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index-1 and index-2. + * @param text the characters to check + * @param index the index after the last or only char forming the code point + * @return the code point before the index + * @draft ICU 3.0 + */ + public static final int codePointBefore(char[] text, int index) { + char c2 = text[--index]; + if (isLowSurrogate(c2)) { + if (index > 0) { + char c1 = text[--index]; + if (isHighSurrogate(c1)) { + return toCodePoint(c1, c2); + } + } + } + return c2; + } + + /** + * Cover the JDK 1.5 API, for convenience. Writes the chars representing the + * code point into the destination at the given index. + * @param cp the code point to convert + * @param dst the destination array into which to put the char(s) representing the code point + * @param dstIndex the index at which to put the first (or only) char + * @return the count of the number of chars written (1 or 2) + * @throws IllegalArgumentException if cp is not a valid code point + * @draft ICU 3.0 + */ + public static final int toChars(int cp, char[] dst, int dstIndex) { + if (cp >= 0) { + if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { + dst[dstIndex] = (char)cp; + return 1; + } + if (cp <= MAX_CODE_POINT) { + dst[dstIndex] = UTF16.getLeadSurrogate(cp); + dst[dstIndex+1] = UTF16.getTrailSurrogate(cp); + return 2; + } + } + throw new IllegalArgumentException(); + } + + /** + * Cover the JDK 1.5 API, for convenience. Returns a char array + * representing the code point. + * @param cp the code point to convert + * @return an array containing the char(s) representing the code point + * @throws IllegalArgumentException if cp is not a valid code point + * @draft ICU 3.0 + */ + public static final char[] toChars(int cp) { + if (cp >= 0) { + if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { + return new char[] { (char)cp }; + } + if (cp <= MAX_CODE_POINT) { + return new char[] { + UTF16.getLeadSurrogate(cp), + UTF16.getTrailSurrogate(cp) + }; + } + } + throw new IllegalArgumentException(); + } + + /** + * Cover the JDK API, for convenience. Return a byte representing the directionality of + * the character. + *
Note: Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined or + * out-of-bounds characters.
Note: The return value must be + * tested using the constants defined in {@link UCharacterEnums.ECharacterDirection} + * since the values are different from the ones defined by java.lang.Character. + * @param cp the code point to check + * @return the directionality of the code point + * @see #getDirection + * @draft ICU 3.0 + */ + public static byte getDirectionality(int cp) + { + // when ch is out of bounds getProperty == 0 + return (byte)((getProperty(cp) >> BIDI_SHIFT_) & BIDI_MASK_AFTER_SHIFT_); + } + // protected data members -------------------------------------------- /** - * Database storing the sets of character name - */ + * Database storing the sets of character name + */ static UCharacterName NAME_ = null; /** @@ -4417,7 +4748,7 @@ public final class UCharacter } catch (Exception e) { - e.printStackTrace(); + e.printStackTrace(); //throw new RuntimeException(e.getMessage()); // DONOT throw an exception // we might be building ICU modularly wothout names.icu and pnames.icu @@ -4438,7 +4769,7 @@ public final class UCharacter private static final int[] PROPERTY_DATA_; private static final int PROPERTY_INITIAL_VALUE_; - // block to initialise character property database + // block to initialise character property database static { try diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java index 92e2accc717..8e474f090ee 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java @@ -1,19 +1,20 @@ /** -******************************************************************************* -* Copyright (C) 1996-2003, International Business Machines Corporation and * -* others. All Rights Reserved. * -******************************************************************************* -* -* $Source: -* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ -* $Date: 2004/01/07 20:06:24 $ -* $Revision: 1.14 $ -* -******************************************************************************* -*/ + ******************************************************************************* + * Copyright (C) 1996-2004, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java,v $ + * $Date: 2004/03/10 02:21:37 $ + * $Revision: 1.15 $ + * + ******************************************************************************* + */ package com.ibm.icu.lang; +import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; + /** * Enumerated Unicode category types from the UnicodeData.txt file. * Used as return results from UCharacter @@ -32,197 +33,8 @@ package com.ibm.icu.lang; * @stable ICU 2.1 */ -public final class UCharacterCategory +public final class UCharacterCategory implements ECharacterCategory { - // public variable ----------------------------------------------------- - - /** - * Unassigned character type - * @stable ICU 2.1 - */ - public static final int UNASSIGNED = 0; - /** - * Character type Cn - * Not Assigned (no characters in [UnicodeData.txt] have this property) - * @stable ICU 2.6 - */ - public static final int GENERAL_OTHER_TYPES = 0; - /** - * Character type Lu - * @stable ICU 2.1 - */ - public static final int UPPERCASE_LETTER = 1; - /** - * Character type Ll - * @stable ICU 2.1 - */ - public static final int LOWERCASE_LETTER = 2; - /** - * Character type Lt - * @stable ICU 2.1 - */ - public static final int TITLECASE_LETTER = 3; - /** - * Character type Lm - * @stable ICU 2.1 - */ - public static final int MODIFIER_LETTER = 4; - /** - * Character type Lo - * @stable ICU 2.1 - */ - public static final int OTHER_LETTER = 5; - /** - * Character type Mn - * @stable ICU 2.1 - */ - public static final int NON_SPACING_MARK = 6; - /** - * Character type Me - * @stable ICU 2.1 - */ - public static final int ENCLOSING_MARK = 7; - /** - * Character type Mc - * @stable ICU 2.1 - */ - public static final int COMBINING_SPACING_MARK = 8; - /** - * Character type Nd - * @stable ICU 2.1 - */ - public static final int DECIMAL_DIGIT_NUMBER = 9; - /** - * Character type Nl - * @stable ICU 2.1 - */ - public static final int LETTER_NUMBER = 10; - - // start of 11------------ - - /** - * Character type No - * @stable ICU 2.1 - */ - public static final int OTHER_NUMBER = 11; - /** - * Character type Zs - * @stable ICU 2.1 - */ - public static final int SPACE_SEPARATOR = 12; - /** - * Character type Zl - * @stable ICU 2.1 - */ - public static final int LINE_SEPARATOR = 13; - /** - * Character type Zp - * @stable ICU 2.1 - */ - public static final int PARAGRAPH_SEPARATOR = 14; - /** - * Character type Cc - * @stable ICU 2.1 - */ - public static final int CONTROL = 15; - /** - * Character type Cf - * @stable ICU 2.1 - */ - public static final int FORMAT = 16; - /** - * Character type Co - * @stable ICU 2.1 - */ - public static final int PRIVATE_USE = 17; - /** - * Character type Cs - * @stable ICU 2.1 - */ - public static final int SURROGATE = 18; - /** - * Character type Pd - * @stable ICU 2.1 - */ - public static final int DASH_PUNCTUATION = 19; - /** - * Character type Ps - * @stable ICU 2.1 - */ - public static final int START_PUNCTUATION = 20; - - // start of 21 ------------ - - /** - * Character type Pe - * @stable ICU 2.1 - */ - public static final int END_PUNCTUATION = 21; - /** - * Character type Pc - * @stable ICU 2.1 - */ - public static final int CONNECTOR_PUNCTUATION = 22; - /** - * Character type Po - * @stable ICU 2.1 - */ - public static final int OTHER_PUNCTUATION = 23; - /** - * Character type Sm - * @stable ICU 2.1 - */ - public static final int MATH_SYMBOL = 24; - /** - * Character type Sc - * @stable ICU 2.1 - */ - public static final int CURRENCY_SYMBOL = 25; - /** - * Character type Sk - * @stable ICU 2.1 - */ - public static final int MODIFIER_SYMBOL = 26; - /** - * Character type So - * @stable ICU 2.1 - */ - public static final int OTHER_SYMBOL = 27; - /** - * Character type Pi - * @see #INITIAL_QUOTE_PUNCTUATION - * @stable ICU 2.1 - */ - public static final int INITIAL_PUNCTUATION = 28; - /** - * Character type Pi - * This name is compatible with java.lang.Character's name for this type. - * @see #INITIAL_PUNCTUATION - * @draft ICU 2.8 - */ - public static final int INITIAL_QUOTE_PUNCTUATION = 28; - /** - * Character type Pf - * @see #FINAL_QUOTE_PUNCTUATION - * @stable ICU 2.1 - */ - public static final int FINAL_PUNCTUATION = 29; - /** - * Character type Pf - * This name is compatible with java.lang.Character's name for this type. - * @see #FINAL_PUNCTUATION - * @draft ICU 2.8 - */ - public static final int FINAL_QUOTE_PUNCTUATION = 29; - - // start of 31 ------------ - - /** - * Character type count - * @stable ICU 2.1 - */ - public static final int CHAR_CATEGORY_COUNT = 30; - /** * Gets the name of the argument category * @param category to retrieve name @@ -232,68 +44,68 @@ public final class UCharacterCategory public static String toString(int category) { switch (category) { - case UPPERCASE_LETTER : - return "Letter, Uppercase"; - case LOWERCASE_LETTER : - return "Letter, Lowercase"; - case TITLECASE_LETTER : - return "Letter, Titlecase"; - case MODIFIER_LETTER : - return "Letter, Modifier"; - case OTHER_LETTER : - return "Letter, Other"; - case NON_SPACING_MARK : - return "Mark, Non-Spacing"; - case ENCLOSING_MARK : - return "Mark, Enclosing"; - case COMBINING_SPACING_MARK : - return "Mark, Spacing Combining"; - case DECIMAL_DIGIT_NUMBER : - return "Number, Decimal Digit"; - case LETTER_NUMBER : - return "Number, Letter"; - case OTHER_NUMBER : - return "Number, Other"; - case SPACE_SEPARATOR : - return "Separator, Space"; - case LINE_SEPARATOR : - return "Separator, Line"; - case PARAGRAPH_SEPARATOR : - return "Separator, Paragraph"; - case CONTROL : - return "Other, Control"; - case FORMAT : - return "Other, Format"; - case PRIVATE_USE : - return "Other, Private Use"; - case SURROGATE : - return "Other, Surrogate"; - case DASH_PUNCTUATION : - return "Punctuation, Dash"; - case START_PUNCTUATION : - return "Punctuation, Open"; - case END_PUNCTUATION : - return "Punctuation, Close"; - case CONNECTOR_PUNCTUATION : - return "Punctuation, Connector"; - case OTHER_PUNCTUATION : - return "Punctuation, Other"; - case MATH_SYMBOL : - return "Symbol, Math"; - case CURRENCY_SYMBOL : - return "Symbol, Currency"; - case MODIFIER_SYMBOL : - return "Symbol, Modifier"; - case OTHER_SYMBOL : - return "Symbol, Other"; - case INITIAL_PUNCTUATION : - return "Punctuation, Initial quote"; - case FINAL_PUNCTUATION : - return "Punctuation, Final quote"; - } - return "Unassigned"; + case UPPERCASE_LETTER : + return "Letter, Uppercase"; + case LOWERCASE_LETTER : + return "Letter, Lowercase"; + case TITLECASE_LETTER : + return "Letter, Titlecase"; + case MODIFIER_LETTER : + return "Letter, Modifier"; + case OTHER_LETTER : + return "Letter, Other"; + case NON_SPACING_MARK : + return "Mark, Non-Spacing"; + case ENCLOSING_MARK : + return "Mark, Enclosing"; + case COMBINING_SPACING_MARK : + return "Mark, Spacing Combining"; + case DECIMAL_DIGIT_NUMBER : + return "Number, Decimal Digit"; + case LETTER_NUMBER : + return "Number, Letter"; + case OTHER_NUMBER : + return "Number, Other"; + case SPACE_SEPARATOR : + return "Separator, Space"; + case LINE_SEPARATOR : + return "Separator, Line"; + case PARAGRAPH_SEPARATOR : + return "Separator, Paragraph"; + case CONTROL : + return "Other, Control"; + case FORMAT : + return "Other, Format"; + case PRIVATE_USE : + return "Other, Private Use"; + case SURROGATE : + return "Other, Surrogate"; + case DASH_PUNCTUATION : + return "Punctuation, Dash"; + case START_PUNCTUATION : + return "Punctuation, Open"; + case END_PUNCTUATION : + return "Punctuation, Close"; + case CONNECTOR_PUNCTUATION : + return "Punctuation, Connector"; + case OTHER_PUNCTUATION : + return "Punctuation, Other"; + case MATH_SYMBOL : + return "Symbol, Math"; + case CURRENCY_SYMBOL : + return "Symbol, Currency"; + case MODIFIER_SYMBOL : + return "Symbol, Modifier"; + case OTHER_SYMBOL : + return "Symbol, Other"; + case INITIAL_PUNCTUATION : + return "Punctuation, Initial quote"; + case FINAL_PUNCTUATION : + return "Punctuation, Final quote"; + } + return "Unassigned"; } - + // private constructor ----------------------------------------------- ///CLOVER:OFF /** diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java b/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java index 8129625eec0..5001a5c9041 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java @@ -1,19 +1,21 @@ /** ******************************************************************************* -* Copyright (C) 1996-2001, International Business Machines Corporation and * +* Copyright (C) 1996-2004, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * * $Source: * /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterDirection.java $ -* $Date: 2002/12/11 23:37:43 $ -* $Revision: 1.7 $ +* $Date: 2004/03/10 02:21:37 $ +* $Revision: 1.8 $ * ******************************************************************************* */ package com.ibm.icu.lang; +import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; + /** * Enumerated Unicode character linguistic direction constants. * Used as return results from UCharacter @@ -24,8 +26,8 @@ package com.ibm.icu.lang; * @stable ICU 2.1 */ -public final class UCharacterDirection -{ +public final class UCharacterDirection implements ECharacterDirection { + // private constructor ========================================= ///CLOVER:OFF /** @@ -36,161 +38,54 @@ public final class UCharacterDirection } ///CLOVER:ON - // public variable ============================================= - /** - * Directional type L + * Gets the name of the argument direction + * @param dir direction type to retrieve name + * @return directional name * @stable ICU 2.1 */ - public static final int LEFT_TO_RIGHT = 0; - /** - * Directional type R - * @stable ICU 2.1 - */ - public static final int RIGHT_TO_LEFT = 1; - /** - * Directional type EN - * @stable ICU 2.1 - */ - public static final int EUROPEAN_NUMBER = 2; - /** - * Directional type ES - * @stable ICU 2.1 - */ - public static final int EUROPEAN_NUMBER_SEPARATOR = 3; - /** - * Directional type ET - * @stable ICU 2.1 - */ - public static final int EUROPEAN_NUMBER_TERMINATOR = 4; - /** - * Directional type AN - * @stable ICU 2.1 - */ - public static final int ARABIC_NUMBER = 5; - /** - * Directional type CS - * @stable ICU 2.1 - */ - public static final int COMMON_NUMBER_SEPARATOR = 6; - /** - * Directional type B - * @stable ICU 2.1 - */ - public static final int BLOCK_SEPARATOR = 7; - /** - * Directional type S - * @stable ICU 2.1 - */ - public static final int SEGMENT_SEPARATOR = 8; - /** - * Directional type WS - * @stable ICU 2.1 - */ - public static final int WHITE_SPACE_NEUTRAL = 9; - - // start of 11 --------------- - - /** - * Directional type ON - * @stable ICU 2.1 - */ - public static final int OTHER_NEUTRAL = 10; - /** - * Directional type LRE - * @stable ICU 2.1 - */ - public static final int LEFT_TO_RIGHT_EMBEDDING = 11; - /** - * Directional type LRO - * @stable ICU 2.1 - */ - public static final int LEFT_TO_RIGHT_OVERRIDE = 12; - /** - * Directional type AL - * @stable ICU 2.1 - */ - public static final int RIGHT_TO_LEFT_ARABIC = 13; - /** - * Directional type RLE - * @stable ICU 2.1 - */ - public static final int RIGHT_TO_LEFT_EMBEDDING = 14; - /** - * Directional type RLO - * @stable ICU 2.1 - */ - public static final int RIGHT_TO_LEFT_OVERRIDE = 15; - /** - * Directional type PDF - * @stable ICU 2.1 - */ - public static final int POP_DIRECTIONAL_FORMAT = 16; - /** - * Directional type NSM - * @stable ICU 2.1 - */ - public static final int DIR_NON_SPACING_MARK = 17; - /** - * Directional type BN - * @stable ICU 2.1 - */ - public static final int BOUNDARY_NEUTRAL = 18; - /** - * Number of directional type - * @stable ICU 2.1 - */ - public static final int CHAR_DIRECTION_COUNT = 19; - - /** - * Gets the name of the argument direction - * @param dir direction type to retrieve name - * @return directional name - * @stable ICU 2.1 - */ - public static String toString(int dir) - { - switch(dir) - { - case LEFT_TO_RIGHT : - return "Left-to-Right"; - case RIGHT_TO_LEFT : - return "Right-to-Left"; - case EUROPEAN_NUMBER : - return "European Number"; - case EUROPEAN_NUMBER_SEPARATOR : - return "European Number Separator"; - case EUROPEAN_NUMBER_TERMINATOR : - return "European Number Terminator"; - case ARABIC_NUMBER : - return "Arabic Number"; - case COMMON_NUMBER_SEPARATOR : - return "Common Number Separator"; - case BLOCK_SEPARATOR : - return "Paragraph Separator"; - case SEGMENT_SEPARATOR : - return "Segment Separator"; - case WHITE_SPACE_NEUTRAL : - return "Whitespace"; - case OTHER_NEUTRAL : - return "Other Neutrals"; - case LEFT_TO_RIGHT_EMBEDDING : - return "Left-to-Right Embedding"; - case LEFT_TO_RIGHT_OVERRIDE : - return "Left-to-Right Override"; - case RIGHT_TO_LEFT_ARABIC : - return "Right-to-Left Arabic"; - case RIGHT_TO_LEFT_EMBEDDING : - return "Right-to-Left Embedding"; - case RIGHT_TO_LEFT_OVERRIDE : - return "Right-to-Left Override"; - case POP_DIRECTIONAL_FORMAT : - return "Pop Directional Format"; - case DIR_NON_SPACING_MARK : - return "Non-Spacing Mark"; - case BOUNDARY_NEUTRAL : - return "Boundary Neutral"; - } - return "Unassigned"; - } + public static String toString(int dir) { + switch(dir) + { + case LEFT_TO_RIGHT : + return "Left-to-Right"; + case RIGHT_TO_LEFT : + return "Right-to-Left"; + case EUROPEAN_NUMBER : + return "European Number"; + case EUROPEAN_NUMBER_SEPARATOR : + return "European Number Separator"; + case EUROPEAN_NUMBER_TERMINATOR : + return "European Number Terminator"; + case ARABIC_NUMBER : + return "Arabic Number"; + case COMMON_NUMBER_SEPARATOR : + return "Common Number Separator"; + case BLOCK_SEPARATOR : + return "Paragraph Separator"; + case SEGMENT_SEPARATOR : + return "Segment Separator"; + case WHITE_SPACE_NEUTRAL : + return "Whitespace"; + case OTHER_NEUTRAL : + return "Other Neutrals"; + case LEFT_TO_RIGHT_EMBEDDING : + return "Left-to-Right Embedding"; + case LEFT_TO_RIGHT_OVERRIDE : + return "Left-to-Right Override"; + case RIGHT_TO_LEFT_ARABIC : + return "Right-to-Left Arabic"; + case RIGHT_TO_LEFT_EMBEDDING : + return "Right-to-Left Embedding"; + case RIGHT_TO_LEFT_OVERRIDE : + return "Right-to-Left Override"; + case POP_DIRECTIONAL_FORMAT : + return "Pop Directional Format"; + case DIR_NON_SPACING_MARK : + return "Non-Spacing Mark"; + case BOUNDARY_NEUTRAL : + return "Boundary Neutral"; + } + return "Unassigned"; + } } diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterEnums.java b/icu4j/src/com/ibm/icu/lang/UCharacterEnums.java new file mode 100644 index 00000000000..6990947e635 --- /dev/null +++ b/icu4j/src/com/ibm/icu/lang/UCharacterEnums.java @@ -0,0 +1,492 @@ +/** + ******************************************************************************* + * Copyright (C) 2004, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterEnums.java,v $ + * $Date: 2004/03/10 02:21:37 $ + * $Revision: 1.1 $ + * + ******************************************************************************* + */ + +package com.ibm.icu.lang; + +/** + * A container for the different 'enumerated types' used by UCharacter. + * @draft ICU 3.0 + */ +public class UCharacterEnums { + + /** + * 'Enum' for the CharacterCategory constants. These constants are + * compatible in name but not in value with those defined in + * java.lang.Character. + * @see UCharacterCategory + * @draft ICU 3.0 + */ + public static interface ECharacterCategory { + /** + * Unassigned character type + * @stable ICU 2.1 + */ + public static final int UNASSIGNED = 0; + + /** + * Character type Cn + * Not Assigned (no characters in [UnicodeData.txt] have this property) + * @stable ICU 2.6 + */ + public static final int GENERAL_OTHER_TYPES = 0; + + /** + * Character type Lu + * @stable ICU 2.1 + */ + public static final int UPPERCASE_LETTER = 1; + + /** + * Character type Ll + * @stable ICU 2.1 + */ + public static final int LOWERCASE_LETTER = 2; + + /** + * Character type Lt + * @stable ICU 2.1 + */ + + public static final int TITLECASE_LETTER = 3; + + /** + * Character type Lm + * @stable ICU 2.1 + */ + public static final int MODIFIER_LETTER = 4; + + /** + * Character type Lo + * @stable ICU 2.1 + */ + public static final int OTHER_LETTER = 5; + + /** + * Character type Mn + * @stable ICU 2.1 + */ + public static final int NON_SPACING_MARK = 6; + + /** + * Character type Me + * @stable ICU 2.1 + */ + public static final int ENCLOSING_MARK = 7; + + /** + * Character type Mc + * @stable ICU 2.1 + */ + public static final int COMBINING_SPACING_MARK = 8; + + /** + * Character type Nd + * @stable ICU 2.1 + */ + public static final int DECIMAL_DIGIT_NUMBER = 9; + + /** + * Character type Nl + * @stable ICU 2.1 + */ + public static final int LETTER_NUMBER = 10; + + /** + * Character type No + * @stable ICU 2.1 + */ + public static final int OTHER_NUMBER = 11; + + /** + * Character type Zs + * @stable ICU 2.1 + */ + public static final int SPACE_SEPARATOR = 12; + + /** + * Character type Zl + * @stable ICU 2.1 + */ + public static final int LINE_SEPARATOR = 13; + + /** + * Character type Zp + * @stable ICU 2.1 + */ + public static final int PARAGRAPH_SEPARATOR = 14; + + /** + * Character type Cc + * @stable ICU 2.1 + */ + public static final int CONTROL = 15; + + /** + * Character type Cf + * @stable ICU 2.1 + */ + public static final int FORMAT = 16; + + /** + * Character type Co + * @stable ICU 2.1 + */ + public static final int PRIVATE_USE = 17; + + /** + * Character type Cs + * @stable ICU 2.1 + */ + public static final int SURROGATE = 18; + + /** + * Character type Pd + * @stable ICU 2.1 + */ + public static final int DASH_PUNCTUATION = 19; + + /** + * Character type Ps + * @stable ICU 2.1 + */ + public static final int START_PUNCTUATION = 20; + + /** + * Character type Pe + * @stable ICU 2.1 + */ + public static final int END_PUNCTUATION = 21; + + /** + * Character type Pc + * @stable ICU 2.1 + */ + public static final int CONNECTOR_PUNCTUATION = 22; + + /** + * Character type Po + * @stable ICU 2.1 + */ + public static final int OTHER_PUNCTUATION = 23; + + /** + * Character type Sm + * @stable ICU 2.1 + */ + public static final int MATH_SYMBOL = 24; + + /** + * Character type Sc + * @stable ICU 2.1 + */ + public static final int CURRENCY_SYMBOL = 25; + + /** + * Character type Sk + * @stable ICU 2.1 + */ + public static final int MODIFIER_SYMBOL = 26; + + /** + * Character type So + * @stable ICU 2.1 + */ + public static final int OTHER_SYMBOL = 27; + + /** + * Character type Pi + * @see #INITIAL_QUOTE_PUNCTUATION + * @stable ICU 2.1 + */ + public static final int INITIAL_PUNCTUATION = 28; + + /** + * Character type Pi + * This name is compatible with java.lang.Character's name for this type. + * @see #INITIAL_PUNCTUATION + * @draft ICU 2.8 + */ + public static final int INITIAL_QUOTE_PUNCTUATION = 28; + + /** + * Character type Pf + * @see #FINAL_QUOTE_PUNCTUATION + * @stable ICU 2.1 + */ + public static final int FINAL_PUNCTUATION = 29; + + /** + * Character type Pf + * This name is compatible with java.lang.Character's name for this type. + * @see #FINAL_PUNCTUATION + * @draft ICU 2.8 + */ + public static final int FINAL_QUOTE_PUNCTUATION = 29; + + /** + * Character type count + * @stable ICU 2.1 + */ + public static final int CHAR_CATEGORY_COUNT = 30; + } + + /** + * 'Enum' for the CharacterDirection constants. There are two sets + * of names, those used in ICU, and those used in the JDK. The + * JDK constants are compatible in name but not in value + * with those defined in java.lang.Character. + * @see UCharacterDirection. + * @draft ICU 3.0 + */ + public static interface ECharacterDirection { + /** + * Directional type L + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT = 0; + + /** + * JDK-compatible synonum for LEFT_TO_RIGHT. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = (byte)LEFT_TO_RIGHT; + + /** + * Directional type R + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT = 1; + + /** + * JDK-compatible synonum for RIGHT_TO_LEFT. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = (byte)RIGHT_TO_LEFT; + + /** + * Directional type EN + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER = 2; + + /** + * JDK-compatible synonum for EUROPEAN_NUMBER. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = (byte)EUROPEAN_NUMBER; + + /** + * Directional type ES + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_SEPARATOR = 3; + + /** + * JDK-compatible synonum for EUROPEAN_NUMBER_SEPARATOR. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = (byte)EUROPEAN_NUMBER_SEPARATOR; + + /** + * Directional type ET + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_TERMINATOR = 4; + + /** + * JDK-compatible synonum for EUROPEAN_NUMBER_TERMINATOR. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = (byte)EUROPEAN_NUMBER_TERMINATOR; + + /** + * Directional type AN + * @stable ICU 2.1 + */ + public static final int ARABIC_NUMBER = 5; + + /** + * JDK-compatible synonum for ARABIC_NUMBER. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_ARABIC_NUMBER = (byte)ARABIC_NUMBER; + + /** + * Directional type CS + * @stable ICU 2.1 + */ + public static final int COMMON_NUMBER_SEPARATOR = 6; + + /** + * JDK-compatible synonum for COMMON_NUMBER_SEPARATOR. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = (byte)COMMON_NUMBER_SEPARATOR; + + /** + * Directional type B + * @stable ICU 2.1 + */ + public static final int BLOCK_SEPARATOR = 7; + + /** + * JDK-compatible synonum for BLOCK_SEPARATOR. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = (byte)BLOCK_SEPARATOR; + + /** + * Directional type S + * @stable ICU 2.1 + */ + public static final int SEGMENT_SEPARATOR = 8; + + /** + * JDK-compatible synonum for SEGMENT_SEPARATOR. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = (byte)SEGMENT_SEPARATOR; + + /** + * Directional type WS + * @stable ICU 2.1 + */ + public static final int WHITE_SPACE_NEUTRAL = 9; + + /** + * JDK-compatible synonum for WHITE_SPACE_NEUTRAL. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_WHITESPACE = (byte)WHITE_SPACE_NEUTRAL; + + /** + * Directional type ON + * @stable ICU 2.1 + */ + public static final int OTHER_NEUTRAL = 10; + + /** + * JDK-compatible synonum for OTHER_NEUTRAL. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_OTHER_NEUTRALS = (byte)OTHER_NEUTRAL; + + /** + * Directional type LRE + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_EMBEDDING = 11; + + /** + * JDK-compatible synonum for LEFT_TO_RIGHT_EMBEDDING. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = (byte)LEFT_TO_RIGHT_EMBEDDING; + + /** + * Directional type LRO + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_OVERRIDE = 12; + + /** + * JDK-compatible synonum for LEFT_TO_RIGHT_OVERRIDE. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = (byte)LEFT_TO_RIGHT_OVERRIDE; + + /** + * Directional type AL + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_ARABIC = 13; + + /** + * JDK-compatible synonum for RIGHT_TO_LEFT_ARABIC. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = (byte)RIGHT_TO_LEFT_ARABIC; + + /** + * Directional type RLE + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_EMBEDDING = 14; + + /** + * JDK-compatible synonum for RIGHT_TO_LEFT_EMBEDDING. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = (byte)RIGHT_TO_LEFT_EMBEDDING; + + /** + * Directional type RLO + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_OVERRIDE = 15; + + /** + * JDK-compatible synonum for RIGHT_TO_LEFT_OVERRIDE. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = (byte)RIGHT_TO_LEFT_OVERRIDE; + + /** + * Directional type PDF + * @stable ICU 2.1 + */ + public static final int POP_DIRECTIONAL_FORMAT = 16; + + /** + * JDK-compatible synonum for POP_DIRECTIONAL_FORMAT. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = (byte)POP_DIRECTIONAL_FORMAT; + + /** + * Directional type NSM + * @stable ICU 2.1 + */ + public static final int DIR_NON_SPACING_MARK = 17; + + /** + * JDK-compatible synonum for DIR_NON_SPACING_MARK. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_NON_SPACING_MARK = (byte)DIR_NON_SPACING_MARK; + + /** + * Directional type BN + * @stable ICU 2.1 + */ + public static final int BOUNDARY_NEUTRAL = 18; + + /** + * JDK-compatible synonum for BOUNDARY_NEUTRAL. + * @draft ICU 3.0 + */ + public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = (byte)BOUNDARY_NEUTRAL; + + /** + * Number of directional types + * @stable ICU 2.1 + */ + public static final int CHAR_DIRECTION_COUNT = 19; + + /** + * Undefined bidirectional character type. Undefined char + * values have undefined directionality in the Unicode specification. + */ + public static final byte DIRECTIONALITY_UNDEFINED = -1; + } +}