no message

X-SVN-Rev: 14687
This commit is contained in:
Mark Davis 2004-03-11 19:04:00 +00:00
parent 77c134dc38
commit 5c397b73b3
19 changed files with 1269 additions and 342 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Data.java,v $
* $Date: 2002/07/14 22:07:00 $
* $Revision: 1.1 $
* $Date: 2004/03/11 19:03:19 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -27,7 +27,7 @@ import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
public class UCA_Data implements UCA_Types {
static final boolean DEBUG = true;
static final boolean DEBUG = false;
static final boolean DEBUG_SHOW_ADD = false;
private Normalizer toD;

View file

@ -1,7 +1,4 @@
# Correlated with Unicode 4.0
# Note: The casing of block names is not normative.
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
#
# Code points not explicitly listed in this file are given the value No_Block.
#
# Note: The casing of block names is not normative.
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
# Format:
# Start Code..End Code; Block Name

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
* $Date: 2004/02/07 01:01:17 $
* $Revision: 1.8 $
* $Date: 2004/03/11 19:03:18 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -26,7 +26,7 @@ import com.ibm.text.utility.*;
public class BuildNames implements UCD_Types {
static final boolean DEBUG = true;
static final boolean DEBUG = false;
public static void main(String[] args) throws IOException {
collectWords();

View file

@ -59,7 +59,7 @@ public class CheckICU {
toolFactory = ToolUnicodePropertySource.make("4.0.0");
String[] quickList = {
"Canonical_Combining_Class",
// "Canonical_Combining_Class",
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
//"Numeric_Value"
};

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
* $Date: 2004/02/12 08:23:17 $
* $Revision: 1.14 $
* $Date: 2004/03/11 19:03:18 $
* $Revision: 1.15 $
*
*******************************************************************************
*/
@ -305,6 +305,7 @@ public final class ConvertUCD implements UCD_Types {
value.compact();
}
/*
UData ud;
ud = getEntry(0x5e);
System.out.println("SPOT-CHECK: 5e: " + ud);
@ -320,6 +321,7 @@ public final class ConvertUCD implements UCD_Types {
ud = getEntry(0xFFFF);
System.out.println("SPOT-CHECK: FFFF: " + ud);
*/
writeJavaData();
}
@ -410,7 +412,7 @@ public final class ConvertUCD implements UCD_Types {
int count = Utility.split(line,';',parts);
if (parts[0].equals("2801")) {
if (false && parts[0].equals("2801")) {
System.out.println("debug?");
}
@ -468,7 +470,7 @@ public final class ConvertUCD implements UCD_Types {
if (end == 0) end = cpStart;
for (int j = cpStart; j <= end; ++j) {
if (j != UCD.mapToRepresentative(j, false)) continue;
if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
if (skipLetters && getEntry(cpStart).isLetter()) continue;
appendCharProperties(j, prop);
}
@ -490,7 +492,7 @@ public final class ConvertUCD implements UCD_Types {
if (val.equals("")) continue; // skip empty values, they mean default
for (int cps = cpStart; cps <= cpTop; ++cps) {
if (UCD.mapToRepresentative(cps, false) != cps) continue; // skip condensed ranges
if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue; // skip condensed ranges
if (key.equals("binary")) {
appendCharProperties(cps, val);
@ -508,7 +510,7 @@ public final class ConvertUCD implements UCD_Types {
if (type.equals("I")) {
data.simpleCaseFolding = val;
setBinaryProperty(cps, CaseFoldTurkishI);
System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting "
if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting "
+ Utility.hex(cps) + ": " + Utility.hex(val));
}
} else if (labels[0].equals("SpecialCasing") // special handling for special casing
@ -658,7 +660,7 @@ public final class ConvertUCD implements UCD_Types {
System.out.println("Warning: NULL name\r\n" + uData);
System.out.println();
}
if (uData.codePoint == 0x2801) {
if (false && uData.codePoint == 0x2801) {
System.out.println("SPOT-CHECK: " + uData);
}
uData.writeBytes(dataOut);

View file

@ -1,16 +1,29 @@
#
# Unicode Character Database: Derived Property Data
# This file shows when various code points were designated in Unicode
# This file shows when various code points were first assigned in Unicode.
#
# Caution: When using the Age *property*, all assigned code points
# in each version are included, not just the newly assigned code points.
# For more information, see http://www.unicode.org/reports/tr18/
#
# Notes:
# - The term 'designated' means that a previously reserved code point was specified
# to be a noncharacter or surrogate, or assigned as a character,
# control or format code.
#
# - The term 'assigned' means that a previously reserved code point was assigned
# to be a character (graphic, format, control, or private-use);
# a noncharacter code point; or a surrogate code point.
# For more information, see The Unicode Standard Section 2.4
#
# - Versions are only tracked from 1.1 onwards, since version 1.0
# predated changes required by the ISO 10646 merger.
#
# - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing.
#
# - The supplementary private use code points and the non-character code points
# were designated in version 2.0, but not specifically listed in the UCD
# were assigned in version 2.0, but not specifically listed in the UCD
# until versions 3.0 and 3.1 respectively.
#
# - Contiguous ranges are broken into separate lines where they would cross code point
# types: graphic, format, control, private-use, surrogate, noncharacter
#
# For details on the contents of each version, see
# http://www.unicode.org/versions/enumeratedversions.html.

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
* $Date: 2004/02/18 03:08:59 $
* $Revision: 1.25 $
* $Date: 2004/03/11 19:03:17 $
* $Revision: 1.26 $
*
*******************************************************************************
*/
@ -378,7 +378,7 @@ public final class DerivedProperty implements UCD_Types {
shortName = "IDC";
header = "# Derived Property: " + name
+ "\r\n# Characters that can continue an identifier."
+ "\r\n# Generated from: ID_Start + Mn+Mc+Nd+Pc"
+ "\r\n# Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue"
+ "\r\n# NOTE: Cf characters should be filtered out.";
}
public boolean hasValue(int cp) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2004/02/18 03:08:59 $
* $Revision: 1.34 $
* $Date: 2004/03/11 19:03:17 $
* $Revision: 1.35 $
*
*******************************************************************************
*/
@ -110,8 +110,8 @@ public class GenerateData implements UCD_Types {
output.println("# Generated algorithmically from the Unicode Character Database");
}
output.println("# For documentation, see UCD.html");
output.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
output.println("# if they have default property values.");
//output.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
//output.println("# if they have default property values.");
output.println(HORIZONTAL_LINE);
output.println();
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,355 @@
Generate: DerivedCoreProperties
DeltaVersion: 11
File: Blocks
Property: Block
Format: valueList
File: CaseFolding
Property: SPECIAL
File: DerivedAge
Property: Age
Format: nameStyle=none noLabel skipValue=unassigned
Value: 1.1
# Assigned as of Unicode 1.1.0 (June, 1993)
# [excluding removed Hangul Syllables]
Value: 2.0
# Newly assigned in Unicode 2.0.0 (July, 1996)
Value: 2.1
# Newly assigned in Unicode 2.1.2 (May, 1998)
Value: 3.0
# Newly assigned in Unicode 3.0.0 (September, 1999)
Value: 3.1
# Newly assigned in Unicode 3.1.0 (March, 2001)
Value: 3.2
# Newly assigned in Unicode 3.2.0 (March, 2002)
Value: 4.0
# Newly assigned in Unicode 4.0.0 (April, 2003)
File: extracted/DerivedBidiClass
Property: Bidi_Class
# Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
Format: valueStyle=short skipUnassigned=Left_To_Right
File: extracted/DerivedBinaryProperties
Property: Bidi_Mirrored
# Bidi_Mirrored (listing UnicodeData.txt, field 9: see UCD.html)
File: extracted/DerivedCombiningClass
Property: Canonical_Combining_Class
# Combining Class (listing UnicodeData.txt, field 3: see UCD.html)
# All code points not explicitly listed in this file have the property
# value: 0.
Format: nameStyle=none valueStyle=short skipUnassigned=Not_Reordered
File: DerivedCoreProperties
Property: Math
# Derived Property: Math
# Generated from: Sm + Other_Math
Property: Alphabetic
# Derived Property: Alphabetic
# Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic
Property: Lowercase
# Derived Property: Lowercase
# Generated from: Ll + Other_Lowercase
Property: Uppercase
# Derived Property: Uppercase
# Generated from: Lu + Other_Uppercase
Property: ID_Start
# Derived Property: ID_Start
# Characters that can start an identifier.
# Generated from Lu+Ll+Lt+Lm+Lo+Nl+Other_ID_Start
Property: ID_Continue
# Derived Property: ID_Continue
# Characters that can continue an identifier.
# Generated from: ID_Start + Mn+Mc+Nd+Pc
# NOTE: Cf characters should be filtered out.
Property: XID_Start
# Derived Property: XID_Start
# ID_Start modified for closure under NFKx
# Modified as described in UAX #15
# NOTE: Does NOT remove the non-NFKx characters.
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
Property: XID_Continue
# Derived Property: XID_Continue
# Mod_ID_Continue modified for closure under NFKx
# Modified as described in UAX #15
# NOTE: Cf characters should be filtered out.
# NOTE: Does NOT remove the non-NFKx characters.
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
Property: Default_Ignorable_Code_Point
# Derived Property: Default_Ignorable_Code_Point
# Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs - White_Space
Property: Grapheme_Extend
# Derived Property: Grapheme_Extend
# Generated from: Me + Mn + Other_Grapheme_Extend
# Note: depending on an application's interpretation of Co (private use),
# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
Property: Grapheme_Base
# Derived Property: Grapheme_Base
# Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend
# Note: depending on an application's interpretation of Co (private use),
# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
File: extracted/DerivedDecompositionType
Property: Decomposition_Type
Format: skipValue=None
# Decomposition_Type (from UnicodeData.txt, field 5: see UCD.html)
File: extracted/DerivedEastAsianWidth
Property: East_Asian_Width
Format: valueStyle=short skipUnassigned=Neutral
# East_Asian_Width (listing EastAsianWidth.txt, field 1)
File: extracted/DerivedGeneralCategory
Property: General_Category
Format: valueStyle=short noLabel
File: extracted/DerivedJoiningGroup
Property: Joining_Group
# Joining Group (listing ArabicShaping.txt, field 3)
Format: skipValue=No_Joining_Group
File: extracted/DerivedJoiningType
Property: Joining_Type
# Type T is derived, as described in ArabicShaping.txt
Format: valueStyle=short skipValue=Non_Joining
File: extracted/DerivedLineBreak
Property: Line_Break
Format: valueStyle=short skipUnassigned=Unknown
File: DerivedNormalizationProps
Property: FC_NFKC_Closure
# Derived Property: FC_NFKC_Closure
# Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));
# Then if (c != b) add the mapping from a to c to the set of
# mappings that constitute the FC_NFKC_Closure list
# Uses the full case folding from CaseFolding.txt, without the T option.
Format: nameStyle=short
Property: Full_Composition_Exclusion
# Derived Property: Full_Composition_Exclusion
# Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions
Property: NFD_QuickCheck
# Derived Property: NFD_QuickCheck
# Generated from computing decomposibles
Format: nameStyle=short valueStyle=short skipValue=Yes
Property: NFC_QuickCheck
# Derived Property: NFC_QuickCheck
# Generated from computing decomposibles (and characters that may compose with previous ones)
Format: nameStyle=short valueStyle=short skipValue=Yes
Property: NFKD_QuickCheck
# Derived Property: NFKD_QuickCheck
# Generated from computing decomposibles
Format: nameStyle=short valueStyle=short skipValue=Yes
Property: NFKC_QuickCheck
# Derived Property: NFKC_QuickCheck
# Generated from computing decomposibles (and characters that may compose with previous ones)
Format: nameStyle=short valueStyle=short skipValue=Yes
Property: Expands_On_NFD
# Derived Property: Expands_On_NFD
# Generated according to UAX #15.
# Characters whose normalized length is not one.
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
Property: Expands_On_NFC
# Derived Property: Expands_On_NFC
# Generated according to UAX #15.
# Characters whose normalized length is not one.
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
Property: Expands_On_NFKD
# Derived Property: Expands_On_NFKD
# Generated according to UAX #15.
# Characters whose normalized length is not one.
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
Property: Expands_On_NFKC
# Derived Property: Expands_On_NFKC
# Generated according to UAX #15.
# Characters whose normalized length is not one.
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
File: extracted/DerivedNumericType
Property: Numeric_Type
# Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan.txt: see UCD.html)
Format: skipValue=None
File: extracted/DerivedNumericValues
Property: Numeric_Value
# Numeric Values (from UnicodeData.txt, field 6/7/8)
# WARNING: Certain valus, such as 0.16666667, are repeating fractions
# Although they are only printed with a limited number of decimal places
# in this file, they should be expressed to the limits of the precision
# available when used.
Format: sortNumeric
File: HangulSyllableType
Property: Hangul_Syllable_Type
Format: valueStyle=short skipValue=Not_Applicable
File: NormalizationTest
Property: SPECIAL
File: PropList
Property: White_Space
Property: Bidi_Control
Property: Join_Control
Property: Dash
Property: Hyphen
Property: Quotation_Mark
Property: Terminal_Punctuation
Property: Other_Math
Property: Hex_Digit
Property: ASCII_Hex_Digit
Property: Other_Alphabetic
Property: Ideographic
Property: Diacritic
Property: Extender
Property: Other_Lowercase
Property: Other_Uppercase
Property: Noncharacter_Code_Point
Property: Other_Grapheme_Extend
Property: Grapheme_Link
Property: IDS_Binary_Operator
Property: IDS_Trinary_Operator
Property: Radical
Property: Unified_Ideograph
Property: Other_Default_Ignorable_Code_Point
Property: Deprecated
Property: Soft_Dotted
Property: Logical_Order_Exception
Property: Other_ID_Start
Property: Other_ID_Continue
Property: STerm
Property: Variation_Selector
File: PropertyAliases
Property: SPECIAL
File: PropertyValueAliases
Property: SPECIAL
File: Scripts
Property: Script
Format: nameStyle=none skipUnassigned=Common
File: SpecialCasing
Property: SPECIAL
File: StandardizedVariants
Property: SPECIAL
HackName: noBreak
HackName: Arabic_Presentation_Forms-A
HackName: Arabic_Presentation_Forms-B
HackName: CJK_Symbols_and_Punctuation
HackName: Combining_Diacritical_Marks_for_Symbols
HackName: Enclosed_CJK_Letters_and_Months
HackName: Greek_and_Coptic
HackName: Halfwidth_and_Fullwidth_Forms
HackName: Latin-1_Supplement
HackName: Latin_Extended-A
HackName: Latin_Extended-B
HackName: Miscellaneous_Mathematical_Symbols-A
HackName: Miscellaneous_Mathematical_Symbols-B
HackName: Miscellaneous_Symbols_and_Arrows
HackName: Superscripts_and_Subscripts
HackName: Supplemental_Arrows-A
HackName: Supplemental_Arrows-B
HackName: Supplementary_Private_Use_Area-A
HackName: Supplementary_Private_Use_Area-B
HackName: Canadian-Aboriginal
HackName: Old-Italic
FinalComments
Note that PropertyAliases sorts by the long name, while PropertyValueAliases
sorts by the short name
ArabicShaping
BidiMirroring
CompositionExclusions
EastAsianWidth
LineBreak
StandardizedVariants
UnicodeData

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyFloatLister.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.5 $
* $Date: 2004/03/11 19:03:17 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -40,7 +40,7 @@ class MyFloatLister extends PropertyLister {
public byte status(int cp) {
//if ((cp & 0xFFF) == 0) System.out.println("# " + Utility.hex(cp));
if (false && !ucdData.isRepresented(cp)) {
if (ucdData.mapToRepresentative(cp, false) != cp) return PropertyLister.CONTINUE;
if (ucdData.mapToRepresentative(cp, ucdData.getCompositeVersion()) != cp) return PropertyLister.CONTINUE;
return PropertyLister.CONTINUE;
}
if (ucdData.getCategory(cp) == Cn) return PropertyLister.CONTINUE;

View file

@ -0,0 +1,40 @@
# This file contains aliases for properties used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
# property tests, and other programmatic textual descriptions of Unicode data.
# For information on which properties are normative, see UCD.html.
#
# The names may be translated in appropriate environments, and additional
# aliases may be useful.
#
# FORMAT
#
# Each line has two or more fields, separated by semicolons.
#
# First Field: The first field is an abbreviated name for the property.
#
# Second Field: The second field is a long name
#
# The above are the preferred aliases. Other aliases may be listed in additional fields.
#
# Loose matching should be applied to all property names and property values, with
# the exception of String Property values. With loose matching of property names and
# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
#
# NOTE: Property value names are NOT unique across properties. For example:
#
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Alpha_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
# For example:
#
# sc means the Script property, and
# Sc means the General_Category property value Currency_Symbol (Sc)
#
# The combination of property value and property name is, however, unique.
#
# For more information, see UTS #18: Regular Expression Guidelines
# ================================================

View file

@ -0,0 +1,48 @@
# This file contains aliases for property values used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
# property tests, and other programmatic textual descriptions of Unicode data.
# For information on which properties are normative, see UCD.html.
#
# The names may be translated in appropriate environments, and additional
# aliases may be useful.
#
# FORMAT
#
# Each line describes a property value name.
# This consists of three or more fields, separated by semicolons.
#
# First Field: The first field describes the property for which that
# property value name is used.
#
# Second Field: The second field is an abbreviated name.
# If there is no abbreviated name available, the field is marked with "n/a".
#
# Third Field: The third field is a long name.
#
# In the case of ccc, there are 4 fields. The second field is numeric, third
# is abbreviated, and fourth is long.
#
# The above are the preferred aliases. Other aliases may be listed in additional fields.
#
# Loose matching should be applied to all property names and property values, with
# the exception of String Property values. With loose matching of property names and
# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
#
# NOTE: Property value names are NOT unique across properties. For example:
#
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Alpha_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
# For example:
#
# sc means the Script property, and
# Sc means the General_Category property value Currency_Symbol (Sc)
#
# The combination of property value and property name is, however, unique.
#
# For more information, see UTS #18: Regular Expression Guidelines
# ================================================

View file

@ -1,5 +1,6 @@
package com.ibm.text.UCD;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -53,7 +54,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
if (codepoint == 0x1D100) {
if (DEBUG && codepoint == 0x1D100) {
System.out.println("here");
}
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
@ -82,10 +83,17 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
.setValues("<string>"));
add(new UnicodeProperty.SimpleProperty() {
NumberFormat nf = NumberFormat.getInstance();
{
nf.setGroupingUsed(false);
nf.setMaximumFractionDigits(8);
nf.setMinimumFractionDigits(1);
}
public String _getValue(int codepoint) {
double num = ucd.getNumericValue(codepoint);
if (Double.isNaN(num)) return null;
return Double.toString(num);
return nf.format(num);
}
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version));
@ -100,8 +108,9 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
public int getMaxWidth(boolean isShort) {
return 14;
}
}.setMain("FC_NFKC_Closure", "FNC", UnicodeProperty.STRING, version)
.addName("FC_NFKC"));
}.setMain("FC_NFKC_Closure", "FC_NFKC", UnicodeProperty.STRING, version)
//.addName("FNC")
);
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
@ -319,7 +328,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((short)i, style)); break;
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex((byte)i, style)); break;
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style));
check = temp != null;
//check = temp != null;
break;
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex((byte)i, style));
titlecase = true;
@ -389,7 +398,10 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
case UCD_Types.EAST_ASIAN_WIDTH>>8:
return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, result);
case UCD_Types.LINE_BREAK>>8:
return lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
if (valueAlias.equals("Inseparable")) addUnique("Inseperable", result);
// Inseparable; Inseperable
return result;
case UCD_Types.JOINING_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, result);
case UCD_Types.JOINING_GROUP>>8:
@ -445,10 +457,13 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
if (isType(BINARY_MASK)) {
return up.hasValue(codepoint) ? "True" : "False";
}
return "<unknown>";
throw new IllegalArgumentException("Failed to find value for " + Utility.hex(codepoint));
}
public String getAge(int codePoint) {
if (codePoint == 0xF0000) {
System.out.println("debug point");
}
if (needAgeCache) {
for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
ucdCache[i] = UCD.make(UCD_Names.AGE_VERSIONS[i]);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2004/02/18 03:09:01 $
* $Revision: 1.32 $
* $Date: 2004/03/11 19:03:16 $
* $Revision: 1.33 $
*
*******************************************************************************
*/
@ -86,7 +86,6 @@ public final class UCD implements UCD_Types {
*/
public boolean isAllocated(int codePoint) {
if (getCategory(codePoint) != Cn) return true;
if (compositeVersion >= 0x20000 && codePoint >= 0xF0000 && codePoint <= 0x10FFFD) return true;
if (isNoncharacter(codePoint)) return true;
return false;
}
@ -94,11 +93,9 @@ public final class UCD implements UCD_Types {
public boolean isNoncharacter(int codePoint) {
if ((codePoint & 0xFFFE) == 0xFFFE) {
if (compositeVersion < 0x20000 && codePoint > 0xFFFF) return false;
// major < 2
return true;
}
if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF && compositeVersion >= 0x30100) return true;
// major >= 3 && minor >= 1
return false;
}
@ -113,8 +110,9 @@ public final class UCD implements UCD_Types {
* Is the code point a PUA character (fast check)
*/
public boolean isPUA(int codePoint) {
return (codePoint >= 0xE000 && codePoint < 0xF900
|| codePoint >= 0xF0000 && codePoint < 0xFFFFE
if (codePoint >= 0xE000 && codePoint < 0xF900) return true;
if (compositeVersion < 0x20000) return false;
return (codePoint >= 0xF0000 && codePoint < 0xFFFFE
|| codePoint >= 0x100000 && codePoint < 0x10FFFE);
}
@ -353,7 +351,7 @@ public final class UCD implements UCD_Types {
return combiningClassSet.get(0xFF & value);
}
static UnicodeSet BIDI_R_SET, BIDI_AL_SET;
static UnicodeSet BIDI_R_SET, BIDI_AL_SET, BIDI_BN_SET;
/**
* Get the bidi class
@ -424,10 +422,17 @@ public final class UCD implements UCD_Types {
BIDI_R_SET.removeAll(noncharacters);
BIDI_AL_SET.removeAll(noncharacters);
BIDI_BN_SET = new UnicodeSet();
if (compositeVersion >= 0x40001) {
BIDI_BN_SET.addAll(noncharacters);
UnicodeSet DefaultIg = DerivedProperty.make(DefaultIgnorable, this).getSet();
System.out.println("DefaultIg: " + DefaultIg);
BIDI_BN_SET.addAll(DefaultIg);
}
System.out.println("BIDI_R_SET: " + BIDI_R_SET);
System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);
System.out.println("BIDI_BN_SET: " + BIDI_BN_SET);
if (BIDI_R_SET.containsSome(BIDI_AL_SET)) {
throw new ChainException("BIDI values for Cf characters overlap!!", null);
@ -435,6 +440,9 @@ public final class UCD implements UCD_Types {
}
if (BIDI_BN_SET.contains(codePoint)) {
return BIDI_BN;
}
if (BIDI_R_SET.contains(codePoint)) {
return BIDI_R;
}
@ -1012,7 +1020,7 @@ public final class UCD implements UCD_Types {
}
public static String getScriptID_fromIndex(byte prop, byte length) {
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
return prop < 0 || prop >= UCD_Names.SCRIPT.length ? null
: (length == SHORT) ? UCD_Names.SCRIPT[prop] : UCD_Names.LONG_SCRIPT[prop];
}
@ -1043,7 +1051,7 @@ public final class UCD implements UCD_Types {
: style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
}
public static int mapToRepresentative(int ch, boolean lessThan20105) {
public static int mapToRepresentative(int ch, int rCompositeVersion) {
if (ch <= 0xFFFD) {
//if (ch <= 0x2800) return ch;
//if (ch <= 0x28FF) return 0x2800; // braille
@ -1061,7 +1069,7 @@ public final class UCD implements UCD_Types {
if (ch <= 0xDFFF) return 0xDC00;
if (ch <= 0xE000) return ch; // Private Use
if (ch <= 0xF8FF) return 0xE000;
if (lessThan20105) {
if (rCompositeVersion < 0x20105) {
if (ch <= 0xF900) return ch; // CJK Compatibility Ideograp
if (ch <= 0xFA2D) return 0xF900;
}
@ -1069,14 +1077,20 @@ public final class UCD implements UCD_Types {
if (ch <= 0xFDEF) return 0xFFFF;
} else {
if ((ch & 0xFFFE) == 0xFFFE) return 0xFFFF; // Noncharacter
if (ch <= 0x20000) return ch; // Extension B
if (ch <= 0x2A6D6) return 0x20000;
//if (ch <= 0x2F800) return ch;
//if (ch <= 0x2FA1D) return 0x2F800; // compat ideographs
if (ch <= 0xF0000) return ch; // Plane 15 Private Use
if (ch < 0xF0000) return ch; // Plane 15 Private Use
if (rCompositeVersion >= 0x20000) {
return 0xE000;
}
/*
if (ch <= 0xFFFFD) return 0xF0000; // Plane 16 Private Use
if (ch <= 0x100000) return ch; // Plane 15 Private Use
if (ch <= 0x10FFFD) return 0x100000; // Plane 16 Private Use
*/
}
return ch;
}
@ -1106,6 +1120,7 @@ public final class UCD implements UCD_Types {
byte cat = getCategory(cp);
if (cat == Mn || cat == Mc || cat == Nd || cat == Pc) return true;
if (getBinaryProperty(cp, Other_ID_Start)) return true;
if (getBinaryProperty(cp, Other_ID_Continue)) return true;
return false;
}
@ -1189,7 +1204,7 @@ to guarantee identifier closure.
if (codePoint >= 0x2800 && codePoint <= 0x28FF) return true;
if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) return true;
int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
int rangeStart = mapToRepresentative(codePoint, compositeVersion);
switch (rangeStart) {
default:
return getRaw(codePoint) == null;
@ -1247,7 +1262,7 @@ to guarantee identifier closure.
// do range stuff
String constructedName = null;
int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
int rangeStart = mapToRepresentative(codePoint, compositeVersion);
boolean isHangul = false;
boolean isRemapped = false;
switch (rangeStart) {
@ -1297,7 +1312,7 @@ to guarantee identifier closure.
case 0xE000: // Private Use
case 0xF0000: // Private Use
case 0x100000: // Private Use
if (fixStrings) constructedName = "<private use area-" + Utility.hex(codePoint, 4) + ">";
if (fixStrings) constructedName = "<private-use-" + Utility.hex(codePoint, 4) + ">";
isRemapped = true;
break;
case 0xD800: // Surrogate

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2004/02/18 03:09:01 $
* $Revision: 1.26 $
* $Date: 2004/03/11 19:03:16 $
* $Revision: 1.27 $
*
*******************************************************************************
*/
@ -152,7 +152,8 @@ final class UCD_Names implements UCD_Types {
"Logical_Order_Exception",
"Other_ID_Start",
"STerm",
"Variation_Selector"
"Variation_Selector",
"Other_ID_Continue",
};
static final String[] SHORT_BP = {
@ -189,7 +190,8 @@ final class UCD_Names implements UCD_Types {
"LOE",
"OIDS",
"STerm",
"VS"
"VS",
"OIDC"
};
/*
@ -262,7 +264,7 @@ final class UCD_Names implements UCD_Types {
"Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation",
"Glue", "Nonstarter", "Exclamation", "BreakSymbols",
"InfixNumeric", "PrefixNumeric", "PostfixNumeric",
"Numeric", "Alphabetic", "Ideographic", "Inseperable", "Hyphen",
"Numeric", "Alphabetic", "Ideographic", "Inseparable", "Hyphen",
"CombiningMark", "BreakBefore", "BreakAfter", "Space",
"MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
"ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
@ -327,7 +329,8 @@ final class UCD_Names implements UCD_Types {
"SHAVIAN",
"OSMANYA",
"CYPRIOT",
"BRAILLE",
"BRAILLE",
"KATAKANA_OR_HIRAGANA",
};
@ -395,6 +398,7 @@ final class UCD_Names implements UCD_Types {
"Osma",
"Cprt",
"Brai",
"Hrkt",
};
@ -643,11 +647,13 @@ final class UCD_Names implements UCD_Types {
case 9: s = style < LONG ? "VR" : "Virama"; break;
case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
/*
case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break;
case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break;
*/
case 216: s = style < LONG ? "ATAR" : "AttachedAboveRight"; break;
case 218: s = style < LONG ? "BL" : "BelowLeft"; break;
case 220: s = style < LONG ? "B" : "Below"; break;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2004/02/18 03:09:01 $
* $Revision: 1.27 $
* $Date: 2004/03/11 19:03:16 $
* $Revision: 1.28 $
*
*******************************************************************************
*/
@ -15,9 +15,7 @@ package com.ibm.text.UCD;
public interface UCD_Types {
public static final int dVersion = 6; // change to fix the generated file D version. If less than zero, no "d"
static final byte BINARY_FORMAT = 14; // bumped if binary format of UCD changes. Forces rebuild
static final byte BINARY_FORMAT = 15; // bumped if binary format of UCD changes. Forces rebuild
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";
@ -213,9 +211,10 @@ public interface UCD_Types {
Soft_Dotted = 29,
Logical_Order_Exception = 30,
Other_ID_Start = 31,
Sentence_Terminal = 32,
Variation_Selector = 33,
LIMIT_BINARY_PROPERTIES = 34;
Sentence_Terminal = 32,
Variation_Selector = 33,
Other_ID_Continue = 34,
LIMIT_BINARY_PROPERTIES = 35;
/*
static final int
@ -383,7 +382,8 @@ public interface UCD_Types {
OSMANYA = 51,
CYPRIOT = 52,
BRAILLE = 53,
LIMIT_SCRIPT = 54;
KATAKANA_OR_HIRAGANA = 54,
LIMIT_SCRIPT = 55;
static final int
UNKNOWN = 0,

View file

@ -7,6 +7,7 @@ import java.io.PrintWriter;
import com.ibm.text.UCD.Default;
import com.ibm.text.UCD.GenerateData;
import com.ibm.text.UCD.MakeUnicodeFiles;
import com.ibm.text.UCD.UCD_Types;
public class UnicodeDataFile {
@ -26,16 +27,23 @@ public class UnicodeDataFile {
result.out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
result.out.println(generateDateLine());
result.out.println("#");
result.out.println("#");
result.out.println("# Unicode Character Database");
result.out.println("# Copyright (c) 1991-2004 Unicode, Inc.");
result.out.println(
"# For terms of use, see http://www.unicode.org/terms_of_use.html");
result.out.println("# For documentation, see UCD.html");
try {
Utility.appendFile(filename + "Header.txt", Utility.LATIN1, result.out);
} catch (FileNotFoundException e) {
/*
result.out.println("# Unicode Character Database: Derived Property Data");
result.out.println("# Generated algorithmically from the Unicode Character Database");
result.out.println("# For documentation, see UCD.html");
result.out.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
result.out.println("# if they have default property values.");
result.out.println("# ================================================");
*/
}
return result;
@ -51,14 +59,20 @@ public class UnicodeDataFile {
}
public static String getHTMLFileSuffix(boolean withDVersion) {
return "-" + Default.ucd().getVersion()
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
return "-"
+ Default.ucd().getVersion()
+ ((withDVersion && MakeUnicodeFiles.dVersion >= 0)
? ("d" + MakeUnicodeFiles.dVersion)
: "")
+ ".html";
}
public static String getFileSuffix(boolean withDVersion) {
return "-" + Default.ucd().getVersion()
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
return "-"
+ Default.ucd().getVersion()
+ ((withDVersion && MakeUnicodeFiles.dVersion >= 0)
? ("d" + MakeUnicodeFiles.dVersion)
: "")
+ ".txt";
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2004/02/18 03:09:02 $
* $Revision: 1.40 $
* $Date: 2004/03/11 19:03:16 $
* $Revision: 1.41 $
*
*******************************************************************************
*/
@ -725,8 +725,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
public static PrintWriter openPrintWriter(String directory, String filename, Encoding options) throws IOException {
File file = new File(directory + filename);
Utility.fixDot();
System.out.print("Creating File: " + file);
System.out.println("\t" + file.getCanonicalPath());
System.out.println("Creating File: " + file.getCanonicalPath());
File parent = new File(file.getParent());
//System.out.println("Creating File: "+ parent);
parent.mkdirs();