no message

X-SVN-Rev: 14687
2025-04-05 21:45:37 +00:00 · 2004-03-11 19:04:00 +00:00 · 2004-03-11 19:04:00 +00:00 · 5c397b73b3
commit 5c397b73b3
parent 77c134dc38
19 changed files with 1269 additions and 342 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/UCA_Data.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA_Data.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Data.java,v $ 
-* $Date: 2002/07/14 22:07:00 $ 
-* $Revision: 1.1 $
+* $Date: 2004/03/11 19:03:19 $ 
+* $Revision: 1.2 $
 *
 *******************************************************************************
 */
@ -27,7 +27,7 @@ import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;

 public class UCA_Data implements UCA_Types {
-    static final boolean DEBUG = true;
+    static final boolean DEBUG = false;
    static final boolean DEBUG_SHOW_ADD = false;
    
    private Normalizer toD;
--- a/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt
@ -1,7 +1,4 @@
-# Correlated with Unicode 4.0
-# Note: The casing of block names is not normative.
-#       For example, "Basic Latin" and "BASIC LATIN" are equivalent.
-#
-# Code points not explicitly listed in this file are given the value No_Block.
-#
+# Note:   The casing of block names is not normative.
+#         For example, "Basic Latin" and "BASIC LATIN" are equivalent.
+# Format:
 # Start Code..End Code; Block Name
--- a/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
+++ b/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
-* $Date: 2004/02/07 01:01:17 $
-* $Revision: 1.8 $
+* $Date: 2004/03/11 19:03:18 $
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@ -26,7 +26,7 @@ import com.ibm.text.utility.*;

 public class BuildNames implements UCD_Types {

-    static final boolean DEBUG = true;
+    static final boolean DEBUG = false;

    public static void main(String[] args) throws IOException {
        collectWords();
--- a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java
@ -59,7 +59,7 @@ public class CheckICU {
        toolFactory = ToolUnicodePropertySource.make("4.0.0");

        String[] quickList = {
-            "Canonical_Combining_Class",
+            // "Canonical_Combining_Class",
            // "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
            //"Numeric_Value"
        };
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
-* $Date: 2004/02/12 08:23:17 $
-* $Revision: 1.14 $
+* $Date: 2004/03/11 19:03:18 $
+* $Revision: 1.15 $
 *
 *******************************************************************************
 */
@ -305,6 +305,7 @@ public final class ConvertUCD implements UCD_Types {
            value.compact();
        }
        
+        /*
        UData ud;
        ud = getEntry(0x5e);
        System.out.println("SPOT-CHECK: 5e: " + ud);
@ -320,6 +321,7 @@ public final class ConvertUCD implements UCD_Types {
        
        ud = getEntry(0xFFFF);
        System.out.println("SPOT-CHECK: FFFF: " + ud);
+        */

        writeJavaData();
    }
@ -410,7 +412,7 @@ public final class ConvertUCD implements UCD_Types {

                int count = Utility.split(line,';',parts);

-                if (parts[0].equals("2801")) {
+                if (false && parts[0].equals("2801")) {
                    System.out.println("debug?");
                }

@ -468,7 +470,7 @@ public final class ConvertUCD implements UCD_Types {
                        if (end == 0) end = cpStart;

                        for (int j = cpStart; j <= end; ++j) {
-                            if (j != UCD.mapToRepresentative(j, false)) continue;
+                            if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
                            if (skipLetters && getEntry(cpStart).isLetter()) continue;
                            appendCharProperties(j, prop);
                        }
@ -490,7 +492,7 @@ public final class ConvertUCD implements UCD_Types {
                        if (val.equals("")) continue; // skip empty values, they mean default

                        for (int cps = cpStart; cps <= cpTop; ++cps) {
-                            if (UCD.mapToRepresentative(cps, false) != cps) continue;    // skip condensed ranges
+                            if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue;    // skip condensed ranges

                            if (key.equals("binary")) {
                                appendCharProperties(cps, val);
@ -508,7 +510,7 @@ public final class ConvertUCD implements UCD_Types {
                                if (type.equals("I")) {
                                    data.simpleCaseFolding = val;
                                    setBinaryProperty(cps, CaseFoldTurkishI);
-                                    System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " 
+                                    if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " 
                                    	+ Utility.hex(cps) + ": " + Utility.hex(val));
                                }
                            } else if (labels[0].equals("SpecialCasing")   // special handling for special casing
@ -658,7 +660,7 @@ public final class ConvertUCD implements UCD_Types {
                    System.out.println("Warning: NULL name\r\n" + uData);
                    System.out.println();
                }
-                if (uData.codePoint == 0x2801) {
+                if (false && uData.codePoint == 0x2801) {
                    System.out.println("SPOT-CHECK: " + uData);
                }
                uData.writeBytes(dataOut);
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt
@ -1,16 +1,29 @@
 #
 # Unicode Character Database: Derived Property Data
-# This file shows when various code points were designated in Unicode
+# This file shows when various code points were first assigned in Unicode.
+#
+# Caution: When using the Age *property*, all assigned code points
+# in each version are included, not just the newly assigned code points.
+# For more information, see http://www.unicode.org/reports/tr18/
+#
 # Notes:
-# - The term 'designated' means that a previously reserved code point was specified
-#   to be a noncharacter or surrogate, or assigned as a character,
-#   control or format code.
+#
+# - The term 'assigned' means that a previously reserved code point was assigned
+#   to be a character (graphic, format, control, or private-use); 
+#   a noncharacter code point; or a surrogate code point.
+#   For more information, see The Unicode Standard Section 2.4
+#
 # - Versions are only tracked from 1.1 onwards, since version 1.0
 #   predated changes required by the ISO 10646 merger.
+#
 # - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing.
+#
 # - The supplementary private use code points and the non-character code points
-#   were designated in version 2.0, but not specifically listed in the UCD
+#   were assigned in version 2.0, but not specifically listed in the UCD
 #   until versions 3.0 and 3.1 respectively.
 #
+# - Contiguous ranges are broken into separate lines where they would cross code point
+#   types: graphic, format, control, private-use, surrogate, noncharacter
+#
 # For details on the contents of each version, see
 #   http://www.unicode.org/versions/enumeratedversions.html.
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
-* $Date: 2004/02/18 03:08:59 $
-* $Revision: 1.25 $
+* $Date: 2004/03/11 19:03:17 $
+* $Revision: 1.26 $
 *
 *******************************************************************************
 */
@ -378,7 +378,7 @@ public final class DerivedProperty implements UCD_Types {
                shortName = "IDC";
                header = "# Derived Property: " + name
                    + "\r\n#  Characters that can continue an identifier."
-                    + "\r\n#  Generated from: ID_Start + Mn+Mc+Nd+Pc"
+                    + "\r\n#  Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue"
                    + "\r\n#  NOTE: Cf characters should be filtered out.";
            }
            public boolean hasValue(int cp) {
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2004/02/18 03:08:59 $
-* $Revision: 1.34 $
+* $Date: 2004/03/11 19:03:17 $
+* $Revision: 1.35 $
 *
 *******************************************************************************
 */
@ -110,8 +110,8 @@ public class GenerateData implements UCD_Types {
            output.println("# Generated algorithmically from the Unicode Character Database");
        }
        output.println("# For documentation, see UCD.html");
-        output.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
-        output.println("#       if they have default property values.");
+        //output.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
+        //output.println("#       if they have default property values.");
        output.println(HORIZONTAL_LINE);
        output.println();
    }
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
@ -0,0 +1,355 @@
+Generate: DerivedCoreProperties
+DeltaVersion: 11
+
+File:	Blocks
+Property: Block
+Format:	valueList
+
+File:	CaseFolding
+Property: SPECIAL
+
+File:	DerivedAge
+Property:	Age
+Format:	nameStyle=none noLabel skipValue=unassigned
+
+Value:	1.1
+# Assigned as of Unicode 1.1.0 (June, 1993)
+# [excluding removed Hangul Syllables]
+
+Value:	2.0
+# Newly assigned in Unicode 2.0.0 (July, 1996)
+
+Value:	2.1
+# Newly assigned in Unicode 2.1.2 (May, 1998)
+
+Value:	3.0
+# Newly assigned in Unicode 3.0.0 (September, 1999)
+
+Value:	3.1
+# Newly assigned in Unicode 3.1.0 (March, 2001)
+
+Value:	3.2
+# Newly assigned in Unicode 3.2.0 (March, 2002)
+
+Value:	4.0
+# Newly assigned in Unicode 4.0.0 (April, 2003)
+
+File:	extracted/DerivedBidiClass
+Property:	Bidi_Class
+# Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
+Format:	valueStyle=short skipUnassigned=Left_To_Right
+
+File:	extracted/DerivedBinaryProperties
+Property:	Bidi_Mirrored
+# Bidi_Mirrored (listing UnicodeData.txt, field 9: see UCD.html)
+
+File:	extracted/DerivedCombiningClass
+Property:	Canonical_Combining_Class
+# Combining Class (listing UnicodeData.txt, field 3: see UCD.html)
+#	All code points not explicitly listed in this file have the property
+#	value:   0.
+Format: nameStyle=none valueStyle=short skipUnassigned=Not_Reordered
+
+File:	DerivedCoreProperties
+Property:	Math
+# Derived Property: Math
+#  Generated from: Sm + Other_Math
+
+Property:	Alphabetic
+# Derived Property: Alphabetic
+#  Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic
+
+
+Property:	Lowercase
+# Derived Property: Lowercase
+#  Generated from: Ll + Other_Lowercase
+
+
+Property:	Uppercase
+# Derived Property: Uppercase
+#  Generated from: Lu + Other_Uppercase
+
+
+Property:	ID_Start
+# Derived Property: ID_Start
+#  Characters that can start an identifier.
+#  Generated from Lu+Ll+Lt+Lm+Lo+Nl+Other_ID_Start
+
+
+Property:	ID_Continue
+# Derived Property: ID_Continue
+#  Characters that can continue an identifier.
+#  Generated from: ID_Start + Mn+Mc+Nd+Pc
+#  NOTE: Cf characters should be filtered out.
+
+
+Property:	XID_Start
+# Derived Property: XID_Start
+#  ID_Start modified for closure under NFKx
+#  Modified as described in UAX #15
+#  NOTE: Does NOT remove the non-NFKx characters.
+#        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
+
+
+Property:	XID_Continue
+# Derived Property: XID_Continue
+#  Mod_ID_Continue modified for closure under NFKx
+#  Modified as described in UAX #15
+#  NOTE: Cf characters should be filtered out.
+#  NOTE: Does NOT remove the non-NFKx characters.
+#        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
+
+
+Property:	Default_Ignorable_Code_Point
+# Derived Property: Default_Ignorable_Code_Point
+#  Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs - White_Space
+
+
+Property:	Grapheme_Extend
+# Derived Property: Grapheme_Extend
+#  Generated from: Me + Mn + Other_Grapheme_Extend
+#  Note: depending on an application's interpretation of Co (private use),
+#  they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
+
+
+Property:	Grapheme_Base
+# Derived Property: Grapheme_Base
+#  Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend
+#  Note: depending on an application's interpretation of Co (private use),
+#  they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
+
+
+File:	extracted/DerivedDecompositionType
+Property:	Decomposition_Type
+Format:	skipValue=None
+# Decomposition_Type (from UnicodeData.txt, field 5: see UCD.html)
+
+File:	extracted/DerivedEastAsianWidth
+Property:	East_Asian_Width
+Format:	valueStyle=short skipUnassigned=Neutral
+# East_Asian_Width (listing EastAsianWidth.txt, field 1)
+
+File:	extracted/DerivedGeneralCategory
+Property:	General_Category
+Format:	valueStyle=short noLabel
+
+File:	extracted/DerivedJoiningGroup
+Property:	Joining_Group
+# Joining Group (listing ArabicShaping.txt, field 3)
+Format: skipValue=No_Joining_Group
+
+File:	extracted/DerivedJoiningType
+Property:	Joining_Type
+#	Type T is derived, as described in ArabicShaping.txt
+Format:	valueStyle=short skipValue=Non_Joining
+
+File:	extracted/DerivedLineBreak
+Property:	Line_Break
+Format:	valueStyle=short skipUnassigned=Unknown
+
+File:	DerivedNormalizationProps
+
+Property:	FC_NFKC_Closure
+# Derived Property: FC_NFKC_Closure
+#  Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));
+#  Then if (c != b) add the mapping from a to c to the set of
+#  mappings that constitute the FC_NFKC_Closure list
+#  Uses the full case folding from CaseFolding.txt, without the T option.
+Format:	nameStyle=short
+
+
+Property:	Full_Composition_Exclusion
+# Derived Property: Full_Composition_Exclusion
+#  Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions
+
+
+Property:	NFD_QuickCheck
+# Derived Property: NFD_QuickCheck
+#  Generated from computing decomposibles
+Format: nameStyle=short valueStyle=short skipValue=Yes
+
+
+Property:	NFC_QuickCheck
+# Derived Property: NFC_QuickCheck
+#  Generated from computing decomposibles (and characters that may compose with previous ones)
+Format: nameStyle=short valueStyle=short skipValue=Yes
+
+Property:	NFKD_QuickCheck
+# Derived Property: NFKD_QuickCheck
+#  Generated from computing decomposibles
+Format: nameStyle=short valueStyle=short skipValue=Yes
+
+
+Property:	NFKC_QuickCheck
+# Derived Property: NFKC_QuickCheck
+#  Generated from computing decomposibles (and characters that may compose with previous ones)
+Format: nameStyle=short valueStyle=short skipValue=Yes
+
+Property:	Expands_On_NFD
+# Derived Property: Expands_On_NFD
+#   Generated according to UAX #15.
+#   Characters whose normalized length is not one.
+#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
+#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
+
+
+Property:	Expands_On_NFC
+# Derived Property: Expands_On_NFC
+#   Generated according to UAX #15.
+#   Characters whose normalized length is not one.
+#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
+#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
+
+
+Property:	Expands_On_NFKD
+# Derived Property: Expands_On_NFKD
+#   Generated according to UAX #15.
+#   Characters whose normalized length is not one.
+#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
+#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
+
+
+Property:	Expands_On_NFKC
+# Derived Property: Expands_On_NFKC
+#   Generated according to UAX #15.
+#   Characters whose normalized length is not one.
+#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
+#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
+
+
+File:	extracted/DerivedNumericType
+Property:	Numeric_Type
+# Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan.txt: see UCD.html)
+Format: skipValue=None
+
+File:	extracted/DerivedNumericValues
+Property:	Numeric_Value
+# Numeric Values (from UnicodeData.txt, field 6/7/8)
+# WARNING: Certain valus, such as 0.16666667, are repeating fractions
+# Although they are only printed with a limited number of decimal places
+# in this file, they should be expressed to the limits of the precision
+# available when used.
+Format: sortNumeric
+
+File:	HangulSyllableType
+Property:	Hangul_Syllable_Type
+Format:	valueStyle=short skipValue=Not_Applicable
+
+File:	NormalizationTest
+Property: SPECIAL
+
+File:	PropList
+
+Property:	White_Space
+
+Property:	Bidi_Control
+
+Property:	Join_Control
+
+Property:	Dash
+
+Property:	Hyphen
+
+Property:	Quotation_Mark
+
+Property:	Terminal_Punctuation
+
+Property:	Other_Math
+
+Property:	Hex_Digit
+
+Property:	ASCII_Hex_Digit
+
+Property:	Other_Alphabetic
+
+Property:	Ideographic
+
+Property:	Diacritic
+
+Property:	Extender
+
+Property:	Other_Lowercase
+
+Property:	Other_Uppercase
+
+Property:	Noncharacter_Code_Point
+
+Property:	Other_Grapheme_Extend
+
+Property:	Grapheme_Link
+
+Property:	IDS_Binary_Operator
+
+Property:	IDS_Trinary_Operator
+
+Property:	Radical
+
+Property:	Unified_Ideograph
+
+Property:	Other_Default_Ignorable_Code_Point
+
+Property:	Deprecated
+
+Property:	Soft_Dotted
+
+Property:	Logical_Order_Exception
+
+Property:	Other_ID_Start
+
+Property:	Other_ID_Continue
+
+Property:	STerm
+
+Property:	Variation_Selector
+
+File:	PropertyAliases
+Property: SPECIAL
+
+File:	PropertyValueAliases
+Property: SPECIAL
+
+File:	Scripts
+
+Property:	Script
+Format:	nameStyle=none skipUnassigned=Common
+
+File:	SpecialCasing
+Property: SPECIAL
+
+File:	StandardizedVariants
+Property: SPECIAL
+
+HackName:	noBreak
+HackName:	Arabic_Presentation_Forms-A
+HackName:	Arabic_Presentation_Forms-B
+HackName:	CJK_Symbols_and_Punctuation
+HackName:	Combining_Diacritical_Marks_for_Symbols
+HackName:	Enclosed_CJK_Letters_and_Months
+HackName:	Greek_and_Coptic
+HackName:	Halfwidth_and_Fullwidth_Forms
+HackName:	Latin-1_Supplement
+HackName:	Latin_Extended-A
+HackName:	Latin_Extended-B
+HackName:	Miscellaneous_Mathematical_Symbols-A
+HackName:	Miscellaneous_Mathematical_Symbols-B
+HackName:	Miscellaneous_Symbols_and_Arrows
+HackName:	Superscripts_and_Subscripts
+HackName:	Supplemental_Arrows-A
+HackName:	Supplemental_Arrows-B
+HackName:	Supplementary_Private_Use_Area-A
+HackName:	Supplementary_Private_Use_Area-B
+HackName:	Canadian-Aboriginal
+HackName:	Old-Italic
+
+FinalComments
+Note that PropertyAliases sorts by the long name, while PropertyValueAliases
+sorts by the short name
+ArabicShaping
+BidiMirroring
+CompositionExclusions
+EastAsianWidth
+LineBreak
+StandardizedVariants
+UnicodeData
+
+
--- a/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyFloatLister.java,v $
-* $Date: 2003/03/12 16:01:26 $
-* $Revision: 1.5 $
+* $Date: 2004/03/11 19:03:17 $
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -40,7 +40,7 @@ class MyFloatLister extends PropertyLister {
    public byte status(int cp) {
        //if ((cp & 0xFFF) == 0) System.out.println("# " + Utility.hex(cp));
        if (false && !ucdData.isRepresented(cp)) {
-            if (ucdData.mapToRepresentative(cp, false) != cp) return PropertyLister.CONTINUE;
+            if (ucdData.mapToRepresentative(cp, ucdData.getCompositeVersion()) != cp) return PropertyLister.CONTINUE;
            return PropertyLister.CONTINUE;
        }
        if (ucdData.getCategory(cp) == Cn) return PropertyLister.CONTINUE;
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasesHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasesHeader.txt
@ -0,0 +1,40 @@
+# This file contains aliases for properties used in the UCD.
+# These names can be used for XML formats of UCD data, for regular-expression
+# property tests, and other programmatic textual descriptions of Unicode data.
+# For information on which properties are normative, see UCD.html.
+#
+# The names may be translated in appropriate environments, and additional
+# aliases may be useful.
+#
+# FORMAT
+#
+# Each line has two or more fields, separated by semicolons.
+#
+# First Field: The first field is an abbreviated name for the property.
+#
+# Second Field: The second field is a long name
+#
+# The above are the preferred aliases. Other aliases may be listed in additional fields.
+#
+# Loose matching should be applied to all property names and property values, with
+# the exception of String Property values. With loose matching of property names and
+# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
+# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
+#
+# NOTE: Property value names are NOT unique across properties. For example:
+#
+#   AL means Arabic Letter for the Bidi_Class property, and
+#   AL means Alpha_Left for the Combining_Class property, and
+#   AL means Alphabetic for the Line_Break property.
+#
+# In addition, some property names may be the same as some property value names.
+# For example:
+#
+#   sc means the Script property, and
+#   Sc means the General_Category property value Currency_Symbol (Sc)
+#
+# The combination of property value and property name is, however, unique.
+#
+# For more information, see UTS #18: Regular Expression Guidelines
+# ================================================
+
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasesHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasesHeader.txt
@ -0,0 +1,48 @@
+# This file contains aliases for property values used in the UCD.
+# These names can be used for XML formats of UCD data, for regular-expression
+# property tests, and other programmatic textual descriptions of Unicode data.
+# For information on which properties are normative, see UCD.html.
+#
+# The names may be translated in appropriate environments, and additional
+# aliases may be useful.
+#
+# FORMAT
+#
+# Each line describes a property value name.
+# This consists of three or more fields, separated by semicolons.
+#
+# First Field: The first field describes the property for which that
+# property value name is used.
+#
+# Second Field: The second field is an abbreviated name.
+# If there is no abbreviated name available, the field is marked with "n/a".
+#
+# Third Field: The third field is a long name.
+#
+# In the case of ccc, there are 4 fields. The second field is numeric, third
+# is abbreviated, and fourth is long.
+#
+# The above are the preferred aliases. Other aliases may be listed in additional fields.
+#
+# Loose matching should be applied to all property names and property values, with
+# the exception of String Property values. With loose matching of property names and
+# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
+# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
+#
+# NOTE: Property value names are NOT unique across properties. For example:
+#
+#   AL means Arabic Letter for the Bidi_Class property, and
+#   AL means Alpha_Left for the Combining_Class property, and
+#   AL means Alphabetic for the Line_Break property.
+#
+# In addition, some property names may be the same as some property value names.
+# For example:
+#
+#   sc means the Script property, and
+#   Sc means the General_Category property value Currency_Symbol (Sc)
+#
+# The combination of property value and property name is, however, unique.
+#
+# For more information, see UTS #18: Regular Expression Guidelines
+# ================================================
+
--- a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
@ -1,5 +1,6 @@
 package com.ibm.text.UCD;

+import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@ -53,7 +54,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
        
        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
-                if (codepoint == 0x1D100) {
+                if (DEBUG && codepoint == 0x1D100) {
                    System.out.println("here");
                }
                //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
@ -82,10 +83,17 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
        .setValues("<string>"));
        
        add(new UnicodeProperty.SimpleProperty() {
+            NumberFormat nf = NumberFormat.getInstance();
+            {
+                nf.setGroupingUsed(false);
+                nf.setMaximumFractionDigits(8);
+                nf.setMinimumFractionDigits(1);
+            }
            public String _getValue(int codepoint) {
+                
                double num = ucd.getNumericValue(codepoint);
                if (Double.isNaN(num)) return null;
-                return Double.toString(num);
+                return nf.format(num);
            }
        }.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version));
        
@ -100,8 +108,9 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            public int getMaxWidth(boolean isShort) {
                return 14;
            }
-        }.setMain("FC_NFKC_Closure", "FNC", UnicodeProperty.STRING, version)
-        .addName("FC_NFKC"));
+        }.setMain("FC_NFKC_Closure", "FC_NFKC", UnicodeProperty.STRING, version)
+        //.addName("FNC")
+        );

        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
@ -319,7 +328,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
                        case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((short)i, style)); break;
                        case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex((byte)i, style)); break;
                        case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style)); 
-                            check = temp != null;
+                            //check = temp != null;
                            break;
                        case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex((byte)i, style));
                            titlecase = true;
@ -389,7 +398,10 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
                        case UCD_Types.EAST_ASIAN_WIDTH>>8:
                            return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, result);
                        case UCD_Types.LINE_BREAK>>8:
-                            return lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
+                            lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
+                            if (valueAlias.equals("Inseparable")) addUnique("Inseperable", result);
+                            // Inseparable; Inseperable
+                            return result;
                        case UCD_Types.JOINING_TYPE>>8:
                            return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, result);
                        case UCD_Types.JOINING_GROUP>>8:
@ -445,10 +457,13 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            if (isType(BINARY_MASK)) {
                return up.hasValue(codepoint) ? "True" : "False";
            }
-            return "<unknown>";
+            throw new IllegalArgumentException("Failed to find value for " + Utility.hex(codepoint));
        }
    
        public String getAge(int codePoint) {
+            if (codePoint == 0xF0000) {
+                System.out.println("debug point");
+            }
            if (needAgeCache) {
                for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
                    ucdCache[i] = UCD.make(UCD_Names.AGE_VERSIONS[i]);
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2004/02/18 03:09:01 $
-* $Revision: 1.32 $
+* $Date: 2004/03/11 19:03:16 $
+* $Revision: 1.33 $
 *
 *******************************************************************************
 */
@ -86,7 +86,6 @@ public final class UCD implements UCD_Types {
     */
    public boolean isAllocated(int codePoint) {
        if (getCategory(codePoint) != Cn) return true;
-        if (compositeVersion >= 0x20000 && codePoint >= 0xF0000 && codePoint <= 0x10FFFD) return true;
        if (isNoncharacter(codePoint)) return true;
        return false;
    }
@ -94,11 +93,9 @@ public final class UCD implements UCD_Types {
    public boolean isNoncharacter(int codePoint) {
        if ((codePoint & 0xFFFE) == 0xFFFE) {
            if (compositeVersion < 0x20000 && codePoint > 0xFFFF) return false;
-            // major < 2
            return true;
        }
        if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF && compositeVersion >= 0x30100) return true;
-        // major >= 3 && minor >= 1
        return false;
    }

@ -113,8 +110,9 @@ public final class UCD implements UCD_Types {
     * Is the code point a PUA character (fast check)
     */
    public boolean isPUA(int codePoint) {
-        return (codePoint >= 0xE000 && codePoint < 0xF900
-             || codePoint >= 0xF0000 && codePoint < 0xFFFFE
+        if (codePoint >= 0xE000 && codePoint < 0xF900) return true;
+        if (compositeVersion < 0x20000) return false;
+        return (codePoint >= 0xF0000 && codePoint < 0xFFFFE
             || codePoint >= 0x100000 && codePoint < 0x10FFFE);
    }

@ -353,7 +351,7 @@ public final class UCD implements UCD_Types {
        return combiningClassSet.get(0xFF & value);
    }

-    static UnicodeSet BIDI_R_SET, BIDI_AL_SET;
+    static UnicodeSet BIDI_R_SET, BIDI_AL_SET, BIDI_BN_SET;
    
    /**
     * Get the bidi class
@ -424,10 +422,17 @@ public final class UCD implements UCD_Types {
            BIDI_R_SET.removeAll(noncharacters);
            BIDI_AL_SET.removeAll(noncharacters);
            
-            
+            BIDI_BN_SET = new UnicodeSet();
+            if (compositeVersion >= 0x40001) {
+                BIDI_BN_SET.addAll(noncharacters);
+                UnicodeSet DefaultIg = DerivedProperty.make(DefaultIgnorable, this).getSet();
+                System.out.println("DefaultIg: " + DefaultIg);
+                BIDI_BN_SET.addAll(DefaultIg);
+            }                       
            
            System.out.println("BIDI_R_SET: " + BIDI_R_SET);
            System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);
+            System.out.println("BIDI_BN_SET: " + BIDI_BN_SET);
            
            if (BIDI_R_SET.containsSome(BIDI_AL_SET)) {
                throw new ChainException("BIDI values for Cf characters overlap!!", null);
@ -435,6 +440,9 @@ public final class UCD implements UCD_Types {
            
        }

+        if (BIDI_BN_SET.contains(codePoint)) {
+            return BIDI_BN;
+        }
        if (BIDI_R_SET.contains(codePoint)) {
            return BIDI_R;
        }
@ -1012,7 +1020,7 @@ public final class UCD implements UCD_Types {
    }

    public static String getScriptID_fromIndex(byte prop, byte length) {
-        return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
+        return prop < 0 || prop >= UCD_Names.SCRIPT.length ? null
        : (length == SHORT) ? UCD_Names.SCRIPT[prop] : UCD_Names.LONG_SCRIPT[prop];
    }

@ -1043,7 +1051,7 @@ public final class UCD implements UCD_Types {
        : style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
    }

-    public static int mapToRepresentative(int ch, boolean lessThan20105) {
+    public static int mapToRepresentative(int ch, int rCompositeVersion) {
        if (ch <= 0xFFFD) {
            //if (ch <= 0x2800) return ch;
            //if (ch <= 0x28FF) return 0x2800;    // braille
@ -1061,7 +1069,7 @@ public final class UCD implements UCD_Types {
            if (ch <= 0xDFFF) return 0xDC00;
            if (ch <= 0xE000) return ch;         // Private Use
            if (ch <= 0xF8FF) return 0xE000;
-            if (lessThan20105) {
+            if (rCompositeVersion < 0x20105) {
                if (ch <= 0xF900) return ch;         // CJK Compatibility Ideograp
                if (ch <= 0xFA2D) return 0xF900;
            }
@ -1069,14 +1077,20 @@ public final class UCD implements UCD_Types {
            if (ch <= 0xFDEF) return 0xFFFF;
        } else {
            if ((ch & 0xFFFE) == 0xFFFE) return 0xFFFF;         // Noncharacter
+            
            if (ch <= 0x20000) return ch;         // Extension B
            if (ch <= 0x2A6D6) return 0x20000;
            //if (ch <= 0x2F800) return ch;
            //if (ch <= 0x2FA1D) return 0x2F800;      // compat ideographs
-            if (ch <= 0xF0000) return ch;       // Plane 15 Private Use
+            if (ch < 0xF0000) return ch;       // Plane 15 Private Use
+            if (rCompositeVersion >= 0x20000) {
+                return 0xE000;
+            }
+            /*
            if (ch <= 0xFFFFD) return 0xF0000;       // Plane 16 Private Use
            if (ch <= 0x100000) return ch;       // Plane 15 Private Use
            if (ch <= 0x10FFFD) return 0x100000;       // Plane 16 Private Use
+            */
        }
        return ch;
    }
@ -1106,6 +1120,7 @@ public final class UCD implements UCD_Types {
        byte cat = getCategory(cp);
        if (cat == Mn || cat == Mc || cat == Nd || cat == Pc) return true;
        if (getBinaryProperty(cp, Other_ID_Start)) return true;
+        if (getBinaryProperty(cp, Other_ID_Continue)) return true;
        return false;
    }

@ -1189,7 +1204,7 @@ to guarantee identifier closure.
        if (codePoint >= 0x2800 && codePoint <= 0x28FF) return true; 
        if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) return true;
        
-        int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
+        int rangeStart = mapToRepresentative(codePoint, compositeVersion);
        switch (rangeStart) {
          default:
            return getRaw(codePoint) == null;
@ -1247,7 +1262,7 @@ to guarantee identifier closure.

        // do range stuff
        String constructedName = null;
-        int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
+        int rangeStart = mapToRepresentative(codePoint, compositeVersion);
        boolean isHangul = false;
        boolean isRemapped = false;
        switch (rangeStart) {
@ -1297,7 +1312,7 @@ to guarantee identifier closure.
          case   0xE000: // Private Use
          case  0xF0000: // Private Use
          case 0x100000: // Private Use
-            if (fixStrings) constructedName = "<private use area-" + Utility.hex(codePoint, 4) + ">";
+            if (fixStrings) constructedName = "<private-use-" + Utility.hex(codePoint, 4) + ">";
            isRemapped = true;
            break;
          case 0xD800: // Surrogate
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2004/02/18 03:09:01 $
-* $Revision: 1.26 $
+* $Date: 2004/03/11 19:03:16 $
+* $Revision: 1.27 $
 *
 *******************************************************************************
 */
@ -152,7 +152,8 @@ final class UCD_Names implements UCD_Types {
        "Logical_Order_Exception",
        "Other_ID_Start",
        "STerm",
-        "Variation_Selector"
+        "Variation_Selector",
+        "Other_ID_Continue",
    };

    static final String[] SHORT_BP = {
@ -189,7 +190,8 @@ final class UCD_Names implements UCD_Types {
        "LOE",
        "OIDS",
        "STerm",
-        "VS"
+        "VS",
+        "OIDC"
    };

    /*
@ -262,7 +264,7 @@ final class UCD_Names implements UCD_Types {
        "Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation",
        "Glue", "Nonstarter", "Exclamation", "BreakSymbols",
        "InfixNumeric", "PrefixNumeric", "PostfixNumeric",
-        "Numeric", "Alphabetic", "Ideographic", "Inseperable", "Hyphen",
+        "Numeric", "Alphabetic", "Ideographic", "Inseparable", "Hyphen",
        "CombiningMark", "BreakBefore", "BreakAfter", "Space",
        "MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
        "ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
@ -327,7 +329,8 @@ final class UCD_Names implements UCD_Types {
    "SHAVIAN",
    "OSMANYA",
    "CYPRIOT", 
-    "BRAILLE", 
+    "BRAILLE",
+    "KATAKANA_OR_HIRAGANA",
    
  };

@ -395,6 +398,7 @@ final class UCD_Names implements UCD_Types {
    "Osma",
    "Cprt",
    "Brai",
+    "Hrkt",

  };

@ -643,11 +647,13 @@ final class UCD_Names implements UCD_Types {
                case 9: s = style < LONG ? "VR" :  "Virama"; break;
                case 200: s = style < LONG ? "ATBL" :  "AttachedBelowLeft"; break;
                case 202: s = style < LONG ? "ATB" :  "AttachedBelow"; break;
+                /*
                case 204: s = style < LONG ? "ATBR" :  "AttachedBelowRight"; break;
                case 208: s = style < LONG ? "ATL" :  "AttachedLeft"; break;
                case 210: s = style < LONG ? "ATR" :  "AttachedRight"; break;
                case 212: s = style < LONG ? "ATAL" :  "AttachedAboveLeft"; break;
-                case 214: s = style < LONG ? "ATA" :  "AttachedAbove"; break;
+                case 214: s = style < LONG ? "ATA" :  "AttachedAbove"; break;                
+                */
                case 216: s = style < LONG ? "ATAR" :   "AttachedAboveRight"; break;
                case 218: s = style < LONG ? "BL" :   "BelowLeft"; break;
                case 220: s = style < LONG ? "B" :   "Below"; break;
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2004/02/18 03:09:01 $
-* $Revision: 1.27 $
+* $Date: 2004/03/11 19:03:16 $
+* $Revision: 1.28 $
 *
 *******************************************************************************
 */
@ -15,9 +15,7 @@ package com.ibm.text.UCD;

 public interface UCD_Types {
    
-    public static final int dVersion = 6; // change to fix the generated file D version. If less than zero, no "d"
-    static final byte BINARY_FORMAT = 14; // bumped if binary format of UCD changes. Forces rebuild
-    
+    static final byte BINARY_FORMAT = 15; // bumped if binary format of UCD changes. Forces rebuild   
    
    public static final String BASE_DIR = "C:\\DATA\\";
    public static final String UCD_DIR = BASE_DIR + "UCD\\";
@ -213,9 +211,10 @@ public interface UCD_Types {
        Soft_Dotted = 29,
        Logical_Order_Exception = 30,
        Other_ID_Start = 31,
-    Sentence_Terminal = 32,
-    Variation_Selector = 33,
-	    LIMIT_BINARY_PROPERTIES = 34;
+        Sentence_Terminal = 32,
+        Variation_Selector = 33,
+        Other_ID_Continue = 34,
+	    LIMIT_BINARY_PROPERTIES = 35;

 	/*
    static final int
@ -383,7 +382,8 @@ public interface UCD_Types {
        OSMANYA = 51,
        CYPRIOT = 52,
        BRAILLE = 53,
-        LIMIT_SCRIPT = 54;
+        KATAKANA_OR_HIRAGANA = 54,
+        LIMIT_SCRIPT = 55;

  static final int
    UNKNOWN = 0,
--- a/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
+++ b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
@ -7,6 +7,7 @@ import java.io.PrintWriter;

 import com.ibm.text.UCD.Default;
 import com.ibm.text.UCD.GenerateData;
+import com.ibm.text.UCD.MakeUnicodeFiles;
 import com.ibm.text.UCD.UCD_Types;

 public class UnicodeDataFile {
@ -26,16 +27,23 @@ public class UnicodeDataFile {
        
        result.out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
        result.out.println(generateDateLine());
-        result.out.println("#");
+        result.out.println("#");        
+        result.out.println("# Unicode Character Database");        
+        result.out.println("# Copyright (c) 1991-2004 Unicode, Inc.");
+        result.out.println(
+            "# For terms of use, see http://www.unicode.org/terms_of_use.html");
+        result.out.println("# For documentation, see UCD.html");
        try {
            Utility.appendFile(filename + "Header.txt", Utility.LATIN1, result.out);
        } catch (FileNotFoundException e) {
+            /*
            result.out.println("# Unicode Character Database: Derived Property Data");
            result.out.println("# Generated algorithmically from the Unicode Character Database");
            result.out.println("# For documentation, see UCD.html");
            result.out.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
            result.out.println("#       if they have default property values.");
            result.out.println("# ================================================");
+            */
        }
        
        return result;
@ -51,14 +59,20 @@ public class UnicodeDataFile {
    }

    public static String getHTMLFileSuffix(boolean withDVersion) {
-        return "-" + Default.ucd().getVersion() 
-            + ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "") 
+        return "-"
+            + Default.ucd().getVersion()
+            + ((withDVersion && MakeUnicodeFiles.dVersion >= 0)
+                ? ("d" + MakeUnicodeFiles.dVersion)
+                : "")
            + ".html";
    }

    public static String getFileSuffix(boolean withDVersion) {
-        return "-" + Default.ucd().getVersion() 
-            + ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "") 
+        return "-"
+            + Default.ucd().getVersion()
+            + ((withDVersion && MakeUnicodeFiles.dVersion >= 0)
+                ? ("d" + MakeUnicodeFiles.dVersion)
+                : "")
            + ".txt";
    }

--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2004/02/18 03:09:02 $
-* $Revision: 1.40 $
+* $Date: 2004/03/11 19:03:16 $
+* $Revision: 1.41 $
 *
 *******************************************************************************
 */
@ -725,8 +725,7 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
    public static PrintWriter openPrintWriter(String directory, String filename, Encoding options) throws IOException {
        File file = new File(directory + filename);
        Utility.fixDot();
-        System.out.print("Creating File: " + file);
-        System.out.println("\t" + file.getCanonicalPath());
+        System.out.println("Creating File: " + file.getCanonicalPath());
        File parent = new File(file.getParent());
        //System.out.println("Creating File: "+ parent);
        parent.mkdirs();