From 2ddcba4a16cdcb148b1a71468833993695597070 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Wed, 19 Mar 2003 17:30:58 +0000 Subject: [PATCH] Latest updates for UCD, default values. Fixed UTF-8 output for UCA, Logical_Order_Exceptions X-SVN-Rev: 11358 --- tools/unicodetools/com/ibm/text/UCA/UCA.java | 15 ++-- .../com/ibm/text/UCA/WriteCollationData.java | 16 ++-- .../com/ibm/text/UCD/GenerateData.java | 10 +-- tools/unicodetools/com/ibm/text/UCD/Main.java | 27 ++++++- .../com/ibm/text/UCD/MyPropertyLister.java | 19 +++-- .../com/ibm/text/UCD/PropertyLister.java | 13 +++- tools/unicodetools/com/ibm/text/UCD/UCD.java | 73 ++++++++++++++++++- .../com/ibm/text/UCD/UCD_Names.java | 51 +++++++++---- .../com/ibm/text/UCD/UCD_Types.java | 6 +- .../com/ibm/text/UCD/UnicodeProperty.java | 1 + .../ibm/text/UCD/UnifiedBinaryProperty.java | 29 ++++++-- .../ibm/text/utility/DirectoryIterator.java | 2 + .../com/ibm/text/utility/Utility.java | 5 +- 13 files changed, 207 insertions(+), 60 deletions(-) diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java index c63560b47c4..c1856fcf23a 100644 --- a/tools/unicodetools/com/ibm/text/UCA/UCA.java +++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ -* $Date: 2003/03/18 00:28:18 $ -* $Revision: 1.19 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.20 $ * ******************************************************************************* */ @@ -926,7 +926,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.] */ static final char EMPTY = '\uFFFF'; char rearrangeBuffer = EMPTY; - UnicodeSet rearrangeList = new UnicodeSet(); + UnicodeSet rearrangeList = null; int hangulBufferPosition = 0; StringBuffer hangulBuffer = new StringBuffer(); @@ -1308,6 +1308,12 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.] StringBuffer multiChars = new StringBuffer(); // used for contracting chars String inputLine = ""; boolean[] wasImplicitLeadPrimary = new boolean[1]; + + // In UAX 3.1, the rearrange list is moved to UCD. + + rearrangeList = UnifiedBinaryProperty.make(UCD.BINARY_PROPERTIES + UCD.Logical_Order_Exception, ucd) + .getSet(); + while (true) try { inputLine = in.readLine(); @@ -1422,11 +1428,10 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.] UCD.BINARY_PROPERTIES + UCD.Logical_Order_Exception, ucd); UnicodeSet desiredSet = ubp.getSet(); - /*if (!rearrangeList.equals(desiredSet)) { + if (!rearrangeList.equals(desiredSet)) { throw new IllegalArgumentException("Rearrangement should be " + desiredSet.toPattern(true) + ", but is " + rearrangeList.toPattern(true)); } - */ ucaData.checkConsistency(); diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java index 7216f538e82..543bda4f2cf 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ -* $Date: 2002/10/05 02:16:17 $ -* $Revision: 1.28 $ +* $Date: 2003/03/19 17:30:55 $ +* $Revision: 1.29 $ * ******************************************************************************* */ @@ -1234,7 +1234,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON "UTF8"), 32*1024)); */ - PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.LATIN1_WINDOWS); + PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.UTF8_WINDOWS); diLog.write('\uFEFF'); @@ -1413,7 +1413,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON "UTF8"), 32*1024)); */ - PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.LATIN1_WINDOWS); + PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.UTF8_WINDOWS); diLog.write('\uFEFF'); @@ -1660,7 +1660,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON int[] lenArray = new int[1]; Set alreadyDone = new HashSet(); - PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.LATIN1_WINDOWS); + PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.UTF8_WINDOWS); while (true) { String s = cc.next(ces, lenArray); @@ -1784,7 +1784,7 @@ F900..FAFF; CJK Compatibility Ideographs if (shortPrint) filename += "_SHORT"; if (option == IN_XML) filename += ".xml"; else filename += ".txt"; - log = Utility.openPrintWriter(filename, Utility.LATIN1_WINDOWS); + log = Utility.openPrintWriter(filename, Utility.UTF8_WINDOWS); String[] commentText = { "UCA Rules", @@ -3102,8 +3102,8 @@ F900..FAFF; CJK Compatibility Ideographs log.println("[first variable " + firstVariable.formatFCE() + "]"); log.println("[last variable " + lastVariable.formatFCE() + "]"); - log.println("[first non-ignorable " + firstNonIgnorable.formatFCE() + "]"); - log.println("[last non-ignorable " + lastNonIgnorable.formatFCE() + "]"); + log.println("[first regular " + firstNonIgnorable.formatFCE() + "]"); + log.println("[last regular " + lastNonIgnorable.formatFCE() + "]"); log.println("[first implicit " + (new FCE(false,firstImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]"); diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java index a1344dd7002..597d9ae098b 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $ -* $Date: 2003/03/15 02:36:48 $ -* $Revision: 1.26 $ +* $Date: 2003/03/19 17:30:58 $ +* $Revision: 1.27 $ * ******************************************************************************* */ @@ -123,8 +123,8 @@ public class GenerateData implements UCD_Types { output.println("# Generated algorithmically from the Unicode Character Database"); } output.println("# For documentation, see UCD.html"); - output.println("# Note: Unassigned and Noncharacter codepoints are omitted,"); - output.println("# except when listing Noncharacter or Cn."); + output.println("# Note: Unassigned and Noncharacter codepoints may be omitted"); + output.println("# if they have default property values."); output.println(HORIZONTAL_LINE); output.println(); } @@ -916,7 +916,7 @@ public class GenerateData implements UCD_Types { for (int i = startEnum; i < endEnum; ++i) { UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd); if (up == null) continue; - if (up.isDefaultValue()) continue; + if (up.skipInDerivedListing()) continue; /* if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java index b8b0d2e32a6..c176a2b4685 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Main.java +++ b/tools/unicodetools/com/ibm/text/UCD/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $ -* $Date: 2003/03/15 02:36:48 $ -* $Revision: 1.28 $ +* $Date: 2003/03/19 17:30:57 $ +* $Revision: 1.29 $ * ******************************************************************************* */ @@ -40,10 +40,24 @@ public final class Main implements UCD_Types { "HangulSyllableType", "DerivedAge", "StandardizedVariants", - //"HangulSyllable", + "HangulSyllable", //"OtherDerivedProperties", }; + static final String[] EXTRACTED_FILES = { + "DerivedBidiClass", + "DerivedBinaryProperties", + "DerivedCombiningClass", + "DerivedDecompositionType", + "DerivedEastAsianWidth", + "DerivedGeneralCategory", + "DerivedJoiningGroup", + "DerivedJoiningType", + "DerivedLineBreak", + "DerivedNumericType", + "DerivedNumericValues", + }; + public static void main (String[] args) throws Exception { for (int i = 0; i < args.length; ++i) { @@ -60,6 +74,13 @@ public final class Main implements UCD_Types { if (arg.equalsIgnoreCase("All")) { args = Utility.append(ALL_FILES, Utility.subarray(args, i+1)); + i = -1; + continue; + } + + if (arg.equalsIgnoreCase("EXTRACTED")) { + args = Utility.append(EXTRACTED_FILES, Utility.subarray(args, i+1)); + i = -1; continue; } diff --git a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java index 17249bcbd0e..63929642ef4 100644 --- a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java +++ b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $ -* $Date: 2002/03/20 00:21:42 $ -* $Revision: 1.8 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.9 $ * ******************************************************************************* */ @@ -22,7 +22,9 @@ final class MyPropertyLister extends PropertyLister { private int propMask; - UnicodeProperty up; + private boolean isDefaultValue = false; + + private UnicodeProperty up; public MyPropertyLister(UCD ucd, int propMask, PrintWriter output) { this.propMask = propMask; @@ -30,6 +32,7 @@ final class MyPropertyLister extends PropertyLister { this.ucdData = ucd; up = UnifiedBinaryProperty.make(propMask, ucd); if (propMask < COMBINING_CLASS) usePropertyComment = false; // skip gen cat + isDefaultValue = up.isDefaultValue(); } public String headerString() { @@ -57,6 +60,10 @@ final class MyPropertyLister extends PropertyLister { return up.getValue(cp); } + public String missingValueName() { + return up.getValue(NORMAL); + } + public String optionalComment(int cp) { if (propMask < COMBINING_CLASS) return ""; // skip gen cat int cat = ucdData.getCategory(cp); @@ -83,15 +90,17 @@ final class MyPropertyLister extends PropertyLister { // System.out.println(Utility.hex(firstRealCp)); //} - if (cat == Cn + if (isDefaultValue + && cat == Cn && propMask != (BINARY_PROPERTIES | Noncharacter_Code_Point) && propMask != (BINARY_PROPERTIES | Other_Default_Ignorable_Code_Point) && propMask != (CATEGORY | Cn)) { if (BRIDGE) return CONTINUE; else return EXCLUDE; } - + boolean inSet = up.hasValue(cp); + /* if (cp >= 0x1D400 && cp <= 0x1D7C9 && cat != Cn) { if (propMask == (SCRIPT | LATIN_SCRIPT)) inSet = cp <= 0x1D6A3; diff --git a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java index e0aec3559c7..70c65085623 100644 --- a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java +++ b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $ -* $Date: 2003/02/25 23:38:22 $ -* $Revision: 1.10 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.11 $ * ******************************************************************************* */ @@ -52,6 +52,10 @@ abstract public class PropertyLister implements UCD_Types { return ""; } + public String missingValueName() { + return ""; + } + public String optionalName(int cp) { return ""; } @@ -226,7 +230,10 @@ abstract public class PropertyLister implements UCD_Types { format(firstRealCp, lastRealCp, realRangeCount); } - if (count == 0) System.out.println("WARNING -- ZERO COUNT FOR " + header); + if (count == 0) { + output.println("# No values for " + missingValueName()); + System.out.println("ZERO COUNT for " + missingValueName()); + } NumberFormat nf = NumberFormat.getInstance(); nf.setMaximumFractionDigits(0); nf.setGroupingUsed(false); diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java index 6bced563b63..ebdf052c808 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $ -* $Date: 2003/03/15 02:36:48 $ -* $Revision: 1.22 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.23 $ * ******************************************************************************* */ @@ -26,7 +26,7 @@ import java.io.FileInputStream; import java.io.BufferedReader; import com.ibm.text.utility.*; - +import com.ibm.icu.text.UnicodeSet; public final class UCD implements UCD_Types { @@ -346,11 +346,76 @@ public final class UCD implements UCD_Types { return combiningClassSet.get(0xFF & value); } + static UnicodeSet BIDI_R_SET, BIDI_AL_SET; + /** * Get the bidi class */ public byte getBidiClass(int codePoint) { - return get(codePoint, false).bidiClass; + if (getCategory(codePoint) != Cn) return get(codePoint, false).bidiClass; + + if (BIDI_R_SET == null) { // build it + + BIDI_R_SET = new UnicodeSet(); + BIDI_AL_SET = new UnicodeSet(); + + int blockId = 0; + BlockData blockData = new BlockData(); + while (Default.ucd.getBlockData(blockId++, blockData)) { + if (blockData.name.equals("Hebrew") + || blockData.name.equals("Cypriot_Syllabary") + ) { + System.out.println("R: Adding " + blockData.name + ": " + + Utility.hex(blockData.start) + + ".." + Utility.hex(blockData.end)); + BIDI_R_SET.add(blockData.start, blockData.end); + } else if (blockData.name.equals("Arabic") + || blockData.name.equals("Syriac") + || blockData.name.equals("Thaana") + || blockData.name.equals("Arabic_Presentation_Forms-A") + || blockData.name.equals("Arabic_Presentation_Forms-B") + ) { + System.out.println("AL: Adding " + blockData.name + ": " + + Utility.hex(blockData.start) + + ".." + Utility.hex(blockData.end)); + BIDI_AL_SET.add(blockData.start, blockData.end); + } else { + if (false) System.out.println("SKIPPING: " + blockData.name + ": " + + Utility.hex(blockData.start) + + ".." + Utility.hex(blockData.end)); + } + } + + System.out.println("BIDI_R_SET: " + BIDI_R_SET); + System.out.println("BIDI_AL_SET: " + BIDI_AL_SET); + + UnicodeSet BIDI_R_Delta = new UnicodeSet(0xFB1D, 0xFB4F).add(0x10800, 0x10FFF).add(0x07BF,0x8FF); + BIDI_R_Delta.removeAll(BIDI_R_SET); + System.out.println("R: Adding " + BIDI_R_Delta); + BIDI_R_SET.addAll(BIDI_R_Delta); + + UnicodeSet BIDI_AL_Delta = new UnicodeSet(0x0750, 0x077F); + BIDI_AL_Delta.removeAll(BIDI_AL_SET); + System.out.println("AL: Adding " + BIDI_AL_Delta); + BIDI_AL_SET.addAll(BIDI_AL_Delta); + + UnicodeSet noncharacters = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Noncharacter_Code_Point).getSet(); + + System.out.println("Removing Noncharacters! " + noncharacters); + BIDI_R_SET.removeAll(noncharacters); + BIDI_AL_SET.removeAll(noncharacters); + + System.out.println("BIDI_R_SET: " + BIDI_R_SET); + System.out.println("BIDI_AL_SET: " + BIDI_AL_SET); + } + + if (BIDI_R_SET.contains(codePoint)) { + return BIDI_R; + } + if (BIDI_AL_SET.contains(codePoint)) { + return BIDI_AL; + } + return BIDI_L; } /** diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java index a389cd385d2..c55dd460bb4 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $ -* $Date: 2003/03/15 02:36:48 $ -* $Revision: 1.18 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.19 $ * ******************************************************************************* */ @@ -39,23 +39,44 @@ final class UCD_Names implements UCD_Types { }; static final String[] UNIFIED_PROPERTY_HEADERS = { - "General Category (listing UnicodeData.txt, field 2: see UnicodeData.html)", - "Combining Class (listing UnicodeData.txt, field 3: see UnicodeData.html)", - "Bidi Class (listing UnicodeData.txt, field 4: see UnicodeData.html)", - "Decomposition Type (from UnicodeData.txt, field 5: see UnicodeData.html)", - "Numeric Type (from UnicodeData.txt, field 6/7/8: see UnicodeData.html)", - "East Asian Width (listing EastAsianWidth.txt, field 1)", - "Line Break (listing LineBreak.txt, field 1)", + "General Category (listing UnicodeData.txt, field 2: see UCD.html)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: Cn.", + "Combining Class (listing UnicodeData.txt, field 3: see UCD.html)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: 0.", + "Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: L.", + "Decomposition Type (from UnicodeData.txt, field 5: see UCD.html)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: None.", + "Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan.txt: see UCD.html)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: None.", + "East Asian Width (listing EastAsianWidth.txt, field 1)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: N.", + "Line Break (listing LineBreak.txt, field 1)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: XX.", "Joining Type (listing ArabicShaping.txt, field 1).\r\n" + "#\tType T is derived, as described in ArabicShaping.txt\r\n" - + "#\tAll code points not listed here have the type U", - "Joining Group (listing ArabicShaping.txt, field 2)", - "BidiMirrored (listing UnicodeData.txt, field 9: see UnicodeData.html)", + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: U.", + "Joining Group (listing ArabicShaping.txt, field 2)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: NO_JOINING_GROUP.", + "BidiMirrored (listing UnicodeData.txt, field 9: see UCD.html)\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: N.", "Script\r\n" - + "#\tThe value for all code points not explicitly listed in this file is COMMON." - , + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: COMMON.", "Age (from a comparison of UCD versions 1.1 [minus Hangul], 2.0, 2.1, 3.0, 3.1)", - "Hangul Syllable Type\r\n# All codepoints not explicitly listed here have the value NA", + "Hangul Syllable Type\r\n" + + "#\tAll code points not explicitly listed in this file have the property\r\n" + + "#\tvalue: NA.", "Derived" }; diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java index c4a614271f4..600ad1d9c26 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $ -* $Date: 2003/03/15 02:36:48 $ -* $Revision: 1.19 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.20 $ * ******************************************************************************* */ @@ -15,7 +15,7 @@ package com.ibm.text.UCD; public interface UCD_Types { - public static final int dVersion = 18; // change to fix the generated file D version. If less than zero, no "d" + public static final int dVersion = 19; // change to fix the generated file D version. If less than zero, no "d" public static final String BASE_DIR = "C:\\DATA\\"; public static final String UCD_DIR = BASE_DIR + "UCD\\"; diff --git a/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java b/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java index 2595da194fb..d3ee3d69bdf 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java +++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java @@ -35,6 +35,7 @@ public abstract class UnicodeProperty implements UCD_Types { public boolean isStandard() { return isStandard; } public void setStandard(boolean in) { isStandard = in; } + public boolean skipInDerivedListing() {return false;} public boolean isDefaultValue() {return false;} /** diff --git a/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java b/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java index b61ea3ebfc4..7f635d8cf57 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java +++ b/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $ -* $Date: 2003/02/25 23:38:22 $ -* $Revision: 1.11 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.12 $ * ******************************************************************************* */ @@ -233,7 +233,7 @@ public final class UnifiedBinaryProperty extends UnicodeProperty { return false; } - public boolean isDefaultValue() { + public boolean skipInDerivedListing() { switch ((majorProp<<8) | propValue) { //case CATEGORY | Cn: //case COMBINING_CLASS | 0: @@ -244,17 +244,32 @@ public final class UnifiedBinaryProperty extends UnicodeProperty { // case LINE_BREAK | LB_XX: case JOINING_TYPE | JT_U: case JOINING_GROUP | NO_SHAPING: - case BINARY_PROPERTIES | Non_break: - case BINARY_PROPERTIES | CaseFoldTurkishI: case SCRIPT | COMMON_SCRIPT: case HANGUL_SYLLABLE_TYPE | NA: + case BINARY_PROPERTIES | Non_break: + case BINARY_PROPERTIES | CaseFoldTurkishI: return true; } return false; } - - + public boolean isDefaultValue() { + switch (majorProp) { + case CATEGORY>>8: return propValue == Cn; + case COMBINING_CLASS>>8: return propValue == 0; + case BIDI_CLASS>>8: return propValue == BIDI_L; + case DECOMPOSITION_TYPE>>8: return propValue == NONE; + case NUMERIC_TYPE>>8: return propValue == NUMERIC_NONE; + case EAST_ASIAN_WIDTH>>8: return propValue == EAN; + case LINE_BREAK>>8: return propValue == LB_XX; + case JOINING_TYPE>>8: return propValue == JT_U; + case JOINING_GROUP>>8: return propValue == NO_SHAPING; + case SCRIPT>>8: return propValue == COMMON_SCRIPT; + case HANGUL_SYLLABLE_TYPE>>8: return propValue == NA; + } + return false; + } + public boolean hasValue(int cp) { try { switch (majorProp) { diff --git a/tools/unicodetools/com/ibm/text/utility/DirectoryIterator.java b/tools/unicodetools/com/ibm/text/utility/DirectoryIterator.java index 3069165ed4d..69632e9eb18 100644 --- a/tools/unicodetools/com/ibm/text/utility/DirectoryIterator.java +++ b/tools/unicodetools/com/ibm/text/utility/DirectoryIterator.java @@ -156,6 +156,7 @@ public class DirectoryIterator { if (line1 == null) { if (line2 == null) return true; if (show) { + Utility.fixDot(); System.out.println("Found difference in : " + file1 + ", " + file2); System.out.println(" Line1: " + line1); System.out.println(" Line2: " + line2); @@ -167,6 +168,7 @@ public class DirectoryIterator { if (line1.startsWith("# Date") && line2.startsWith("# Date")) continue; if (lineCount == 0 && line1.startsWith("#") && line2.startsWith("#")) continue; if (show) { + Utility.fixDot(); System.out.println("Found difference in : " + file1 + ", " + file2); System.out.println(" Line1: " + line1); System.out.println(" Line2: " + line2); diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index e3669605a1e..21d3b589d8a 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ -* $Date: 2003/03/15 02:36:47 $ -* $Revision: 1.29 $ +* $Date: 2003/03/19 17:30:56 $ +* $Revision: 1.30 $ * ******************************************************************************* */ @@ -906,6 +906,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES } else { if (line1 == null) line1 = ""; if (line2 == null) line2 = ""; + fixDot(); System.out.println("Found difference in : " + file1 + ", " + file2); int diff = compare(line1, line2); System.out.println(" Line1: '" + line1.substring(0,diff) + "', '" + line1.substring(diff));