" + pp.toPattern(set) + "
"); + pw.println(); + } + + static UnicodeSet getIDNInput() { + if (!initialized) initialize(); + return IDNInputOnly; + } + + static UnicodeSet getIDNOutput() { + if (!initialized) initialize(); + return IDNInputOnly; + } + + private static void initialize() { + UnicodeSet oddballs = new UnicodeSet("[\u034F \u180B-\u180D \uFE00-\uFE0F _]"); + UCD U32 = UCD.make("3.2.0"); + Normalizer nfkc32 = new Normalizer(Normalizer.NFKC, "3.2.0"); + UCDProperty xid32 = DerivedProperty.make(UCD.Mod_ID_Continue_NO_Cf,U32); + UnicodeSet IDInput32 = xid32.getSet(); + IDInput32.add('-').removeAll(oddballs); + + UCD U50 = UCD.make("5.0.0"); + Normalizer nfkc50 = new Normalizer(Normalizer.NFKC, "5.0.0"); + UCDProperty xid50 = DerivedProperty.make(UCD.Mod_ID_Continue_NO_Cf,U50); + UnicodeSet IDInput50 = xid50.getSet(); + IDInput50.add('-').removeAll(oddballs); + + for (int i = 0; i < 0x10FFFF; ++i) { + if ((i & 0xFFF) == 0) { + System.out.println(i); + System.out.flush(); + } + int type = getIDNAType(i); + if (type == OK) { + IDNOutput.add(i); + } else if (type != ILLEGAL) { + IDNInputOnly.add(i); + } + if (IDInput32.contains(i)) { + splitSet(IDInputOnly32, IDOutput32, U32, nfkc32, i); + } + if (IDInput50.contains(i)) { + splitSet(IDInputOnly50, IDOutput50, U50, nfkc50, i); + } + } + initialized = true; + } + + private static void splitSet(UnicodeSet inputOnlySet, UnicodeSet outputSet, UCD ucd, Normalizer nfkc, int i) { + if (i < 0x7F) { + outputSet.add(i); + return; + } + String v = UTF16.valueOf(i); + String s = ucd.getCase(i, UCD.FULL, UCD.FOLD); + if (s.equals(v)) { + s = nfkc.normalize(s); + if (s.equals(v)) { + s = ucd.getCase(s, UCD.FULL, UCD.FOLD); + if (s.equals(v)) { + outputSet.add(i); + return; + } + } + } + inputOnlySet.add(i); + } + + static public int getIDNAType(int cp) { + if (cp == '-') return OK; + inbuffer.setLength(0); + UTF16.append(inbuffer, cp); + try { + intermediate = IDNA.convertToASCII(inbuffer, + IDNA.DEFAULT); // USE_STD3_RULES + if (intermediate.length() == 0) + return DELETED; + outbuffer = IDNA.convertToUnicode(intermediate, + IDNA.USE_STD3_RULES); + } catch (StringPrepParseException e) { + return ILLEGAL; + } catch (Exception e) { + System.out.println("Failure at: " + Utility.hex(cp)); + return ILLEGAL; + } + if (!TestData.equals(inbuffer, outbuffer)) + return REMAPPED; + return OK; + } + +} \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCD/InvariantTest.txt b/tools/unicodetools/com/ibm/text/UCD/InvariantTest.txt new file mode 100644 index 00000000000..29e73dd2ae8 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/InvariantTest.txt @@ -0,0 +1,75 @@ +Let $letter = [$gc:Lu $gc:Ll $gc:Lt $gc:Lo $gc:Lm]; +Let $number = [$gc:Nd $gc:Nl $gc:No] +Let $mark = [$gc:mn $gc:me $gc:mc] +Let $LMN = [$letter $number $mark] +Let $gcAllPunctuation = [$gc:Open_Punctuation $gc:Close_Punctuation $gc:Dash_Punctuation $gc:Connector_Punctuation $gc:Other_Punctuation $gc:Initial_Punctuation $gc:Final_Punctuation] +Let $gcAllSymbols = [$gc:Currency_Symbol $gc:Modifier_Symbol $gc:Math_Symbol $gc:Other_Symbol] +Let $nfc = [^$NFC_Quick_Check:No] + +Show $nfc + +Show [$alphabetic - [$mark $letter $number]] + + +Let $oldCJK = [\u1100-\u11FF \u3040-\u30FF \u3130-\u318F \u31F0-\u31FF \u3400-\u4DBF \u4E00-\u9FFF \uAC00-\uD7AF \uF900-\uFAFF \uFF65-\uFFDC] + +Show [$oldCJK & $gc:cn] + +Let $fixedOld = [$oldCJK-$gc:cn] + + +#List the non-alphabetic old items +#Show [$oldCJK-$gc:cn-$alphabetic] + +#Check for differences +#Test $fixedOld = $trialNew + +#ShowEach $mark + +Let $uax29_outliers = [\u3031-\u3035 \u309B-\u309C \u30A0 \u30FC \uFF70 \uFF9E-\uFF9F] +Let $other_outliers = [\u3099-\u309A \u3006 \u303C \u302A-\u302E \u302F \U000E0100-\U000E01EF] + +# ========================================== + +# Outliers from UAX29 +Show $uax29_outliers + +# Additional outliers +Show $other_outliers + +# Take the 5 CJK scripts +Let $trialScripts = [$script:hani $script:hang $script:kana $script:hira $script:bopo] + +# Remove the non-LMN +Let $trialNewBase = [$trialScripts & $LMN] + +# Add the outliers +Let $trialNew = [$trialNewBase $uax29_outliers $other_outliers] + +# Show our result +Show $trialNew + +# As a double-check, show script characters we're tossing +Show [$trialScripts - $trialNew] + +# Compare snippets stuff +Let $guessClose = [$lb:QU $lb:Close_Punctuation] +Let $__closing_punc = ["')>\]`\}\u00AB\u00BB\u2018\u2019\u201C\u201D\u2039\u203A\u207E\u208E\u27E7\u27E9\u27EB\u2984\u2986\u2988\u298A\u298C\u298E\u2990\u2992\u2994\u2996\u2998\u29D9\u29DB\u29FD\u3009\u300B\u300D\u300F\u3011\u3015\u3017\u3019\u301B\u301E\u301F\uFD3F\uFE42\uFE44\uFE5A\uFE5C\uFF02\uFF07\uFF09\uFF3D\uFF5D\uFF63] + +$guessClose = $__closing_punc + +Let $guessClose = [$gc:pf $gc:pe $gc:pi] +$guessClose = $__closing_punc + +Let $guessTerm = [$sb:aterm $sb:sterm] +$guessTerm = [? ? !?? ? ? ? ? ??? ? ? ? ? ? ? ? .?? … ? ? ? ? ? ? ? ?? ? ? ? ? ? ? ?] + +Let $__issymotherr = [\u00A6\u00A7\u06FD\u06FE\u0F01-\u0F03\u0F13-\u0F17\u0F1A-\u0F1F\u0FBE-\u0FC5\u0FC7-\u0FCC\u2100\u2101\u2104-\u2106\u2108\u2109\u2117\u2118\u211E-\u2121\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u2400-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u2613\u2619-\u266E\u2670\u2671\u2701-\u2704\u2706-\u2709\u270C-\u2727\u2729-\u274B\u274F-\u2752\u2758-\u275E\u2761-\u2794\u2798-\u27AF\u27B1-\u27BE\u2800-\u28FF\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3012\u3013\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u3200-\u321C\u322A-\u3243\u3260-\u327B\u328A-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uA490-\uA4A1\uA4A4-\uA4B3\uA4B5-\uA4C0\uA4C2-\uA4C4\uFFED\uFFEE\uFFFC\uFFFD] +Let $__issymothers = [\u00B6\u0482\u06E9\u09FA\u0B70\u0F34\u0F36\u0F38\u0FCF\u2114\u2123\u2125\u2127\u2129\u212E\u2132\u213A\u21D3\u220E\u2617\u274D\u2756\u3004\u3020\u327F\uA4C6\uFFE4\uFFE8] + +Let $symOther = [$__issymotherr $__issymothers] + +$symOther = $gcAllSymbols + + +[$symOther & $nfc] = [$gcAllSymbols & $nfc] diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java index c059d8d1c74..f0b273399f3 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Main.java +++ b/tools/unicodetools/com/ibm/text/UCD/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $ -* $Date: 2005/10/11 19:39:15 $ -* $Revision: 1.36 $ +* $Date: 2006/04/05 22:12:44 $ +* $Revision: 1.37 $ * ******************************************************************************* */ @@ -160,8 +160,9 @@ public final class Main implements UCD_Types { //else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros(); else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null); - else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]}); - + else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]}); + else if (arg.equalsIgnoreCase("MakeUnicodeFiles")) MakeUnicodeFiles.main(new String[]{}); + //else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo(); else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts(); else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest(); diff --git a/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java b/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java index e56e21967c2..3895442b7c3 100644 --- a/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java +++ b/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java @@ -16,6 +16,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.ibm.icu.dev.test.util.BagFormatter; +import com.ibm.icu.dev.test.util.TransliteratorUtilities; import com.ibm.icu.dev.test.util.UnicodeMap; import com.ibm.icu.dev.test.util.UnicodePropertySource; import com.ibm.icu.text.Collator; @@ -71,7 +72,7 @@ public class MakeNamesChart { System.out.println("file: " + chartPrefix + fileName); PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", chartPrefix + fileName); out.println("![]() |
+
+ ![]() |
|||||||||||||||||||||||||
@@ -29,105 +27,78 @@ |
Revision | -@revision@ | -
Authors | -Members of the Editorial Committee | -
Date | -@date@ | -
This Version | -http://www.unicode.org/Public/@updateDirectory@/StandardizedVariants-@revision@.html | -
Previous Version | -http://www.unicode.org/Public/3.2-Update/StandardizedVariants-3.2.0.html | -
Latest Version | -http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html | -
Revision | +@revision@ | +
Authors | +Members of the Editorial Committee | +
Date | +@date@ | +
This Version | ++ http://www.unicode.org/Public/@updateDirectory@/@filename@.html | +
Previous Version | ++ http://www.unicode.org/Public/4.1.0/ucd/StandardizedVariants.html | +
Latest Version | ++ http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html | +
-This file provides a visual display of the standard variant sequences - derived from StandardizedVariants.txt.
+This file provides a visual display of the standard variant sequences derived from + StandardizedVariants.txt.
-The file and the files described herein are part of the Unicode - Character Database (UCD) and are governed by the UCD - Terms of Use stated at the end.
+This file and the files described herein are part of the Unicode Character Database and + are governed by the terms of use at + http://www.unicode.org/terms_of_use.html.
The tables here exhaustively lists the valid, registered - combinations of base character plus variation indicator. All combinations not - listed in StandardizedVariants.txt are unspecified and are reserved for future - standardization; no conformant process may interpret them as standardized - variants. Variation selectors and their use are described in The Unicode - Standard.
-These mathematical variants are all produced with the addition of Variation - Selector 1 (VS1 or U+FE00) to mathematical operator base characters. There is - no variation according to context. The Mongolian variants use the Mongolian - Variant Selectors, and may vary according to context. That is, if a contextual - shape is not listed below, then the variation sequence has an unmodified +
The tables here exhaustively lists the valid, registered combinations of base character + plus variation indicator. All combinations not listed in StandardizedVariants.txt are unspecified + and are reserved for future standardization; no conformant process may interpret them as + standardized variants. Variation selectors and their use are described in The Unicode Standard.
+These mathematical variants are all produced with the addition of Variation Selector 1 (VS1 or + U+FE00) to mathematical operator base characters. There is no variation according to context. The + Mongolian variants use the Mongolian Variant Selectors, and may vary according to context. That + is, if a contextual shape is not listed below, then the variation sequence has an unmodified appearance. At this time no Han variants exist.
-Note: The glyphs used to show the variations - are often derived from different physical fonts than the representative - glyphs in the standard. They may therefore exhibit minor differences in - size, proportion, or weight unrelated to the intentional difference - in feature that is the defining element of the variation. Such minor - differences should be ignored. Likewise, in some cases the existing - representative fonts may not yet contain newly encoded characters and hence - some representative glyphs shown in these tables may have a slightly - different style than others.
+Note: The glyphs used to show the variations are often derived + from different physical fonts than the representative glyphs in the standard. They may therefore + exhibit minor differences in size, proportion, or weight unrelated to the intentional + difference in feature that is the defining element of the variation. Such minor differences + should be ignored. Likewise, in some cases the existing representative fonts may not yet contain + newly encoded characters and hence some representative glyphs shown in these tables may have a + slightly different style than others.
@table@
--The Unicode Character Database is provided as is by Unicode, Inc. No - claims are made as to fitness for any particular purpose. No warranties of - any kind are expressed or implied. The recipient agrees to determine - applicability of information provided. If this file has been purchased on - magnetic or optical media from Unicode, Inc., the sole remedy for any claim - will be exchange of defective media within 90 days of receipt.
-This disclaimer is applicable for all other data files accompanying - the Unicode Character Database, some of which have been compiled by the - Unicode Consortium, and some of which have been supplied by other sources.
-
--Recipient is granted the right to make copies in any form for internal - distribution and to freely use the information supplied in the creation of - products supporting the UnicodeTM Standard. The files in the - Unicode Character Database can be redistributed to third parties or other - organizations (whether for profit or not) as long as this notice and the - disclaimer notice are retained. Information can be extracted from these - files and used in documentation or programs, as long as there is an - accompanying notice indicating the source.
-
![]() |
+
+ ![]() |
()?
#Note: the 'code' field is currently discarded; only the char is important.
#Note: if there is conflict, the FIRST char wins.
diff --git a/tools/unicodetools/com/ibm/text/utility/Counter.java b/tools/unicodetools/com/ibm/text/utility/Counter.java
index f866b3a8996..57aa6193b46 100644
--- a/tools/unicodetools/com/ibm/text/utility/Counter.java
+++ b/tools/unicodetools/com/ibm/text/utility/Counter.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Counter.java,v $
-* $Date: 2005/10/11 19:39:15 $
-* $Revision: 1.3 $
+* $Date: 2006/04/05 22:12:45 $
+* $Revision: 1.4 $
*
*******************************************************************************
*/
@@ -21,7 +21,7 @@ import java.io.*;
import java.text.*;
public final class Counter {
- Map map = new HashMap();
+ private Map map = new HashMap();
static public final class RWInteger implements Comparable {
static int uniqueCount;
@@ -92,5 +92,11 @@ public final class Counter {
return result;
}
-
+ public Set keySet() {
+ return map.keySet();
+ }
+
+ public Map getMap() {
+ return Collections.unmodifiableMap(map);
+ }
}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
index 881197e45f0..e840c30c5dd 100644
--- a/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
+++ b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
@@ -48,7 +48,7 @@ public class UnicodeDataFile {
out.println("# For documentation, see UCD.html");
}
try {
- Utility.appendFile(filename + "Header" + fileType, Utility.UTF8_UNIX, out);
+ Utility.appendFile("com/ibm/text/UCD/" + filename + "Header" + fileType, Utility.UTF8_UNIX, out);
} catch (FileNotFoundException e) {
/*
out.println("# Unicode Character Database: Derived Property Data");
diff --git a/tools/unicodetools/readme.html b/tools/unicodetools/readme.html
index 7e844f18886..aef2727248b 100644
--- a/tools/unicodetools/readme.html
+++ b/tools/unicodetools/readme.html
@@ -77,7 +77,10 @@ exist:
<UCD_DIR>/EXTRAS-Update
2. Download all of the UnicodeData files for each version into UCD_DIR.
The folder names must be of the form: "3.2.0-Update", so rename the folders on the
-Unicode site to this format.
+Unicode site to this format. If the
+folder contains ucd, then make the contents of that directory be the contents of
+the x.x.x-Update directory. That is, each directory will directly contain files
+like PropList....txt
2a Ensure Complete Release
If you are downloading any "incomplete" release (one that does not contain a complete set of data
files for that release, you need to also download the previous complete release). Most of the N.M-Update
@@ -87,6 +90,7 @@ directoriess are complete, *except*:
Also, make the following changes to UnicodeData for 1.1.5:
Delete
3400;HANGUL SYLLABLE KIYEOK A;Lo;0;L;1100 1161;;;;N;;;;;
+...
4DFF;HANGUL SYLLABLE MIEUM WEO RIEUL-THIEUTH;Lo;0;L;1106 116F 11B4;;;;N;;;;;
4E00;;Lo;0;L;;;;;N;;;;;
Add:
@@ -106,18 +110,19 @@ BASE_DIR + "Collation\allkeys" + VERSION + ".txt".
If you have it in a different location, change that value for KEYS in UCA.java, and
the value for BASE_DIR
-2c. Here is an example of the default directory structure with files:
+2c. Here is an example of the default directory structure with files. All of
+the yellow ones should exist
C://DATA/
BIN/
- Collation/
+ Collation/
allkeys-3.1.1.txt
-
+
GEN/
DerivedData/
ExtractedProperties
- UCD/
+ UCD/
3.0.0-Update/
Unihan-3.2.0.txt
...
@@ -133,69 +138,145 @@ the value for BASE_DIR
ArabicShaping-4.0.0d14b.txt
BidiMirroring-4.0.0d1b.txt
...
- EXTRAS-Update/
+ EXTRAS-Update/
3. Versions
All of the following have "version X" in the options you give to Java (either on the
command line, or in the Eclipse 'run' options. If you want a specific version like 3.1.0, then you
would write "version 3.1.1". If you want the latest version (4.1.0), you can omit the "version X".
-4. Running UCD, you will use com.ibm.text.UCD.Main as your main class.
-The Working directory has to be C:\ICU4J\unicodetools\com\ibm\text\UCD
-(In Eclipse you can also use ${workspace_loc:UnicodeTools/com/ibm/text/UCD}, which abstracts away
-the location.)
-
-The same for UCA:
-main: com.ibm.text.UCD.Main
-directory:
-C:\ICU4J\unicodetools\com\ibm\text\UCA
-4a. BIN
-For each version, the tools build a set of binary data in BIN that contain the information for
-that release. This is done automatically, or you can manually do it with the options
-
-version X build
-
-This builds an compressed format of all the UCD data (except blocks and Unihan) into the BIN
-directory. Don't worry about the voluminous console messages, unless one says "FAIL".
-
-You have to manually do this if you change any of the data files in that
-version!!
-Note: if for any reason you modify the binary format of the BIN files, you also have to bump the
-value in that file:
-
-static final byte BINARY_FORMAT = 8; // bumped if binary format of UCD changes
-4b. To build the Unicode files for a particular version X, run the Main with the following
-argument:
-MakeUnicodeFiles.generateFile
-This will execute the commands in the file MakeUnicodeFiles.txt.
-You will edit that file if you want a different 'd' version for the files, OR if you want to
-change which files are built. At the top of the file you will see the following text:
-Generate:
-DeltaVersion: 7
-4c. To change which files are built, put any number of regular expressions separated by spaces
-after Generate. Eg,
-Generate: .*line.* prop.*
-The matching is case-insensitive.
-4d. To change the 'd' number that is appended to the generated files names, change the
-DeltaVersion.
-4e. To run basic consistency checking, run:
-version X verify
-
-Don't worry about any console messages except those that say FAIL.
-4f. Output
-The files will be generated in the GEN directories.
-
- - If they are the same as previous files (except for the first line and Date), they will be
- renamed to UNCHANGED...
- - If they are not, then a bat file will be generated in the DIFF directory. Double-clicking on
- this file will launch CompareIt, which is a nice diff program. Get compareIt from
- http://www.grigsoft.com/files.htm (be
- sure to get the Unicode version),then you can also set it as the diff program in CVS with
- Admin/Preferences/WinCVS, External Diff = C:\Program Files\Compare It!\wincmp3.exe (or equiv).
-
-5. Running UCA, you will use com.ibm.text.UCA.Main as your main class.
-5a. To build all the UCA files used by ICU, use the option:
-java <UCA>Main ICU
-6. To build all the charts, use the UCA project, with options: normalizationChart caseChart
-scriptChart indexChart
+4. Building Files
+
+ - Setup
+ - In Eclipse, open the Package Explorer (Use Window>Show View if you
+ don't see it)
+ - Open UnicodeTools
+ - com.ibm.text.UCD
+ - MakeUnicodeFiles.txt
This file drives the production of
+ the derived Unicode files. The first three lines contain
+ parameters that you may want to modify at some times:
+ Generate: .*script.* // this is a regular expression. Use .* for all files
+DeltaVersion: 10 // This gets appended to the file name. Pick 1+ the highest value in Public
+CopyrightYear: 2006 // Pick the current year
+
+
+
+
+
+ - Open in Package Explorer
+
+ - com.ibm.text.UCD
+ - Main
+
+
+
+
+ - Run>Run As...
+ - Choose Java Application
+ - it will fail, don't worry; you need to set some parameters
+
+
+
+
+ - Run>Run...
+ - Select the Arguments tab, and fill in the following
+ - Program arguments:
build 5.0 MakeUnicodeFiles
+
+ - VM arguments:
+
-Xms512m -Xmx512m
+
+
+
+ - Close and Save
+
+
+
+
+ - Run
+ - You'll see it build the 5.0 files, with something like the following
+ results:
Writing UCD_Data5.0.0
+Data Size: 109,802
+Wrote Data 109802
+
+ - For each version, the tools build a set of binary data in BIN that
+ contain the information for that release. This is done automatically, or
+ you can manually do it with the Program Arguments
version X build
+ This builds an compressed format of all the UCD data (except blocks
+ and Unihan) into the BIN directory. Don't worry about the voluminous
+ console messages, unless one says "FAIL".
+ You have to manually do this if you change
+ any of the data files in that version!
+ Note: if for any reason you modify the binary format of the BIN files, you also have to bump the
+value in that file:
+ static final byte BINARY_FORMAT = 8; // bumped if binary format of UCD changes
+
+
+
+ - Results in
+ C:\DATA\GEN\DerivedData
+ - The files will be in this directory.
+ - There are also DIFF folders, that contain BAT files that you can run
+ on Windows with CompareIt. (You can modify the code to build BATs with
+ another Diff program if you want).
+ - For any file with a significant difference, it will build two
+ BAT files, such as the first two below.
Diff_PropList-5.0.0d10.txt.bat
+OLDER-Diff_PropList-5.0.0d10.txt.bat
+
+UNCHANGED-Diff_PropertyValueAliases-5.0.0d10.txt.bat
+
+
+
+ - Any files without significant changes will have "UNCHANGED" as a
+ prefix: ignore them. The OLDER prefix is the comparison to the
+ last version of Unicode.
+ - On Windows you can run these BATs to compare files:
+
+
+
+5. Invariant Checking
+
+ - Setup
+ - Open in Package Explorer
+ - com.ibm.text.UCD
+ - TestUnicodeInvariants.java
+
+
+
+
+ - Run>Run As... Java Application
+ Will create the following file of results:C:\DATA\GEN\UnicodeInvariantResults.txt\
+
+ - Open that file and search for "**** START Error Info ****" Each such
+ point provides a dump of comparison information.
+
+
+
+6. Options
+
+ - If you want to see files that are opened while processing, do the
+ following:
+ - Run>Run
+ - Select the Arguments tab, and add the following
+ - VM arguments:
+
-DSHOW_FILES
+
+
+
+
+
+
+5. UCA
+
+ -
+
You will use com.ibm.text.UCA.Main as your main class, creating along
+ the same lines as above.
+ -
+
To build all the UCA files used by ICU, use the Program arguments:
+ Main ICU
+
+ -
+
To build all the charts, use the UCA project, with options:
+ normalizationChart caseChart scriptChart indexChart
+
+