mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 01:11:02 +00:00
ICU-5222 updates for UCD/A 5.0.0
X-SVN-Rev: 19697
This commit is contained in:
parent
18f81012d0
commit
1d7d7f00ba
5 changed files with 153 additions and 54 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
|
||||
* $Date: 2005/05/02 15:39:54 $
|
||||
* $Revision: 1.25 $
|
||||
* $Date: 2006/06/08 18:16:40 $
|
||||
* $Revision: 1.26 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1128,7 +1128,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
Normalizer skipDecomps;
|
||||
Normalizer nfd;
|
||||
Normalizer nfkd;
|
||||
Iterator enum = null;
|
||||
Iterator enum1 = null;
|
||||
byte ceLimit;
|
||||
int currentRange = SAMPLE_RANGES.length; // set to ZERO to enable
|
||||
int startOfRange = SAMPLE_RANGES[0][0];
|
||||
|
@ -1197,9 +1197,9 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
}
|
||||
|
||||
// contractions
|
||||
if (enum == null) enum = ucaData.getContractions();
|
||||
while (enum.hasNext()) {
|
||||
result = (String)enum.next();
|
||||
if (enum1 == null) enum1 = ucaData.getContractions();
|
||||
while (enum1.hasNext()) {
|
||||
result = (String)enum1.next();
|
||||
if (result.length() == 1 && UTF16.isLeadSurrogate(result.charAt(0))) {
|
||||
//System.out.println("Skipping " + ucd.getCodeAndName(result));
|
||||
continue; // try again
|
||||
|
@ -1500,9 +1500,9 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
Map missingStrings = new HashMap();
|
||||
Map tempMap = new HashMap();
|
||||
|
||||
Iterator enum = ucaData.getContractions();
|
||||
while (enum.hasNext()) {
|
||||
String sequence = (String)enum.next();
|
||||
Iterator enum1 = ucaData.getContractions();
|
||||
while (enum1.hasNext()) {
|
||||
String sequence = (String)enum1.next();
|
||||
//System.out.println("Contraction: " + Utility.hex(sequence));
|
||||
for (int i = sequence.length()-1; i > 0; --i) {
|
||||
String shorter = sequence.substring(0,i);
|
||||
|
@ -1520,26 +1520,26 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
|
||||
// now add them. We couldn't before because we were iterating over it.
|
||||
|
||||
enum = tempMap.keySet().iterator();
|
||||
while (enum.hasNext()) {
|
||||
String shorter = (String) enum.next();
|
||||
enum1 = tempMap.keySet().iterator();
|
||||
while (enum1.hasNext()) {
|
||||
String shorter = (String) enum1.next();
|
||||
IntStack tempStack = (IntStack) tempMap.get(shorter);
|
||||
ucaData.add(shorter, tempStack);
|
||||
}
|
||||
|
||||
|
||||
enum = missingStrings.keySet().iterator();
|
||||
enum1 = missingStrings.keySet().iterator();
|
||||
if (missingStrings.size() != 0) {
|
||||
/**
|
||||
while (enum.hasMoreElements()) {
|
||||
String sequence = (String)enum.nextElement();
|
||||
while (enum1.hasMoreElements()) {
|
||||
String sequence = (String)enum1.nextElement();
|
||||
getCE(sequence);
|
||||
FIX LATER;
|
||||
}
|
||||
*/
|
||||
String errorMessage = "";
|
||||
while (enum.hasNext()) {
|
||||
String missing = (String)enum.next();
|
||||
while (enum1.hasNext()) {
|
||||
String missing = (String)enum1.next();
|
||||
if (errorMessage.length() != 0) errorMessage += ", ";
|
||||
errorMessage += "\"" + missing + "\"";
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Data.java,v $
|
||||
* $Date: 2005/05/02 15:39:54 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2006/06/08 18:16:40 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -294,9 +294,9 @@ public class UCA_Data implements UCA_Types {
|
|||
if (collationElements[i] == CONTRACTING) ceSet.add(i);
|
||||
}
|
||||
UnicodeSet ceSet2 = new UnicodeSet();
|
||||
Iterator enum = contractingTable.keySet().iterator();
|
||||
while (enum.hasNext()) {
|
||||
String sequence = (String)enum.next();
|
||||
Iterator enum1 = contractingTable.keySet().iterator();
|
||||
while (enum1.hasNext()) {
|
||||
String sequence = (String)enum1.next();
|
||||
ceSet2.add(sequence.charAt(0));
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2005/04/06 08:48:17 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2006/06/08 18:16:40 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -266,10 +266,12 @@ public class WriteCharts implements UCD_Types {
|
|||
|
||||
String[] replacement = new String[] {"%%%", "Normalization Charts"};
|
||||
String folder = "charts\\normalization\\";
|
||||
|
||||
//System.out.println("File: " + new File(".").getCanonicalPath());
|
||||
|
||||
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("norm_help.html", Utility.UTF8, folder + "help.html");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("com/ibm/text/UCA/charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/norm_help.html", Utility.UTF8, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile(WORKING_DIR + "index_header.html", Utility.UTF8, indexFile, replacement);
|
||||
|
@ -375,9 +377,9 @@ public class WriteCharts implements UCD_Types {
|
|||
String[] replacement = new String[] {"%%%", "Case Charts"};
|
||||
String folder = "charts\\case\\";
|
||||
|
||||
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("case_help.html", Utility.UTF8, folder + "help.html");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("com/ibm/text/UCA/charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/case_help.html", Utility.UTF8, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile(WORKING_DIR + "index_header.html", Utility.UTF8, indexFile, replacement);
|
||||
|
@ -487,9 +489,9 @@ public class WriteCharts implements UCD_Types {
|
|||
String[] replacement = new String[] {"%%%", "Script Charts"};
|
||||
String folder = "charts\\script\\";
|
||||
|
||||
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("script_help.html", Utility.UTF8, folder + "help.html");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("com/ibm/text/UCA/charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/script_help.html", Utility.UTF8, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile(WORKING_DIR + "script_index_header.html", Utility.UTF8, indexFile, replacement);
|
||||
|
@ -609,9 +611,9 @@ public class WriteCharts implements UCD_Types {
|
|||
String[] replacement = new String[] {"%%%", "Name Charts"};
|
||||
String folder = "charts\\name\\";
|
||||
|
||||
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("name_help.html", Utility.UTF8, folder + "help.html");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("com/ibm/text/UCA/charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("com/ibm/text/UCA/name_help.html", Utility.UTF8, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile(WORKING_DIR + "index_header.html", Utility.UTF8, indexFile, replacement);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2006/04/05 22:12:46 $
|
||||
* $Revision: 1.43 $
|
||||
* $Date: 2006/06/08 18:16:40 $
|
||||
* $Revision: 1.44 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -2301,7 +2301,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
// NOTE: we add the back map based on the string value; the smallest (UTF-16 order) string wins
|
||||
Object key = new ArrayWrapper((int[])(ces.clone()),0, len);
|
||||
if (false) {
|
||||
Object value = backMap.get(key);
|
||||
String value = (String)backMap.get(key);
|
||||
if (value == null) return;
|
||||
if (s.compareTo(value) >= 0) return;
|
||||
}
|
||||
|
@ -4099,23 +4099,23 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
writeDuplicates();
|
||||
writeOverlap();
|
||||
|
||||
log.println("<h2>Coverage</h2>");
|
||||
log.println("<h2>11. Coverage</h2>");
|
||||
BagFormatter bf = new BagFormatter();
|
||||
bf.setLineSeparator("<br>\r\n");
|
||||
ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("");
|
||||
ToolUnicodePropertySource ups = ToolUnicodePropertySource.make(Default.ucdVersion());
|
||||
bf.setUnicodePropertyFactory(ups);
|
||||
bf.setShowLiteral(TransliteratorUtilities.toHTML);
|
||||
bf.setFixName(TransliteratorUtilities.toHTML);
|
||||
UCD ucd = Default.ucd();
|
||||
UnicodeProperty cat = ups.getProperty("gc");
|
||||
UnicodeSet ucd410 = cat.getSet("Cn")
|
||||
UnicodeSet ucdCharacters = cat.getSet("Cn")
|
||||
.addAll(cat.getSet("Co"))
|
||||
.addAll(cat.getSet("Cs"))
|
||||
.complement()
|
||||
//.addAll(ups.getSet("Noncharactercodepoint=true"))
|
||||
//.addAll(ups.getSet("Default_Ignorable_Code_Point=true"))
|
||||
;
|
||||
bf.showSetDifferences(log, "UCD4.1.0", ucd410, "UCA4.1.0", coverage, 3);
|
||||
bf.showSetDifferences(log, "UCD" + Default.ucdVersion(), ucdCharacters, collator.getFileVersion(), coverage, 3);
|
||||
|
||||
log.println("</body></html>");
|
||||
log.close();
|
||||
|
|
|
@ -4,6 +4,11 @@
|
|||
<meta http-equiv="Content-Language" content="en-us">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
|
||||
<title>New Page 18</title>
|
||||
<style>
|
||||
<!--
|
||||
li { margin-top: 0.5em; margin-bottom: 0.5em }
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
@ -170,14 +175,14 @@ CopyrightYear: <b>2006</b> <i> // Pick the current year</i></pre>
|
|||
</li>
|
||||
<li>Run>Run As...<ol>
|
||||
<li>Choose Java Application<ul>
|
||||
<li>it will fail, don't worry; you need to set some parameters</li>
|
||||
<li>it will fail, don't worry; you need to set some parameters.</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>Run>Run...<ul>
|
||||
<li>Select the Arguments tab, and fill in the following<ul>
|
||||
<li>Program arguments:<pre>build 5.0 MakeUnicodeFiles</pre>
|
||||
<li>Program arguments:<pre>build 5.0<span style="background-color: #FFFF00">.0</span> MakeUnicodeFiles</pre>
|
||||
</li>
|
||||
<li>VM arguments:
|
||||
<pre>-Xms512m -Xmx512m</pre>
|
||||
|
@ -229,6 +234,14 @@ UNCHANGED-Diff_PropertyValueAliases-5.0.0d10.txt.bat</pre>
|
|||
<li>On Windows you can run these BATs to compare files:</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><span style="background-color: #FFFF00">NFSkippable</span><ol>
|
||||
<li><span style="background-color: #FFFF00">A file is needed by ICU that is
|
||||
generated with the same tool. Just use the input parameter "NFSkippable" to
|
||||
generate the file NFSafeSets.txt, also in </span>
|
||||
<a href="file:///C:/DATA/GEN"><span style="background-color: #FFFF00">
|
||||
file:///C:/DATA/GEN</span></a></li>
|
||||
</ol>
|
||||
</li>
|
||||
</ol>
|
||||
<h3>5. Invariant Checking</h3>
|
||||
<ol>
|
||||
|
@ -242,9 +255,63 @@ UNCHANGED-Diff_PropertyValueAliases-5.0.0d10.txt.bat</pre>
|
|||
</li>
|
||||
<li>Run>Run As... Java Application<br>
|
||||
Will create the following file of results:<pre><a href="file:///C:/DATA/GEN/UnicodeInvariantResults.txt/">C:\DATA\GEN\UnicodeInvariantResults.txt\</a></pre>
|
||||
<p>And on the console will list whether any problems are found. Thus in
|
||||
the following case there was one failure:</p>
|
||||
<pre>ParseErrorCount=0
|
||||
TestFailureCount=1</pre>
|
||||
</li>
|
||||
<li>The header of the result file explains the syntax of the tests.</li>
|
||||
<li>Open that file and search for "**** START Error Info ****". Each such
|
||||
point provides a dump of comparison information.<ol>
|
||||
<li>Failures print a list of differences between two sets being
|
||||
compared. So if A and B are being compared, it prints all the items in
|
||||
A-B, then in B-A, then in A&B.</li>
|
||||
<li>For example, here is a listing of a problem that must be corrected.
|
||||
Note that usually there is a comment that explains what the following
|
||||
line or lines are supposed to test. Then will come FALSE (indicating
|
||||
that the test failed), then the detailed error report.<pre><span style="font-size: 9pt"># Canonical decompositions (minus exclusions) must be identical across releases
|
||||
[$Decomposition_Type:Canonical - $Full_Composition_Exclusion] = [$×Decomposition_Type:Canonical - $×Full_Composition_Exclusion]
|
||||
|
||||
FALSE
|
||||
**** START Error Info ****
|
||||
|
||||
In [$×Decomposition_Type:Canonical - $×Full_Composition_Exclusion], but not in [$Decomposition_Type:Canonical - $Full_Composition_Exclusion] :
|
||||
|
||||
# Total code points: 0
|
||||
|
||||
Not in [$×Decomposition_Type:Canonical - $×Full_Composition_Exclusion], but in [$Decomposition_Type:Canonical - $Full_Composition_Exclusion] :
|
||||
1B06 # Lo BALINESE LETTER AKARA TEDUNG
|
||||
1B08 # Lo BALINESE LETTER IKARA TEDUNG
|
||||
1B0A # Lo BALINESE LETTER UKARA TEDUNG
|
||||
1B0C # Lo BALINESE LETTER RA REPA TEDUNG
|
||||
1B0E # Lo BALINESE LETTER LA LENGA TEDUNG
|
||||
1B12 # Lo BALINESE LETTER OKARA TEDUNG
|
||||
1B3B # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
|
||||
1B3D # Mc BALINESE VOWEL SIGN LA LENGA TEDUNG
|
||||
1B40..1B41 # Mc [2] BALINESE VOWEL SIGN TALING TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
|
||||
1B43 # Mc BALINESE VOWEL SIGN PEPET TEDUNG
|
||||
|
||||
# Total code points: 11
|
||||
|
||||
In both [$×Decomposition_Type:Canonical - $×Full_Composition_Exclusion], and in [$Decomposition_Type:Canonical - $Full_Composition_Exclusion] :
|
||||
00C0..00C5 # L& [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
00C7..00CF # L& [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
00D1..00D6 # L& [6] LATIN CAPITAL LETTER N WITH TILDE..LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
...
|
||||
30F7..30FA # Lo [4] KATAKANA LETTER VA..KATAKANA LETTER VO
|
||||
30FE # Lm KATAKANA VOICED ITERATION MARK
|
||||
AC00..D7A3 # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
|
||||
|
||||
# Total code points: 12089
|
||||
**** END Error Info ****</span></pre>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>Options:<ol>
|
||||
<li>-r Print the failures as a range list.</li>
|
||||
<li>-fxxx Use a different input file, such as -fInvariantTest.txt</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>Open that file and search for "**** START Error Info ****" Each such
|
||||
point provides a dump of comparison information.</li>
|
||||
</ol>
|
||||
</li>
|
||||
</ol>
|
||||
|
@ -265,18 +332,48 @@ UNCHANGED-Diff_PropertyValueAliases-5.0.0d10.txt.bat</pre>
|
|||
<h3>5. UCA</h3>
|
||||
<ol>
|
||||
<li>
|
||||
<h3>You will use com.ibm.text.UCA.Main as your main class, creating along
|
||||
the same lines as above.</h3></li>
|
||||
You will use com.ibm.text.UCA.Main as your main class, creating along
|
||||
the same lines as above.</li>
|
||||
<li>To test whether the UCA files are valid, use the
|
||||
<span style="font-weight: 400">options (<i>note: you should also build the ICU
|
||||
files below, since they test other aspects</i>).</span><pre>writeCollationValidityLog</pre>
|
||||
<p>It will create a file:</p>
|
||||
<pre><a href="file:///C:/DATA/GEN/collation/5.0.0/CheckCollationValidity.html">C:\DATA\GEN\collation\5.0.0\CheckCollationValidity.html</a></pre>
|
||||
<ol>
|
||||
<li>Review this file. It will list errors. Some of those are actually
|
||||
warnings, and indicate possible problems (this is indicated in the text,
|
||||
such as by: "These are not necessarily errors, but should be examined for
|
||||
<i>possible</i> errors"). In those cases, the items should be reviewed to make
|
||||
sure that there are no inadvertent problems.</li>
|
||||
<li>If it is not so marked, it is a true error, and must be fixed.</li>
|
||||
<li>At the end, there is section <b>11. Coverage</b>. There are two sections:<ol>
|
||||
<li>In UCDxxx, but not in allkeys. Check this over to make sure that these
|
||||
are all the characters that should get <b><i>implicit</i></b> weights.</li>
|
||||
<li>In allkeys, but not in UCD. These should be <b><i>only</i></b>
|
||||
contractions. Check them over to make sure they look right also.</li>
|
||||
</ol></li>
|
||||
</ol></li>
|
||||
<li>
|
||||
<h4>To build all the UCA files used by ICU, use the Program arguments:</h4>
|
||||
<pre>Main ICU</pre>
|
||||
<h4><span style="font-weight: 400">To build all the charts, use the options:
|
||||
</span> </h4>
|
||||
<pre>normalizationChart caseChart scriptChart indexChart</pre>
|
||||
</li>
|
||||
<li>
|
||||
<h4>To build all the charts, use the UCA project, with options: </h4>
|
||||
<pre>normalizationChart caseChart scriptChart indexChart</pre>
|
||||
<h4><span style="font-weight: 400">To build all the UCA files used by ICU, use the
|
||||
option:</span></h4>
|
||||
<pre>ICU</pre>
|
||||
</li>
|
||||
<li>You should then build a set of the ICU files for the previous version,
|
||||
if you don't have them. The key file is UCA_Rules_NoCE.txt. It contains the
|
||||
rules expressed in ICU format, which allows for comparison across versions
|
||||
of UCA.<ol>
|
||||
<li>Do a Diff, and verify that all the differences are either new
|
||||
characters, or were authorized to be changed by the UTC.</li>
|
||||
</ol>
|
||||
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
||||
</html>
|
Loading…
Add table
Reference in a new issue