mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-0 update for U4.1.0
X-SVN-Rev: 17400
This commit is contained in:
parent
599dbb508c
commit
641a6d6d79
12 changed files with 118 additions and 54 deletions
|
@ -1,4 +1,6 @@
|
|||
#
|
||||
# Note: The casing of block names is not normative.
|
||||
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
|
||||
#
|
||||
# Format:
|
||||
# Start Code..End Code; Block Name
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#
|
||||
# Case Folding Properties
|
||||
#
|
||||
# This file is a supplement to the UnicodeData file.
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2005/03/26 05:40:04 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -574,14 +574,19 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
log.close();
|
||||
|
||||
System.out.println("Writing");
|
||||
String newFile = "DerivedData/SpecialCasing" + suffix2 + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
//String newFile = "DerivedData/SpecialCasing" + suffix2 + UnicodeDataFile.getFileSuffix(true);
|
||||
//PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
|
||||
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedData/", "SpecialCasing" + suffix2);
|
||||
PrintWriter out = udf.out;
|
||||
|
||||
/* String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "SpecialCasing", suffix2 + UnicodeDataFile.getFileSuffix(true), batName);
|
||||
out.println("# SpecialCasing" + UnicodeDataFile.getFileSuffix(false));
|
||||
out.println(UnicodeDataFile.generateDateLine());
|
||||
out.println("#");
|
||||
Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out);
|
||||
*/
|
||||
|
||||
Iterator it = sorted.keySet().iterator();
|
||||
int lastOrder = -1;
|
||||
|
@ -612,8 +617,8 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
}
|
||||
out.println(line);
|
||||
}
|
||||
Utility.appendFile("SpecialCasingFooter.txt", Utility.UTF8, out);
|
||||
out.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
//Utility.appendFile("SpecialCasingFooter.txt", Utility.UTF8, out);
|
||||
udf.close();
|
||||
//Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2005/03/10 02:37:19 $
|
||||
* $Revision: 1.37 $
|
||||
* $Date: 2005/03/26 05:40:04 $
|
||||
* $Revision: 1.38 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -744,16 +744,19 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
|
||||
|
||||
UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, fileName);
|
||||
PrintWriter log = fc.out;
|
||||
|
||||
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
//PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
|
||||
//String[] batName = {""};
|
||||
//String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
String[] example = new String[256];
|
||||
|
||||
log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
log.println("#");
|
||||
//log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
|
||||
//log.println(UnicodeDataFile.generateDateLine());
|
||||
/*log.println("#");
|
||||
log.println("# Normalization Test Suite");
|
||||
log.println("# Format:");
|
||||
log.println("#");
|
||||
|
@ -787,7 +790,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
log.println("#");
|
||||
log.println("@Part0 # Specific cases");
|
||||
log.println("#");
|
||||
log.println("#");*/
|
||||
|
||||
for (int j = 0; j < testSuiteCases.length; ++j) {
|
||||
writeLine(testSuiteCases[j], log, false);
|
||||
|
@ -891,8 +894,8 @@ public class GenerateData implements UCD_Types {
|
|||
Utility.fixDot();
|
||||
log.println("#");
|
||||
log.println("# END OF FILE");
|
||||
log.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
fc.close();
|
||||
//Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
}
|
||||
|
||||
static void handleIdentical() throws IOException {
|
||||
|
@ -942,12 +945,13 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
// not recursive!!!
|
||||
static final String comma(String s) {
|
||||
//if (true) return s;
|
||||
commaResult.setLength(0);
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF32.count16(i)) {
|
||||
cp = UTF32.char32At(s, i);
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
if (Default.ucd().getCategory(cp) == Mn) commaResult.append('\u25CC');
|
||||
UTF32.append32(commaResult, cp);
|
||||
UTF16.append(commaResult, cp);
|
||||
}
|
||||
return commaResult.toString();
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
Generate: Derived.*
|
||||
DeltaVersion: 12
|
||||
Generate:
|
||||
DeltaVersion: 13
|
||||
CopyrightYear: 2005
|
||||
|
||||
File: auxiliary/GraphemeBreakProperty
|
||||
|
@ -58,6 +58,13 @@ Value: 4.1
|
|||
File: extracted/DerivedBidiClass
|
||||
Property: Bidi_Class
|
||||
# Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
|
||||
# Unlike other properties, unassigned code points in blocks reserved for right-to-left scripts are given either types R or AL.
|
||||
# The unassigned characters that default to R are:
|
||||
# Hebrew, Cypriot_Syllabary, Kharoshthi, and the ranges \u07C0-\u08FF \uFB1D-\uFB4F \U00010840-\U00010FFF
|
||||
# The unassigned characters that default to AL are:
|
||||
# Arabic, Syriac, Thaana, Arabic_Presentation_Forms_A, Arabic_Presentation_Forms_B, Arabic_Supplement,
|
||||
# and the range \u0750-\u077F, minus the Noncharacter_Code_Points
|
||||
# For all other cases:
|
||||
Format: valueStyle=short skipUnassigned=Left_To_Right
|
||||
|
||||
File: extracted/DerivedBinaryProperties
|
||||
|
@ -67,8 +74,6 @@ Property: Bidi_Mirrored
|
|||
File: extracted/DerivedCombiningClass
|
||||
Property: Canonical_Combining_Class
|
||||
# Combining Class (listing UnicodeData.txt, field 3: see UCD.html)
|
||||
# All code points not explicitly listed in this file have the property
|
||||
# value: 0.
|
||||
Format: nameStyle=none valueStyle=short skipUnassigned=Not_Reordered
|
||||
|
||||
File: DerivedCoreProperties
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
#
|
||||
# Normalization Test Suite
|
||||
# Format:
|
||||
#
|
||||
# Columns (c1, c2,...) are separated by semicolons
|
||||
# Comments are indicated with hash marks
|
||||
#
|
||||
# CONFORMANCE:
|
||||
# 1. The following invariants must be true for all conformant implementations
|
||||
#
|
||||
# NFC
|
||||
# c2 == NFC(c1) == NFC(c2) == NFC(c3)
|
||||
# c4 == NFC(c4) == NFC(c5)
|
||||
#
|
||||
# NFD
|
||||
# c3 == NFD(c1) == NFD(c2) == NFD(c3)
|
||||
# c5 == NFD(c4) == NFD(c5)
|
||||
#
|
||||
# NFKC
|
||||
# c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
|
||||
#
|
||||
# NFKD
|
||||
# c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
|
||||
#
|
||||
# 2. For every code point X assigned in this version of Unicode that is not specifically
|
||||
# listed in Part 1, the following invariants must be true for all conformant
|
||||
# implementations:
|
||||
#
|
||||
# X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
|
||||
#
|
||||
@Part0 # Specific cases
|
||||
#
|
|
@ -1,3 +1,4 @@
|
|||
#
|
||||
# This file contains aliases for properties used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
# property tests, and other programmatic textual descriptions of Unicode data.
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#
|
||||
# This file contains aliases for property values used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
# property tests, and other programmatic textual descriptions of Unicode data.
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#
|
||||
# Special Casing Properties
|
||||
#
|
||||
# This file is a supplement to the UnicodeData file.
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
||||
* $Date: 2005/03/10 02:37:19 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2005/03/26 05:40:05 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -151,7 +151,12 @@ public class TestData implements UCD_Types {
|
|||
|
||||
static class GenStringPrep {
|
||||
UnicodeSet[] coreChars = new UnicodeSet[100];
|
||||
UnicodeSet[] decompChars = new UnicodeSet[100];
|
||||
UnicodeSet decomposable = new UnicodeSet();
|
||||
UnicodeSet pattern = new UnicodeSet();
|
||||
ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("");
|
||||
//UnicodeSet id_continue = ups.getSet("ID_Continue=true");
|
||||
UnicodeSet xid_continue = ups.getSet("XID_Continue=true");
|
||||
//UnicodeSet[] decompChars = new UnicodeSet[100];
|
||||
UCD ucd = Default.ucd();
|
||||
|
||||
Collator uca = Collator.getInstance(ULocale.ENGLISH);
|
||||
|
@ -167,10 +172,13 @@ public class TestData implements UCD_Types {
|
|||
|
||||
|
||||
void genStringPrep() throws IOException {
|
||||
//BagFormatter bf = new BagFormatter();
|
||||
//System.out.println(bf.showSetDifferences("ID_Continue", id_continue, "XID_Continue", xid_continue));
|
||||
StringBuffer inbuffer = new StringBuffer();
|
||||
StringBuffer intermediate, outbuffer;
|
||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
if (!Default.nfd().isNormalized(cp)) decomposable.add(cp);
|
||||
inbuffer.setLength(0);
|
||||
UTF16.append(inbuffer, cp);
|
||||
try {
|
||||
|
@ -189,15 +197,9 @@ public class TestData implements UCD_Types {
|
|||
if (!TestData.equals(inbuffer, outbuffer))
|
||||
continue;
|
||||
int script = ucd.getScript(cp);
|
||||
if (!Default.nfd().isNormalized(cp)) {
|
||||
if (decompChars[script] == null)
|
||||
decompChars[script] = new UnicodeSet();
|
||||
decompChars[script].add(cp);
|
||||
} else {
|
||||
if (coreChars[script] == null)
|
||||
coreChars[script] = new UnicodeSet();
|
||||
coreChars[script].add(cp);
|
||||
}
|
||||
if (coreChars[script] == null)
|
||||
coreChars[script] = new UnicodeSet();
|
||||
coreChars[script].add(cp);
|
||||
}
|
||||
// find characters with no uppercase
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(lowercase); it.next();) {
|
||||
|
@ -212,8 +214,11 @@ public class TestData implements UCD_Types {
|
|||
.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
out.println("<title>IDN Characters</title><style>");
|
||||
out.println("<!--");
|
||||
out
|
||||
.println(".script { font-size: 150%; background-color: #C0C0C0 }");
|
||||
out.println(".script { font-size: 150%; background-color: #CCCCCC }");
|
||||
out.println(".Atomic { background-color: #CCCCFF }");
|
||||
out.println(".Atomic-no-uppercase { background-color: #CCFFCC }");
|
||||
out.println(".Non-ID { background-color: #FFCCCC }");
|
||||
out.println(".Decomposable { background-color: #FFFFCC }");
|
||||
out.println("th { text-align: left }");
|
||||
out.println("-->");
|
||||
out.println("</style></head><body><table>");
|
||||
|
@ -240,15 +245,16 @@ public class TestData implements UCD_Types {
|
|||
* @param scriptCode
|
||||
*/
|
||||
private void showCodes(PrintWriter out, int scriptCode) {
|
||||
if (coreChars[scriptCode] == null
|
||||
&& decompChars[scriptCode] == null)
|
||||
return;
|
||||
if (coreChars[scriptCode] == null) return;
|
||||
System.out.println(ucd.getScriptID_fromIndex((byte) scriptCode));
|
||||
String script = Default.ucd().getScriptID_fromIndex(
|
||||
(byte) scriptCode);
|
||||
String script = Default.ucd().getScriptID_fromIndex((byte) scriptCode);
|
||||
out.println();
|
||||
out.println("<tr><th class='script'>Script: " + script + "</th></tr>");
|
||||
UnicodeSet core = new UnicodeSet(coreChars[scriptCode]);
|
||||
UnicodeSet decomp = new UnicodeSet(core).retainAll(decomposable);
|
||||
core.removeAll(decomp);
|
||||
UnicodeSet non_id = new UnicodeSet(core).removeAll(xid_continue);
|
||||
core.removeAll(non_id);
|
||||
UnicodeSet otherCore = new UnicodeSet(core).removeAll(hasUpper);
|
||||
core.removeAll(otherCore);
|
||||
if (core.size() == 0) {
|
||||
|
@ -257,9 +263,9 @@ public class TestData implements UCD_Types {
|
|||
otherCore = temp;
|
||||
}
|
||||
printlnSet(out, "Atomic", core, scriptCode);
|
||||
if (otherCore.size() != 0) printlnSet(out, "Atomic [noUpper]", otherCore, scriptCode);
|
||||
UnicodeSet decomp = decompChars[scriptCode];
|
||||
if (decomp != null && decomp.size() != 0) printlnSet(out, "Decomposable", decomp, scriptCode);
|
||||
if (otherCore.size() != 0) printlnSet(out, "Atomic-no-uppercase", otherCore, scriptCode);
|
||||
if (non_id.size() != 0) printlnSet(out, "Non-ID", non_id, scriptCode);
|
||||
if (decomp.size() != 0) printlnSet(out, "Decomposable", decomp, scriptCode);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -277,7 +283,7 @@ public class TestData implements UCD_Types {
|
|||
&& unicodeset.containsNone(bidiL) ? " dir='rtl'" : "";
|
||||
out.println("<tr><th class='" + title + "'>" + title + " ("
|
||||
+ nf.format(size) + ")</th></tr>");
|
||||
out.print("<tr><td" + dir + ">");
|
||||
out.print("<tr><td class='" + title + "'" + dir + ">");
|
||||
UnicodeSetIterator usi = new UnicodeSetIterator();
|
||||
if (scriptCode == HAN_SCRIPT || scriptCode == HANGUL_SCRIPT) {
|
||||
usi.reset(unicodeset);
|
||||
|
|
|
@ -264,7 +264,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
unicodeMap.putAll(lineBreak.getSet("Infix_Numeric")
|
||||
.remove(0x003A), "MidNum");
|
||||
unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
|
||||
unicodeMap.putAll(cat.getSet("Connector_Punctuation").remove(0x30FB).remove(0xFF65), "Numeric");
|
||||
unicodeMap.putAll(cat.getSet("Connector_Punctuation").remove(0x30FB).remove(0xFF65), "ExtendNumLet");
|
||||
unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none of the above touch it.
|
||||
unicodeMap.setMissing("Other");
|
||||
}
|
||||
|
@ -479,9 +479,10 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
public List _getValueAliases(String valueAlias, List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
int type = getType() & CORE_MASK;
|
||||
if (type == STRING || type == MISC) return result;
|
||||
else if (type == NUMERIC) return result;
|
||||
else if (type == BINARY) {
|
||||
if (type == STRING || type == MISC || type == NUMERIC) {
|
||||
UnicodeProperty.addUnique(valueAlias, result);
|
||||
return result;
|
||||
} else if (type == BINARY) {
|
||||
UnicodeProperty.addUnique(valueAlias, result);
|
||||
return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, null, result);
|
||||
} else if (type == ENUMERATED || type == CATALOG) {
|
||||
|
|
|
@ -15,15 +15,17 @@ public class UnicodeDataFile {
|
|||
private String newFile;
|
||||
private String batName;
|
||||
private String mostRecent;
|
||||
private String filename;
|
||||
private UnicodeDataFile(){};
|
||||
|
||||
public static UnicodeDataFile openAndWriteHeader(String directory, String filename) throws IOException {
|
||||
UnicodeDataFile result = new UnicodeDataFile();
|
||||
result.newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
result.out = Utility.openPrintWriter(result.newFile, Utility.LATIN1_UNIX);
|
||||
result.out = Utility.openPrintWriter(result.newFile, Utility.UTF8_UNIX);
|
||||
String[] batName = {""};
|
||||
result.mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
result.batName = batName[0];
|
||||
result.filename = filename;
|
||||
|
||||
result.out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
result.out.println(generateDateLine());
|
||||
|
@ -50,6 +52,9 @@ public class UnicodeDataFile {
|
|||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
Utility.appendFile(filename + "Footer.txt", Utility.LATIN1, out);
|
||||
} catch (FileNotFoundException e) {}
|
||||
out.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue