ICU-0 updated for 4.1

X-SVN-Rev: 16940
This commit is contained in:
Mark Davis 2004-12-11 06:03:10 +00:00
parent 3e4d1a861a
commit 3daf3898fb
10 changed files with 358 additions and 214 deletions

View file

@ -15,6 +15,7 @@ public final class Default implements UCD_Types {
private static Normalizer nfkc;
private static Normalizer nfkd;
private static Normalizer[] nf = new Normalizer[4];
private static String year;
public static void setUCD(String version) {
ucdVersion = version;
@ -37,14 +38,21 @@ public final class Default implements UCD_Types {
}
static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'");
static DateFormat yearFormat = new SimpleDateFormat("yyyy");
static {
myDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
year = yearFormat.format(new Date());
}
public static String getDate() {
return myDateFormat.format(new Date());
}
public static String getYear() {
return year;
}
public static String ucdVersion() {
if (ucd == null) setUCD();
return ucdVersion;
@ -75,4 +83,11 @@ public final class Default implements UCD_Types {
return nf[index];
}
/**
* @param lineValue
*/
public static void setYear(String lineValue) {
year = lineValue;
}
}

View file

@ -34,12 +34,13 @@ import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.UCD.MakeUnicodeFiles.Format.PrintStyle;
import com.ibm.text.utility.UnicodeDataFile;
import com.ibm.text.utility.Utility;
import com.ibm.icu.text.Collator;
public class MakeUnicodeFiles {
public static int dVersion = 6; // change to fix the generated file D version. If less than zero, no "d"
public static int dVersion = -1; // change to fix the generated file D version. If less than zero, no "d"
/*static String[] hackNameList = {
"noBreak", "Arabic_Presentation_Forms-A", "Arabic_Presentation_Forms-B",
@ -62,8 +63,7 @@ public class MakeUnicodeFiles {
static boolean DEBUG = false;
public static void main(String[] args) throws IOException {
//generateFile();
testInvariants();
generateFile();
}
static class Format {
@ -321,6 +321,8 @@ public class MakeUnicodeFiles {
}
} else if (line.startsWith("DeltaVersion:")) {
dVersion = Integer.parseInt(lineValue);
} else if (line.startsWith("CopyrightYear:")) {
Default.setYear(lineValue);
} else if (line.startsWith("File:")) {
int p2 = lineValue.lastIndexOf('/');
file = lineValue.substring(p2+1);
@ -758,7 +760,7 @@ public class MakeUnicodeFiles {
else bf.setPropName(name);
if (ps.interleaveValues) {
writeInterleavedValues(pw, bf, prop);
writeInterleavedValues(pw, bf, prop, ps);
} else if (prop.isType(UnicodeProperty.STRING_OR_MISC_MASK)) {
writeStringValues(pw, bf, prop);
//} else if (prop.isType(UnicodeProperty.BINARY_MASK)) {
@ -904,10 +906,10 @@ public class MakeUnicodeFiles {
private static void writeInterleavedValues(
PrintWriter pw,
BagFormatter bf,
UnicodeProperty prop) {
UnicodeProperty prop, PrintStyle ps) {
if (DEBUG) System.out.println("Writing Interleaved Values: " + prop.getName());
pw.println();
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, new RestoreSpacesFilter()))
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, new RestoreSpacesFilter(ps)))
.setNameSource(null)
.setLabelSource(null)
.setRangeBreakSource(null)
@ -945,10 +947,20 @@ public class MakeUnicodeFiles {
}
static class RestoreSpacesFilter extends UnicodeProperty.StringFilter {
public String remap(String original) {
String skipValue;
/**
* @param ps
*/
public RestoreSpacesFilter(PrintStyle ps) {
skipValue = ps.skipValue;
if (skipValue == null) skipValue = ps.skipUnassigned;
}
public String remap(String original) {
// ok, because doesn't change length
String mod = (String) Format.theFormat.hackMap.get(original);
if (mod != null) original = mod;
if (original.equals(skipValue)) return null;
return original.replace('_',' ');
}
}
@ -1147,185 +1159,7 @@ public class MakeUnicodeFiles {
}
}
/**
* Chain together several SymbolTables.
* @author Davis
*/
static class ChainedSymbolTable implements SymbolTable {
// TODO: add accessors?
private List symbolTables;
/**
* Each SymbolTable is each accessed in order by the other methods,
* so the first in the list is accessed first, etc.
* @param symbolTables
*/
ChainedSymbolTable(SymbolTable[] symbolTables) {
this.symbolTables = Arrays.asList(symbolTables);
}
public char[] lookup(String s) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
char[] result = st.lookup(s);
if (result != null) return result;
}
return null;
}
public UnicodeMatcher lookupMatcher(int ch) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
UnicodeMatcher result = st.lookupMatcher(ch);
if (result != null) return result;
}
return null;
}
// Warning: this depends on pos being left alone unless a string is returned!!
public String parseReference(String text, ParsePosition pos, int limit) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
String result = st.parseReference(text, pos, limit);
if (result != null) return result;
}
return null;
}
}
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
public static void testInvariants() throws IOException {
String[][] variables = new String[100][2];
int variableCount = 0;
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
out.write('\uFEFF'); // BOM
BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
BagFormatter bf = new BagFormatter();
ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
ToolUnicodePropertySource.make("4.0.0").getSymbolTable("\u00D7"),
ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
ParsePosition pp = new ParsePosition(0);
int parseErrorCount = 0;
int testFailureCount = 0;
while (true) {
String line = in.readLine();
if (line == null) break;
if (line.startsWith("\uFEFF")) line = line.substring(1);
out.println(line);
line = line.trim();
int pos = line.indexOf('#');
if (pos >= 0) line = line.substring(0,pos).trim();
if (line.length() == 0) continue;
// fix all the variables
String oldLine = line;
line = Utility.replace(line, variables, variableCount);
// detect variables
if (line.startsWith("Let")) {
int x = line.indexOf('=');
variables[variableCount][0] = line.substring(3,x).trim();
variables[variableCount][1] = line.substring(x+1).trim();
variableCount++;
System.out.println("Added variable: <" + variables[variableCount-1][0] + "><"
+ variables[variableCount-1][1] + ">");
continue;
}
char relation = 0;
String rightSide = null;
String leftSide = null;
UnicodeSet leftSet = null;
UnicodeSet rightSet = null;
try {
pp.setIndex(0);
leftSet = new UnicodeSet(line, pp, st);
leftSide = line.substring(0,pp.getIndex());
eatWhitespace(line, pp);
relation = line.charAt(pp.getIndex());
if (!INVARIANT_RELATIONS.contains(relation)) {
throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false),
pp.getIndex());
}
pp.setIndex(pp.getIndex()+1); // skip char
eatWhitespace(line, pp);
int start = pp.getIndex();
rightSet = new UnicodeSet(line, pp, st);
rightSide = line.substring(start,pp.getIndex());
eatWhitespace(line, pp);
if (line.length() != pp.getIndex()) {
throw new ParseException("Extra characters at end", pp.getIndex());
}
} catch (ParseException e) {
out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset())
+ "<@>" + line.substring(e.getErrorOffset()));
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
} catch (IllegalArgumentException e) {
out.println("PARSE ERROR:\t" + line);
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
}
boolean ok = true;
switch(relation) {
case '=': ok = leftSet.equals(rightSet); break;
case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break;
case '!': ok = leftSet.containsNone(rightSet); break;
case '?': ok = !leftSet.equals(rightSet)
&& !leftSet.containsAll(rightSet)
&& !rightSet.containsAll(leftSet)
&& !leftSet.containsNone(rightSet);
break;
default: throw new IllegalArgumentException("Internal Error");
}
if (ok) continue;
out.println();
out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
out.println("**** START Error Info ****");
bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
out.println("**** END Error Info ****");
out.println();
testFailureCount++;
}
out.println();
out.println("**** SUMMARY ****");
out.println();
out.println("ParseErrorCount=" + parseErrorCount);
out.println("TestFailureCount=" + testFailureCount);
out.close();
System.out.println("ParseErrorCount=" + parseErrorCount);
System.out.println("TestFailureCount=" + testFailureCount);
}
/**
* @param line
* @param pp
*/
private static void eatWhitespace(String line, ParsePosition pp) {
int cp = 0;
int i;
for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(line, i);
if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) {
break;
}
}
pp.setIndex(i);
}
/*
static class PropertySymbolTable implements SymbolTable {
static boolean DEBUG = false;

View file

@ -1,15 +1,16 @@
Generate:
DeltaVersion: 7
DeltaVersion: 8
CopyrightYear: 2005
File: GraphemeClusterBreakProperty
File: uax29/GraphemeBreakProperty
Property: Grapheme_Cluster_Break
Format: skipValue=Other
File: WordBreakProperty
File: uax29/WordBreakProperty
Property: Word_Break
Format: skipValue=Other
File: SentenceBreakProperty
File: uax29/SentenceBreakProperty
Property: Sentence_Break
Format: skipValue=Other
@ -20,9 +21,7 @@ Property: Block
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
# For more information on the comparison of property values,
# see UCD.html.
#
# Code points not explicitly listed in this file are given the value No_Block.
Format: valueList
Format: valueList skipUnassigned=No_Block
File: CaseFolding
Property: SPECIAL
@ -363,7 +362,7 @@ HackName: Supplemental_Arrows-B
HackName: Supplementary_Private_Use_Area-A
HackName: Supplementary_Private_Use_Area-B
HackName: Canadian-Aboriginal
HackName: Old-Italic
#HackName: Old-Italic
FinalComments
Note that PropertyAliases sorts by the long name, while PropertyValueAliases

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
* $Date: 2004/02/18 03:09:00 $
* $Revision: 1.15 $
* $Date: 2004/12/11 06:03:08 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -137,8 +137,71 @@ public class TestData implements UCD_Types {
log.close();
}
}
static PrintWriter log;
public static void checkShaping() throws IOException {
log = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "checklog.txt");
checkProperty("Joining_Type", "Non_Joining", "Joining_Type", "Transparent");
checkProperty("Joining_Group", "No_Joining_Group", "Joining_Type", "Transparent");
checkProperty("Line_Break", "Unknown", "Line_Break", "Combining_Mark");
checkProperty("East_Asian_Width", null, "Line_Break", "Combining_Mark");
checkProperty("Bidi_Class", null, "Line_Break", "Combining_Mark");
checkProperty("Script", null, "Script", new String[]{"Common", "Inherited"});
checkProperty("General_Category", null, "General_Category", new String[]{"Spacing_Mark",
"Enclosing_Mark", "Nonspacing_Mark"});
log.close();
}
public static class RegexMatcher implements UnicodeProperty.Matcher {
/**
* @param propertyName
* @param exclusion
* @param ignorePropertyName TODO
* @param ignoreValue
*/
private static void checkProperty(String propertyName, String exclusion, String ignorePropertyName, Object ignoreValueList) {
log.println();
log.println(propertyName + " Check");
log.println();
Set ignoreValueSet = new HashSet();
if (ignoreValueList instanceof String) ignoreValueSet.add(ignoreValueList);
else ignoreValueSet.addAll(Arrays.asList((Object[])ignoreValueList));
ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("4.0.1");
UnicodeProperty up = ups.getProperty(propertyName);
UnicodeProperty ignProp = ups.getProperty(ignorePropertyName);
UnicodeProperty name = ups.getProperty("Name");
UnicodeSet significant = (exclusion != null ? up.getSet(exclusion) : new UnicodeSet()).complement();
UnicodeSetIterator it = new UnicodeSetIterator(significant);
Normalizer n = new Normalizer(Normalizer.NFD, "4.0.1");
int counter = 0;
while (it.next()) {
String baseValue = up.getValue(it.codepoint);
String nfd = n.normalize(it.codepoint);
if (n.isNormalized(it.codepoint)) continue;
//if (nfd.equals(it.getString())) continue;
int cp;
for (int i = 0; i < nfd.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(nfd, i);
boolean shown = false;
String newValue = up.getValue(cp);
String possIgnValue = ignProp.getValue(cp);
if (ignoreValueSet.contains(possIgnValue)) {
//log.println("--- " + newValue + "\t" + Utility.hex(cp) + " " + name.getValue(cp));
continue;
}
//log.println("*** " + newValue + "\t" + Utility.hex(cp) + " " + name.getValue(cp));
if (!baseValue.equals(newValue)) {
if (!shown) log.println((++counter) + "\tCONFLICT\t" + baseValue + "\t" + Utility.hex(it.codepoint) + " " + name.getValue(it.codepoint));
log.println("\tNFD(" + Utility.hex(it.codepoint) + ") contains:\t" + newValue + "\t" + Utility.hex(cp) + " " + name.getValue(cp));
shown = true;
}
}
}
}
public static class RegexMatcher implements UnicodeProperty.Matcher {
private Matcher matcher;
public UnicodeProperty.Matcher set(String pattern) {

View file

@ -0,0 +1,203 @@
package com.ibm.text.UCD;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.Utility;
public class TestUnicodeInvariants {
public static void main(String[] args) throws IOException {
testInvariants();
}
/**
* Chain together several SymbolTables.
* @author Davis
*/
static class ChainedSymbolTable implements SymbolTable {
// TODO: add accessors?
private List symbolTables;
/**
* Each SymbolTable is each accessed in order by the other methods,
* so the first in the list is accessed first, etc.
* @param symbolTables
*/
ChainedSymbolTable(SymbolTable[] symbolTables) {
this.symbolTables = Arrays.asList(symbolTables);
}
public char[] lookup(String s) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
char[] result = st.lookup(s);
if (result != null) return result;
}
return null;
}
public UnicodeMatcher lookupMatcher(int ch) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
UnicodeMatcher result = st.lookupMatcher(ch);
if (result != null) return result;
}
return null;
}
// Warning: this depends on pos being left alone unless a string is returned!!
public String parseReference(String text, ParsePosition pos, int limit) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
String result = st.parseReference(text, pos, limit);
if (result != null) return result;
}
return null;
}
}
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
public static void testInvariants() throws IOException {
String[][] variables = new String[100][2];
int variableCount = 0;
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
out.write('\uFEFF'); // BOM
BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
BagFormatter bf = new BagFormatter();
ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
ToolUnicodePropertySource.make("4.0.0").getSymbolTable("\u00D7"),
ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
ParsePosition pp = new ParsePosition(0);
int parseErrorCount = 0;
int testFailureCount = 0;
while (true) {
String line = in.readLine();
if (line == null) break;
if (line.startsWith("\uFEFF")) line = line.substring(1);
out.println(line);
line = line.trim();
int pos = line.indexOf('#');
if (pos >= 0) line = line.substring(0,pos).trim();
if (line.length() == 0) continue;
// fix all the variables
String oldLine = line;
line = Utility.replace(line, variables, variableCount);
// detect variables
if (line.startsWith("Let")) {
int x = line.indexOf('=');
variables[variableCount][0] = line.substring(3,x).trim();
variables[variableCount][1] = line.substring(x+1).trim();
variableCount++;
if (false) System.out.println("Added variable: <" + variables[variableCount-1][0] + "><"
+ variables[variableCount-1][1] + ">");
continue;
}
char relation = 0;
String rightSide = null;
String leftSide = null;
UnicodeSet leftSet = null;
UnicodeSet rightSet = null;
try {
pp.setIndex(0);
leftSet = new UnicodeSet(line, pp, st);
leftSide = line.substring(0,pp.getIndex());
eatWhitespace(line, pp);
relation = line.charAt(pp.getIndex());
if (!INVARIANT_RELATIONS.contains(relation)) {
throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false),
pp.getIndex());
}
pp.setIndex(pp.getIndex()+1); // skip char
eatWhitespace(line, pp);
int start = pp.getIndex();
rightSet = new UnicodeSet(line, pp, st);
rightSide = line.substring(start,pp.getIndex());
eatWhitespace(line, pp);
if (line.length() != pp.getIndex()) {
throw new ParseException("Extra characters at end", pp.getIndex());
}
} catch (ParseException e) {
out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset())
+ "<@>" + line.substring(e.getErrorOffset()));
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
} catch (IllegalArgumentException e) {
out.println("PARSE ERROR:\t" + line);
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
}
boolean ok = true;
switch(relation) {
case '=': ok = leftSet.equals(rightSet); break;
case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break;
case '!': ok = leftSet.containsNone(rightSet); break;
case '?': ok = !leftSet.equals(rightSet)
&& !leftSet.containsAll(rightSet)
&& !rightSet.containsAll(leftSet)
&& !leftSet.containsNone(rightSet);
break;
default: throw new IllegalArgumentException("Internal Error");
}
if (ok) continue;
out.println();
out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
out.println("**** START Error Info ****");
bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
out.println("**** END Error Info ****");
out.println();
testFailureCount++;
}
out.println();
out.println("**** SUMMARY ****");
out.println();
out.println("ParseErrorCount=" + parseErrorCount);
out.println("TestFailureCount=" + testFailureCount);
out.close();
System.out.println("ParseErrorCount=" + parseErrorCount);
System.out.println("TestFailureCount=" + testFailureCount);
}
/**
* @param line
* @param pp
*/
private static void eatWhitespace(String line, ParsePosition pp) {
int cp = 0;
int i;
for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(line, i);
if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) {
break;
}
}
pp.setIndex(i);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2004/11/13 23:10:32 $
* $Revision: 1.36 $
* $Date: 2004/12/11 06:03:08 $
* $Revision: 1.37 $
*
*******************************************************************************
*/
@ -1094,6 +1094,7 @@ public final class UCD implements UCD_Types {
if (ch <= 0x4DB5) return 0x3400;
if (ch <= 0x4E00) return ch; // CJK Ideograph
if (ch <= 0x9FA5) return 0x4E00;
if (ch <= 0x9FBB && rCompositeVersion >= 0x40100) return 0x4E00;
if (ch <= 0xAC00) return ch; // Hangul Syllable
if (ch <= 0xD7A3) return 0xAC00;
if (ch <= 0xD800) return ch; // Non Private Use High Surrogate

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2004/11/13 23:10:32 $
* $Revision: 1.29 $
* $Date: 2004/12/11 06:03:08 $
* $Revision: 1.30 $
*
*******************************************************************************
*/
@ -318,7 +318,7 @@ final class UCD_Names implements UCD_Types {
"BOPOMOFO", // BOPOMOFO
"HAN", // HAN
"YI", // YI
"OLD-ITALIC",
"OLD_ITALIC",
"GOTHIC",
"DESERET",
"INHERITED", // nonspacing marks
@ -335,6 +335,14 @@ final class UCD_Names implements UCD_Types {
"CYPRIOT",
"BRAILLE",
"KATAKANA_OR_HIRAGANA",
"BUGINESE",
"COPTIC",
"NEW_TAI_LUE",
"GLAGOLITIC",
"TIFINAGH",
"SYLOTI_NAGRI",
"OLD_PERSIAN",
"KHAROSHTHI",
};
@ -403,6 +411,14 @@ final class UCD_Names implements UCD_Types {
"Cprt",
"Brai",
"Hrkt",
"Bugi",
"Copt",
"Talu",
"Glag",
"Tfng",
"Sylo",
"Xpeo",
"Khar",
};

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2004/11/13 23:10:32 $
* $Revision: 1.29 $
* $Date: 2004/12/11 06:03:08 $
* $Revision: 1.30 $
*
*******************************************************************************
*/
@ -390,7 +390,16 @@ public interface UCD_Types {
CYPRIOT = 52,
BRAILLE = 53,
KATAKANA_OR_HIRAGANA = 54,
LIMIT_SCRIPT = 55;
BUGINESE = 55,
COPTIC = 56,
NEW_TAI_LUE = 57,
GLAGOLITIC = 58,
TIFINAGH = 59,
SYLOTI_NAGRI = 60,
OLD_PERSIAN = 61,
KHAROSHTHI = 62,
LIMIT_SCRIPT = 63;
static final int
UNKNOWN = 0,

View file

@ -41,7 +41,7 @@
#$East_Asian_Width:Neutral ? $GC:Uppercase_Letter
$GC:Zs ? $Name:«.*SPACE.*»
[$script:greek&$gc:«.*letter.*»] = [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126]
# [$script:greek&$gc:«.*letter.*»] = [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126]
# Examples of parsing errors
@ -56,6 +56,10 @@ $Line_Break:Unknown = [$General_Category:Unassigned $GeneralCategory:PrivateUse]
$LB:OP = $GC:Ps
$General_Category:Decimal_Number = $Numeric_Type:Decimal
$Whitespace ⊃ [$GC:Zs $GC:Zp $GC:Zl]
$Dash ⊃ [$GC:Pd]
$Script:Common ! [$GC:Mn $GC:Me]
$Script:Common ! [$Alphabetic - $Math]
$Alphabetic ⊃ [$Uppercase $Lowercase]
# Comparisons across versions
@ -71,7 +75,7 @@ $Alphabetic = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_Alphabetic]
$Lowercase = [$GC:Ll $Other_Lowercase]
$Uppercase = [$GC:Lu $Other_Uppercase]
$ID_Start = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_ID_Start]
$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc] $Other_ID_Continue
$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc $Other_ID_Continue]
$Default_Ignorable_Code_Point = [[$Other_Default_Ignorable_Code_Point $GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]
$Grapheme_Extend = [$GC:Me $GC:Mn $Other_Grapheme_Extend]
$Grapheme_Base = [^$GC:Cc $GC:Cf $GC:Cs $GC:Co $GC:Cn $GC:Zl $GC:Zp $Grapheme_Extend]
@ -87,8 +91,8 @@ $Other_Default_Ignorable_Code_Point = [$Default_Ignorable_Code_Point - [[$GC:Cf
$Other_Grapheme_Extend = [$Grapheme_Extend - [$GC:Me $GC:Mn]]
# Testing
$script:greek = $×script:greek
$gc:lm = $script:inherited
# $script:greek = $×script:greek
# $gc:lm = $script:inherited
# ===========================
@ -110,7 +114,7 @@ Let $ZWJ = [\u200D] # [\N{ZERO WIDTH JOINER}]
Let $gcAllPunctuation = [$gc:Open_Punctuation $gc:Close_Punctuation $gc:Dash_Punctuation $gc:Connector_Punctuation $gc:Other_Punctuation $gc:Initial_Punctuation $gc:Final_Punctuation]
Let $gcAllSymbols = [$gc:Currency_Symbol $gc:Modifier_Symbol $gc:Math_Symbol $gc:Other_Symbol]
Let $gcAllMarks = [$gc:Nonspacing_Mark $gc:Enclosing_Mark $gc:Spacing_Mark]
Let $strange = [\u24B6-\u24CF]
Let $strange = [\u24B6-\u24E9]
# Unassigned, Control, Format, Private_Use, Surrogate,
# Uppercase_Letter, Lowercase_Letter, Titlecase_Letter, Modifier_Letter, Other_Letter,
@ -122,9 +126,9 @@ Let $strange = [\u24B6-\u24CF]
# UTS Rules
Let $alpha = [$Alphabetic $Lowercase] # $Uppercase $ZWNJ $ZWJ]
Let $alpha = [$Alphabetic $strange] # $Uppercase $ZWNJ $ZWJ]
Let $lower = $Lowercase
Let $upper = [$Uppercase - $strange]
Let $upper = [$Uppercase]
Let $punct = [$gcAllPunctuation $gcAllSymbols - $alpha]
Let $digit = $gc:Decimal_Number
Let $xdigit = [$gc:Decimal_Number $Hex_Digit] # in both!
@ -132,7 +136,7 @@ Let $alnum = [$alpha $digit]
Let $space = $Whitespace
Let $blank = [$Whitespace - [$LF $VTAB $FF $CR $NEL $gc:Line_Separator $gc:Paragraph_Separator]]
Let $cntrl = $gc:Control
Let $graph = [^$space $gc:Control $gc:Format $gc:Surrogate $gc:Unassigned] # $ZWNJ $ZWJ]
Let $graph = [^$space $gc:Control $gc:Surrogate $gc:Unassigned] # $ZWNJ $ZWJ]
Let $print = [$graph $blank - $cntrl]
Let $word = [$alpha $gcAllMarks $digit $gc:Connector_Punctuation]

View file

@ -29,7 +29,7 @@ public class UnicodeDataFile {
result.out.println(generateDateLine());
result.out.println("#");
result.out.println("# Unicode Character Database");
result.out.println("# Copyright (c) 1991-2004 Unicode, Inc.");
result.out.println("# Copyright (c) 1991-" + Default.getYear() + " Unicode, Inc.");
result.out.println(
"# For terms of use, see http://www.unicode.org/terms_of_use.html");
result.out.println("# For documentation, see UCD.html");