mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
minor additions to change PropertyAlias.txt
X-SVN-Rev: 6812
This commit is contained in:
parent
9565246f34
commit
3405bab3d1
6 changed files with 190 additions and 65 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2001/10/31 00:02:27 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2001/11/13 02:31:55 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -424,6 +424,7 @@ public class GenerateData implements UCD_Types {
|
|||
Set accumulation = new TreeSet(java.text.Collator.getInstance());
|
||||
String spacing;
|
||||
|
||||
/*
|
||||
BufferedReader blocks = Utility.openUnicodeFile("Blocks", ucd.getVersion());
|
||||
String[] parts = new String[10];
|
||||
while (true) {
|
||||
|
@ -442,6 +443,7 @@ public class GenerateData implements UCD_Types {
|
|||
checkDuplicate(duplicates, accumulation, value, "Block=" + value);
|
||||
}
|
||||
blocks.close();
|
||||
*/
|
||||
|
||||
for (int k = 0; k < UCD_Names.NON_ENUMERATED.length; ++k) {
|
||||
propAbb = fixGaps(UCD_Names.NON_ENUMERATED[k][0], false);
|
||||
|
@ -456,15 +458,19 @@ public class GenerateData implements UCD_Types {
|
|||
valueAbb = fixGaps(UCD_Names.SUPER_CATEGORIES[k][0], false);
|
||||
value = fixGaps(UCD_Names.SUPER_CATEGORIES[k][1], true);
|
||||
spacing = Utility.repeat(" ", 10-valueAbb.length());
|
||||
sorted.add("gc; " + valueAbb + spacing + "; " + value);
|
||||
String baseLine = "gc; " + valueAbb + spacing + "; " + value;
|
||||
spacing = Utility.repeat(" ", 50-baseLine.length());
|
||||
sorted.add(baseLine + spacing + "# " + UCD_Names.SUPER_CATEGORIES[k][2]);
|
||||
checkDuplicate(duplicates, accumulation, value, "General_Category=" + value);
|
||||
if (!value.equals(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, "General_Category=" + value);
|
||||
}
|
||||
|
||||
/*
|
||||
sorted.add("xx; T ; True");
|
||||
checkDuplicate(duplicates, accumulation, "T", "xx=True");
|
||||
sorted.add("xx; F ; False");
|
||||
checkDuplicate(duplicates, accumulation, "F", "xx=False");
|
||||
*/
|
||||
sorted.add("qc; Y ; Yes");
|
||||
checkDuplicate(duplicates, accumulation, "Y", "qc=Yes");
|
||||
sorted.add("qc; N ; No");
|
||||
|
@ -507,6 +513,10 @@ public class GenerateData implements UCD_Types {
|
|||
if (value.startsWith("Fixed_")) { continue; }
|
||||
}
|
||||
|
||||
if (type == JOINING_GROUP) {
|
||||
valueAbb = "n/a";
|
||||
}
|
||||
|
||||
/*
|
||||
String elide = "";
|
||||
if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{"
|
||||
|
@ -546,7 +556,18 @@ public class GenerateData implements UCD_Types {
|
|||
log.println("# Generated: " + new Date() + ", MD");
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
Utility.print(log, sorted, "\r\n", new MyBreaker());
|
||||
Utility.print(log, sorted, "\r\n", new MyBreaker(true));
|
||||
log.close();
|
||||
|
||||
log = Utility.openPrintWriter("PropertyValueAliases-" + ucd.getVersion() + "dX.txt");
|
||||
Utility.appendFile("PropertyValueAliasHeader.txt", false, log);
|
||||
log.println("# Generated: " + new Date() + ", MD");
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
Utility.print(log, sorted, "\r\n", new MyBreaker(false));
|
||||
log.close();
|
||||
|
||||
log = Utility.openPrintWriter("PropertyAliasSummary-" + ucd.getVersion() + "dX.txt");
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
|
@ -555,20 +576,43 @@ public class GenerateData implements UCD_Types {
|
|||
log.println("# Note: no two property names can be the same,");
|
||||
log.println("# nor can two property value names for the same property be the same.");
|
||||
log.println();
|
||||
Utility.print(log, accumulation, "\r\n", new MyBreaker());
|
||||
Utility.print(log, accumulation, "\r\n", new MyBreaker(false));
|
||||
log.println();
|
||||
log.close();
|
||||
}
|
||||
|
||||
static class MyBreaker implements Utility.Breaker {
|
||||
boolean status;
|
||||
|
||||
public MyBreaker(boolean status) {
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
public boolean filter(Object current) {
|
||||
String c = current.toString();
|
||||
if (c.startsWith("AA") || c.startsWith("BB") || c.startsWith("ZZ")) return status;
|
||||
return !status;
|
||||
}
|
||||
|
||||
public String get(Object current, Object old) {
|
||||
if (old == null) return "";
|
||||
if (old == null) {
|
||||
old = " ";
|
||||
}
|
||||
String c = current.toString();
|
||||
String o = old.toString();
|
||||
if (c.length() >= 2 && o.length() >= 0 && !c.substring(0,2).equals(o.substring(0,2))) {
|
||||
return "\r\n";
|
||||
String sep = "";
|
||||
if (!c.substring(0,2).equals(o.substring(0,2))) {
|
||||
sep = "\r\n";
|
||||
if (status) {
|
||||
if (c.startsWith("AA")) sep = sep + HORIZONTAL_LINE + sep + "# Non-enumerated Properties" + sep + HORIZONTAL_LINE + sep;
|
||||
if (c.startsWith("BB")) sep = sep + HORIZONTAL_LINE + sep + "# Enumerated Non-Binary Properties" + sep + HORIZONTAL_LINE + sep;
|
||||
if (c.startsWith("ZZ")) sep = sep + HORIZONTAL_LINE + sep + "# Binary Properties" + sep + HORIZONTAL_LINE + sep;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
if (status) {
|
||||
c = c.substring(4);
|
||||
}
|
||||
return sep + c;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2001/10/25 20:33:46 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2001/11/13 02:31:55 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -41,7 +41,8 @@ public final class Main {
|
|||
|
||||
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
|
||||
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
|
||||
else if (arg.equalsIgnoreCase("checkCase2")) VerifyUCD.checkCase2();
|
||||
else if (arg.equalsIgnoreCase("checkCaseLong")) VerifyUCD.checkCase2(true);
|
||||
else if (arg.equalsIgnoreCase("checkCaseShort")) VerifyUCD.checkCase2(false);
|
||||
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
|
||||
else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
|
||||
else if (arg.equalsIgnoreCase("idn")) VerifyUCD.VerifyIDN();
|
||||
|
|
|
@ -1,46 +1,30 @@
|
|||
# DRAFT
|
||||
# PropertyAliases-3.2.0.txt
|
||||
#
|
||||
# This file contains aliases for properties and property values used in the UCD.
|
||||
# This file contains aliases for properties used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
# property tests, and other programmatic textual descriptions of Unicode data.
|
||||
# The names are not normative, except where they correspond to normative values
|
||||
# in the UCD.
|
||||
# The names are not normative, except where they correspond to normative
|
||||
# properties in the UCD. For information on which properties are normative,
|
||||
# see UnicodeCharacterDatabase.html.
|
||||
#
|
||||
# The names may be translated in appropriate environments, and additional
|
||||
# aliases may be useful.
|
||||
#
|
||||
# FORMAT
|
||||
#
|
||||
# Each line has three fields, separated by semicolons.
|
||||
# Each line has two fields, separated by semicolons.
|
||||
#
|
||||
# First Field: Where the first field is AA, BB, or ZZ, then the line describes a property name:
|
||||
# First Field: The first field is an abbreviated name for the property
|
||||
#
|
||||
# AA - non-enumerated properties
|
||||
# BB - enumerated, non-binary properties
|
||||
# ZZ - binary properties and quick-check properties
|
||||
#
|
||||
# (The values AA, BB, and ZZ are arbitrary -- they were simply chosen to distinguish
|
||||
# the different types.)
|
||||
#
|
||||
# Where the first field is not one of the above, the line describes a
|
||||
# property value name. The first field describes the property for which that
|
||||
# property value name is used. There are two special properties:
|
||||
#
|
||||
# xx stands for any binary property
|
||||
# qc stands for any quick-check property
|
||||
#
|
||||
# Second Field: The second field is an abbreviated name.
|
||||
# If there is no abbreviated name available, the field is marked with "n/a".
|
||||
#
|
||||
# Third Field: The third field is a long name.
|
||||
# Second Field: The second field is a long name
|
||||
#
|
||||
# With loose matching of property names, the case distinctions, whitespace,
|
||||
# and '_' are ignored.
|
||||
#
|
||||
# NOTE: Currently there is at most one abbreviated name and one long name for
|
||||
# each property and property value. However, in the future additional aliases
|
||||
# may be added. In such a case, the first line for the property or property value
|
||||
# each property. However, in the future additional aliases
|
||||
# may be added. In such a case, the first line for the property
|
||||
# would have the preferred alias for output.
|
||||
#
|
||||
# NOTE: The property value names are NOT unique across properties, especially
|
||||
|
@ -53,7 +37,5 @@
|
|||
# cc means Combining_Class property, and
|
||||
# cc means the General_Category property value Control (cc)
|
||||
#
|
||||
# Comments at the end of the file show cases of non-unique names.
|
||||
#
|
||||
# The combination of property value and property name is, however, unique.
|
||||
# For more information, see UTR #24: Regular Expression Guidelines
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2001/10/31 00:02:27 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2001/11/13 02:31:55 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -32,7 +32,10 @@ final class UCD_Names implements UCD_Types {
|
|||
{"stc", "Simple_Titlecase_Mapping"},
|
||||
{"sfc", "Simple_Case_Folding"},
|
||||
{"scc", "Special_Case_Condition"},
|
||||
{"blk", "Block"}
|
||||
{"blk", "Block"},
|
||||
{"na1", "Unicode_1_Name"},
|
||||
{"isc", "ISO_Comment"},
|
||||
{"age", "Age"},
|
||||
};
|
||||
|
||||
static final String[] UNIFIED_PROPERTIES = {
|
||||
|
@ -406,13 +409,14 @@ final class UCD_Names implements UCD_Types {
|
|||
};
|
||||
|
||||
static final String[][] SUPER_CATEGORIES = {
|
||||
{"L", "Letter"},
|
||||
{"M", "Mark"},
|
||||
{"N", "Number"},
|
||||
{"Z", "Separator"},
|
||||
{"C", "Other"},
|
||||
{"S", "Symbol"},
|
||||
{"P", "Punctuation"},
|
||||
{"L", "Letter", "Ll | Lm | Lo | Lt | Lu"},
|
||||
{"M", "Mark", "Mc | Me | Mn"},
|
||||
{"N", "Number", "Nd | Nl | No"},
|
||||
{"Z", "Separator", "Zl | Zp | Zs"},
|
||||
{"C", "Other", "Cc | Cf | Cn | Co | Cs"},
|
||||
{"S", "Symbol", "Sc | Sk | Sm | So"},
|
||||
{"P", "Punctuation", "Pc | Pd | Pe | Pf | Pi | Po | Ps"},
|
||||
{"Lc", "Cased Letter", "Ll | Lt | Lu"},
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
||||
* $Date: 2001/10/25 20:33:46 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2001/11/13 02:31:55 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -141,43 +141,109 @@ public class VerifyUCD implements UCD_Types {
|
|||
log.close();
|
||||
}
|
||||
|
||||
public static void checkCase2() throws IOException {
|
||||
public static void checkCase2(boolean longForm) throws IOException {
|
||||
Utility.fixDot();
|
||||
System.out.println("checkCase");
|
||||
ucd = UCD.make(Main.ucdVersion);
|
||||
initNormalizers();
|
||||
System.out.println(ucd.getCase("ABC,DE'F G\u0308H", FULL, TITLE));
|
||||
|
||||
/*String tx1 = "\u0391\u0342\u0345";
|
||||
String ux1 = "\u0391\u0342\u0399";
|
||||
String ctx1 = nfc.normalize(tx1);
|
||||
String ctx2 = nfc.normalize(ux1); // wrong??
|
||||
|
||||
//System.out.println(ucd.getCase("ABC,DE'F G\u0308H", FULL, TITLE));
|
||||
*/
|
||||
|
||||
|
||||
String fileName = "CaseNormalizationDifferences.txt";
|
||||
PrintWriter log = Utility.openPrintWriter(fileName);
|
||||
|
||||
log.println("Differences between case(normalize(cp)) and normalize(case(cp))");
|
||||
log.println("u, l, t - upper, lower, title");
|
||||
log.println("c, d - nfc, nfd");
|
||||
|
||||
//Utility.DOTMASK = 0x7F;
|
||||
|
||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
if (!ucd.isRepresented(cp) || ucd.isPUA(cp)) continue;
|
||||
if (cp == '\u3371') {
|
||||
if (cp == '\u0130') {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
String x = UTF32.valueOf32(cp);
|
||||
String dx = nfd.normalize(cp);
|
||||
String cx = nfc.normalize(cp);
|
||||
|
||||
String ux = ucd.getCase(x, FULL, UPPER);
|
||||
String lx = ucd.getCase(x, FULL, LOWER);
|
||||
String tx = ucd.getCase(x, FULL, TITLE);
|
||||
|
||||
String dux = nfd.normalize(ux);
|
||||
String dlx = nfd.normalize(lx);
|
||||
String dtx = nfd.normalize(tx);
|
||||
|
||||
if (x.equals(dx) && dx.equals(cx) && cx.equals(ux) && ux.equals(lx) && lx.equals(tx)) continue;
|
||||
|
||||
String cux = nfc.normalize(ux);
|
||||
String clx = nfc.normalize(lx);
|
||||
String ctx = nfc.normalize(tx);
|
||||
|
||||
if (x.equals(cx)) {
|
||||
boolean needBreak = false;
|
||||
if (!clx.equals(lx)) needBreak = true;
|
||||
if (!ctx.equals(tx)) needBreak = true;
|
||||
if (!cux.equals(ux)) needBreak = true;
|
||||
|
||||
if (needBreak) {
|
||||
log.println("# Was not NFC:");
|
||||
log.println(
|
||||
"## " + Utility.hex(x) + "; "
|
||||
+ Utility.hex(lx) + "; "
|
||||
+ Utility.hex(tx) + "; "
|
||||
+ Utility.hex(ux) + "; # "
|
||||
+ ucd.getName(x));
|
||||
log.println("# should be:");
|
||||
log.println(
|
||||
Utility.hex(x) + "; "
|
||||
+ Utility.hex(clx) + "; "
|
||||
+ Utility.hex(ctx) + "; "
|
||||
+ Utility.hex(cux) + "; # "
|
||||
+ ucd.getName(x));
|
||||
log.println();
|
||||
}
|
||||
}
|
||||
|
||||
String dux = nfd.normalize(ux);
|
||||
String dlx = nfd.normalize(lx);
|
||||
String dtx = nfd.normalize(tx);
|
||||
|
||||
|
||||
|
||||
String startdx = getMarks(dx, false);
|
||||
String enddx = getMarks(dx, true);
|
||||
|
||||
String dx = nfd.normalize(cp);
|
||||
String cx = nfc.normalize(cp);
|
||||
String startdux = getMarks(dux, false);
|
||||
String enddux = getMarks(dux, true);
|
||||
|
||||
String startdtx = getMarks(dtx, false);
|
||||
String enddtx = getMarks(dtx, true);
|
||||
|
||||
String startdlx = getMarks(dlx, false);
|
||||
String enddlx = getMarks(dlx, true);
|
||||
|
||||
// If the new marks don't occur in the old decomposition, we got a problem!
|
||||
|
||||
if (!startdx.startsWith(startdux) || !startdx.startsWith(startdtx) || !startdx.startsWith(startdlx)
|
||||
|| !enddx.endsWith(enddux) || !enddx.endsWith(enddtx) || !enddx.endsWith(enddlx)) {
|
||||
log.println("Combining Class Difference for " + ucd.getCodeAndName(x));
|
||||
log.println("x: " + ucd.getCodeAndName(dx) + ", " + Utility.hex(startdx) + ", " + Utility.hex(enddx));
|
||||
log.println("ux: " + ucd.getCodeAndName(dux) + ", " + Utility.hex(startdux) + ", " + Utility.hex(enddux));
|
||||
log.println("tx: " + ucd.getCodeAndName(dtx) + ", " + Utility.hex(startdtx) + ", " + Utility.hex(enddtx));
|
||||
log.println("lx: " + ucd.getCodeAndName(dlx) + ", " + Utility.hex(startdlx) + ", " + Utility.hex(enddlx));
|
||||
log.println();
|
||||
}
|
||||
|
||||
|
||||
if (!longForm) continue;
|
||||
|
||||
String udx = ucd.getCase(dx, FULL, UPPER);
|
||||
String ldx = ucd.getCase(dx, FULL, LOWER);
|
||||
String tdx = ucd.getCase(dx, FULL, TITLE);
|
||||
|
@ -286,6 +352,28 @@ public class VerifyUCD implements UCD_Types {
|
|||
|
||||
log.close();
|
||||
}
|
||||
|
||||
public static String getMarks(String s, boolean doEnd) {
|
||||
int cp;
|
||||
if (!doEnd) {
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
int cc = ucd.getCombiningClass(cp);
|
||||
if (cc == 0) {
|
||||
return s.substring(0, i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = s.length(); i > 0; i -= UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i-1); // will go 2 before if necessary
|
||||
int cc = ucd.getCombiningClass(cp);
|
||||
if (cc == 0) {
|
||||
return s.substring(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static final String names[] = {"LOWER", "TITLE", "UPPER", "(UNC)", "MIXED"};
|
||||
static final String lowerNames[] = {"", "Other_Lower"};
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2001/10/31 00:02:54 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2001/11/13 02:31:34 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -30,9 +30,11 @@ public final class Utility { // COMMON UTILITIES
|
|||
}
|
||||
|
||||
private static boolean needCRLF = false;
|
||||
|
||||
public static int DOTMASK = 0x7FF;
|
||||
|
||||
public static void dot(int i) {
|
||||
if ((i % 0x7FF) == 0) {
|
||||
if ((i % DOTMASK) == 0) {
|
||||
needCRLF = true;
|
||||
System.out.print('.');
|
||||
}
|
||||
|
@ -458,6 +460,7 @@ public final class Utility { // COMMON UTILITIES
|
|||
|
||||
public interface Breaker {
|
||||
public String get(Object current, Object old);
|
||||
public boolean filter(Object current); // true is keep
|
||||
}
|
||||
|
||||
public static void print(PrintWriter pw, Collection c, String separator, Breaker b) {
|
||||
|
@ -466,14 +469,17 @@ public final class Utility { // COMMON UTILITIES
|
|||
Object last = null;
|
||||
while (it.hasNext()) {
|
||||
Object obj = it.next();
|
||||
if (b != null && !b.filter(obj)) continue;
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
pw.print(separator);
|
||||
}
|
||||
else pw.print(separator);
|
||||
if (b != null) {
|
||||
pw.print(b.get(obj, last));
|
||||
} else {
|
||||
pw.print(obj);
|
||||
}
|
||||
pw.print(obj);
|
||||
last = obj;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue