various changes for new properties

X-SVN-Rev: 14494
This commit is contained in:
Mark Davis 2004-02-12 08:23:19 +00:00
parent 73529e8ad9
commit 459c96f0b1
22 changed files with 907 additions and 434 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2004/02/07 01:01:12 $
* $Revision: 1.19 $
* $Date: 2004/02/12 08:23:19 $
* $Revision: 1.20 $
*
*******************************************************************************
*/
@ -1033,19 +1033,28 @@ public class WriteCharts implements UCD_Types {
int[] starts = new int[names.length];
int[] ends = new int[names.length];
UCD.BlockData blockData = new UCD.BlockData();
Iterator blockIterator = Default.ucd().getBlockNames().iterator();
//UCD.BlockData blockData = new UCD.BlockData();
int counter = 0;
int blockId = 0;
while (Default.ucd().getBlockData(blockId++, blockData)) {
names[counter] = blockData.name;
starts[counter] = blockData.start;
ends[counter] = blockData.end;
String currentName;
//int blockId = 0;
while (blockIterator.hasNext()) {
//while (Default.ucd().getBlockData(blockId++, blockData)) {
names[counter] = currentName = (String) blockIterator.next();
if (currentName.equals("No_Block")) continue;
UnicodeSet s = Default.ucd().getBlockSet(currentName, null);
if (s.getRangeCount() != 1) {
throw new IllegalArgumentException("Failure with block set: " + currentName);
}
starts[counter] = s.getRangeStart(0);
ends[counter] = s.getRangeEnd(0);
//System.out.println(names[counter] + ", " + values[counter]);
++counter;
// HACK
if (blockData.name.equals("Tags")) {
if (currentName.equals("Tags")) {
names[counter] = "<i>reserved default ignorable</i>";
starts[counter] = 0xE0080;
ends[counter] = 0xE0FFF;

View file

@ -0,0 +1,7 @@
# Correlated with Unicode 4.0
# Note: The casing of block names is not normative.
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
#
# Code points not explicitly listed in this file are given the value No_Block.
#
# Start Code..End Code; Block Name

View file

@ -4,9 +4,13 @@ import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
@ -20,7 +24,6 @@ import com.ibm.text.utility.Utility;
public class CheckICU {
static final BagFormatter bf = new BagFormatter();
static final BagFormatter bf2 = new BagFormatter();
public static void main(String[] args) throws IOException {
System.out.println("Start");
@ -45,25 +48,24 @@ public class CheckICU {
return p.getMaxWidth(v);
}
}
public static void test() throws IOException {
//generateFile("4.0.0", "DerivedCombiningClass");
//generateFile("4.0.0", "DerivedCoreProperties");
if (true) return;
checkUCD();
itemFailures = new UnicodeSet();
icuFactory = ICUPropertyFactory.make();
toolFactory = ToolUnicodePropertySource.make("4.0.0");
String[] quickList = {
"Block",
"Math",
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
//"Numeric_Value"
};
for (int i = 0; i < quickList.length; ++i) {
//testProperty(quickList[i], -1);
bf2.setValueSource(new ReplaceLabel(toolFactory.getProperty(quickList[i])))
.setLabelSource(null)
.setNameSource(null)
.setShowCount(false);
bf2.showSetNames(bf2.CONSOLE, quickList[i], new UnicodeSet(0,0x10FFFF));
testProperty(quickList[i], -1);
}
if (quickList.length > 0) return;
@ -97,11 +99,16 @@ public class CheckICU {
if (nfc.isLeading(i)) leading.add(i);
}
PrintWriter pw = bf.openUTF8Writer(UCD_Types.GEN_DIR, "Trailing.txt");
bf.showSetNames(pw, "+Trailing+Starter", new UnicodeSet(trailing).retainAll(starter));
bf.showSetNames(pw, "+Trailing-Starter", new UnicodeSet(trailing).removeAll(starter));
bf.showSetNames(pw, "-Trailing-Starter", new UnicodeSet(trailing).complement().removeAll(starter));
bf.showSetNames(pw, "+Trailing+Leading", new UnicodeSet(trailing).retainAll(leading));
bf.showSetNames(pw, "+Trailing-Leading", new UnicodeSet(trailing).removeAll(leading));
pw.println("+Trailing+Starter");
bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(starter));
pw.println("+Trailing-Starter");
bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(starter));
pw.println("-Trailing-Starter");
bf.showSetNames(pw, new UnicodeSet(trailing).complement().removeAll(starter));
pw.println("+Trailing+Leading");
bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(leading));
pw.println("+Trailing-Leading");
bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(leading));
pw.close();
}
/*

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.4 $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.5 $
*
*******************************************************************************
*/
@ -92,30 +92,26 @@ public class CompareProperties implements UCD_Types {
public final static class UnicodeSetComparator implements Comparator {
/**
* Compares two UnicodeSets, producing a transitive ordering.
* @return -1 if first is smaller (in size) than second,
* 1 if first is greater (in size) than second,
* Otherwise (since they are equal in size)
* returns a comparison based on the first range that differs.
* The ordering is based on the first codepoint that differs between them.
* @return -1 if first set contains the first different code point
* 1 if the second set does.
* 0 if there is no difference.
* If compareTo were added to UnicodeSet, this can be optimized to use list[i].
* @author Davis
*
*/
public int compare(Object o1, Object o2) {
UnicodeSet bs1 = (UnicodeSet) o1;
UnicodeSet bs2 = (UnicodeSet) o2;
if (bs1.size() < bs2.size()) return -1;
if (bs1.size() > bs2.size()) return 1;
UnicodeSetIterator it1 = new UnicodeSetIterator(bs1);
UnicodeSetIterator it2 = new UnicodeSetIterator(bs2);
// Note: because they are the same size, and we stop if any ranges
// are different, it is safe to test for both at the same time
while (it1.nextRange() && it2.nextRange()) {
if (it1.codepoint < it2.codepoint) return -1;
UnicodeSetIterator it1 = new UnicodeSetIterator((UnicodeSet) o1);
UnicodeSetIterator it2 = new UnicodeSetIterator((UnicodeSet) o2);
while (it1.nextRange()) {
if (!it2.nextRange()) return -1; // first has range while second exhausted
if (it1.codepoint < it2.codepoint) return -1; // first has code point not in second
if (it1.codepoint > it2.codepoint) return 1;
if (it1.codepointEnd < it2.codepointEnd) return -1;
if (it1.codepointEnd > it2.codepointEnd) return 1;
if (it1.codepointEnd < it2.codepointEnd) return 1; // second has codepoint not in first
if (it1.codepointEnd > it2.codepointEnd) return -1;
}
return 0;
if (it2.nextRange()) return 1; // second has range while first is exhausted
return 0; // otherwise we ran out in both of them, so equal
}
}
@ -210,7 +206,7 @@ public class CompareProperties implements UCD_Types {
public void printPartition() throws IOException {
System.out.println("Set Size: " + map.size());
PrintWriter output = Utility.openPrintWriter("Partition"
+ GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS);
+ UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS);
Iterator it = map.keySet().iterator();
while (it.hasNext()) {
@ -234,7 +230,7 @@ public class CompareProperties implements UCD_Types {
public void printStatistics() throws IOException {
System.out.println("Set Size: " + map.size());
PrintWriter output = Utility.openPrintWriter("Statistics"
+ GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS);
+ UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS);
System.out.println("Finding disjoints/contains");
for (int i = 0; i < count; ++i) {
@ -383,10 +379,10 @@ public class CompareProperties implements UCD_Types {
public static void listDifferences() throws IOException {
PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet");
output.println("# Generated for " + Default.ucd().getVersion());
output.println(GenerateData.generateDateLine());
output.println(UnicodeDataFile.generateDateLine());
output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)");

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
* $Date: 2004/02/06 18:30:23 $
* $Revision: 1.13 $
* $Date: 2004/02/12 08:23:17 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
@ -834,9 +834,9 @@ public final class ConvertUCD implements UCD_Types {
//UCD_Names.BP_OLD
} else if (fieldName.equals("gc")) {
uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GC, true);
uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GENERAL_CATEGORY, true);
} else if (fieldName.equals("bc")) {
uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BC, true);
uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BIDI_CLASS, true);
} else if (fieldName.equals("dt")) {
if (major < 2) {
if (fieldValue.equals("no-break")) fieldValue = "noBreak";
@ -847,17 +847,17 @@ public final class ConvertUCD implements UCD_Types {
else if (fieldValue.equals("no-join")) fieldValue = "compat";
else if (fieldValue.equals("join")) fieldValue = "compat";
}
uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.DT, true);
uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.LONG_DECOMPOSITION_TYPE, true);
} else if (fieldName.equals("nt")) {
uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT, true);
uData.numericType = Utility.lookup(fieldValue, UCD_Names.LONG_NUMERIC_TYPE, true);
} else if (fieldName.equals("ea")) {
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.SHORT_EA, true);
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EAST_ASIAN_WIDTH, true);
} else if (fieldName.equals("lb")) {
uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB, true);
uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LINE_BREAK, true);
} else if (fieldName.equals("sn")) {
uData.script = Utility.lookup(fieldValue, UCD_Names.SCRIPT, true);
uData.script = Utility.lookup(fieldValue, UCD_Names.LONG_SCRIPT, true);
} else if (fieldName.equals("jt")) {
uData.joiningType = Utility.lookup(fieldValue, UCD_Names.JOINING_TYPE, true);

View file

@ -21,13 +21,19 @@ public final class Default implements UCD_Types {
setUCD();
}
private static boolean inRecursiveCall = false;
private static void setUCD() {
ucd = UCD.make(ucdVersion());
if (inRecursiveCall) {
throw new IllegalArgumentException("Recursive call to setUCD");
}
inRecursiveCall = true;
ucd = UCD.make(ucdVersion);
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion());
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion());
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion());
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion());
System.out.println("Loaded UCD" + ucd().getVersion() + " " + (new Date(ucd().getDate())));
inRecursiveCall = false;
}
static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'");
@ -40,32 +46,32 @@ public final class Default implements UCD_Types {
}
public static String ucdVersion() {
if (ucd() == null) setUCD();
if (ucd == null) setUCD();
return ucdVersion;
}
public static UCD ucd() {
if (ucd() == null) setUCD();
if (ucd == null) setUCD();
return ucd;
}
public static Normalizer nfc() {
if (ucd() == null) setUCD();
if (ucd == null) setUCD();
return nfc;
}
public static Normalizer nfd() {
if (ucd() == null) setUCD();
if (ucd == null) setUCD();
return nfd;
}
public static Normalizer nfkc() {
if (ucd() == null) setUCD();
if (ucd == null) setUCD();
return nfkc;
}
public static Normalizer nfkd() {
if (ucd() == null) setUCD();
if (ucd == null) setUCD();
return nfkd;
}
public static Normalizer nf(int index) {
if (ucd() == null) setUCD();
if (ucd == null) setUCD();
return nf[index];
}

View file

@ -0,0 +1,16 @@
#
# Unicode Character Database: Derived Property Data
# This file shows when various code points were designated in Unicode
# Notes:
# - The term 'designated' means that a previously reserved code point was specified
# to be a noncharacter or surrogate, or assigned as a character,
# control or format code.
# - Versions are only tracked from 1.1 onwards, since version 1.0
# predated changes required by the ISO 10646 merger.
# - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing.
# - The supplementary private use code points and the non-character code points
# were designated in version 2.0, but not specifically listed in the UCD
# until versions 3.0 and 3.1 respectively.
#
# For details on the contents of each version, see
# http://www.unicode.org/versions/enumeratedversions.html.

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.9 $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.10 $
*
*******************************************************************************
*/
@ -28,8 +28,8 @@ abstract public class GenerateBreakTest implements UCD_Types {
Normalizer nfd;
Normalizer nfkd;
UnicodeMap sampleMap = null;
UnicodeMap map = new UnicodeMap();
OldUnicodeMap sampleMap = null;
OldUnicodeMap map = new OldUnicodeMap();
// ====================== Main ===========================

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
* $Date: 2004/02/07 01:01:15 $
* $Revision: 1.15 $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -15,6 +15,7 @@ package com.ibm.text.UCD;
import java.util.*;
import java.io.*;
import com.ibm.icu.text.UTF16;
import com.ibm.text.utility.*;
@ -37,11 +38,12 @@ public class GenerateCaseFolding implements UCD_Types {
static PrintWriter log;
public static void makeCaseFold(boolean normalized) throws java.io.IOException {
PICK_SHORT = NF_CLOSURE = normalized;
log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true));
log = Utility.openPrintWriter("CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
System.out.println("Writing Log: " + "CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true));
System.out.println("Making Full Data");
Map fullData = getCaseFolding(true, NF_CLOSURE, "");
@ -64,15 +66,8 @@ public class GenerateCaseFolding implements UCD_Types {
String filename = "CaseFolding";
if (normalized) filename += "-Normalized";
String directory = "DerivedData/";
String newFile = directory + filename + GenerateData.getFileSuffix(true);
PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName);
out.println("# CaseFolding" + GenerateData.getFileSuffix(false));
out.println(GenerateData.generateDateLine());
out.println("#");
Utility.appendFile("CaseFoldingHeader.txt", Utility.LATIN1, out);
UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, filename);
PrintWriter out = fc.out;
/*
PrintWriter out = new PrintWriter(
@ -124,9 +119,8 @@ public class GenerateCaseFolding implements UCD_Types {
drawLine(out, ch, "t", rSimpleTurkish);
}
}
out.close();
fc.close();
log.close();
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
}
/* Goal is following (with no entries for 0131 or 0069)
@ -470,7 +464,7 @@ public class GenerateCaseFolding implements UCD_Types {
if (normalize) suffix2 = "-Normalized";
PrintWriter log = Utility.openPrintWriter("SpecialCasingExceptions"
+ suffix2 + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
+ suffix2 + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
Utility.dot(ch);
@ -580,12 +574,12 @@ public class GenerateCaseFolding implements UCD_Types {
log.close();
System.out.println("Writing");
String newFile = "DerivedData/SpecialCasing" + suffix2 + GenerateData.getFileSuffix(true);
String newFile = "DerivedData/SpecialCasing" + suffix2 + UnicodeDataFile.getFileSuffix(true);
PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = GenerateData.generateBat("DerivedData/", "SpecialCasing", suffix2 + GenerateData.getFileSuffix(true), batName);
out.println("# SpecialCasing" + GenerateData.getFileSuffix(false));
out.println(GenerateData.generateDateLine());
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "SpecialCasing", suffix2 + UnicodeDataFile.getFileSuffix(true), batName);
out.println("# SpecialCasing" + UnicodeDataFile.getFileSuffix(false));
out.println(UnicodeDataFile.generateDateLine());
out.println("#");
Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2004/02/07 01:01:15 $
* $Revision: 1.32 $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.33 $
*
*******************************************************************************
*/
@ -95,24 +95,11 @@ public class GenerateData implements UCD_Types {
}
//Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names
public static String fixFile(String s) {
int len = s.length();
if (!s.endsWith(".txt")) return s;
if (s.charAt(len-6) != 'd') return s;
char c = s.charAt(len-5);
if (c != 'X' && (c < '0' || '9' < c)) return s;
s = s.substring(0,len-6) + s.substring(len-4);
System.out.println("Fixing File Name: " + s);
return s;
}
static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;
public static void doHeader(String fileName, PrintWriter output, int headerChoice) {
output.println("# " + fileName);
output.println(generateDateLine());
output.println(UnicodeDataFile.generateDateLine());
output.println("#");
if (headerChoice == HEADER_SCRIPTS) {
} else if (headerChoice == HEADER_EXTEND) {
@ -128,18 +115,6 @@ public class GenerateData implements UCD_Types {
output.println();
}
public static String getFileSuffix(boolean withDVersion) {
return "-" + Default.ucd().getVersion()
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
+ ".txt";
}
public static String getHTMLFileSuffix(boolean withDVersion) {
return "-" + Default.ucd().getVersion()
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
+ ".html";
}
public static void checkDifferences (String targetVersion) throws IOException {
System.out.println("Checking Differences");
UCD target = UCD.make(targetVersion);
@ -176,14 +151,14 @@ public class GenerateData implements UCD_Types {
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
String newFile = directory + fileName + getFileSuffix(true);
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
System.out.println("New File: " + newFile);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
System.out.println("Most recent: " + mostRecent);
doHeader(fileName + getFileSuffix(false), output, headerChoice);
doHeader(fileName + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
UCDProperty up = DerivedProperty.make(i, Default.ucd());
if (up == null) continue;
@ -227,13 +202,13 @@ public class GenerateData implements UCD_Types {
public static void generateCompExclusions() throws IOException {
String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true);
String newFile = "DerivedData/CompositionExclusions" + UnicodeDataFile.getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = generateBat("DerivedData/", "CompositionExclusions", getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
output.println("# CompositionExclusions" + getFileSuffix(false));
output.println(generateDateLine());
output.println("# CompositionExclusions" + UnicodeDataFile.getFileSuffix(false));
output.println(UnicodeDataFile.generateDateLine());
output.println("#");
output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table.");
output.println("#");
@ -289,10 +264,6 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
}
static String generateDateLine() {
return "# Date: " + Default.getDate() + " [MD]";
}
static class CompLister extends PropertyLister {
UCD oldUCD;
int type;
@ -509,21 +480,27 @@ public class GenerateData implements UCD_Types {
}
}
Iterator blockIterator = Default.ucd().getBlockNames().iterator();
while (blockIterator.hasNext()) {
addLine(sorted, "blk", "n/a", (String)blockIterator.next());
}
/*
UCD.BlockData blockData = new UCD.BlockData();
int blockId = 0;
while (Default.ucd().getBlockData(blockId++, blockData)) {
addLine(sorted, "blk", "n/a", blockData.name);
}
*/
String filename = "PropertyAliases";
String newFile = "DerivedData/" + filename + getFileSuffix(true);
String newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println("# " + filename + getFileSuffix(false));
log.println(generateDateLine());
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
log.println("#");
Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
@ -538,12 +515,12 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
filename = "PropertyValueAliases";
newFile = "DerivedData/" + filename + getFileSuffix(true);
newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName);
mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println("# " + filename + getFileSuffix(false));
log.println(generateDateLine());
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
log.println("#");
Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
@ -554,9 +531,9 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
filename = "PropertyAliasSummary";
newFile = "OtherData/" + filename + getFileSuffix(true);
newFile = "OtherData/" + filename + UnicodeDataFile.getFileSuffix(true);
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
mostRecent = generateBat("OtherData/", filename, getFileSuffix(true), batName);
mostRecent = UnicodeDataFile.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println();
log.println(HORIZONTAL_LINE);
@ -682,66 +659,16 @@ public class GenerateData implements UCD_Types {
}
}
/*
static String skeleton(String source) {
StringBuffer result = new StringBuffer();
source = source.toLowerCase();
for (int i = 0; i < source.length(); ++i) {
char c = source.charAt(i);
if (c == ' ' || c == '_' || c == '-') continue;
result.append(c);
}
return result.toString();
}
*/
// static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
public static String generateBat(String directory, String fileRoot, String suffix, String[] batName) throws IOException {
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), true, true);
if (mostRecent != null) {
batName[0] = generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix,
mostRecent, directory + fileRoot + suffix);
} else {
System.out.println("No previous version of: " + fileRoot + ".txt");
return null;
}
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), false, true);
if (lessRecent != null && !mostRecent.equals(lessRecent)) {
generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix,
lessRecent, directory + fileRoot + suffix);
}
return mostRecent;
}
public static String generateBatAux(String batName, String oldName, String newName) throws IOException {
String fullBatName = batName + ".bat";
PrintWriter output = Utility.openPrintWriter(batName + ".bat", Utility.LATIN1_UNIX);
newName = Utility.getOutputName(newName);
System.out.println("Writing BAT to compare " + oldName + " and " + newName);
File newFile = new File(newName);
File oldFile = new File(oldName);
output.println("\"C:\\Program Files\\wincmp.exe\" "
+ oldFile.getCanonicalFile()
+ " "
+ newFile.getCanonicalFile());
output.close();
return new File(Utility.getOutputName(fullBatName)).getCanonicalFile().toString();
}
public static void generateVerticalSlice(int startEnum, int endEnum,
int headerChoice, String directory, String file) throws IOException {
String newFile = directory + file + getFileSuffix(true);
String newFile = directory + file + UnicodeDataFile.getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = generateBat(directory, file, getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
doHeader(file + getFileSuffix(false), output, headerChoice);
doHeader(file + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
int last = -1;
for (int i = startEnum; i < endEnum; ++i) {
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd());
@ -810,15 +737,15 @@ public class GenerateData implements UCD_Types {
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
String newFile = directory + fileName + getFileSuffix(true);
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
String[] batName = {""};
String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
String[] example = new String[256];
log.println("# " + fileName + getFileSuffix(false));
log.println(generateDateLine());
log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
log.println("#");
log.println("# Normalization Test Suite");
log.println("# Format:");
@ -1012,10 +939,10 @@ public class GenerateData implements UCD_Types {
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
String newFile = directory + filename + getFileSuffix(true);
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
DiffPropertyLister dpl;
UnicodeSet cummulative = new UnicodeSet();
@ -1095,13 +1022,13 @@ public class GenerateData implements UCD_Types {
static final void generateAge(String directory, String filename) throws IOException {
String newFile = directory + filename + getFileSuffix(true);
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
try {
log.println("# " + filename + getFileSuffix(false));
log.println(generateDateLine());
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
log.println("#");
log.println("# Unicode Character Database: Derived Property Data");
log.println("# This file shows when various code points were designated in Unicode");
@ -1195,7 +1122,7 @@ public class GenerateData implements UCD_Types {
public static void listCombiningAccents() throws IOException {
PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true), Utility.LATIN1_UNIX);
PrintWriter log = Utility.openPrintWriter("ListAccents" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
Set set = new TreeSet();
Set set2 = new TreeSet();
@ -1232,7 +1159,7 @@ public class GenerateData implements UCD_Types {
public static void listGreekVowels() throws IOException {
PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true), Utility.LATIN1_UNIX);
PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
Set set = new TreeSet();
Set set2 = new TreeSet();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.4 $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.5 $
*
*******************************************************************************
*/
@ -97,10 +97,10 @@ public final class GenerateStandardizedVariants implements UCD_Types {
// now write out the results
String directory = "DerivedData/";
String filename = directory + "StandardizedVariants" + GenerateData.getHTMLFileSuffix(true);
String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true);
PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName);
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
String version = Default.ucd().getVersion();
int lastDot = version.lastIndexOf('.');

View file

@ -0,0 +1,280 @@
package com.ibm.text.UCD;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.UnicodeDataFile;
public class MakeUnicodeFiles {
static boolean DEBUG = true;
public static void main() throws IOException {
generateFile("Scripts","z");
}
static class OrderedMap {
HashMap map = new HashMap();
ArrayList keys = new ArrayList();
void put(Object o, Object t) {
map.put(o,t);
keys.add(o);
}
List keyset() {
return keys;
}
}
static class PrintStyle {
boolean longForm = false;
boolean noLabel = false;
boolean makeUppercase = false;
boolean makeFirstLetterLowercase = false;
String skipValue = null;
String skipUnassigned = null;
boolean orderByRangeStart = false;
boolean valueList = false;
PrintStyle setLongForm(boolean value) {
longForm = value;
return this;
}
PrintStyle setSkipUnassigned(String value) {
skipUnassigned = value;
return this;
}
PrintStyle setNoLabel(boolean value) {
noLabel = value;
return this;
}
PrintStyle setMakeUppercase(boolean value) {
makeUppercase = value;
return this;
}
PrintStyle setMakeFirstLetterLowercase(boolean value) {
makeFirstLetterLowercase = value;
return this;
}
PrintStyle setSkipValue(String value) {
skipValue = value;
return this;
}
PrintStyle setOrderByRangeStart(boolean value) {
orderByRangeStart = value;
return this;
}
PrintStyle setValueList(boolean value) {
valueList = value;
return this;
}
}
static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator();
static Map printStyles = new TreeMap(/*skeletonComparator*/);
static {
printStyles.put("Script", new PrintStyle().setLongForm(true)
.setMakeUppercase(true).setSkipUnassigned("Common"));
printStyles.put("Age", new PrintStyle().setNoLabel(true));
printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true)
.setMakeFirstLetterLowercase(true).setSkipUnassigned("none"));
printStyles.put("General_Category", new PrintStyle().setNoLabel(true)
//.setSkipUnassigned(true)
);
printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown"));
printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining"));
printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group")
.setMakeUppercase(true));
printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral"));
printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true)
.setSkipValue("None").setMakeFirstLetterLowercase(true));
printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right"));
printStyles.put("Block", new PrintStyle().setNoLabel(true)
.setValueList(true));
printStyles.put("Age", new PrintStyle().setSkipValue("unassigned"));
printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0"));
printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA"));
}
//PropertyAliases
//PropertyValueAliases
//CompositionExclusions
//SpecialCasing
//NormalizationTest
//add("CaseFolding", new String[] {"CaseFolding"});
static Map contents = new TreeMap();
static void add(String name, String[] properties) {
contents.put(name, properties);
}
static {
add("Blocks", new String[] {"Block"});
add("DerivedAge", new String[] {"Age"});
add("Scripts", new String[] {"Script"});
add("HangulSyllableType", new String[] {"HangulSyllableType"});
if (false) add("DerivedNormalizationProps", new String[] {
"FNC", "Full_Composition_Exclusion",
"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",
"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"});
add("DerivedBidiClass", new String[] {"BidiClass"});
add("DerivedBinaryProperties", new String[] {"BidiMirrored"});
add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"});
add("DerivedDecompositionType", new String[] {"DecompositionType"});
add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"});
add("DerivedGeneralCategory", new String[] {"GeneralCategory"});
add("DerivedJoiningGroup", new String[] {"JoiningGroup"});
add("DerivedJoiningType", new String[] {"JoiningType"});
add("DerivedLineBreak", new String[] {"LineBreak"});
add("DerivedNumericType", new String[] {"NumericType"});
add("DerivedNumericValues", new String[] {"NumericValue"});
add("PropList", new String[] {
"White_Space", "Bidi_Control", "Join_Control",
"Dash", "Hyphen", "Quotation_Mark",
"Terminal_Punctuation", "Other_Math",
"Hex_Digit", "ASCII_Hex_Digit",
"Other_Alphabetic",
"Ideographic",
"Diacritic", "Extender",
"Other_Lowercase", "Other_Uppercase",
"Noncharacter_Code_Point",
"Other_Grapheme_Extend",
"Grapheme_Link",
"IDS_Binary_Operator", "IDS_Trinary_Operator",
"Radical", "Unified_Ideograph",
"Other_Default_Ignorable_Code_Point",
"Deprecated", "Soft_Dotted",
"Logical_Order_Exception",
"Other_ID_Start"
});
add("DerivedCoreProperties", new String[] {
"Math", "Alphabetic", "Lowercase", "Uppercase",
"ID_Start", "ID_Continue",
"XID_Start", "XID_Continue",
"Default_Ignorable_Code_Point",
"Grapheme_Extend", "Grapheme_Base"
});
}
public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {
Iterator it = contents.keySet().iterator();
while (it.hasNext()) {
String propname = (String) it.next();
if (propname.compareTo(atOrAfter) < 0) continue;
if (propname.compareTo(atOrBefore) > 0) continue;
generateFile(propname);
}
}
public static void generateFile(String filename) throws IOException {
String[] propList = (String[]) contents.get(filename);
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt");
UnicodeProperty.Factory toolFactory
= ToolUnicodePropertySource.make(Default.ucdVersion());
BagFormatter bf2 = new BagFormatter(toolFactory);
UnicodeSet unassigned = toolFactory.getSet("gc=cn")
.addAll(toolFactory.getSet("gc=cs"));
//System.out.println(unassigned.toPattern(true));
// .removeAll(toolFactory.getSet("noncharactercodepoint=true"));
String separator = bf2.getLineSeparator()
+ "# ================================================"
+ bf2.getLineSeparator() + bf2.getLineSeparator();
for (int i = 0; i < propList.length; ++i) {
UnicodeProperty prop = toolFactory.getProperty(propList[i]);
System.out.println(prop.getName());
pw.print(separator);
PrintStyle ps = (PrintStyle) printStyles.get(prop.getName());
if (ps == null) {
ps = DEFAULT_PRINT_STYLE;
System.out.println("Using default style!");
}
if (ps.noLabel) bf2.setLabelSource(null);
if (ps.valueList) {
bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter()))
.setNameSource(null)
.setShowCount(false)
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
} else if (prop.getType() <= prop.EXTENDED_BINARY) {
UnicodeSet s = prop.getSet("True");
bf2.setValueSource(prop.getName());
bf2.showSetNames(pw, s);
} else {
bf2.setValueSource(prop);
Collection aliases = prop.getAvailableValueAliases();
if (ps.orderByRangeStart) {
System.out.println("Reordering");
TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
temp2.addAll(aliases);
aliases = temp2;
}
Iterator it = aliases.iterator();
while (it.hasNext()) {
String value = (String)it.next();
UnicodeSet s = prop.getSet(value);
System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue);
System.out.println(s.toPattern(true));
if (skeletonComparator.compare(value, ps.skipValue) == 0) continue;
if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) {
s.removeAll(unassigned);
}
if (s.size() == 0) continue;
//if (unassigned.containsAll(s)) continue; // skip if all unassigned
//if (s.contains(0xD0000)) continue; // skip unassigned
pw.print(separator);
if (!ps.longForm) value = prop.getShortestValueAlias(value);
if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH);
if (ps.makeFirstLetterLowercase) {
// NOTE: this is ok since we are only working in ASCII
value = value.substring(0,1).toLowerCase(Locale.ENGLISH)
+ value.substring(1);
}
bf2.setValueSource(value);
bf2.showSetNames(pw, s);
}
}
}
udf.close();
}
static class RangeStartComparator implements Comparator {
UnicodeProperty prop;
CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator();
RangeStartComparator(UnicodeProperty prop) {
this.prop = prop;
}
public int compare(Object o1, Object o2) {
UnicodeSet s1 = prop.getSet((String)o1);
UnicodeSet s2 = prop.getSet((String)o2);
if (true) System.out.println("comparing " + o1 + ", " + o2
+ s1.toPattern(true) + "?" + s2.toPattern(true)
+ ", " + comp.compare(s1, s2));
return comp.compare(s1, s2);
}
}
public static class ReplaceFilter extends UnicodeProperty.StringFilter {
public String remap(String original) {
return original.replace('_',' ');
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.7 $
* $Date: 2004/02/12 08:23:16 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
@ -237,9 +237,9 @@ public final class TestNormalization {
UnicodeSet t = (UnicodeSet) it.next();
UnicodeSet l = (UnicodeSet) map.get(t);
System.out.println("<tr><td>"
+ bf.showSetNames("",l)
+ bf.showSetNames(l)
+ "</td><td>"
+ bf.showSetNames("",t)
+ bf.showSetNames(t)
+ "</td></tr>");
}
}

View file

@ -1,11 +1,13 @@
package com.ibm.text.UCD;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Locale;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.lang.UCharacter;
import com.ibm.text.utility.Utility;
@ -15,62 +17,74 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
private static boolean needAgeCache = true;
private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE];
private static HashMap cache = new HashMap();
private static HashMap factoryCache = new HashMap();
public static synchronized ToolUnicodePropertySource make(String version) {
ToolUnicodePropertySource result = (ToolUnicodePropertySource)cache.get(version);
ToolUnicodePropertySource result = (ToolUnicodePropertySource)factoryCache.get(version);
if (result != null) return result;
result = new ToolUnicodePropertySource(version);
cache.put(version, result);
factoryCache.put(version, result);
return result;
}
private ToolUnicodePropertySource(String version) {
ucd = UCD.make(version);
version = ucd.getVersion();
TreeSet names = new TreeSet();
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
Iterator it = names.iterator();
while (it.hasNext()) {
String name = (String) it.next();
//System.out.println("Name: " + name);
add(new ToolUnicodeProperty(name));
}
add(new UnicodeProperty.SimpleProperty() {
{set("Name", "na", UnicodeProperty.STRING, "<string>");}
public String getValue(int codepoint) {
public String _getValue(int codepoint) {
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getName(codepoint);
}
});
}.setMain("Name", "na", UnicodeProperty.STRING, version)
.setValues("<string>"));
add(new UnicodeProperty.SimpleProperty() {
{set("Block", "blk", UnicodeProperty.ENUMERATED,
ucd.getBlockNames(null));}
public String getValue(int codepoint) {
public String _getValue(int codepoint) {
if (codepoint == 0x1D100) {
System.out.println("here");
}
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getBlock(codepoint);
}
});
protected UnicodeMap _getUnicodeMap() {
return ucd.blockData;
}
}.setMain("Block", "blk", UnicodeProperty.ENUMERATED, version)
.setValues(ucd.getBlockNames(null)));
add(new UnicodeProperty.SimpleProperty() {
{set("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, "<string>");}
public String getValue(int codepoint) {
public String _getValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getBidiMirror(codepoint);
}
});
}.setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version)
.setValues("<string>"));
add(new UnicodeProperty.SimpleProperty() {
{set("Case_Folding", "cf", UnicodeProperty.STRING, "<string>");}
public String getValue(int codepoint) {
public String _getValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD);
}
});
}.setMain("Case_Folding", "cf", UnicodeProperty.STRING, version)
.setValues("<string>"));
add(new UnicodeProperty.SimpleProperty() {
{set("Numeric_Value", "nv", UnicodeProperty.NUMERIC, "<number>");}
public String getValue(int codepoint) {
public String _getValue(int codepoint) {
double num = ucd.getNumericValue(codepoint);
if (Double.isNaN(num)) return null;
return Double.toString(num);
}
});
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version)
.setValues("<number>"));
}
/*
"Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment",
@ -109,7 +123,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
setName(propertyAlias);
}
public Collection getAvailableValueAliases(Collection result) {
public Collection _getAvailableValueAliases(Collection result) {
if (result == null) result = new ArrayList();
int type = getType() & ~EXTENDED_BIT;
if (type == STRING) result.add("<string>");
else if (type == NUMERIC) result.add("<string>");
@ -149,34 +164,80 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
}
if (temp != null && temp.length() != 0) result.add(Utility.getUnskeleton(temp, titlecase));
}
if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None");
//if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
//if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
//if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None");
}
return result;
}
public Collection getAliases(Collection result) {
String longName = up.getName(UCD_Types.LONG);
addUnique(Utility.getUnskeleton(longName, true), result);
String shortName = up.getName(UCD_Types.SHORT);
addUnique(Utility.getUnskeleton(shortName, false), result);
return result;
}
public Collection getValueAliases(String valueAlias, Collection result) {
// TODO Auto-generated method stub
public Collection _getAliases(Collection result) {
if (result == null) result = new ArrayList();
String longName = up.getName(UCD_Types.LONG);
addUnique(Utility.getUnskeleton(longName, true), result);
String shortName = up.getName(UCD_Types.SHORT);
addUnique(Utility.getUnskeleton(shortName, false), result);
return result;
}
public Collection _getValueAliases(String valueAlias, Collection result) {
if (result == null) result = new ArrayList();
int type = getType() & ~EXTENDED_BIT;
if (type == STRING) return result;
else if (type == NUMERIC) return result;
else if (type == BINARY) {
UnicodeProperty.addUnique(valueAlias, result);
return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, result);
} else if (type == ENUMERATED) {
byte style = UCD_Types.LONG;
int prop = propMask>>8;
boolean titlecase = false;
for (int i = 0; i < 256; ++i) {
try {
switch (prop) {
case UCD_Types.CATEGORY>>8:
return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, result);
case UCD_Types.COMBINING_CLASS>>8:
addUnique(""+i, result);
return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, result);
case UCD_Types.BIDI_CLASS>>8:
return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, result);
case UCD_Types.DECOMPOSITION_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_DECOMPOSITION_TYPE, UCD_Names.DECOMPOSITION_TYPE, result);
case UCD_Types.NUMERIC_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_NUMERIC_TYPE, UCD_Names.NUMERIC_TYPE, result);
case UCD_Types.EAST_ASIAN_WIDTH>>8:
return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, result);
case UCD_Types.LINE_BREAK>>8:
return lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
case UCD_Types.JOINING_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, result);
case UCD_Types.JOINING_GROUP>>8:
return lookup(valueAlias, UCD_Names.JOINING_GROUP, null, result);
case UCD_Types.SCRIPT>>8:
return lookup(valueAlias, UCD_Names.LONG_SCRIPT, UCD_Names.SCRIPT, result);
case UCD_Types.AGE>>8:
return lookup(valueAlias, UCD_Names.AGE, null, result);
case UCD_Types.HANGUL_SYLLABLE_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_HANGUL_SYLLABLE_TYPE, UCD_Names.HANGUL_SYLLABLE_TYPE, result);
default: throw new IllegalArgumentException("Internal Error: " + prop);
}
} catch (ArrayIndexOutOfBoundsException e) {
continue;
}
}
}
throw new ArrayIndexOutOfBoundsException("not supported yet");
}
public String getValue(int codepoint) {
public String _getValue(int codepoint) {
byte style = UCD_Types.LONG;
String temp = null;
boolean titlecase = false;
switch (propMask>>8) {
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex(ucd.getCategory(codepoint), style)); break;
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex(ucd.getCombiningClass(codepoint), style));
if (temp.startsWith("Fixed_")) temp = temp.substring(6);
//if (temp.startsWith("Fixed_")) temp = temp.substring(6);
break;
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex(ucd.getBidiClass(codepoint), style)); break;
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(codepoint), style));
@ -226,7 +287,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
private int getPropertyTypeInternal() {
int result = 0;
String name = up.getName(UCD_Types.LONG);
if ("Age".equals(name)) return STRING;
if ("Age".equals(name)) return ENUMERATED;
switch (up.getValueType()) {
case UCD_Types.NUMERIC_PROP: result = NUMERIC; break;
case UCD_Types.STRING_PROP: result = STRING; break;
@ -243,5 +304,18 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
return result;
}
public String _getVersion() {
return up.ucd.getVersion();
}
}
static Collection lookup(String valueAlias, String[] main, String[] aux, Collection result) {
//System.out.println(valueAlias + "=>");
int pos = 0xFF & Utility.lookup(valueAlias, main, true);
//System.out.println("=>" + aux[pos]);
UnicodeProperty.addUnique(valueAlias, result);
if (aux == null) return result;
return UnicodeProperty.addUnique(aux[pos], result);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.30 $
* $Date: 2004/02/12 08:23:16 $
* $Revision: 1.31 $
*
*******************************************************************************
*/
@ -29,6 +29,7 @@ import java.io.BufferedReader;
import com.ibm.text.utility.*;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.UnicodeSet;
@ -364,9 +365,19 @@ public final class UCD implements UCD_Types {
BIDI_R_SET = new UnicodeSet();
BIDI_AL_SET = new UnicodeSet();
blockData.getSet("Hebrew",BIDI_R_SET);
blockData.getSet("Cypriot_Syllabary",BIDI_R_SET);
blockData.getSet("Arabic",BIDI_AL_SET);
blockData.getSet("Syriac",BIDI_AL_SET);
blockData.getSet("Thaana",BIDI_AL_SET);
blockData.getSet("Arabic_Presentation_Forms-A",BIDI_AL_SET);
blockData.getSet("Arabic_Presentation_Forms-B",BIDI_AL_SET);
/*
int blockId = 0;
BlockData blockData = new BlockData();
UnicodeSet s = blockData.get
while (getBlockData(blockId++, blockData)) {
if (blockData.name.equals("Hebrew")
|| blockData.name.equals("Cypriot_Syllabary")
@ -391,6 +402,7 @@ public final class UCD implements UCD_Types {
+ ".." + Utility.hex(blockData.end));
}
}
*/
System.out.println("BIDI_R_SET: " + BIDI_R_SET);
System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);
@ -835,8 +847,8 @@ public final class UCD implements UCD_Types {
}
public static String getCategoryID_fromIndex(byte prop, byte style) {
return prop < 0 || prop >= UCD_Names.GC.length ? null
: (style != LONG) ? UCD_Names.GC[prop] : UCD_Names.LONG_GC[prop];
return prop < 0 || prop >= UCD_Names.GENERAL_CATEGORY.length ? null
: (style != LONG) ? UCD_Names.GENERAL_CATEGORY[prop] : UCD_Names.LONG_GENERAL_CATEGORY[prop];
}
@ -898,11 +910,11 @@ public final class UCD implements UCD_Types {
public static String getBidiClassID_fromIndex(byte prop, byte style) {
return prop < 0
|| prop >= UCD_Names.BC.length
|| prop >= UCD_Names.BIDI_CLASS.length
? null
: style == SHORT
? UCD_Names.BC[prop]
: UCD_Names.LONG_BC[prop];
? UCD_Names.BIDI_CLASS[prop]
: UCD_Names.LONG_BIDI_CLASS[prop];
}
public String getDecompositionTypeID(int codePoint) {
@ -913,8 +925,8 @@ public final class UCD implements UCD_Types {
return getDecompositionTypeID_fromIndex(prop, NORMAL);
}
public static String getDecompositionTypeID_fromIndex(byte prop, byte style) {
return prop < 0 || prop >= UCD_Names.DT.length ? null
: style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop];
return prop < 0 || prop >= UCD_Names.LONG_DECOMPOSITION_TYPE.length ? null
: style == SHORT ? UCD_Names.DECOMPOSITION_TYPE[prop] : UCD_Names.LONG_DECOMPOSITION_TYPE[prop];
}
public String getNumericTypeID(int codePoint) {
@ -926,8 +938,8 @@ public final class UCD implements UCD_Types {
}
public static String getNumericTypeID_fromIndex(byte prop, byte style) {
return prop < 0 || prop >= UCD_Names.NT.length ? null
: style == SHORT ? UCD_Names.SHORT_NT[prop] : UCD_Names.NT[prop];
return prop < 0 || prop >= UCD_Names.LONG_NUMERIC_TYPE.length ? null
: style == SHORT ? UCD_Names.NUMERIC_TYPE[prop] : UCD_Names.LONG_NUMERIC_TYPE[prop];
}
public String getEastAsianWidthID(int codePoint) {
@ -939,8 +951,8 @@ public final class UCD implements UCD_Types {
}
public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
return prop < 0 || prop >= UCD_Names.EA.length ? null
: style != LONG ? UCD_Names.SHORT_EA[prop] : UCD_Names.EA[prop];
return prop < 0 || prop >= UCD_Names.LONG_EAST_ASIAN_WIDTH.length ? null
: style != LONG ? UCD_Names.EAST_ASIAN_WIDTH[prop] : UCD_Names.LONG_EAST_ASIAN_WIDTH[prop];
}
public String getLineBreakID(int codePoint) {
@ -952,8 +964,8 @@ public final class UCD implements UCD_Types {
}
public static String getLineBreakID_fromIndex(byte prop, byte style) {
return prop < 0 || prop >= UCD_Names.LB.length ? null
: style != LONG ? UCD_Names.LB[prop] : UCD_Names.LONG_LB[prop];
return prop < 0 || prop >= UCD_Names.LINE_BREAK.length ? null
: style != LONG ? UCD_Names.LINE_BREAK[prop] : UCD_Names.LONG_LINE_BREAK[prop];
}
public String getJoiningTypeID(int codePoint) {
@ -993,7 +1005,7 @@ public final class UCD implements UCD_Types {
public static String getScriptID_fromIndex(byte prop, byte length) {
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
: (length == SHORT) ? UCD_Names.ABB_SCRIPT[prop] : UCD_Names.SCRIPT[prop];
: (length == SHORT) ? UCD_Names.SCRIPT[prop] : UCD_Names.LONG_SCRIPT[prop];
}
public String getAgeID(int codePoint) {
@ -1553,6 +1565,54 @@ to guarantee identifier closure.
}
}
UnicodeMap blockData;
public String getBlock(int codePoint) {
if (blockData == null) loadBlocks();
return (String)blockData.getValue(codePoint);
}
public Collection getBlockNames() {
return getBlockNames(null);
}
public Collection getBlockNames(Collection result) {
if (result == null) result = new ArrayList();
if (blockData == null) loadBlocks();
return blockData.getAvailableValues(result);
}
public UnicodeSet getBlockSet(String value, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
if (blockData == null) loadBlocks();
return blockData.getSet(value, result);
}
private void loadBlocks() {
blockData = new UnicodeMap();
try {
BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1);
try {
while (true) {
// 0000..007F; Basic Latin
String line = Utility.readDataLine(in);
if (line == null) break;
if (line.length() == 0) continue;
int pos1 = line.indexOf('.');
int pos2 = line.indexOf(';', pos1);
//lastBlock = new BlockData();
int start = Integer.parseInt(line.substring(0, pos1), 16);
int end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
String name = line.substring(pos2+1).trim().replace(' ', '_');
blockData.putAll(start,end, name);
}
blockData.setMissing("No_Block");
} finally {
in.close();
}
} catch (IOException e) {
throw new IllegalArgumentException("Can't read block file");
}
}
/*
public static class BlockData {
public int start;
public int end;
@ -1560,13 +1620,17 @@ to guarantee identifier closure.
}
public String NOBLOCK = Utility.getUnskeleton("no block", true);
private BlockData lastBlock;
public String getBlock(int codePoint) {
if (blocks == null) loadBlocks();
if (codePoint >= lastBlock.start && codePoint <= lastBlock.end) return lastBlock.name;
Iterator it = blocks.iterator();
while (it.hasNext()) {
BlockData data = (BlockData) it.next();
if (codePoint >= data.start && codePoint <= data.end) return data.name;
lastBlock = (BlockData) it.next();
if (codePoint < lastBlock.start) continue;
if (codePoint > lastBlock.end) break;
return lastBlock.name;
}
return NOBLOCK;
}
@ -1612,11 +1676,11 @@ to guarantee identifier closure.
int pos1 = line.indexOf('.');
int pos2 = line.indexOf(';', pos1);
BlockData blockData = new BlockData();
blockData.start = Integer.parseInt(line.substring(0, pos1), 16);
blockData.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
blockData.name = line.substring(pos2+1).trim().replace(' ', '_');
blocks.add(blockData);
lastBlock = new BlockData();
lastBlock.start = Integer.parseInt(line.substring(0, pos1), 16);
lastBlock.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
lastBlock.name = line.substring(pos2+1).trim().replace(' ', '_');
blocks.add(lastBlock);
}
} finally {
in.close();
@ -1625,6 +1689,7 @@ to guarantee identifier closure.
throw new IllegalArgumentException("Can't read block file");
}
}
*/
/**
* @return
*/

View file

@ -5,14 +5,16 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2004/02/06 18:30:19 $
* $Revision: 1.24 $
* $Date: 2004/02/12 08:23:17 $
* $Revision: 1.25 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.Locale;
import com.ibm.text.utility.*;
@ -234,15 +236,15 @@ final class UCD_Names implements UCD_Types {
static final String[] YN_TABLE = {"F", "T"};
static final String[] YN_TABLE_LONG = {"False", "True"};
static String[] SHORT_EA = {
static String[] EAST_ASIAN_WIDTH = {
"N", "A", "H", "W", "F", "Na"
};
static String[] EA = {
static String[] LONG_EAST_ASIAN_WIDTH = {
"Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow"
};
static final String[] LB = {
static final String[] LINE_BREAK = {
"XX", "OP", "CL", "QU", "GL", "NS", "EX", "SY",
"IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
"CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
@ -255,7 +257,7 @@ final class UCD_Names implements UCD_Types {
};
static final String[] LONG_LB = {
static final String[] LONG_LINE_BREAK = {
"Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation",
"Glue", "Nonstarter", "Exclamation", "BreakSymbols",
"InfixNumeric", "PrefixNumeric", "PostfixNumeric",
@ -270,7 +272,7 @@ final class UCD_Names implements UCD_Types {
//"Trailing_Jamo",
};
public static final String[] SCRIPT = {
public static final String[] LONG_SCRIPT = {
"COMMON", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
"LATIN", // LATIN
"GREEK", // GREEK
@ -328,7 +330,7 @@ final class UCD_Names implements UCD_Types {
};
public static final String[] ABB_SCRIPT = {
public static final String[] SCRIPT = {
"Zyyy", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
"Latn", // LATIN
"Grek", // GREEK
@ -398,15 +400,18 @@ final class UCD_Names implements UCD_Types {
static final String[] AGE = {
"UNSPECIFIED",
"unassigned",
"1.1",
"2.0", "2.1",
"3.0", "3.1", "3.2",
"2.0",
"2.1",
"3.0",
"3.1",
"3.2",
"4.0"
};
static final String[] GC = {
static final String[] GENERAL_CATEGORY = {
"Cn", // = Other, Not Assigned 0
"Lu", // = Letter, Uppercase 1
@ -449,7 +454,7 @@ final class UCD_Names implements UCD_Types {
"Pf" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
};
static final String[] LONG_GC = {
static final String[] LONG_GENERAL_CATEGORY = {
"Unassigned", // = Other, Not Assigned 0
"UppercaseLetter", // = Letter, Uppercase 1
@ -505,7 +510,7 @@ final class UCD_Names implements UCD_Types {
static final String[] BC = {
static final String[] BIDI_CLASS = {
"L", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
"R", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
"EN", // European Number
@ -520,7 +525,7 @@ final class UCD_Names implements UCD_Types {
"<unused>", "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
};
static String[] LONG_BC = {
static String[] LONG_BIDI_CLASS = {
"LeftToRight", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
"RightToLeft", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
"EuropeanNumber", // European Number
@ -543,8 +548,8 @@ final class UCD_Names implements UCD_Types {
"LOWER", "TITLE", "UPPER", "UNCASED"
};
static String[] DT = {
"", // NONE
static String[] LONG_DECOMPOSITION_TYPE = {
"none", // NONE
"canonical", // CANONICAL
"compat", // Otherwise unspecified compatibility character.
"font", // A font variant (e.g. a blackletter form).
@ -563,9 +568,8 @@ final class UCD_Names implements UCD_Types {
"square", // A CJK squared font variant.
"fraction", // A vulgar fraction form.
};
static String[] SHORT_DT = {
"", // NONE
static String[] DECOMPOSITION_TYPE = {
"none", // NONE
"can", // CANONICAL
"com", // Otherwise unspecified compatibility character.
"font", // A font variant (e.g. a blackletter form).
@ -584,14 +588,19 @@ final class UCD_Names implements UCD_Types {
"sqr", // A CJK squared font variant.
"fra", // A vulgar fraction form.
};
static {
fixArray(LONG_DECOMPOSITION_TYPE);
fixArray(DECOMPOSITION_TYPE);
}
static private String[] MIRRORED_TABLE = {
"N",
"Y"
};
static String[] NT = {
"",
static String[] LONG_NUMERIC_TYPE = {
"none",
"numeric",
"digit",
"decimal",
@ -602,8 +611,8 @@ final class UCD_Names implements UCD_Types {
*/
};
static String[] SHORT_NT = {
"",
static String[] NUMERIC_TYPE = {
"none",
"nu",
"di",
"de",
@ -613,18 +622,66 @@ final class UCD_Names implements UCD_Types {
"ho"
*/
};
static {
if (LIMIT_CATEGORY != GC.length || LIMIT_CATEGORY != LONG_GC.length) {
fixArray(LONG_NUMERIC_TYPE);
fixArray(NUMERIC_TYPE);
}
static String[] COMBINING_CLASS = new String[256];
static String[] LONG_COMBINING_CLASS = new String[256];
// TODO clean this up, just a quick copy of code
static {
for (int style = SHORT; style <= LONG; ++style)
for (int index = 0; index < 256; ++index) {
String s = null;
switch (index) {
case 0: s = style < LONG ? "NR" : "NotReordered"; break;
case 1: s = style < LONG ? "OV" : "Overlay"; break;
case 7: s = style < LONG ? "NK" : "Nukta"; break;
case 8: s = style < LONG ? "KV" : "KanaVoicing"; break;
case 9: s = style < LONG ? "VR" : "Virama"; break;
case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break;
case 216: s = style < LONG ? "ATAR" : "AttachedAboveRight"; break;
case 218: s = style < LONG ? "BL" : "BelowLeft"; break;
case 220: s = style < LONG ? "B" : "Below"; break;
case 222: s = style < LONG ? "BR" : "BelowRight"; break;
case 224: s = style < LONG ? "L" : "Left"; break;
case 226: s = style < LONG ? "R" : "Right"; break;
case 228: s = style < LONG ? "AL" : "AboveLeft"; break;
case 230: s = style < LONG ? "A" : "Above"; break;
case 232: s = style < LONG ? "AR" : "AboveRight"; break;
case 233: s = style < LONG ? "DB" : "DoubleBelow"; break;
case 234: s = style < LONG ? "DA" : "DoubleAbove"; break;
case 240: s = style < LONG ? "IS" : "IotaSubscript"; break;
default: s = style < LONG ? "" + index : "Fixed_" + index;
}
if (style < LONG) COMBINING_CLASS[index] = s;
else LONG_COMBINING_CLASS[index] = s;
}
if (false) for (int i = 0; i < 256; ++i) {
System.out.println(i
+ "\t" + COMBINING_CLASS[i]
+ "\t" + LONG_COMBINING_CLASS[i]);
}
}
static {
if (LIMIT_CATEGORY != GENERAL_CATEGORY.length || LIMIT_CATEGORY != LONG_GENERAL_CATEGORY.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: category");
}
if (LIMIT_BIDI_CLASS != BC.length) {
if (LIMIT_BIDI_CLASS != BIDI_CLASS.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: bidi");
}
if (LIMIT_LINE_BREAK != LB.length || LIMIT_LINE_BREAK != LONG_LB.length) {
if (LIMIT_LINE_BREAK != LINE_BREAK.length || LIMIT_LINE_BREAK != LONG_LINE_BREAK.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: linebreak");
}
if (LIMIT_DECOMPOSITION_TYPE != DT.length || LIMIT_DECOMPOSITION_TYPE != SHORT_DT.length) {
if (LIMIT_DECOMPOSITION_TYPE != LONG_DECOMPOSITION_TYPE.length || LIMIT_DECOMPOSITION_TYPE != DECOMPOSITION_TYPE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: decomp type");
}
if (LIMIT_MIRRORED != MIRRORED_TABLE.length) {
@ -633,16 +690,16 @@ final class UCD_Names implements UCD_Types {
if (LIMIT_CASE != CASE_TABLE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: case");
}
if (LIMIT_NUMERIC_TYPE != NT.length) {
if (LIMIT_NUMERIC_TYPE != LONG_NUMERIC_TYPE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: numeric type");
}
if (LIMIT_EAST_ASIAN_WIDTH != EA.length) {
if (LIMIT_EAST_ASIAN_WIDTH != LONG_EAST_ASIAN_WIDTH.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: east Asian Width");
}
if (LIMIT_BINARY_PROPERTIES != BP.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: binary properties");
}
if (LIMIT_SCRIPT != SCRIPT.length) {
if (LIMIT_SCRIPT != LONG_SCRIPT.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: script");
}
if (LIMIT_AGE != AGE.length) {
@ -650,7 +707,7 @@ final class UCD_Names implements UCD_Types {
}
}
public static byte ON = Utility.lookup("ON", BC, true);
public static byte ON = Utility.lookup("ON", BIDI_CLASS, true);
public static String[] HANGUL_SYLLABLE_TYPE = {
"NA",
@ -744,6 +801,16 @@ final class UCD_Names implements UCD_Types {
"KHAPH",
"FE",
};
static {
fixArray(JOINING_GROUP);
}
static void fixArray (String[] array) {
for (int i = 0; i < array.length; ++i) {
array[i] = Utility.getUnskeleton(
array[i].toLowerCase(Locale.ENGLISH),
true);
}
}
public static String[] OLD_JOINING_GROUP = {
"<no shaping>",

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.10 $
* $Date: 2004/02/12 08:23:16 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -201,21 +201,21 @@ class UData implements UCD_Types {
int lastPos = result.length();
if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GC[generalCategory]).append('\'');
if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GENERAL_CATEGORY[generalCategory]).append('\'');
if (full || combiningClass != 0) result.append(" cc='").append(combiningClass & 0xFF).append('\'');
if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.DT[decompositionType]).append('\'');
if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.LONG_DECOMPOSITION_TYPE[decompositionType]).append('\'');
if (full || !s.equals(decompositionMapping)) result.append(" dm='").append(Utility.quoteXML(decompositionMapping)).append('\'');
if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.NT[numericType]).append('\'');
if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.LONG_NUMERIC_TYPE[numericType]).append('\'');
if (full || !Double.isNaN(numericValue)) result.append(" nv='").append(numericValue).append('\'');
if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.EA[eastAsianWidth]).append('\'');
if (full || lineBreak != LB_AL) result.append(" lb='").append(UCD_Names.LB[lineBreak]).append('\'');
if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.LONG_EAST_ASIAN_WIDTH[eastAsianWidth]).append('\'');
if (full || lineBreak != LB_AL) result.append(" lb='").append(UCD_Names.LINE_BREAK[lineBreak]).append('\'');
if (joiningType != -1 && (full || joiningType != JT_U)) result.append(" jt='").append(UCD_Names.JOINING_TYPE[joiningType]).append('\'');
if (full || joiningGroup != NO_SHAPING) result.append(" jg='").append(UCD_Names.JOINING_GROUP[joiningGroup]).append('\'');
if (full || age != 0) result.append(" ag='").append(UCD_Names.AGE[age]).append('\'');
if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BC[bidiClass]).append('\'');
if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BIDI_CLASS[bidiClass]).append('\'');
if (full || !bidiMirror.equals(s)) result.append(" bmg='").append(Utility.quoteXML(bidiMirror)).append('\'');
if (lastPos != result.length()) {

View file

@ -1,109 +0,0 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Attic/UnicodeMap.java,v $
* $Date: 2003/04/02 05:16:44 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.*;
import java.io.*;
import com.ibm.text.utility.*;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
/**
* Class that maps from codepoints to an index, and optionally a label.
*/
public class UnicodeMap {
UnicodeSet[] sets = new UnicodeSet[50];
String[] labels = new String[50];
int count = 0;
public int add(String label, UnicodeSet set) {
return add(label, set, false, true);
}
/**
* Add set
*@param removeOld true: remove any collisions from sets already in the map
* if false, remove any collisions from this set
*@param signal: print a warning when collisions occur
*/
public int add(String label, UnicodeSet set, boolean removeOld, boolean signal) {
// remove from any preceding!!
for (int i = 0; i < count; ++i) {
if (!set.containsSome(sets[i])) continue;
if (signal) showOverlap(label, set, i);
if (removeOld) {
sets[i] = sets[i].removeAll(set);
} else {
set = set.removeAll(sets[i]);
}
}
sets[count] = set;
labels[count++] = label;
return (short)(count - 1);
}
public void showOverlap(String label, UnicodeSet set, int i) {
UnicodeSet delta = new UnicodeSet(set).retainAll(sets[i]);
System.out.println("Warning! Overlap with " + label + " and " + labels[i]
+ ": " + delta);
}
public int getIndex(int codepoint) {
for (int i = count - 1; i >= 0; --i) {
if (sets[i].contains(codepoint)) return i;
}
return -1;
}
public int getIndexFromLabel(String label) {
for (int i = count - 1; i >= 0; --i) {
if (labels[i].equalsIgnoreCase(label)) return i;
}
return -1;
}
public String getLabel(int codepoint) {
return getLabelFromIndex(getIndex(codepoint));
}
public String getLabelFromIndex(int index) {
if (index < 0 || index >= count) return null;
return labels[index];
}
public UnicodeSet getSetFromIndex(int index) {
if (index < 0 || index >= count) return null;
return new UnicodeSet(sets[index]); // protect from changes
}
public int size() {
return count;
}
public int setLabel(int index, String label) {
labels[index] = label;
return index;
}
public int put(int codepoint, int index) {
if (sets[index] == null) {
sets[index] = new UnicodeSet();
if (index >= count) count = index + 1;
}
sets[index].add(codepoint);
return index;
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
* $Date: 2004/02/07 01:01:12 $
* $Revision: 1.24 $
* $Date: 2004/02/12 08:23:16 $
* $Revision: 1.25 $
*
*******************************************************************************
*/
@ -2239,11 +2239,11 @@ E0020-E007F; [TAGGING CHARACTERS]
int j = UTF32.char32At(s, 0);
try {
if (q == 0) {
check(i, Default.ucd().getCategory(i), Default.ucd().getCategory(j), UCD_Names.GC, "GeneralCategory");
check(i, Default.ucd().getCategory(i), Default.ucd().getCategory(j), UCD_Names.GENERAL_CATEGORY, "GeneralCategory");
check(i, Default.ucd().getCombiningClass(i), Default.ucd().getCombiningClass(j), "CanonicalClass");
check(i, Default.ucd().getBidiClass(i), Default.ucd().getBidiClass(j), UCD_Names.BC, "BidiClass");
check(i, Default.ucd().getBidiClass(i), Default.ucd().getBidiClass(j), UCD_Names.BIDI_CLASS, "BidiClass");
check(i, Default.ucd().getNumericValue(i), Default.ucd().getNumericValue(j), "NumericValue");
check(i, Default.ucd().getNumericType(i), Default.ucd().getNumericType(j), UCD_Names.NT, "NumericType");
check(i, Default.ucd().getNumericType(i), Default.ucd().getNumericType(j), UCD_Names.LONG_NUMERIC_TYPE, "NumericType");
if (false) {
for (byte k = LOWER; k < LIMIT_CASE; ++k) {

View file

@ -28,18 +28,22 @@ public class CallArgs {
}
int pos = arg.indexOf('.');
Method method = null;
String className = "Main";
String methodName = "";
if (pos >= 0) {
String className = prefix + arg.substring(0,pos);
String methodName = arg.substring(pos+1);
className = prefix + arg.substring(0,pos);
methodName = arg.substring(pos+1);
method = tryMethod(className, methodName, methodArgs);
} else {
method = tryMethod("Main", arg, methodArgs);
method = tryMethod(className, arg, methodArgs);
if (method == null) {
method = tryMethod(arg, "main", methodArgs);
className = arg;
methodName = "main";
method = tryMethod(className, methodName, methodArgs);
}
}
if (method == null) throw new IllegalArgumentException("Bad parameter: " + arg);
if (method == null) throw new IllegalArgumentException("Bad parameter: " + className + ", " + methodName);
System.out.println(method.getName() + "\t" + bf.join(methodArgs));
method.invoke(null,methodArgs);
}

View file

@ -0,0 +1,127 @@
package com.ibm.text.utility;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import com.ibm.text.UCD.Default;
import com.ibm.text.UCD.GenerateData;
import com.ibm.text.UCD.UCD_Types;
public class UnicodeDataFile {
public PrintWriter out;
private String newFile;
private String batName;
private String mostRecent;
private UnicodeDataFile(){};
public static UnicodeDataFile openAndWriteHeader(String directory, String filename) throws IOException {
UnicodeDataFile result = new UnicodeDataFile();
result.newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
result.out = Utility.openPrintWriter(result.newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
result.mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
result.batName = batName[0];
result.out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
result.out.println(generateDateLine());
result.out.println("#");
try {
Utility.appendFile(filename + "Header.txt", Utility.LATIN1, result.out);
} catch (FileNotFoundException e) {
result.out.println("# Unicode Character Database: Derived Property Data");
result.out.println("# Generated algorithmically from the Unicode Character Database");
result.out.println("# For documentation, see UCD.html");
result.out.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
result.out.println("# if they have default property values.");
result.out.println("# ================================================");
}
return result;
}
public void close() throws IOException {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName);
out.close();
}
public static String generateDateLine() {
return "# Date: " + Default.getDate() + " [MD]";
}
public static String getHTMLFileSuffix(boolean withDVersion) {
return "-" + Default.ucd().getVersion()
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
+ ".html";
}
public static String getFileSuffix(boolean withDVersion) {
return "-" + Default.ucd().getVersion()
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
+ ".txt";
}
//Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names
public static String fixFile(String s) {
int len = s.length();
if (!s.endsWith(".txt")) return s;
if (s.charAt(len-6) != 'd') return s;
char c = s.charAt(len-5);
if (c != 'X' && (c < '0' || '9' < c)) return s;
s = s.substring(0,len-6) + s.substring(len-4);
System.out.println("Fixing File Name: " + s);
return s;
}
private static String generateBatAux(String batName, String oldName, String newName) throws IOException {
String fullBatName = batName + ".bat";
PrintWriter output = Utility.openPrintWriter(batName + ".bat", Utility.LATIN1_UNIX);
newName = Utility.getOutputName(newName);
System.out.println("Writing BAT to compare " + oldName + " and " + newName);
File newFile = new File(newName);
File oldFile = new File(oldName);
output.println("\"C:\\Program Files\\Compare It!\\wincmp3.exe\" "
// "\"C:\\Program Files\\wincmp.exe\" "
+ oldFile.getCanonicalFile()
+ " "
+ newFile.getCanonicalFile());
output.close();
return new File(Utility.getOutputName(fullBatName)).getCanonicalFile().toString();
}
/*
static String skeleton(String source) {
StringBuffer result = new StringBuffer();
source = source.toLowerCase();
for (int i = 0; i < source.length(); ++i) {
char c = source.charAt(i);
if (c == ' ' || c == '_' || c == '-') continue;
result.append(c);
}
return result.toString();
}
*/
// static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
public static String generateBat(String directory, String fileRoot, String suffix, String[] outputBatName) throws IOException {
String mostRecent = Utility.getMostRecentUnicodeDataFile(UnicodeDataFile.fixFile(fileRoot), Default.ucd().getVersion(), true, true);
if (mostRecent != null) {
outputBatName[0] = UnicodeDataFile.generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix,
mostRecent, directory + fileRoot + suffix);
} else {
System.out.println("No previous version of: " + fileRoot + ".txt");
return null;
}
String lessRecent = Utility.getMostRecentUnicodeDataFile(UnicodeDataFile.fixFile(fileRoot), Default.ucd().getVersion(), false, true);
if (lessRecent != null && !mostRecent.equals(lessRecent)) {
UnicodeDataFile.generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix,
lessRecent, directory + fileRoot + suffix);
}
return mostRecent;
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2004/02/07 01:01:17 $
* $Revision: 1.38 $
* $Date: 2004/02/12 08:23:14 $
* $Revision: 1.39 $
*
*******************************************************************************
*/
@ -16,6 +16,7 @@ package com.ibm.text.utility;
import java.util.*;
import java.text.*;
import java.io.*;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.Replaceable;
@ -717,7 +718,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
public static PrintWriter openPrintWriter(String directory, String filename, Encoding options) throws IOException {
File file = new File(directory + filename);
Utility.fixDot();
System.out.println("Creating File: " + file.getCanonicalPath());
System.out.print("Creating File: " + file);
System.out.println("\t" + file.getCanonicalPath());
File parent = new File(file.getParent());
//System.out.println("Creating File: "+ parent);
parent.mkdirs();
@ -1095,7 +1097,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
public static void showSetDifferences(PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2,
boolean separateLines, boolean withChar, UnicodeMap names, UCD ucd) {
boolean separateLines, boolean withChar, OldUnicodeMap names, UCD ucd) {
UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
pw.println();
@ -1135,7 +1137,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
static java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN,
boolean withChar, UnicodeMap names, UCD ucd) {
boolean withChar, OldUnicodeMap names, UCD ucd) {
if (set.size() == 0) {
pw.println(prefix + "<none>");
pw.flush();
@ -1196,4 +1198,5 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
private static boolean isSeparateLineIDN(int start, int end, UCD ucd) {
return (isSeparateLineIDN(start, ucd) || isSeparateLineIDN(end, ucd));
}
}