mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
various changes for new properties
X-SVN-Rev: 14494
This commit is contained in:
parent
73529e8ad9
commit
459c96f0b1
22 changed files with 907 additions and 434 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2004/02/07 01:01:12 $
|
||||
* $Revision: 1.19 $
|
||||
* $Date: 2004/02/12 08:23:19 $
|
||||
* $Revision: 1.20 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1033,19 +1033,28 @@ public class WriteCharts implements UCD_Types {
|
|||
int[] starts = new int[names.length];
|
||||
int[] ends = new int[names.length];
|
||||
|
||||
UCD.BlockData blockData = new UCD.BlockData();
|
||||
Iterator blockIterator = Default.ucd().getBlockNames().iterator();
|
||||
|
||||
//UCD.BlockData blockData = new UCD.BlockData();
|
||||
|
||||
int counter = 0;
|
||||
int blockId = 0;
|
||||
while (Default.ucd().getBlockData(blockId++, blockData)) {
|
||||
names[counter] = blockData.name;
|
||||
starts[counter] = blockData.start;
|
||||
ends[counter] = blockData.end;
|
||||
String currentName;
|
||||
//int blockId = 0;
|
||||
while (blockIterator.hasNext()) {
|
||||
//while (Default.ucd().getBlockData(blockId++, blockData)) {
|
||||
names[counter] = currentName = (String) blockIterator.next();
|
||||
if (currentName.equals("No_Block")) continue;
|
||||
UnicodeSet s = Default.ucd().getBlockSet(currentName, null);
|
||||
if (s.getRangeCount() != 1) {
|
||||
throw new IllegalArgumentException("Failure with block set: " + currentName);
|
||||
}
|
||||
starts[counter] = s.getRangeStart(0);
|
||||
ends[counter] = s.getRangeEnd(0);
|
||||
//System.out.println(names[counter] + ", " + values[counter]);
|
||||
++counter;
|
||||
|
||||
// HACK
|
||||
if (blockData.name.equals("Tags")) {
|
||||
if (currentName.equals("Tags")) {
|
||||
names[counter] = "<i>reserved default ignorable</i>";
|
||||
starts[counter] = 0xE0080;
|
||||
ends[counter] = 0xE0FFF;
|
||||
|
|
7
tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt
Normal file
7
tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Correlated with Unicode 4.0
|
||||
# Note: The casing of block names is not normative.
|
||||
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
|
||||
#
|
||||
# Code points not explicitly listed in this file are given the value No_Block.
|
||||
#
|
||||
# Start Code..End Code; Block Name
|
|
@ -4,9 +4,13 @@ import java.io.IOException;
|
|||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
@ -20,7 +24,6 @@ import com.ibm.text.utility.Utility;
|
|||
|
||||
public class CheckICU {
|
||||
static final BagFormatter bf = new BagFormatter();
|
||||
static final BagFormatter bf2 = new BagFormatter();
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
System.out.println("Start");
|
||||
|
@ -45,25 +48,24 @@ public class CheckICU {
|
|||
return p.getMaxWidth(v);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static void test() throws IOException {
|
||||
//generateFile("4.0.0", "DerivedCombiningClass");
|
||||
//generateFile("4.0.0", "DerivedCoreProperties");
|
||||
if (true) return;
|
||||
checkUCD();
|
||||
itemFailures = new UnicodeSet();
|
||||
icuFactory = ICUPropertyFactory.make();
|
||||
toolFactory = ToolUnicodePropertySource.make("4.0.0");
|
||||
|
||||
String[] quickList = {
|
||||
"Block",
|
||||
"Math",
|
||||
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
|
||||
//"Numeric_Value"
|
||||
};
|
||||
for (int i = 0; i < quickList.length; ++i) {
|
||||
//testProperty(quickList[i], -1);
|
||||
bf2.setValueSource(new ReplaceLabel(toolFactory.getProperty(quickList[i])))
|
||||
.setLabelSource(null)
|
||||
.setNameSource(null)
|
||||
.setShowCount(false);
|
||||
bf2.showSetNames(bf2.CONSOLE, quickList[i], new UnicodeSet(0,0x10FFFF));
|
||||
testProperty(quickList[i], -1);
|
||||
}
|
||||
if (quickList.length > 0) return;
|
||||
|
||||
|
@ -97,11 +99,16 @@ public class CheckICU {
|
|||
if (nfc.isLeading(i)) leading.add(i);
|
||||
}
|
||||
PrintWriter pw = bf.openUTF8Writer(UCD_Types.GEN_DIR, "Trailing.txt");
|
||||
bf.showSetNames(pw, "+Trailing+Starter", new UnicodeSet(trailing).retainAll(starter));
|
||||
bf.showSetNames(pw, "+Trailing-Starter", new UnicodeSet(trailing).removeAll(starter));
|
||||
bf.showSetNames(pw, "-Trailing-Starter", new UnicodeSet(trailing).complement().removeAll(starter));
|
||||
bf.showSetNames(pw, "+Trailing+Leading", new UnicodeSet(trailing).retainAll(leading));
|
||||
bf.showSetNames(pw, "+Trailing-Leading", new UnicodeSet(trailing).removeAll(leading));
|
||||
pw.println("+Trailing+Starter");
|
||||
bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(starter));
|
||||
pw.println("+Trailing-Starter");
|
||||
bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(starter));
|
||||
pw.println("-Trailing-Starter");
|
||||
bf.showSetNames(pw, new UnicodeSet(trailing).complement().removeAll(starter));
|
||||
pw.println("+Trailing+Leading");
|
||||
bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(leading));
|
||||
pw.println("+Trailing-Leading");
|
||||
bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(leading));
|
||||
pw.close();
|
||||
}
|
||||
/*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $
|
||||
* $Date: 2004/02/07 01:01:16 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -92,30 +92,26 @@ public class CompareProperties implements UCD_Types {
|
|||
public final static class UnicodeSetComparator implements Comparator {
|
||||
/**
|
||||
* Compares two UnicodeSets, producing a transitive ordering.
|
||||
* @return -1 if first is smaller (in size) than second,
|
||||
* 1 if first is greater (in size) than second,
|
||||
* Otherwise (since they are equal in size)
|
||||
* returns a comparison based on the first range that differs.
|
||||
* The ordering is based on the first codepoint that differs between them.
|
||||
* @return -1 if first set contains the first different code point
|
||||
* 1 if the second set does.
|
||||
* 0 if there is no difference.
|
||||
* If compareTo were added to UnicodeSet, this can be optimized to use list[i].
|
||||
* @author Davis
|
||||
*
|
||||
*/
|
||||
public int compare(Object o1, Object o2) {
|
||||
UnicodeSet bs1 = (UnicodeSet) o1;
|
||||
UnicodeSet bs2 = (UnicodeSet) o2;
|
||||
if (bs1.size() < bs2.size()) return -1;
|
||||
if (bs1.size() > bs2.size()) return 1;
|
||||
UnicodeSetIterator it1 = new UnicodeSetIterator(bs1);
|
||||
UnicodeSetIterator it2 = new UnicodeSetIterator(bs2);
|
||||
// Note: because they are the same size, and we stop if any ranges
|
||||
// are different, it is safe to test for both at the same time
|
||||
while (it1.nextRange() && it2.nextRange()) {
|
||||
if (it1.codepoint < it2.codepoint) return -1;
|
||||
UnicodeSetIterator it1 = new UnicodeSetIterator((UnicodeSet) o1);
|
||||
UnicodeSetIterator it2 = new UnicodeSetIterator((UnicodeSet) o2);
|
||||
while (it1.nextRange()) {
|
||||
if (!it2.nextRange()) return -1; // first has range while second exhausted
|
||||
if (it1.codepoint < it2.codepoint) return -1; // first has code point not in second
|
||||
if (it1.codepoint > it2.codepoint) return 1;
|
||||
if (it1.codepointEnd < it2.codepointEnd) return -1;
|
||||
if (it1.codepointEnd > it2.codepointEnd) return 1;
|
||||
if (it1.codepointEnd < it2.codepointEnd) return 1; // second has codepoint not in first
|
||||
if (it1.codepointEnd > it2.codepointEnd) return -1;
|
||||
}
|
||||
return 0;
|
||||
if (it2.nextRange()) return 1; // second has range while first is exhausted
|
||||
return 0; // otherwise we ran out in both of them, so equal
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -210,7 +206,7 @@ public class CompareProperties implements UCD_Types {
|
|||
public void printPartition() throws IOException {
|
||||
System.out.println("Set Size: " + map.size());
|
||||
PrintWriter output = Utility.openPrintWriter("Partition"
|
||||
+ GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS);
|
||||
+ UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS);
|
||||
|
||||
Iterator it = map.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
|
@ -234,7 +230,7 @@ public class CompareProperties implements UCD_Types {
|
|||
public void printStatistics() throws IOException {
|
||||
System.out.println("Set Size: " + map.size());
|
||||
PrintWriter output = Utility.openPrintWriter("Statistics"
|
||||
+ GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS);
|
||||
+ UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS);
|
||||
|
||||
System.out.println("Finding disjoints/contains");
|
||||
for (int i = 0; i < count; ++i) {
|
||||
|
@ -383,10 +379,10 @@ public class CompareProperties implements UCD_Types {
|
|||
|
||||
public static void listDifferences() throws IOException {
|
||||
|
||||
PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet");
|
||||
output.println("# Generated for " + Default.ucd().getVersion());
|
||||
output.println(GenerateData.generateDateLine());
|
||||
output.println(UnicodeDataFile.generateDateLine());
|
||||
output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)");
|
||||
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
|
||||
* $Date: 2004/02/06 18:30:23 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2004/02/12 08:23:17 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -834,9 +834,9 @@ public final class ConvertUCD implements UCD_Types {
|
|||
//UCD_Names.BP_OLD
|
||||
|
||||
} else if (fieldName.equals("gc")) {
|
||||
uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GC, true);
|
||||
uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GENERAL_CATEGORY, true);
|
||||
} else if (fieldName.equals("bc")) {
|
||||
uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BC, true);
|
||||
uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BIDI_CLASS, true);
|
||||
} else if (fieldName.equals("dt")) {
|
||||
if (major < 2) {
|
||||
if (fieldValue.equals("no-break")) fieldValue = "noBreak";
|
||||
|
@ -847,17 +847,17 @@ public final class ConvertUCD implements UCD_Types {
|
|||
else if (fieldValue.equals("no-join")) fieldValue = "compat";
|
||||
else if (fieldValue.equals("join")) fieldValue = "compat";
|
||||
}
|
||||
uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.DT, true);
|
||||
uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.LONG_DECOMPOSITION_TYPE, true);
|
||||
} else if (fieldName.equals("nt")) {
|
||||
uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT, true);
|
||||
uData.numericType = Utility.lookup(fieldValue, UCD_Names.LONG_NUMERIC_TYPE, true);
|
||||
|
||||
} else if (fieldName.equals("ea")) {
|
||||
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.SHORT_EA, true);
|
||||
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EAST_ASIAN_WIDTH, true);
|
||||
} else if (fieldName.equals("lb")) {
|
||||
uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB, true);
|
||||
uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LINE_BREAK, true);
|
||||
|
||||
} else if (fieldName.equals("sn")) {
|
||||
uData.script = Utility.lookup(fieldValue, UCD_Names.SCRIPT, true);
|
||||
uData.script = Utility.lookup(fieldValue, UCD_Names.LONG_SCRIPT, true);
|
||||
|
||||
} else if (fieldName.equals("jt")) {
|
||||
uData.joiningType = Utility.lookup(fieldValue, UCD_Names.JOINING_TYPE, true);
|
||||
|
|
|
@ -21,13 +21,19 @@ public final class Default implements UCD_Types {
|
|||
setUCD();
|
||||
}
|
||||
|
||||
private static boolean inRecursiveCall = false;
|
||||
private static void setUCD() {
|
||||
ucd = UCD.make(ucdVersion());
|
||||
if (inRecursiveCall) {
|
||||
throw new IllegalArgumentException("Recursive call to setUCD");
|
||||
}
|
||||
inRecursiveCall = true;
|
||||
ucd = UCD.make(ucdVersion);
|
||||
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion());
|
||||
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion());
|
||||
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion());
|
||||
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion());
|
||||
System.out.println("Loaded UCD" + ucd().getVersion() + " " + (new Date(ucd().getDate())));
|
||||
inRecursiveCall = false;
|
||||
}
|
||||
|
||||
static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'");
|
||||
|
@ -40,32 +46,32 @@ public final class Default implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String ucdVersion() {
|
||||
if (ucd() == null) setUCD();
|
||||
if (ucd == null) setUCD();
|
||||
return ucdVersion;
|
||||
}
|
||||
|
||||
public static UCD ucd() {
|
||||
if (ucd() == null) setUCD();
|
||||
if (ucd == null) setUCD();
|
||||
return ucd;
|
||||
}
|
||||
public static Normalizer nfc() {
|
||||
if (ucd() == null) setUCD();
|
||||
if (ucd == null) setUCD();
|
||||
return nfc;
|
||||
}
|
||||
public static Normalizer nfd() {
|
||||
if (ucd() == null) setUCD();
|
||||
if (ucd == null) setUCD();
|
||||
return nfd;
|
||||
}
|
||||
public static Normalizer nfkc() {
|
||||
if (ucd() == null) setUCD();
|
||||
if (ucd == null) setUCD();
|
||||
return nfkc;
|
||||
}
|
||||
public static Normalizer nfkd() {
|
||||
if (ucd() == null) setUCD();
|
||||
if (ucd == null) setUCD();
|
||||
return nfkd;
|
||||
}
|
||||
public static Normalizer nf(int index) {
|
||||
if (ucd() == null) setUCD();
|
||||
if (ucd == null) setUCD();
|
||||
return nf[index];
|
||||
}
|
||||
|
||||
|
|
16
tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt
Normal file
16
tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
#
|
||||
# Unicode Character Database: Derived Property Data
|
||||
# This file shows when various code points were designated in Unicode
|
||||
# Notes:
|
||||
# - The term 'designated' means that a previously reserved code point was specified
|
||||
# to be a noncharacter or surrogate, or assigned as a character,
|
||||
# control or format code.
|
||||
# - Versions are only tracked from 1.1 onwards, since version 1.0
|
||||
# predated changes required by the ISO 10646 merger.
|
||||
# - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing.
|
||||
# - The supplementary private use code points and the non-character code points
|
||||
# were designated in version 2.0, but not specifically listed in the UCD
|
||||
# until versions 3.0 and 3.1 respectively.
|
||||
#
|
||||
# For details on the contents of each version, see
|
||||
# http://www.unicode.org/versions/enumeratedversions.html.
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
|
||||
* $Date: 2004/02/07 01:01:16 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -28,8 +28,8 @@ abstract public class GenerateBreakTest implements UCD_Types {
|
|||
Normalizer nfd;
|
||||
Normalizer nfkd;
|
||||
|
||||
UnicodeMap sampleMap = null;
|
||||
UnicodeMap map = new UnicodeMap();
|
||||
OldUnicodeMap sampleMap = null;
|
||||
OldUnicodeMap map = new OldUnicodeMap();
|
||||
|
||||
// ====================== Main ===========================
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
|
||||
* $Date: 2004/02/07 01:01:15 $
|
||||
* $Revision: 1.15 $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.16 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,6 +15,7 @@ package com.ibm.text.UCD;
|
|||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
@ -37,11 +38,12 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
|
||||
static PrintWriter log;
|
||||
|
||||
|
||||
public static void makeCaseFold(boolean normalized) throws java.io.IOException {
|
||||
PICK_SHORT = NF_CLOSURE = normalized;
|
||||
|
||||
log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true));
|
||||
log = Utility.openPrintWriter("CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
System.out.println("Writing Log: " + "CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true));
|
||||
|
||||
System.out.println("Making Full Data");
|
||||
Map fullData = getCaseFolding(true, NF_CLOSURE, "");
|
||||
|
@ -64,15 +66,8 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
String filename = "CaseFolding";
|
||||
if (normalized) filename += "-Normalized";
|
||||
String directory = "DerivedData/";
|
||||
String newFile = directory + filename + GenerateData.getFileSuffix(true);
|
||||
PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName);
|
||||
|
||||
out.println("# CaseFolding" + GenerateData.getFileSuffix(false));
|
||||
out.println(GenerateData.generateDateLine());
|
||||
out.println("#");
|
||||
Utility.appendFile("CaseFoldingHeader.txt", Utility.LATIN1, out);
|
||||
UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, filename);
|
||||
PrintWriter out = fc.out;
|
||||
|
||||
/*
|
||||
PrintWriter out = new PrintWriter(
|
||||
|
@ -124,9 +119,8 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
drawLine(out, ch, "t", rSimpleTurkish);
|
||||
}
|
||||
}
|
||||
out.close();
|
||||
fc.close();
|
||||
log.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
}
|
||||
|
||||
/* Goal is following (with no entries for 0131 or 0069)
|
||||
|
@ -470,7 +464,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
if (normalize) suffix2 = "-Normalized";
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("SpecialCasingExceptions"
|
||||
+ suffix2 + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
+ suffix2 + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
|
||||
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
|
||||
Utility.dot(ch);
|
||||
|
@ -580,12 +574,12 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
log.close();
|
||||
|
||||
System.out.println("Writing");
|
||||
String newFile = "DerivedData/SpecialCasing" + suffix2 + GenerateData.getFileSuffix(true);
|
||||
String newFile = "DerivedData/SpecialCasing" + suffix2 + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = GenerateData.generateBat("DerivedData/", "SpecialCasing", suffix2 + GenerateData.getFileSuffix(true), batName);
|
||||
out.println("# SpecialCasing" + GenerateData.getFileSuffix(false));
|
||||
out.println(GenerateData.generateDateLine());
|
||||
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "SpecialCasing", suffix2 + UnicodeDataFile.getFileSuffix(true), batName);
|
||||
out.println("# SpecialCasing" + UnicodeDataFile.getFileSuffix(false));
|
||||
out.println(UnicodeDataFile.generateDateLine());
|
||||
out.println("#");
|
||||
Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out);
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2004/02/07 01:01:15 $
|
||||
* $Revision: 1.32 $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.33 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -95,24 +95,11 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
|
||||
//Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names
|
||||
|
||||
public static String fixFile(String s) {
|
||||
int len = s.length();
|
||||
if (!s.endsWith(".txt")) return s;
|
||||
if (s.charAt(len-6) != 'd') return s;
|
||||
char c = s.charAt(len-5);
|
||||
if (c != 'X' && (c < '0' || '9' < c)) return s;
|
||||
s = s.substring(0,len-6) + s.substring(len-4);
|
||||
System.out.println("Fixing File Name: " + s);
|
||||
return s;
|
||||
}
|
||||
|
||||
static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;
|
||||
|
||||
public static void doHeader(String fileName, PrintWriter output, int headerChoice) {
|
||||
output.println("# " + fileName);
|
||||
output.println(generateDateLine());
|
||||
output.println(UnicodeDataFile.generateDateLine());
|
||||
output.println("#");
|
||||
if (headerChoice == HEADER_SCRIPTS) {
|
||||
} else if (headerChoice == HEADER_EXTEND) {
|
||||
|
@ -128,18 +115,6 @@ public class GenerateData implements UCD_Types {
|
|||
output.println();
|
||||
}
|
||||
|
||||
public static String getFileSuffix(boolean withDVersion) {
|
||||
return "-" + Default.ucd().getVersion()
|
||||
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
|
||||
+ ".txt";
|
||||
}
|
||||
|
||||
public static String getHTMLFileSuffix(boolean withDVersion) {
|
||||
return "-" + Default.ucd().getVersion()
|
||||
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
|
||||
+ ".html";
|
||||
}
|
||||
|
||||
public static void checkDifferences (String targetVersion) throws IOException {
|
||||
System.out.println("Checking Differences");
|
||||
UCD target = UCD.make(targetVersion);
|
||||
|
@ -176,14 +151,14 @@ public class GenerateData implements UCD_Types {
|
|||
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
|
||||
|
||||
|
||||
String newFile = directory + fileName + getFileSuffix(true);
|
||||
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
|
||||
System.out.println("New File: " + newFile);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
System.out.println("Most recent: " + mostRecent);
|
||||
|
||||
doHeader(fileName + getFileSuffix(false), output, headerChoice);
|
||||
doHeader(fileName + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
|
||||
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
|
||||
UCDProperty up = DerivedProperty.make(i, Default.ucd());
|
||||
if (up == null) continue;
|
||||
|
@ -227,13 +202,13 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
public static void generateCompExclusions() throws IOException {
|
||||
|
||||
String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true);
|
||||
String newFile = "DerivedData/CompositionExclusions" + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = generateBat("DerivedData/", "CompositionExclusions", getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
output.println("# CompositionExclusions" + getFileSuffix(false));
|
||||
output.println(generateDateLine());
|
||||
output.println("# CompositionExclusions" + UnicodeDataFile.getFileSuffix(false));
|
||||
output.println(UnicodeDataFile.generateDateLine());
|
||||
output.println("#");
|
||||
output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table.");
|
||||
output.println("#");
|
||||
|
@ -289,10 +264,6 @@ public class GenerateData implements UCD_Types {
|
|||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
}
|
||||
|
||||
static String generateDateLine() {
|
||||
return "# Date: " + Default.getDate() + " [MD]";
|
||||
}
|
||||
|
||||
static class CompLister extends PropertyLister {
|
||||
UCD oldUCD;
|
||||
int type;
|
||||
|
@ -509,21 +480,27 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
Iterator blockIterator = Default.ucd().getBlockNames().iterator();
|
||||
while (blockIterator.hasNext()) {
|
||||
addLine(sorted, "blk", "n/a", (String)blockIterator.next());
|
||||
}
|
||||
/*
|
||||
UCD.BlockData blockData = new UCD.BlockData();
|
||||
|
||||
int blockId = 0;
|
||||
while (Default.ucd().getBlockData(blockId++, blockData)) {
|
||||
addLine(sorted, "blk", "n/a", blockData.name);
|
||||
}
|
||||
*/
|
||||
|
||||
String filename = "PropertyAliases";
|
||||
String newFile = "DerivedData/" + filename + getFileSuffix(true);
|
||||
String newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
log.println("# " + filename + getFileSuffix(false));
|
||||
log.println(generateDateLine());
|
||||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
log.println("#");
|
||||
Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log);
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
@ -538,12 +515,12 @@ public class GenerateData implements UCD_Types {
|
|||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
|
||||
filename = "PropertyValueAliases";
|
||||
newFile = "DerivedData/" + filename + getFileSuffix(true);
|
||||
newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName);
|
||||
mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
log.println("# " + filename + getFileSuffix(false));
|
||||
log.println(generateDateLine());
|
||||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
log.println("#");
|
||||
Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log);
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
@ -554,9 +531,9 @@ public class GenerateData implements UCD_Types {
|
|||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
|
||||
filename = "PropertyAliasSummary";
|
||||
newFile = "OtherData/" + filename + getFileSuffix(true);
|
||||
newFile = "OtherData/" + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
mostRecent = generateBat("OtherData/", filename, getFileSuffix(true), batName);
|
||||
mostRecent = UnicodeDataFile.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
@ -682,66 +659,16 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
static String skeleton(String source) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
source = source.toLowerCase();
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char c = source.charAt(i);
|
||||
if (c == ' ' || c == '_' || c == '-') continue;
|
||||
result.append(c);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
*/
|
||||
// static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
|
||||
|
||||
public static String generateBat(String directory, String fileRoot, String suffix, String[] batName) throws IOException {
|
||||
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), true, true);
|
||||
if (mostRecent != null) {
|
||||
batName[0] = generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix,
|
||||
mostRecent, directory + fileRoot + suffix);
|
||||
} else {
|
||||
System.out.println("No previous version of: " + fileRoot + ".txt");
|
||||
return null;
|
||||
}
|
||||
|
||||
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), false, true);
|
||||
if (lessRecent != null && !mostRecent.equals(lessRecent)) {
|
||||
generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix,
|
||||
lessRecent, directory + fileRoot + suffix);
|
||||
}
|
||||
return mostRecent;
|
||||
}
|
||||
|
||||
public static String generateBatAux(String batName, String oldName, String newName) throws IOException {
|
||||
String fullBatName = batName + ".bat";
|
||||
PrintWriter output = Utility.openPrintWriter(batName + ".bat", Utility.LATIN1_UNIX);
|
||||
|
||||
newName = Utility.getOutputName(newName);
|
||||
System.out.println("Writing BAT to compare " + oldName + " and " + newName);
|
||||
|
||||
File newFile = new File(newName);
|
||||
File oldFile = new File(oldName);
|
||||
output.println("\"C:\\Program Files\\wincmp.exe\" "
|
||||
+ oldFile.getCanonicalFile()
|
||||
+ " "
|
||||
+ newFile.getCanonicalFile());
|
||||
output.close();
|
||||
return new File(Utility.getOutputName(fullBatName)).getCanonicalFile().toString();
|
||||
}
|
||||
|
||||
|
||||
public static void generateVerticalSlice(int startEnum, int endEnum,
|
||||
int headerChoice, String directory, String file) throws IOException {
|
||||
|
||||
|
||||
String newFile = directory + file + getFileSuffix(true);
|
||||
String newFile = directory + file + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = generateBat(directory, file, getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
doHeader(file + getFileSuffix(false), output, headerChoice);
|
||||
doHeader(file + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
|
||||
int last = -1;
|
||||
for (int i = startEnum; i < endEnum; ++i) {
|
||||
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd());
|
||||
|
@ -810,15 +737,15 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
|
||||
|
||||
String newFile = directory + fileName + getFileSuffix(true);
|
||||
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
String[] example = new String[256];
|
||||
|
||||
log.println("# " + fileName + getFileSuffix(false));
|
||||
log.println(generateDateLine());
|
||||
log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
log.println("#");
|
||||
log.println("# Normalization Test Suite");
|
||||
log.println("# Format:");
|
||||
|
@ -1012,10 +939,10 @@ public class GenerateData implements UCD_Types {
|
|||
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
|
||||
|
||||
|
||||
String newFile = directory + filename + getFileSuffix(true);
|
||||
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
DiffPropertyLister dpl;
|
||||
UnicodeSet cummulative = new UnicodeSet();
|
||||
|
||||
|
@ -1095,13 +1022,13 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
static final void generateAge(String directory, String filename) throws IOException {
|
||||
|
||||
String newFile = directory + filename + getFileSuffix(true);
|
||||
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
try {
|
||||
log.println("# " + filename + getFileSuffix(false));
|
||||
log.println(generateDateLine());
|
||||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
log.println("#");
|
||||
log.println("# Unicode Character Database: Derived Property Data");
|
||||
log.println("# This file shows when various code points were designated in Unicode");
|
||||
|
@ -1195,7 +1122,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
public static void listCombiningAccents() throws IOException {
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
PrintWriter log = Utility.openPrintWriter("ListAccents" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
Set set = new TreeSet();
|
||||
Set set2 = new TreeSet();
|
||||
|
||||
|
@ -1232,7 +1159,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
public static void listGreekVowels() throws IOException {
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
|
||||
Set set = new TreeSet();
|
||||
Set set2 = new TreeSet();
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
|
||||
* $Date: 2004/02/07 01:01:14 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -97,10 +97,10 @@ public final class GenerateStandardizedVariants implements UCD_Types {
|
|||
// now write out the results
|
||||
|
||||
String directory = "DerivedData/";
|
||||
String filename = directory + "StandardizedVariants" + GenerateData.getHTMLFileSuffix(true);
|
||||
String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true);
|
||||
PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName);
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
String version = Default.ucd().getVersion();
|
||||
int lastDot = version.lastIndexOf('.');
|
||||
|
|
280
tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
Normal file
280
tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
Normal file
|
@ -0,0 +1,280 @@
|
|||
package com.ibm.text.UCD;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.text.utility.UnicodeDataFile;
|
||||
|
||||
public class MakeUnicodeFiles {
|
||||
|
||||
static boolean DEBUG = true;
|
||||
|
||||
public static void main() throws IOException {
|
||||
generateFile("Scripts","z");
|
||||
}
|
||||
|
||||
static class OrderedMap {
|
||||
HashMap map = new HashMap();
|
||||
ArrayList keys = new ArrayList();
|
||||
void put(Object o, Object t) {
|
||||
map.put(o,t);
|
||||
keys.add(o);
|
||||
}
|
||||
List keyset() {
|
||||
return keys;
|
||||
}
|
||||
}
|
||||
|
||||
static class PrintStyle {
|
||||
boolean longForm = false;
|
||||
boolean noLabel = false;
|
||||
boolean makeUppercase = false;
|
||||
boolean makeFirstLetterLowercase = false;
|
||||
String skipValue = null;
|
||||
String skipUnassigned = null;
|
||||
boolean orderByRangeStart = false;
|
||||
boolean valueList = false;
|
||||
|
||||
PrintStyle setLongForm(boolean value) {
|
||||
longForm = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setSkipUnassigned(String value) {
|
||||
skipUnassigned = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setNoLabel(boolean value) {
|
||||
noLabel = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setMakeUppercase(boolean value) {
|
||||
makeUppercase = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setMakeFirstLetterLowercase(boolean value) {
|
||||
makeFirstLetterLowercase = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setSkipValue(String value) {
|
||||
skipValue = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setOrderByRangeStart(boolean value) {
|
||||
orderByRangeStart = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setValueList(boolean value) {
|
||||
valueList = value;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
|
||||
static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator();
|
||||
static Map printStyles = new TreeMap(/*skeletonComparator*/);
|
||||
static {
|
||||
printStyles.put("Script", new PrintStyle().setLongForm(true)
|
||||
.setMakeUppercase(true).setSkipUnassigned("Common"));
|
||||
printStyles.put("Age", new PrintStyle().setNoLabel(true));
|
||||
printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true)
|
||||
.setMakeFirstLetterLowercase(true).setSkipUnassigned("none"));
|
||||
printStyles.put("General_Category", new PrintStyle().setNoLabel(true)
|
||||
//.setSkipUnassigned(true)
|
||||
);
|
||||
printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown"));
|
||||
printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining"));
|
||||
printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group")
|
||||
.setMakeUppercase(true));
|
||||
printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral"));
|
||||
printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true)
|
||||
.setSkipValue("None").setMakeFirstLetterLowercase(true));
|
||||
printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right"));
|
||||
printStyles.put("Block", new PrintStyle().setNoLabel(true)
|
||||
.setValueList(true));
|
||||
printStyles.put("Age", new PrintStyle().setSkipValue("unassigned"));
|
||||
printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0"));
|
||||
printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA"));
|
||||
|
||||
}
|
||||
//PropertyAliases
|
||||
//PropertyValueAliases
|
||||
//CompositionExclusions
|
||||
//SpecialCasing
|
||||
//NormalizationTest
|
||||
//add("CaseFolding", new String[] {"CaseFolding"});
|
||||
static Map contents = new TreeMap();
|
||||
static void add(String name, String[] properties) {
|
||||
contents.put(name, properties);
|
||||
}
|
||||
static {
|
||||
add("Blocks", new String[] {"Block"});
|
||||
add("DerivedAge", new String[] {"Age"});
|
||||
add("Scripts", new String[] {"Script"});
|
||||
add("HangulSyllableType", new String[] {"HangulSyllableType"});
|
||||
if (false) add("DerivedNormalizationProps", new String[] {
|
||||
"FNC", "Full_Composition_Exclusion",
|
||||
"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",
|
||||
"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"});
|
||||
|
||||
add("DerivedBidiClass", new String[] {"BidiClass"});
|
||||
add("DerivedBinaryProperties", new String[] {"BidiMirrored"});
|
||||
add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"});
|
||||
add("DerivedDecompositionType", new String[] {"DecompositionType"});
|
||||
add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"});
|
||||
add("DerivedGeneralCategory", new String[] {"GeneralCategory"});
|
||||
add("DerivedJoiningGroup", new String[] {"JoiningGroup"});
|
||||
add("DerivedJoiningType", new String[] {"JoiningType"});
|
||||
add("DerivedLineBreak", new String[] {"LineBreak"});
|
||||
add("DerivedNumericType", new String[] {"NumericType"});
|
||||
add("DerivedNumericValues", new String[] {"NumericValue"});
|
||||
add("PropList", new String[] {
|
||||
"White_Space", "Bidi_Control", "Join_Control",
|
||||
"Dash", "Hyphen", "Quotation_Mark",
|
||||
"Terminal_Punctuation", "Other_Math",
|
||||
"Hex_Digit", "ASCII_Hex_Digit",
|
||||
"Other_Alphabetic",
|
||||
"Ideographic",
|
||||
"Diacritic", "Extender",
|
||||
"Other_Lowercase", "Other_Uppercase",
|
||||
"Noncharacter_Code_Point",
|
||||
"Other_Grapheme_Extend",
|
||||
"Grapheme_Link",
|
||||
"IDS_Binary_Operator", "IDS_Trinary_Operator",
|
||||
"Radical", "Unified_Ideograph",
|
||||
"Other_Default_Ignorable_Code_Point",
|
||||
"Deprecated", "Soft_Dotted",
|
||||
"Logical_Order_Exception",
|
||||
"Other_ID_Start"
|
||||
});
|
||||
add("DerivedCoreProperties", new String[] {
|
||||
"Math", "Alphabetic", "Lowercase", "Uppercase",
|
||||
"ID_Start", "ID_Continue",
|
||||
"XID_Start", "XID_Continue",
|
||||
"Default_Ignorable_Code_Point",
|
||||
"Grapheme_Extend", "Grapheme_Base"
|
||||
});
|
||||
}
|
||||
|
||||
public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {
|
||||
Iterator it = contents.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
String propname = (String) it.next();
|
||||
if (propname.compareTo(atOrAfter) < 0) continue;
|
||||
if (propname.compareTo(atOrBefore) > 0) continue;
|
||||
generateFile(propname);
|
||||
}
|
||||
}
|
||||
|
||||
public static void generateFile(String filename) throws IOException {
|
||||
String[] propList = (String[]) contents.get(filename);
|
||||
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
|
||||
PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt");
|
||||
UnicodeProperty.Factory toolFactory
|
||||
= ToolUnicodePropertySource.make(Default.ucdVersion());
|
||||
BagFormatter bf2 = new BagFormatter(toolFactory);
|
||||
UnicodeSet unassigned = toolFactory.getSet("gc=cn")
|
||||
.addAll(toolFactory.getSet("gc=cs"));
|
||||
//System.out.println(unassigned.toPattern(true));
|
||||
// .removeAll(toolFactory.getSet("noncharactercodepoint=true"));
|
||||
String separator = bf2.getLineSeparator()
|
||||
+ "# ================================================"
|
||||
+ bf2.getLineSeparator() + bf2.getLineSeparator();
|
||||
|
||||
for (int i = 0; i < propList.length; ++i) {
|
||||
UnicodeProperty prop = toolFactory.getProperty(propList[i]);
|
||||
System.out.println(prop.getName());
|
||||
pw.print(separator);
|
||||
PrintStyle ps = (PrintStyle) printStyles.get(prop.getName());
|
||||
if (ps == null) {
|
||||
ps = DEFAULT_PRINT_STYLE;
|
||||
System.out.println("Using default style!");
|
||||
}
|
||||
if (ps.noLabel) bf2.setLabelSource(null);
|
||||
|
||||
if (ps.valueList) {
|
||||
bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter()))
|
||||
.setNameSource(null)
|
||||
.setShowCount(false)
|
||||
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
|
||||
} else if (prop.getType() <= prop.EXTENDED_BINARY) {
|
||||
UnicodeSet s = prop.getSet("True");
|
||||
bf2.setValueSource(prop.getName());
|
||||
bf2.showSetNames(pw, s);
|
||||
} else {
|
||||
bf2.setValueSource(prop);
|
||||
Collection aliases = prop.getAvailableValueAliases();
|
||||
if (ps.orderByRangeStart) {
|
||||
System.out.println("Reordering");
|
||||
TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
|
||||
temp2.addAll(aliases);
|
||||
aliases = temp2;
|
||||
}
|
||||
Iterator it = aliases.iterator();
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
UnicodeSet s = prop.getSet(value);
|
||||
|
||||
System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue);
|
||||
System.out.println(s.toPattern(true));
|
||||
|
||||
if (skeletonComparator.compare(value, ps.skipValue) == 0) continue;
|
||||
if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) {
|
||||
s.removeAll(unassigned);
|
||||
}
|
||||
|
||||
if (s.size() == 0) continue;
|
||||
//if (unassigned.containsAll(s)) continue; // skip if all unassigned
|
||||
//if (s.contains(0xD0000)) continue; // skip unassigned
|
||||
pw.print(separator);
|
||||
if (!ps.longForm) value = prop.getShortestValueAlias(value);
|
||||
if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH);
|
||||
if (ps.makeFirstLetterLowercase) {
|
||||
// NOTE: this is ok since we are only working in ASCII
|
||||
value = value.substring(0,1).toLowerCase(Locale.ENGLISH)
|
||||
+ value.substring(1);
|
||||
}
|
||||
bf2.setValueSource(value);
|
||||
bf2.showSetNames(pw, s);
|
||||
}
|
||||
}
|
||||
}
|
||||
udf.close();
|
||||
}
|
||||
static class RangeStartComparator implements Comparator {
|
||||
UnicodeProperty prop;
|
||||
CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator();
|
||||
RangeStartComparator(UnicodeProperty prop) {
|
||||
this.prop = prop;
|
||||
}
|
||||
public int compare(Object o1, Object o2) {
|
||||
UnicodeSet s1 = prop.getSet((String)o1);
|
||||
UnicodeSet s2 = prop.getSet((String)o2);
|
||||
if (true) System.out.println("comparing " + o1 + ", " + o2
|
||||
+ s1.toPattern(true) + "?" + s2.toPattern(true)
|
||||
+ ", " + comp.compare(s1, s2));
|
||||
return comp.compare(s1, s2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class ReplaceFilter extends UnicodeProperty.StringFilter {
|
||||
public String remap(String original) {
|
||||
return original.replace('_',' ');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
|
||||
* $Date: 2004/02/07 01:01:13 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2004/02/12 08:23:16 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -237,9 +237,9 @@ public final class TestNormalization {
|
|||
UnicodeSet t = (UnicodeSet) it.next();
|
||||
UnicodeSet l = (UnicodeSet) map.get(t);
|
||||
System.out.println("<tr><td>"
|
||||
+ bf.showSetNames("",l)
|
||||
+ bf.showSetNames(l)
|
||||
+ "</td><td>"
|
||||
+ bf.showSetNames("",t)
|
||||
+ bf.showSetNames(t)
|
||||
+ "</td></tr>");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
@ -15,62 +17,74 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
private static boolean needAgeCache = true;
|
||||
private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE];
|
||||
|
||||
private static HashMap cache = new HashMap();
|
||||
private static HashMap factoryCache = new HashMap();
|
||||
|
||||
public static synchronized ToolUnicodePropertySource make(String version) {
|
||||
ToolUnicodePropertySource result = (ToolUnicodePropertySource)cache.get(version);
|
||||
ToolUnicodePropertySource result = (ToolUnicodePropertySource)factoryCache.get(version);
|
||||
if (result != null) return result;
|
||||
result = new ToolUnicodePropertySource(version);
|
||||
cache.put(version, result);
|
||||
factoryCache.put(version, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private ToolUnicodePropertySource(String version) {
|
||||
ucd = UCD.make(version);
|
||||
version = ucd.getVersion();
|
||||
TreeSet names = new TreeSet();
|
||||
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
|
||||
Iterator it = names.iterator();
|
||||
while (it.hasNext()) {
|
||||
String name = (String) it.next();
|
||||
//System.out.println("Name: " + name);
|
||||
add(new ToolUnicodeProperty(name));
|
||||
}
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Name", "na", UnicodeProperty.STRING, "<string>");}
|
||||
public String getValue(int codepoint) {
|
||||
public String _getValue(int codepoint) {
|
||||
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getName(codepoint);
|
||||
}
|
||||
});
|
||||
}.setMain("Name", "na", UnicodeProperty.STRING, version)
|
||||
.setValues("<string>"));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Block", "blk", UnicodeProperty.ENUMERATED,
|
||||
ucd.getBlockNames(null));}
|
||||
public String getValue(int codepoint) {
|
||||
public String _getValue(int codepoint) {
|
||||
if (codepoint == 0x1D100) {
|
||||
System.out.println("here");
|
||||
}
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getBlock(codepoint);
|
||||
}
|
||||
});
|
||||
protected UnicodeMap _getUnicodeMap() {
|
||||
return ucd.blockData;
|
||||
}
|
||||
}.setMain("Block", "blk", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(ucd.getBlockNames(null)));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, "<string>");}
|
||||
public String getValue(int codepoint) {
|
||||
public String _getValue(int codepoint) {
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getBidiMirror(codepoint);
|
||||
}
|
||||
});
|
||||
}.setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version)
|
||||
.setValues("<string>"));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Case_Folding", "cf", UnicodeProperty.STRING, "<string>");}
|
||||
public String getValue(int codepoint) {
|
||||
public String _getValue(int codepoint) {
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD);
|
||||
}
|
||||
});
|
||||
}.setMain("Case_Folding", "cf", UnicodeProperty.STRING, version)
|
||||
.setValues("<string>"));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Numeric_Value", "nv", UnicodeProperty.NUMERIC, "<number>");}
|
||||
public String getValue(int codepoint) {
|
||||
public String _getValue(int codepoint) {
|
||||
double num = ucd.getNumericValue(codepoint);
|
||||
if (Double.isNaN(num)) return null;
|
||||
return Double.toString(num);
|
||||
}
|
||||
});
|
||||
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version)
|
||||
.setValues("<number>"));
|
||||
}
|
||||
/*
|
||||
"Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment",
|
||||
|
@ -109,7 +123,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
setName(propertyAlias);
|
||||
}
|
||||
|
||||
public Collection getAvailableValueAliases(Collection result) {
|
||||
public Collection _getAvailableValueAliases(Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
int type = getType() & ~EXTENDED_BIT;
|
||||
if (type == STRING) result.add("<string>");
|
||||
else if (type == NUMERIC) result.add("<string>");
|
||||
|
@ -149,34 +164,80 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
}
|
||||
if (temp != null && temp.length() != 0) result.add(Utility.getUnskeleton(temp, titlecase));
|
||||
}
|
||||
if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
|
||||
if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
|
||||
if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None");
|
||||
//if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
|
||||
//if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
|
||||
//if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection getAliases(Collection result) {
|
||||
String longName = up.getName(UCD_Types.LONG);
|
||||
addUnique(Utility.getUnskeleton(longName, true), result);
|
||||
String shortName = up.getName(UCD_Types.SHORT);
|
||||
addUnique(Utility.getUnskeleton(shortName, false), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection getValueAliases(String valueAlias, Collection result) {
|
||||
// TODO Auto-generated method stub
|
||||
public Collection _getAliases(Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
String longName = up.getName(UCD_Types.LONG);
|
||||
addUnique(Utility.getUnskeleton(longName, true), result);
|
||||
String shortName = up.getName(UCD_Types.SHORT);
|
||||
addUnique(Utility.getUnskeleton(shortName, false), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection _getValueAliases(String valueAlias, Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
int type = getType() & ~EXTENDED_BIT;
|
||||
if (type == STRING) return result;
|
||||
else if (type == NUMERIC) return result;
|
||||
else if (type == BINARY) {
|
||||
UnicodeProperty.addUnique(valueAlias, result);
|
||||
return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, result);
|
||||
} else if (type == ENUMERATED) {
|
||||
byte style = UCD_Types.LONG;
|
||||
int prop = propMask>>8;
|
||||
boolean titlecase = false;
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
try {
|
||||
switch (prop) {
|
||||
case UCD_Types.CATEGORY>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, result);
|
||||
case UCD_Types.COMBINING_CLASS>>8:
|
||||
addUnique(""+i, result);
|
||||
return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, result);
|
||||
case UCD_Types.BIDI_CLASS>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, result);
|
||||
case UCD_Types.DECOMPOSITION_TYPE>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_DECOMPOSITION_TYPE, UCD_Names.DECOMPOSITION_TYPE, result);
|
||||
case UCD_Types.NUMERIC_TYPE>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_NUMERIC_TYPE, UCD_Names.NUMERIC_TYPE, result);
|
||||
case UCD_Types.EAST_ASIAN_WIDTH>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, result);
|
||||
case UCD_Types.LINE_BREAK>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
|
||||
case UCD_Types.JOINING_TYPE>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, result);
|
||||
case UCD_Types.JOINING_GROUP>>8:
|
||||
return lookup(valueAlias, UCD_Names.JOINING_GROUP, null, result);
|
||||
case UCD_Types.SCRIPT>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_SCRIPT, UCD_Names.SCRIPT, result);
|
||||
case UCD_Types.AGE>>8:
|
||||
return lookup(valueAlias, UCD_Names.AGE, null, result);
|
||||
case UCD_Types.HANGUL_SYLLABLE_TYPE>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_HANGUL_SYLLABLE_TYPE, UCD_Names.HANGUL_SYLLABLE_TYPE, result);
|
||||
default: throw new IllegalArgumentException("Internal Error: " + prop);
|
||||
}
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new ArrayIndexOutOfBoundsException("not supported yet");
|
||||
}
|
||||
|
||||
public String getValue(int codepoint) {
|
||||
public String _getValue(int codepoint) {
|
||||
byte style = UCD_Types.LONG;
|
||||
String temp = null;
|
||||
boolean titlecase = false;
|
||||
switch (propMask>>8) {
|
||||
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex(ucd.getCategory(codepoint), style)); break;
|
||||
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex(ucd.getCombiningClass(codepoint), style));
|
||||
if (temp.startsWith("Fixed_")) temp = temp.substring(6);
|
||||
//if (temp.startsWith("Fixed_")) temp = temp.substring(6);
|
||||
break;
|
||||
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex(ucd.getBidiClass(codepoint), style)); break;
|
||||
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(codepoint), style));
|
||||
|
@ -226,7 +287,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
private int getPropertyTypeInternal() {
|
||||
int result = 0;
|
||||
String name = up.getName(UCD_Types.LONG);
|
||||
if ("Age".equals(name)) return STRING;
|
||||
if ("Age".equals(name)) return ENUMERATED;
|
||||
switch (up.getValueType()) {
|
||||
case UCD_Types.NUMERIC_PROP: result = NUMERIC; break;
|
||||
case UCD_Types.STRING_PROP: result = STRING; break;
|
||||
|
@ -243,5 +304,18 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
return result;
|
||||
}
|
||||
|
||||
public String _getVersion() {
|
||||
return up.ucd.getVersion();
|
||||
}
|
||||
|
||||
}
|
||||
static Collection lookup(String valueAlias, String[] main, String[] aux, Collection result) {
|
||||
//System.out.println(valueAlias + "=>");
|
||||
int pos = 0xFF & Utility.lookup(valueAlias, main, true);
|
||||
//System.out.println("=>" + aux[pos]);
|
||||
UnicodeProperty.addUnique(valueAlias, result);
|
||||
if (aux == null) return result;
|
||||
return UnicodeProperty.addUnique(aux[pos], result);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2004/02/07 01:01:13 $
|
||||
* $Revision: 1.30 $
|
||||
* $Date: 2004/02/12 08:23:16 $
|
||||
* $Revision: 1.31 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -29,6 +29,7 @@ import java.io.BufferedReader;
|
|||
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
|
@ -364,9 +365,19 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
BIDI_R_SET = new UnicodeSet();
|
||||
BIDI_AL_SET = new UnicodeSet();
|
||||
|
||||
|
||||
blockData.getSet("Hebrew",BIDI_R_SET);
|
||||
blockData.getSet("Cypriot_Syllabary",BIDI_R_SET);
|
||||
|
||||
blockData.getSet("Arabic",BIDI_AL_SET);
|
||||
blockData.getSet("Syriac",BIDI_AL_SET);
|
||||
blockData.getSet("Thaana",BIDI_AL_SET);
|
||||
blockData.getSet("Arabic_Presentation_Forms-A",BIDI_AL_SET);
|
||||
blockData.getSet("Arabic_Presentation_Forms-B",BIDI_AL_SET);
|
||||
/*
|
||||
int blockId = 0;
|
||||
BlockData blockData = new BlockData();
|
||||
UnicodeSet s = blockData.get
|
||||
while (getBlockData(blockId++, blockData)) {
|
||||
if (blockData.name.equals("Hebrew")
|
||||
|| blockData.name.equals("Cypriot_Syllabary")
|
||||
|
@ -391,6 +402,7 @@ public final class UCD implements UCD_Types {
|
|||
+ ".." + Utility.hex(blockData.end));
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
System.out.println("BIDI_R_SET: " + BIDI_R_SET);
|
||||
System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);
|
||||
|
@ -835,8 +847,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getCategoryID_fromIndex(byte prop, byte style) {
|
||||
return prop < 0 || prop >= UCD_Names.GC.length ? null
|
||||
: (style != LONG) ? UCD_Names.GC[prop] : UCD_Names.LONG_GC[prop];
|
||||
return prop < 0 || prop >= UCD_Names.GENERAL_CATEGORY.length ? null
|
||||
: (style != LONG) ? UCD_Names.GENERAL_CATEGORY[prop] : UCD_Names.LONG_GENERAL_CATEGORY[prop];
|
||||
}
|
||||
|
||||
|
||||
|
@ -898,11 +910,11 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
public static String getBidiClassID_fromIndex(byte prop, byte style) {
|
||||
return prop < 0
|
||||
|| prop >= UCD_Names.BC.length
|
||||
|| prop >= UCD_Names.BIDI_CLASS.length
|
||||
? null
|
||||
: style == SHORT
|
||||
? UCD_Names.BC[prop]
|
||||
: UCD_Names.LONG_BC[prop];
|
||||
? UCD_Names.BIDI_CLASS[prop]
|
||||
: UCD_Names.LONG_BIDI_CLASS[prop];
|
||||
}
|
||||
|
||||
public String getDecompositionTypeID(int codePoint) {
|
||||
|
@ -913,8 +925,8 @@ public final class UCD implements UCD_Types {
|
|||
return getDecompositionTypeID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
public static String getDecompositionTypeID_fromIndex(byte prop, byte style) {
|
||||
return prop < 0 || prop >= UCD_Names.DT.length ? null
|
||||
: style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop];
|
||||
return prop < 0 || prop >= UCD_Names.LONG_DECOMPOSITION_TYPE.length ? null
|
||||
: style == SHORT ? UCD_Names.DECOMPOSITION_TYPE[prop] : UCD_Names.LONG_DECOMPOSITION_TYPE[prop];
|
||||
}
|
||||
|
||||
public String getNumericTypeID(int codePoint) {
|
||||
|
@ -926,8 +938,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getNumericTypeID_fromIndex(byte prop, byte style) {
|
||||
return prop < 0 || prop >= UCD_Names.NT.length ? null
|
||||
: style == SHORT ? UCD_Names.SHORT_NT[prop] : UCD_Names.NT[prop];
|
||||
return prop < 0 || prop >= UCD_Names.LONG_NUMERIC_TYPE.length ? null
|
||||
: style == SHORT ? UCD_Names.NUMERIC_TYPE[prop] : UCD_Names.LONG_NUMERIC_TYPE[prop];
|
||||
}
|
||||
|
||||
public String getEastAsianWidthID(int codePoint) {
|
||||
|
@ -939,8 +951,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
|
||||
return prop < 0 || prop >= UCD_Names.EA.length ? null
|
||||
: style != LONG ? UCD_Names.SHORT_EA[prop] : UCD_Names.EA[prop];
|
||||
return prop < 0 || prop >= UCD_Names.LONG_EAST_ASIAN_WIDTH.length ? null
|
||||
: style != LONG ? UCD_Names.EAST_ASIAN_WIDTH[prop] : UCD_Names.LONG_EAST_ASIAN_WIDTH[prop];
|
||||
}
|
||||
|
||||
public String getLineBreakID(int codePoint) {
|
||||
|
@ -952,8 +964,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getLineBreakID_fromIndex(byte prop, byte style) {
|
||||
return prop < 0 || prop >= UCD_Names.LB.length ? null
|
||||
: style != LONG ? UCD_Names.LB[prop] : UCD_Names.LONG_LB[prop];
|
||||
return prop < 0 || prop >= UCD_Names.LINE_BREAK.length ? null
|
||||
: style != LONG ? UCD_Names.LINE_BREAK[prop] : UCD_Names.LONG_LINE_BREAK[prop];
|
||||
}
|
||||
|
||||
public String getJoiningTypeID(int codePoint) {
|
||||
|
@ -993,7 +1005,7 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
public static String getScriptID_fromIndex(byte prop, byte length) {
|
||||
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
|
||||
: (length == SHORT) ? UCD_Names.ABB_SCRIPT[prop] : UCD_Names.SCRIPT[prop];
|
||||
: (length == SHORT) ? UCD_Names.SCRIPT[prop] : UCD_Names.LONG_SCRIPT[prop];
|
||||
}
|
||||
|
||||
public String getAgeID(int codePoint) {
|
||||
|
@ -1553,6 +1565,54 @@ to guarantee identifier closure.
|
|||
}
|
||||
}
|
||||
|
||||
UnicodeMap blockData;
|
||||
public String getBlock(int codePoint) {
|
||||
if (blockData == null) loadBlocks();
|
||||
return (String)blockData.getValue(codePoint);
|
||||
}
|
||||
public Collection getBlockNames() {
|
||||
return getBlockNames(null);
|
||||
}
|
||||
public Collection getBlockNames(Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
if (blockData == null) loadBlocks();
|
||||
return blockData.getAvailableValues(result);
|
||||
}
|
||||
public UnicodeSet getBlockSet(String value, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
if (blockData == null) loadBlocks();
|
||||
return blockData.getSet(value, result);
|
||||
}
|
||||
|
||||
private void loadBlocks() {
|
||||
blockData = new UnicodeMap();
|
||||
try {
|
||||
BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1);
|
||||
try {
|
||||
while (true) {
|
||||
// 0000..007F; Basic Latin
|
||||
String line = Utility.readDataLine(in);
|
||||
if (line == null) break;
|
||||
if (line.length() == 0) continue;
|
||||
int pos1 = line.indexOf('.');
|
||||
int pos2 = line.indexOf(';', pos1);
|
||||
|
||||
//lastBlock = new BlockData();
|
||||
int start = Integer.parseInt(line.substring(0, pos1), 16);
|
||||
int end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
|
||||
String name = line.substring(pos2+1).trim().replace(' ', '_');
|
||||
blockData.putAll(start,end, name);
|
||||
}
|
||||
blockData.setMissing("No_Block");
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("Can't read block file");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public static class BlockData {
|
||||
public int start;
|
||||
public int end;
|
||||
|
@ -1560,13 +1620,17 @@ to guarantee identifier closure.
|
|||
}
|
||||
|
||||
public String NOBLOCK = Utility.getUnskeleton("no block", true);
|
||||
private BlockData lastBlock;
|
||||
|
||||
public String getBlock(int codePoint) {
|
||||
if (blocks == null) loadBlocks();
|
||||
if (codePoint >= lastBlock.start && codePoint <= lastBlock.end) return lastBlock.name;
|
||||
Iterator it = blocks.iterator();
|
||||
while (it.hasNext()) {
|
||||
BlockData data = (BlockData) it.next();
|
||||
if (codePoint >= data.start && codePoint <= data.end) return data.name;
|
||||
lastBlock = (BlockData) it.next();
|
||||
if (codePoint < lastBlock.start) continue;
|
||||
if (codePoint > lastBlock.end) break;
|
||||
return lastBlock.name;
|
||||
}
|
||||
return NOBLOCK;
|
||||
}
|
||||
|
@ -1612,11 +1676,11 @@ to guarantee identifier closure.
|
|||
int pos1 = line.indexOf('.');
|
||||
int pos2 = line.indexOf(';', pos1);
|
||||
|
||||
BlockData blockData = new BlockData();
|
||||
blockData.start = Integer.parseInt(line.substring(0, pos1), 16);
|
||||
blockData.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
|
||||
blockData.name = line.substring(pos2+1).trim().replace(' ', '_');
|
||||
blocks.add(blockData);
|
||||
lastBlock = new BlockData();
|
||||
lastBlock.start = Integer.parseInt(line.substring(0, pos1), 16);
|
||||
lastBlock.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
|
||||
lastBlock.name = line.substring(pos2+1).trim().replace(' ', '_');
|
||||
blocks.add(lastBlock);
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
|
@ -1625,6 +1689,7 @@ to guarantee identifier closure.
|
|||
throw new IllegalArgumentException("Can't read block file");
|
||||
}
|
||||
}
|
||||
*/
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
|
|
|
@ -5,14 +5,16 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2004/02/06 18:30:19 $
|
||||
* $Revision: 1.24 $
|
||||
* $Date: 2004/02/12 08:23:17 $
|
||||
* $Revision: 1.25 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
|
||||
|
@ -234,15 +236,15 @@ final class UCD_Names implements UCD_Types {
|
|||
static final String[] YN_TABLE = {"F", "T"};
|
||||
static final String[] YN_TABLE_LONG = {"False", "True"};
|
||||
|
||||
static String[] SHORT_EA = {
|
||||
static String[] EAST_ASIAN_WIDTH = {
|
||||
"N", "A", "H", "W", "F", "Na"
|
||||
};
|
||||
|
||||
static String[] EA = {
|
||||
static String[] LONG_EAST_ASIAN_WIDTH = {
|
||||
"Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow"
|
||||
};
|
||||
|
||||
static final String[] LB = {
|
||||
static final String[] LINE_BREAK = {
|
||||
"XX", "OP", "CL", "QU", "GL", "NS", "EX", "SY",
|
||||
"IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
|
||||
"CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
|
||||
|
@ -255,7 +257,7 @@ final class UCD_Names implements UCD_Types {
|
|||
|
||||
};
|
||||
|
||||
static final String[] LONG_LB = {
|
||||
static final String[] LONG_LINE_BREAK = {
|
||||
"Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation",
|
||||
"Glue", "Nonstarter", "Exclamation", "BreakSymbols",
|
||||
"InfixNumeric", "PrefixNumeric", "PostfixNumeric",
|
||||
|
@ -270,7 +272,7 @@ final class UCD_Names implements UCD_Types {
|
|||
//"Trailing_Jamo",
|
||||
};
|
||||
|
||||
public static final String[] SCRIPT = {
|
||||
public static final String[] LONG_SCRIPT = {
|
||||
"COMMON", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
|
||||
"LATIN", // LATIN
|
||||
"GREEK", // GREEK
|
||||
|
@ -328,7 +330,7 @@ final class UCD_Names implements UCD_Types {
|
|||
|
||||
};
|
||||
|
||||
public static final String[] ABB_SCRIPT = {
|
||||
public static final String[] SCRIPT = {
|
||||
"Zyyy", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
|
||||
"Latn", // LATIN
|
||||
"Grek", // GREEK
|
||||
|
@ -398,15 +400,18 @@ final class UCD_Names implements UCD_Types {
|
|||
|
||||
|
||||
static final String[] AGE = {
|
||||
"UNSPECIFIED",
|
||||
"unassigned",
|
||||
"1.1",
|
||||
"2.0", "2.1",
|
||||
"3.0", "3.1", "3.2",
|
||||
"2.0",
|
||||
"2.1",
|
||||
"3.0",
|
||||
"3.1",
|
||||
"3.2",
|
||||
"4.0"
|
||||
};
|
||||
|
||||
|
||||
static final String[] GC = {
|
||||
static final String[] GENERAL_CATEGORY = {
|
||||
"Cn", // = Other, Not Assigned 0
|
||||
|
||||
"Lu", // = Letter, Uppercase 1
|
||||
|
@ -449,7 +454,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"Pf" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
|
||||
};
|
||||
|
||||
static final String[] LONG_GC = {
|
||||
static final String[] LONG_GENERAL_CATEGORY = {
|
||||
"Unassigned", // = Other, Not Assigned 0
|
||||
|
||||
"UppercaseLetter", // = Letter, Uppercase 1
|
||||
|
@ -505,7 +510,7 @@ final class UCD_Names implements UCD_Types {
|
|||
|
||||
|
||||
|
||||
static final String[] BC = {
|
||||
static final String[] BIDI_CLASS = {
|
||||
"L", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
|
||||
"R", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
|
||||
"EN", // European Number
|
||||
|
@ -520,7 +525,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"<unused>", "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
|
||||
};
|
||||
|
||||
static String[] LONG_BC = {
|
||||
static String[] LONG_BIDI_CLASS = {
|
||||
"LeftToRight", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
|
||||
"RightToLeft", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
|
||||
"EuropeanNumber", // European Number
|
||||
|
@ -543,8 +548,8 @@ final class UCD_Names implements UCD_Types {
|
|||
"LOWER", "TITLE", "UPPER", "UNCASED"
|
||||
};
|
||||
|
||||
static String[] DT = {
|
||||
"", // NONE
|
||||
static String[] LONG_DECOMPOSITION_TYPE = {
|
||||
"none", // NONE
|
||||
"canonical", // CANONICAL
|
||||
"compat", // Otherwise unspecified compatibility character.
|
||||
"font", // A font variant (e.g. a blackletter form).
|
||||
|
@ -563,9 +568,8 @@ final class UCD_Names implements UCD_Types {
|
|||
"square", // A CJK squared font variant.
|
||||
"fraction", // A vulgar fraction form.
|
||||
};
|
||||
|
||||
static String[] SHORT_DT = {
|
||||
"", // NONE
|
||||
static String[] DECOMPOSITION_TYPE = {
|
||||
"none", // NONE
|
||||
"can", // CANONICAL
|
||||
"com", // Otherwise unspecified compatibility character.
|
||||
"font", // A font variant (e.g. a blackletter form).
|
||||
|
@ -584,14 +588,19 @@ final class UCD_Names implements UCD_Types {
|
|||
"sqr", // A CJK squared font variant.
|
||||
"fra", // A vulgar fraction form.
|
||||
};
|
||||
static {
|
||||
fixArray(LONG_DECOMPOSITION_TYPE);
|
||||
fixArray(DECOMPOSITION_TYPE);
|
||||
}
|
||||
|
||||
|
||||
static private String[] MIRRORED_TABLE = {
|
||||
"N",
|
||||
"Y"
|
||||
};
|
||||
|
||||
static String[] NT = {
|
||||
"",
|
||||
static String[] LONG_NUMERIC_TYPE = {
|
||||
"none",
|
||||
"numeric",
|
||||
"digit",
|
||||
"decimal",
|
||||
|
@ -602,8 +611,8 @@ final class UCD_Names implements UCD_Types {
|
|||
*/
|
||||
};
|
||||
|
||||
static String[] SHORT_NT = {
|
||||
"",
|
||||
static String[] NUMERIC_TYPE = {
|
||||
"none",
|
||||
"nu",
|
||||
"di",
|
||||
"de",
|
||||
|
@ -613,18 +622,66 @@ final class UCD_Names implements UCD_Types {
|
|||
"ho"
|
||||
*/
|
||||
};
|
||||
|
||||
static {
|
||||
if (LIMIT_CATEGORY != GC.length || LIMIT_CATEGORY != LONG_GC.length) {
|
||||
fixArray(LONG_NUMERIC_TYPE);
|
||||
fixArray(NUMERIC_TYPE);
|
||||
}
|
||||
|
||||
static String[] COMBINING_CLASS = new String[256];
|
||||
static String[] LONG_COMBINING_CLASS = new String[256];
|
||||
// TODO clean this up, just a quick copy of code
|
||||
static {
|
||||
for (int style = SHORT; style <= LONG; ++style)
|
||||
for (int index = 0; index < 256; ++index) {
|
||||
String s = null;
|
||||
switch (index) {
|
||||
case 0: s = style < LONG ? "NR" : "NotReordered"; break;
|
||||
case 1: s = style < LONG ? "OV" : "Overlay"; break;
|
||||
case 7: s = style < LONG ? "NK" : "Nukta"; break;
|
||||
case 8: s = style < LONG ? "KV" : "KanaVoicing"; break;
|
||||
case 9: s = style < LONG ? "VR" : "Virama"; break;
|
||||
case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
|
||||
case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
|
||||
case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
|
||||
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
|
||||
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
|
||||
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
|
||||
case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break;
|
||||
case 216: s = style < LONG ? "ATAR" : "AttachedAboveRight"; break;
|
||||
case 218: s = style < LONG ? "BL" : "BelowLeft"; break;
|
||||
case 220: s = style < LONG ? "B" : "Below"; break;
|
||||
case 222: s = style < LONG ? "BR" : "BelowRight"; break;
|
||||
case 224: s = style < LONG ? "L" : "Left"; break;
|
||||
case 226: s = style < LONG ? "R" : "Right"; break;
|
||||
case 228: s = style < LONG ? "AL" : "AboveLeft"; break;
|
||||
case 230: s = style < LONG ? "A" : "Above"; break;
|
||||
case 232: s = style < LONG ? "AR" : "AboveRight"; break;
|
||||
case 233: s = style < LONG ? "DB" : "DoubleBelow"; break;
|
||||
case 234: s = style < LONG ? "DA" : "DoubleAbove"; break;
|
||||
case 240: s = style < LONG ? "IS" : "IotaSubscript"; break;
|
||||
default: s = style < LONG ? "" + index : "Fixed_" + index;
|
||||
}
|
||||
if (style < LONG) COMBINING_CLASS[index] = s;
|
||||
else LONG_COMBINING_CLASS[index] = s;
|
||||
}
|
||||
if (false) for (int i = 0; i < 256; ++i) {
|
||||
System.out.println(i
|
||||
+ "\t" + COMBINING_CLASS[i]
|
||||
+ "\t" + LONG_COMBINING_CLASS[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
if (LIMIT_CATEGORY != GENERAL_CATEGORY.length || LIMIT_CATEGORY != LONG_GENERAL_CATEGORY.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: category");
|
||||
}
|
||||
if (LIMIT_BIDI_CLASS != BC.length) {
|
||||
if (LIMIT_BIDI_CLASS != BIDI_CLASS.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: bidi");
|
||||
}
|
||||
if (LIMIT_LINE_BREAK != LB.length || LIMIT_LINE_BREAK != LONG_LB.length) {
|
||||
if (LIMIT_LINE_BREAK != LINE_BREAK.length || LIMIT_LINE_BREAK != LONG_LINE_BREAK.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: linebreak");
|
||||
}
|
||||
if (LIMIT_DECOMPOSITION_TYPE != DT.length || LIMIT_DECOMPOSITION_TYPE != SHORT_DT.length) {
|
||||
if (LIMIT_DECOMPOSITION_TYPE != LONG_DECOMPOSITION_TYPE.length || LIMIT_DECOMPOSITION_TYPE != DECOMPOSITION_TYPE.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: decomp type");
|
||||
}
|
||||
if (LIMIT_MIRRORED != MIRRORED_TABLE.length) {
|
||||
|
@ -633,16 +690,16 @@ final class UCD_Names implements UCD_Types {
|
|||
if (LIMIT_CASE != CASE_TABLE.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: case");
|
||||
}
|
||||
if (LIMIT_NUMERIC_TYPE != NT.length) {
|
||||
if (LIMIT_NUMERIC_TYPE != LONG_NUMERIC_TYPE.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: numeric type");
|
||||
}
|
||||
if (LIMIT_EAST_ASIAN_WIDTH != EA.length) {
|
||||
if (LIMIT_EAST_ASIAN_WIDTH != LONG_EAST_ASIAN_WIDTH.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: east Asian Width");
|
||||
}
|
||||
if (LIMIT_BINARY_PROPERTIES != BP.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: binary properties");
|
||||
}
|
||||
if (LIMIT_SCRIPT != SCRIPT.length) {
|
||||
if (LIMIT_SCRIPT != LONG_SCRIPT.length) {
|
||||
System.err.println("!! ERROR !! Enums and Names out of sync: script");
|
||||
}
|
||||
if (LIMIT_AGE != AGE.length) {
|
||||
|
@ -650,7 +707,7 @@ final class UCD_Names implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
public static byte ON = Utility.lookup("ON", BC, true);
|
||||
public static byte ON = Utility.lookup("ON", BIDI_CLASS, true);
|
||||
|
||||
public static String[] HANGUL_SYLLABLE_TYPE = {
|
||||
"NA",
|
||||
|
@ -744,6 +801,16 @@ final class UCD_Names implements UCD_Types {
|
|||
"KHAPH",
|
||||
"FE",
|
||||
};
|
||||
static {
|
||||
fixArray(JOINING_GROUP);
|
||||
}
|
||||
static void fixArray (String[] array) {
|
||||
for (int i = 0; i < array.length; ++i) {
|
||||
array[i] = Utility.getUnskeleton(
|
||||
array[i].toLowerCase(Locale.ENGLISH),
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
public static String[] OLD_JOINING_GROUP = {
|
||||
"<no shaping>",
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
|
||||
* $Date: 2004/02/07 01:01:13 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2004/02/12 08:23:16 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -201,21 +201,21 @@ class UData implements UCD_Types {
|
|||
|
||||
int lastPos = result.length();
|
||||
|
||||
if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GC[generalCategory]).append('\'');
|
||||
if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GENERAL_CATEGORY[generalCategory]).append('\'');
|
||||
if (full || combiningClass != 0) result.append(" cc='").append(combiningClass & 0xFF).append('\'');
|
||||
if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.DT[decompositionType]).append('\'');
|
||||
if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.LONG_DECOMPOSITION_TYPE[decompositionType]).append('\'');
|
||||
if (full || !s.equals(decompositionMapping)) result.append(" dm='").append(Utility.quoteXML(decompositionMapping)).append('\'');
|
||||
|
||||
if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.NT[numericType]).append('\'');
|
||||
if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.LONG_NUMERIC_TYPE[numericType]).append('\'');
|
||||
if (full || !Double.isNaN(numericValue)) result.append(" nv='").append(numericValue).append('\'');
|
||||
|
||||
if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.EA[eastAsianWidth]).append('\'');
|
||||
if (full || lineBreak != LB_AL) result.append(" lb='").append(UCD_Names.LB[lineBreak]).append('\'');
|
||||
if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.LONG_EAST_ASIAN_WIDTH[eastAsianWidth]).append('\'');
|
||||
if (full || lineBreak != LB_AL) result.append(" lb='").append(UCD_Names.LINE_BREAK[lineBreak]).append('\'');
|
||||
if (joiningType != -1 && (full || joiningType != JT_U)) result.append(" jt='").append(UCD_Names.JOINING_TYPE[joiningType]).append('\'');
|
||||
if (full || joiningGroup != NO_SHAPING) result.append(" jg='").append(UCD_Names.JOINING_GROUP[joiningGroup]).append('\'');
|
||||
if (full || age != 0) result.append(" ag='").append(UCD_Names.AGE[age]).append('\'');
|
||||
|
||||
if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BC[bidiClass]).append('\'');
|
||||
if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BIDI_CLASS[bidiClass]).append('\'');
|
||||
if (full || !bidiMirror.equals(s)) result.append(" bmg='").append(Utility.quoteXML(bidiMirror)).append('\'');
|
||||
|
||||
if (lastPos != result.length()) {
|
||||
|
|
|
@ -1,109 +0,0 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Attic/UnicodeMap.java,v $
|
||||
* $Date: 2003/04/02 05:16:44 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* Class that maps from codepoints to an index, and optionally a label.
|
||||
*/
|
||||
public class UnicodeMap {
|
||||
UnicodeSet[] sets = new UnicodeSet[50];
|
||||
String[] labels = new String[50];
|
||||
int count = 0;
|
||||
|
||||
public int add(String label, UnicodeSet set) {
|
||||
return add(label, set, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add set
|
||||
*@param removeOld true: remove any collisions from sets already in the map
|
||||
* if false, remove any collisions from this set
|
||||
*@param signal: print a warning when collisions occur
|
||||
*/
|
||||
public int add(String label, UnicodeSet set, boolean removeOld, boolean signal) {
|
||||
// remove from any preceding!!
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (!set.containsSome(sets[i])) continue;
|
||||
if (signal) showOverlap(label, set, i);
|
||||
if (removeOld) {
|
||||
sets[i] = sets[i].removeAll(set);
|
||||
} else {
|
||||
set = set.removeAll(sets[i]);
|
||||
}
|
||||
}
|
||||
sets[count] = set;
|
||||
labels[count++] = label;
|
||||
return (short)(count - 1);
|
||||
}
|
||||
|
||||
public void showOverlap(String label, UnicodeSet set, int i) {
|
||||
UnicodeSet delta = new UnicodeSet(set).retainAll(sets[i]);
|
||||
System.out.println("Warning! Overlap with " + label + " and " + labels[i]
|
||||
+ ": " + delta);
|
||||
}
|
||||
|
||||
public int getIndex(int codepoint) {
|
||||
for (int i = count - 1; i >= 0; --i) {
|
||||
if (sets[i].contains(codepoint)) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public int getIndexFromLabel(String label) {
|
||||
for (int i = count - 1; i >= 0; --i) {
|
||||
if (labels[i].equalsIgnoreCase(label)) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public String getLabel(int codepoint) {
|
||||
return getLabelFromIndex(getIndex(codepoint));
|
||||
}
|
||||
|
||||
public String getLabelFromIndex(int index) {
|
||||
if (index < 0 || index >= count) return null;
|
||||
return labels[index];
|
||||
}
|
||||
|
||||
public UnicodeSet getSetFromIndex(int index) {
|
||||
if (index < 0 || index >= count) return null;
|
||||
return new UnicodeSet(sets[index]); // protect from changes
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public int setLabel(int index, String label) {
|
||||
labels[index] = label;
|
||||
return index;
|
||||
}
|
||||
|
||||
public int put(int codepoint, int index) {
|
||||
if (sets[index] == null) {
|
||||
sets[index] = new UnicodeSet();
|
||||
if (index >= count) count = index + 1;
|
||||
}
|
||||
sets[index].add(codepoint);
|
||||
return index;
|
||||
}
|
||||
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
||||
* $Date: 2004/02/07 01:01:12 $
|
||||
* $Revision: 1.24 $
|
||||
* $Date: 2004/02/12 08:23:16 $
|
||||
* $Revision: 1.25 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -2239,11 +2239,11 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||
int j = UTF32.char32At(s, 0);
|
||||
try {
|
||||
if (q == 0) {
|
||||
check(i, Default.ucd().getCategory(i), Default.ucd().getCategory(j), UCD_Names.GC, "GeneralCategory");
|
||||
check(i, Default.ucd().getCategory(i), Default.ucd().getCategory(j), UCD_Names.GENERAL_CATEGORY, "GeneralCategory");
|
||||
check(i, Default.ucd().getCombiningClass(i), Default.ucd().getCombiningClass(j), "CanonicalClass");
|
||||
check(i, Default.ucd().getBidiClass(i), Default.ucd().getBidiClass(j), UCD_Names.BC, "BidiClass");
|
||||
check(i, Default.ucd().getBidiClass(i), Default.ucd().getBidiClass(j), UCD_Names.BIDI_CLASS, "BidiClass");
|
||||
check(i, Default.ucd().getNumericValue(i), Default.ucd().getNumericValue(j), "NumericValue");
|
||||
check(i, Default.ucd().getNumericType(i), Default.ucd().getNumericType(j), UCD_Names.NT, "NumericType");
|
||||
check(i, Default.ucd().getNumericType(i), Default.ucd().getNumericType(j), UCD_Names.LONG_NUMERIC_TYPE, "NumericType");
|
||||
|
||||
if (false) {
|
||||
for (byte k = LOWER; k < LIMIT_CASE; ++k) {
|
||||
|
|
|
@ -28,18 +28,22 @@ public class CallArgs {
|
|||
}
|
||||
int pos = arg.indexOf('.');
|
||||
Method method = null;
|
||||
String className = "Main";
|
||||
String methodName = "";
|
||||
|
||||
if (pos >= 0) {
|
||||
String className = prefix + arg.substring(0,pos);
|
||||
String methodName = arg.substring(pos+1);
|
||||
className = prefix + arg.substring(0,pos);
|
||||
methodName = arg.substring(pos+1);
|
||||
method = tryMethod(className, methodName, methodArgs);
|
||||
} else {
|
||||
method = tryMethod("Main", arg, methodArgs);
|
||||
method = tryMethod(className, arg, methodArgs);
|
||||
if (method == null) {
|
||||
method = tryMethod(arg, "main", methodArgs);
|
||||
className = arg;
|
||||
methodName = "main";
|
||||
method = tryMethod(className, methodName, methodArgs);
|
||||
}
|
||||
}
|
||||
if (method == null) throw new IllegalArgumentException("Bad parameter: " + arg);
|
||||
if (method == null) throw new IllegalArgumentException("Bad parameter: " + className + ", " + methodName);
|
||||
System.out.println(method.getName() + "\t" + bf.join(methodArgs));
|
||||
method.invoke(null,methodArgs);
|
||||
}
|
||||
|
|
127
tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
Normal file
127
tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
Normal file
|
@ -0,0 +1,127 @@
|
|||
package com.ibm.text.utility;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
|
||||
import com.ibm.text.UCD.Default;
|
||||
import com.ibm.text.UCD.GenerateData;
|
||||
import com.ibm.text.UCD.UCD_Types;
|
||||
|
||||
public class UnicodeDataFile {
|
||||
public PrintWriter out;
|
||||
private String newFile;
|
||||
private String batName;
|
||||
private String mostRecent;
|
||||
private UnicodeDataFile(){};
|
||||
|
||||
public static UnicodeDataFile openAndWriteHeader(String directory, String filename) throws IOException {
|
||||
UnicodeDataFile result = new UnicodeDataFile();
|
||||
result.newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
result.out = Utility.openPrintWriter(result.newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
result.mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
result.batName = batName[0];
|
||||
|
||||
result.out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
result.out.println(generateDateLine());
|
||||
result.out.println("#");
|
||||
try {
|
||||
Utility.appendFile(filename + "Header.txt", Utility.LATIN1, result.out);
|
||||
} catch (FileNotFoundException e) {
|
||||
result.out.println("# Unicode Character Database: Derived Property Data");
|
||||
result.out.println("# Generated algorithmically from the Unicode Character Database");
|
||||
result.out.println("# For documentation, see UCD.html");
|
||||
result.out.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
|
||||
result.out.println("# if they have default property values.");
|
||||
result.out.println("# ================================================");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName);
|
||||
out.close();
|
||||
}
|
||||
|
||||
public static String generateDateLine() {
|
||||
return "# Date: " + Default.getDate() + " [MD]";
|
||||
}
|
||||
|
||||
public static String getHTMLFileSuffix(boolean withDVersion) {
|
||||
return "-" + Default.ucd().getVersion()
|
||||
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
|
||||
+ ".html";
|
||||
}
|
||||
|
||||
public static String getFileSuffix(boolean withDVersion) {
|
||||
return "-" + Default.ucd().getVersion()
|
||||
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
|
||||
+ ".txt";
|
||||
}
|
||||
|
||||
//Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names
|
||||
|
||||
public static String fixFile(String s) {
|
||||
int len = s.length();
|
||||
if (!s.endsWith(".txt")) return s;
|
||||
if (s.charAt(len-6) != 'd') return s;
|
||||
char c = s.charAt(len-5);
|
||||
if (c != 'X' && (c < '0' || '9' < c)) return s;
|
||||
s = s.substring(0,len-6) + s.substring(len-4);
|
||||
System.out.println("Fixing File Name: " + s);
|
||||
return s;
|
||||
}
|
||||
|
||||
private static String generateBatAux(String batName, String oldName, String newName) throws IOException {
|
||||
String fullBatName = batName + ".bat";
|
||||
PrintWriter output = Utility.openPrintWriter(batName + ".bat", Utility.LATIN1_UNIX);
|
||||
|
||||
newName = Utility.getOutputName(newName);
|
||||
System.out.println("Writing BAT to compare " + oldName + " and " + newName);
|
||||
|
||||
File newFile = new File(newName);
|
||||
File oldFile = new File(oldName);
|
||||
output.println("\"C:\\Program Files\\Compare It!\\wincmp3.exe\" "
|
||||
// "\"C:\\Program Files\\wincmp.exe\" "
|
||||
+ oldFile.getCanonicalFile()
|
||||
+ " "
|
||||
+ newFile.getCanonicalFile());
|
||||
output.close();
|
||||
return new File(Utility.getOutputName(fullBatName)).getCanonicalFile().toString();
|
||||
}
|
||||
|
||||
/*
|
||||
static String skeleton(String source) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
source = source.toLowerCase();
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char c = source.charAt(i);
|
||||
if (c == ' ' || c == '_' || c == '-') continue;
|
||||
result.append(c);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
*/
|
||||
// static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
|
||||
|
||||
public static String generateBat(String directory, String fileRoot, String suffix, String[] outputBatName) throws IOException {
|
||||
String mostRecent = Utility.getMostRecentUnicodeDataFile(UnicodeDataFile.fixFile(fileRoot), Default.ucd().getVersion(), true, true);
|
||||
if (mostRecent != null) {
|
||||
outputBatName[0] = UnicodeDataFile.generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix,
|
||||
mostRecent, directory + fileRoot + suffix);
|
||||
} else {
|
||||
System.out.println("No previous version of: " + fileRoot + ".txt");
|
||||
return null;
|
||||
}
|
||||
|
||||
String lessRecent = Utility.getMostRecentUnicodeDataFile(UnicodeDataFile.fixFile(fileRoot), Default.ucd().getVersion(), false, true);
|
||||
if (lessRecent != null && !mostRecent.equals(lessRecent)) {
|
||||
UnicodeDataFile.generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix,
|
||||
lessRecent, directory + fileRoot + suffix);
|
||||
}
|
||||
return mostRecent;
|
||||
}
|
||||
}
|
||||
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2004/02/07 01:01:17 $
|
||||
* $Revision: 1.38 $
|
||||
* $Date: 2004/02/12 08:23:14 $
|
||||
* $Revision: 1.39 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -16,6 +16,7 @@ package com.ibm.text.utility;
|
|||
import java.util.*;
|
||||
import java.text.*;
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.Replaceable;
|
||||
|
@ -717,7 +718,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
public static PrintWriter openPrintWriter(String directory, String filename, Encoding options) throws IOException {
|
||||
File file = new File(directory + filename);
|
||||
Utility.fixDot();
|
||||
System.out.println("Creating File: " + file.getCanonicalPath());
|
||||
System.out.print("Creating File: " + file);
|
||||
System.out.println("\t" + file.getCanonicalPath());
|
||||
File parent = new File(file.getParent());
|
||||
//System.out.println("Creating File: "+ parent);
|
||||
parent.mkdirs();
|
||||
|
@ -1095,7 +1097,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
}
|
||||
|
||||
public static void showSetDifferences(PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2,
|
||||
boolean separateLines, boolean withChar, UnicodeMap names, UCD ucd) {
|
||||
boolean separateLines, boolean withChar, OldUnicodeMap names, UCD ucd) {
|
||||
|
||||
UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
|
||||
pw.println();
|
||||
|
@ -1135,7 +1137,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
static java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
|
||||
|
||||
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN,
|
||||
boolean withChar, UnicodeMap names, UCD ucd) {
|
||||
boolean withChar, OldUnicodeMap names, UCD ucd) {
|
||||
if (set.size() == 0) {
|
||||
pw.println(prefix + "<none>");
|
||||
pw.flush();
|
||||
|
@ -1196,4 +1198,5 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
private static boolean isSeparateLineIDN(int start, int end, UCD ucd) {
|
||||
return (isSeparateLineIDN(start, ucd) || isSeparateLineIDN(end, ucd));
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Reference in a new issue