bunch o' changes

X-SVN-Rev: 9982
This commit is contained in:
Mark Davis 2002-10-05 01:28:58 +00:00
parent d29ea5e179
commit 5529d37324
24 changed files with 1489 additions and 138 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2002/10/03 22:58:17 $
* $Revision: 1.13 $
* $Date: 2002/10/05 01:28:56 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
@ -921,14 +921,35 @@ public class WriteCharts implements UCD_Types {
+ "<br><tt>" + Utility.hex(comp) + "</tt></td>";
}
public static void writeAllocation() throws IOException {
Default.setUCD();
String[] names = new String[300]; // HACK, 300 is plenty for now. Fix if it ever gets larger
int[] starts = new int[names.length];
int[] ends = new int[names.length];
UCD.BlockData blockData = new UCD.BlockData();
int counter = 0;
UnicodeSet[] values = new UnicodeSet[500];
String[] names = new String[values.length];
int[] starts = new int[values.length];
int[] ends = new int[values.length];
int blockId = 0;
while (Default.ucd.getBlockData(blockId++, blockData)) {
names[counter] = blockData.name;
starts[counter] = blockData.start;
ends[counter] = blockData.end;
//System.out.println(names[counter] + ", " + values[counter]);
++counter;
// HACK
if (blockData.name.equals("Tags")) {
names[counter] = "<i>reserved default ignorable</i>";
starts[counter] = 0xE0080;
ends[counter] = 0xE0FFF;
++counter;
}
}
/*
BufferedReader in = Utility.openUnicodeFile("Blocks", "", true, false);
try {
while (true) {
@ -947,42 +968,79 @@ public class WriteCharts implements UCD_Types {
ends[counter] = end;
//System.out.println(names[counter] + ", " + values[counter]);
++counter;
// HACK
if (name.equals("Tags")) {
names[counter] = "<i>reserved default ignorable</i>";
values[counter] = new UnicodeSet(0xE0080, 0xE0FFF);
starts[counter] = 0xE0080;
ends[counter] = 0xE0FFF;
++counter;
}
}
} finally {
in.close();
}
*/
PrintWriter out = Utility.openPrintWriter("Allocation.html", Utility.LATIN1_WINDOWS);
/*
Graphic
Format
Control
Private Use
Surrogate
Noncharacter
Reserved (default ignorable)
Reserved (other)
*/
PrintWriter out = Utility.openPrintWriter("allocation.html", Utility.LATIN1_WINDOWS);
try {
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
out.println("<title>Unicode Allocation</title></head>");
out.println("<body bgcolor='#FFFFFF'><h1 align='center'><a href='#Notes'>Unicode Allocation</a></h1>");
out.println("<table border='1' width='100%' cellspacing='0'>");
out.println("<tr><th>Start</th><th align='left'>Block Name</th><th align='left'>Size</th></tr>");
UnicodeSetIterator it = new UnicodeSetIterator();
int lastEnd = -1;
for (int i = 0; i < counter; ++i) {
if (starts[i] != lastEnd + 1) {
drawAllocation(out, lastEnd + 1, "<i>reserved</i>", starts[i] - lastEnd + 1, 0);
for (int textOnly = 0; textOnly < 2; ++textOnly) {
out.println("<table border='1' cellspacing='0'>"); // width='100%'
if (textOnly == 0) {
out.println("<tr><th>Start</th><th align='left'>Block Name</th><th align='left'>Size</th></tr>");
} else {
out.println("<tr><th>Block Name</th><th>Start</th><th>Total</th><th>Assigned</th></tr>");
}
int total = values[i].size();
int alloc = 0;
it.reset(values[i]);
while (it.nextRange()) {
for (int j = it.codepoint; j <= it.codepointEnd; ++j) {
int lastEnd = -1;
for (int i = 0; i < counter; ++i) {
if (starts[i] != lastEnd + 1) {
drawAllocation(out, lastEnd + 1, "<i>reserved</i>", starts[i] - lastEnd + 1, 0, "#000000", "#000000", textOnly);
}
int total = ends[i] - starts[i] + 1;
int alloc = 0;
for (int j = starts[i]; j <= ends[i]; ++j) {
if (Default.ucd.isAllocated(j)) ++alloc;
}
//System.out.println(names[i] + "\t" + alloc + "\t" + total);
String color = names[i].indexOf("Surrogates") >= 0 ? "#FF0000"
: names[i].indexOf("Private") >= 0 ? "#0000FF"
: "#00FF00";
String colorReserved = names[i].indexOf("reserved default ignorable") >= 0 ? "#CCCCCC"
: "#000000";
drawAllocation(out, starts[i], names[i], total, alloc, color, colorReserved, textOnly);
lastEnd = ends[i];
}
System.out.println(names[i] + "\t" + alloc + "\t" + total);
drawAllocation(out, starts[i], names[i], total, alloc);
lastEnd = ends[i];
out.println("</table><p>&nbsp;</p>");
}
out.println("</table>");
out.println("<p><a name='Notes'></a>This chart lists all the Unicode blocks and their starting code points. "
+ "The area of each bar is proportional to the total number of code points in each block, "
+ "with green for the proportion of assigned code points. "
out.println("<h2>Key</h2><p><a name='Notes'></a>This chart lists all the Unicode blocks and their starting code points. "
+ "The area of each bar is proportional to the total number of code points in each block. "
+ "The colors have the following significance:<br>"
+ "<table border='1' cellspacing='0' cellpadding='4'>"
+ "<tr><td>Green</td><td>Graphic, Control, Format, Noncharacter* code points</td></tr>"
+ "<tr><td>Red</td><td>Surrogate code points</td></tr>"
+ "<tr><td>Blue</td><td>Private Use code points</td></tr>"
+ "<tr><td>Gray</td><td>Reserved (default ignorable) code points</td></tr>"
+ "<tr><td>Black</td><td>Reserved (other) code points</td></tr>"
+ "</table><br>"
+ "* Control, Format, and Noncharacter are not distinguished from Graphic characters by color, since they are mixed into other blocks. "
+ "Tooltips on the bars show the total number of code points and the number assigned. "
+ "(Remember that assigned <i>code points</i> are not necessarily assigned <i>characters</i>.)"
+ "</p>");
@ -997,23 +1055,27 @@ public class WriteCharts implements UCD_Types {
static NumberFormat nf = NumberFormat.getNumberInstance(Locale.US);
static {nf.setMaximumFractionDigits(0);}
static void drawAllocation(PrintWriter out, int start, String title, int total, int alloc) {
int unalloc = total - alloc;
double totalWidth = longestBar*(Math.sqrt(total) / Math.sqrt(longestBlock));
double allocWidth = alloc * totalWidth / total;
double unallocWidth = totalWidth - allocWidth;
out.println("<tr><td align='right'><code>" + Utility.hex(start)
+ "</code></td><td>" + title
+ "</td><td title='total: " + nf.format(total) + ", assigned: " + nf.format(alloc)
+ "'><table border='0' cellspacing='0' cellpadding='0'><tr>");
if (alloc != 0) out.println("<td style='font-size:1;width:" + allocWidth + ";height:" + totalWidth
+ "' bgcolor='#00FF00'>&nbsp;</td>");
if (unalloc != 0) out.println("<td style='font-size:1;width:" + unallocWidth + ";height:" + totalWidth
+ "' bgcolor='#000000'>&nbsp;</td>");
out.println("</tr></table></td></tr>");
static void drawAllocation(PrintWriter out, int start, String title, int total, int alloc, String color, String colorReserved, int textOnly) {
if (textOnly == 0) {
int unalloc = total - alloc;
double totalWidth = longestBar*(Math.sqrt(total) / Math.sqrt(longestBlock));
double allocWidth = alloc * totalWidth / total;
double unallocWidth = totalWidth - allocWidth;
out.println("<tr><td align='right'><code>" + Utility.hex(start)
+ "</code></td><td>" + title
+ "</td><td title='total: " + nf.format(total) + ", assigned: " + nf.format(alloc)
+ "'><table border='0' cellspacing='0' cellpadding='0'><tr>");
if (alloc != 0) out.println("<td style='font-size:1;width:" + allocWidth + ";height:" + totalWidth
+ "' bgcolor='" + color + "'>&nbsp;</td>");
if (unalloc != 0) out.println("<td style='font-size:1;width:" + unallocWidth + ";height:" + totalWidth
+ "' bgcolor='" + colorReserved + "'>&nbsp;</td>");
out.println("</tr></table></td></tr>");
} else {
out.println("<tr><td>" + title + "</td><td align='right'>" + start + "</td><td align='right'>" + total + "</td><td align='right'>" + alloc + "</td></tr>");
}
}
}

View file

@ -0,0 +1,47 @@
#
# This file is used to test (1) case conversion, (2) case detection,
# and (3) case-insensitive matching.
# (1) is represented below by function names such as toLower(),
# (2) is represented below by function names such as isLower().
# (3) is represented below by the function name equalsCaseInsensitive().
# (The actual function names will vary depending on software language and/or library.)
#
# The test cases also check whether canonical equivalence is preserved
# by these functions.
#
# Format:
# <src> ; <lower> ; <upper> ; <title> ; <fold> (# <comment>)?
#
# Test:
#
# A. For each line:
# 1. Verify the following equalities:
# lower == toLower(src)
# upper == toUpper(src)
# title == toTitle(src)
# fold == toFold(src)
# 2. Verify that all of the following are true:
# isLower(toLower(lower))
# isUpper(toUpper(upper))
# isTitle(toTitle(title))
# isFold(toTitle(fold))
# 3. Verify that all of the following are true:
# equalsCaseInsensitive(src, lower)
# equalsCaseInsensitive(src, upper)
# equalsCaseInsensitive(src, title)
# equalsCaseInsensitive(src, fold)
#
# B. For each code point that is NOT listed as a src:
# 1. Verify the following equalities:
# src == toLower(src) == toUpper(src) == toTitle(src) == toFold(src)
# 2. Verify that all of the following are true:
# isLower(toLower(lower))
# isUpper(toUpper(upper))
# isTitle(toTitle(title))
# isFold(toTitle(fold))
# 3. Verify that all of the following are true:
# equalsCaseInsensitive(src, lower)
# equalsCaseInsensitive(src, upper)
# equalsCaseInsensitive(src, title)
# equalsCaseInsensitive(src, fold)
#

View file

@ -0,0 +1,25 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Charts.java,v $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import com.ibm.icu.text.UnicodeSet;
import java.io.*;
import java.util.*;
import com.ibm.icu.text.UTF16;
import com.ibm.text.utility.*;
public class Charts {
}

View file

@ -0,0 +1,106 @@
package com.ibm.text.UCD;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.*;
import java.util.*;
// Enumerated properties will be IntCodePointProperty.
// The string values they return will be the property value names.
// Binary properties are Enumerated properties. They return 0 or 1
abstract public class CodePointProperty {
// styles for names and string values
static final byte SHORT = 0, DEFAULT = 1, LONG = 2, NORMAL_LIMIT = 3;
// gets the property name
abstract public String getName(byte style);
// value may also be numeric, etc, but this returns string equivalent.
abstract public String getValue(int codePoint, byte style);
// returns true if the code point has the value
// works with any style that getValue takes
abstract public boolean hasValue(int codePoint, String value);
// returns the set of all code points with that value.
// same effect as using hasValue one by one, but faster internal implementation
abstract public UnicodeSet getSet(String value);
// returns a list of all possible values
// logically the same as looping from 0..10FFFF with getValue and getStyleLimit,
// and throwing out duplicates, but much faster.
static Iterator getAllValues(byte style) {
return null;
}
// gets top value style available for this property
public byte getStyleLimit(byte style) {
return NORMAL_LIMIT;
}
// returns true if the value is known to be uniform over a type.
// this is used for various optimizations, especially for Cn & Co
public boolean isUniformOverCategory(byte generalCategory) {
return false;
}
// subclasses
static abstract public class IntCodePointProperty extends CodePointProperty {
abstract int getNumericValue(int codePoint);
abstract int getMaxValue();
abstract int getMinValue();
static Iterator getAllNumericValues() {
return null;
}
}
static abstract public class DoubleCodePointProperty extends CodePointProperty {
abstract double getNumericValue(int codePoint);
abstract double getMaxValue();
abstract double getMinValue();
static Iterator getAllNumericValues() {
return null;
}
}
// registration and lookup
// register a new property
static void register(CodePointProperty newProp) {
//...
}
// finds a registered property by name
static CodePointProperty getInstance(String name) {
return null;
}
// returns a list of all registered properties
static Iterator getAllRegistered() {
return null;
}
// UnicodeSet would use these internally to handle properties. That is, when
// it encountered ... [:name=value:] ...
// it would do:
// CodePointProperty x = getInstance(name);
// if (x != null) doError(name, value);
// UnicodeSet s = x.getSet(value);
// and then use s.
// open issue: we could have a property like: contains("dot")
// in that case, we would register "contains" as the 'base' name,
// but allow lookup with string parameters ("dot")
// Maybe just adding:
public boolean hasParameters() {
return false;
}
public void setParameters(String parameters) {}
public String getParameters() {
return null;
}
// that way we could have [[:letter:]&[:contains(dot):]]
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
* $Date: 2002/06/13 21:14:05 $
* $Revision: 1.8 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -331,7 +331,7 @@ public final class ConvertUCD implements UCD_Types {
static void readBlocks() throws Exception {
System.out.println("Reading 'Blocks'");
BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, false);
BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, Utility.LATIN1);
String line = "";
try {
String[] parts = new String[20];
@ -376,7 +376,7 @@ public final class ConvertUCD implements UCD_Types {
}
String tempVersion = version;
if (version.equals(UCD.latestVersion)) tempVersion = "";
BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, false);
BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, Utility.LATIN1);
if (input == null) {
System.out.println("COULDN'T OPEN: " + labels[0]);
return;
@ -834,7 +834,7 @@ public final class ConvertUCD implements UCD_Types {
uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT, true);
} else if (fieldName.equals("ea")) {
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EA, true);
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.SHORT_EA, true);
} else if (fieldName.equals("lb")) {
uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB, true);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
* $Date: 2002/07/30 09:56:41 $
* $Revision: 1.11 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -63,7 +63,7 @@ public class GenerateCaseFolding implements UCD_Types {
out.println("# CaseFolding" + GenerateData.getFileSuffix(false));
out.println(GenerateData.generateDateLine());
out.println("#");
Utility.appendFile("CaseFoldingHeader.txt", false, out);
Utility.appendFile("CaseFoldingHeader.txt", Utility.LATIN1, out);
/*
PrintWriter out = new PrintWriter(
@ -561,7 +561,7 @@ public class GenerateCaseFolding implements UCD_Types {
out.println("# SpecialCasing" + GenerateData.getFileSuffix(false));
out.println(GenerateData.generateDateLine());
out.println("#");
Utility.appendFile("SpecialCasingHeader.txt", true, out);
Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out);
Iterator it = sorted.keySet().iterator();
int lastOrder = -1;
@ -584,7 +584,7 @@ public class GenerateCaseFolding implements UCD_Types {
case 3: out.println("# Ligatures"); break;
case 4: skipLine = true; break;
case 5: out.println("# No corresponding uppercase precomposed character"); break;
case 6: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
case 6: Utility.appendFile("SpecialCasingIota.txt", Utility.UTF8, out); break;
case 7: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
case 8: skipLine = true; break;
}
@ -592,7 +592,7 @@ public class GenerateCaseFolding implements UCD_Types {
}
out.println(line);
}
Utility.appendFile("SpecialCasingFooter.txt", true, out);
Utility.appendFile("SpecialCasingFooter.txt", Utility.UTF8, out);
out.close();
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile));
}

View file

@ -0,0 +1,94 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java,v $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.*;
import java.io.*;
import com.ibm.text.utility.*;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
abstract public class GenerateCaseTest implements UCD_Types {
public static void main(String[] args) throws IOException {
System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61");
Default.setUCD();
PrintWriter out = Utility.openPrintWriter("CaseTest.txt", Utility.UTF8_WINDOWS);
out.println("# CaseTest");
out.println("# Generated: " + Default.getDate() + ", MED");
Utility.appendFile("CaseTestHeader.txt", Utility.LATIN1, out);
for (int cp = 0; cp < 0x10FFFF; ++cp) {
Utility.dot(cp);
if (!Default.ucd.isAllocated(cp)) continue;
if (Default.ucd.isHangulSyllable(cp)) continue;
byte cat = Default.ucd.getCategory(cp);
if (cp == PRIVATE_USE) continue;
String lower = Default.ucd.getCase(cp, FULL, LOWER);
String upper = Default.ucd.getCase(cp, FULL, UPPER);
String title = Default.ucd.getCase(cp, FULL, TITLE);
String fold = Default.ucd.getCase(cp, FULL, FOLD);
if (lower.equals(upper)
&& lower.equals(title)
&& lower.equals(fold)) continue;
String s = UTF16.valueOf(cp);
write(out, s, true);
// if (cp == '\u0345') continue; // don't add combining for this special case
s = s + testChar;
String s2 = Default.nfd.normalize(s);
String lower1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, LOWER));
String upper1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, UPPER));
String title1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, TITLE));
String fold1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, FOLD));
if (lower1.equals(Default.nfc.normalize(lower+testChar))
&& upper1.equals(Default.nfc.normalize(upper+testChar))
&& title1.equals(Default.nfc.normalize(title+testChar))
&& fold1.equals(Default.nfc.normalize(fold+testChar))
) continue;
write(out, s, true);
}
out.println("# total lines: " + counter);
out.close();
}
static final char testChar = '\u0316';
static int counter = 0;
static void write(PrintWriter out, String ss, boolean doComment) {
String s = Default.nfd.normalize(ss);
String lower = Default.nfc.normalize(Default.ucd.getCase(s, FULL, LOWER));
String upper = Default.nfc.normalize(Default.ucd.getCase(s, FULL, UPPER));
String title = Default.nfc.normalize(Default.ucd.getCase(s, FULL, TITLE));
String fold = Default.nfc.normalize(Default.ucd.getCase(s, FULL, FOLD));
out.println(Utility.hex(ss) + "; "
+ Utility.hex(lower) + "; "
+ Utility.hex(upper) + "; "
+ Utility.hex(title) + "; "
+ Utility.hex(fold)
+ (doComment ? "\t# " + Default.ucd.getName(ss) : "")
);
counter++;
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2002/07/30 09:56:41 $
* $Revision: 1.22 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.23 $
*
*******************************************************************************
*/
@ -545,6 +545,10 @@ public class GenerateData implements UCD_Types {
if (i == (BINARY_PROPERTIES | CaseFoldTurkishI)) continue;
if (i == (BINARY_PROPERTIES | Non_break)) continue;
if (type == NUMERIC_TYPE) {
//System.out.println("debug");
}
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
@ -587,8 +591,9 @@ public class GenerateData implements UCD_Types {
}
valueAbb = up.getValue(SHORT);
if (valueAbb.length() == 0) valueAbb = "n/a";
valueAbb = Utility.getUnskeleton(valueAbb, false);
if (valueAbb.length() == 0) valueAbb = "n/a";
//else if (valueAbb.equals(value)) valueAbb = "n/a";
if (type == COMBINING_CLASS) {
@ -643,6 +648,13 @@ public class GenerateData implements UCD_Types {
}
}
UCD.BlockData blockData = new UCD.BlockData();
int blockId = 0;
while (Default.ucd.getBlockData(blockId++, blockData)) {
addLine(sorted, "blk", "n/a", blockData.name);
}
String filename = "PropertyAliases";
String newFile = "DerivedData/" + filename + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
@ -651,7 +663,7 @@ public class GenerateData implements UCD_Types {
log.println("# " + filename + getFileSuffix(false));
log.println(generateDateLine());
log.println("#");
Utility.appendFile("PropertyAliasHeader.txt", false, log);
Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
log.println();
Utility.print(log, sorted, "\r\n", new MyBreaker(true));
@ -667,7 +679,7 @@ public class GenerateData implements UCD_Types {
log.println("# " + filename + getFileSuffix(false));
log.println(generateDateLine());
log.println("#");
Utility.appendFile("PropertyValueAliasHeader.txt", false, log);
Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
log.println();
Utility.print(log, sorted, "\r\n", new MyBreaker(false));

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
* $Date: 2002/08/04 21:38:45 $
* $Revision: 1.9 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.10 $
*
*******************************************************************************
*/
@ -45,7 +45,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
log = Utility.openPrintWriter("Unihan_log.html", Utility.UTF8_WINDOWS);
log.println("<body>");
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, true);
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
Map properties = new TreeMap();
@ -502,7 +502,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
if (type == CHINESE) {
System.out.println("Reading chinese_frequency.txt");
br = Utility.openReadFile(BASE_DIR + "dict\\chinese_frequency.txt", true);
br = Utility.openReadFile(BASE_DIR + "dict\\chinese_frequency.txt", Utility.UTF8);
counter = 0;
while (true) {
line = Utility.readDataLine(br);
@ -521,7 +521,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
if (type == JAPANESE) {
System.out.println("Reading japanese_frequency.txt");
br = Utility.openReadFile( BASE_DIR + "dict\\japanese_frequency.txt", true);
br = Utility.openReadFile( BASE_DIR + "dict\\japanese_frequency.txt", Utility.UTF8);
Map japaneseMap = new HashMap();
while (true) {
line = Utility.readDataLine(br);
@ -704,7 +704,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
if (type == JAPANESE) fname = "edict.txt";
System.out.println("Reading " + fname);
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, true);
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, Utility.UTF8);
int counter = 0;
String[] pieces = new String[50];
String line = "";
@ -751,7 +751,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
String fname = "Chinese_override.txt";
System.out.println("Reading " + fname);
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, true);
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, Utility.UTF8);
int counter = 0;
String[] pieces = new String[50];
String line = "";
@ -997,7 +997,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
static void readCDICT() throws IOException {
System.out.println("Reading cdict.txt");
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\cdict.txt", true);
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\cdict.txt", Utility.UTF8);
int counter = 0;
String[] pieces = new String[50];
String line = "";
@ -1075,7 +1075,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
static void readUnihanData(String key) throws java.io.IOException {
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, true);
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
int count = 0;
int lineCounter = 0;

View file

@ -0,0 +1,74 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java,v $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.io.*;
import com.ibm.text.utility.*;
import com.ibm.text.UnicodeSet;
import java.util.*;
public class GenerateThaiBreaks {
public static void main(String [] args) throws IOException {
BufferedReader br = new BufferedReader(
new InputStreamReader(
new FileInputStream("\\icu4j\\src\\data\\thai6.ucs"), "UnicodeLittle"));
try {
Main.setUCD();
UnicodeSet ignorables = new UnicodeSet("[:M:]");
ignorables.retain(0x0E00, 0x0E7F); // just Thai block
ignorables.add(0x0E40, 0x0E44); // add logical order exception
ignorables.add(0, ' '); // add controls
ignorables.add('.');
UnicodeSet initials = new UnicodeSet();
UnicodeSet finals = new UnicodeSet();
UnicodeSet medials = new UnicodeSet();
while (true) {
String line = br.readLine();
if (line == null) break;
int end;
// find final consonant
for (int i = line.length() - 1; ; --i) {
char c = line.charAt(i);
if (!ignorables.contains(c)) {
finals.add(c);
end = i;
break;
}
}
boolean haveFirst = false;
for (int i = 0; i < end; ++i) {
char c = line.charAt(i);
if (ignorables.contains(c)) continue;
if (!haveFirst) {
initials.add(c);
haveFirst = true;
} else {
medials.add(c);
}
}
}
initials.removeAll(medials);
finals.removeAll(medials);
Utility.showSetNames("initials: ", initials, false, Main.ucd);
Utility.showSetNames("finals: ", finals, false, Main.ucd);
Utility.showSetNames("medials: ", medials, false, Main.ucd);
} finally {
br.close();
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IANANames.java,v $
* $Date: 2002/08/08 15:38:16 $
* $Revision: 1.1 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -65,7 +65,7 @@ public class IANANames implements UCD_Types {
}
public IANANames() throws IOException {
BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", false);
BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", Utility.LATIN1);
try {
boolean atStart = true;
String lastName = "";

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2002/10/01 01:19:16 $
* $Revision: 1.24 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.25 $
*
*******************************************************************************
*/
@ -73,6 +73,8 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();

View file

@ -34,4 +34,4 @@
# In addition, some property names may be the same as some property value names.
#
# The combination of property value and property name is, however, unique.
# For more information, see UTR #24: Regular Expression Guidelines
# For more information, see UTR #18: Regular Expression Guidelines

View file

@ -29,7 +29,7 @@
# and '_' are ignored.
#
# NOTE: The Block property values are in Blocks.txt, and not repeated here.
# For more information on the use of blocks, see UTR #24: Regular Expression Guidelines
# For more information on the use of blocks, see UTR #18: Regular Expression Guidelines
#
# NOTE: Currently there is at most one abbreviated name and one long name for
# property value. However, in the future additional aliases

View file

@ -0,0 +1,103 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.*;
import java.io.*;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.*;
public class QuickTest implements UCD_Types {
static final void test() {
Default.setUCD();
/*
[4] NameStartChar := ":" | [A-Z] | "_" | [a-z] |
[#xC0 - #x2FF] | [#x370 - #x37D] | [#x37F - #x1FFF] |
[#x200C - #x200D] | [#x2070 - #x218F] | [#x2C00 - #x2FEF] |
[#x3001 - #xD7FF] | [#xF900 - #xF9FF] | [#x10000 - #xDFFFF]
[4a] NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F]
*/
UnicodeSet nameStartChar = new UnicodeSet("[\\: A-Z \\_ a-z"
+ "\\u00c0-\\u02FF \\u0370-\\u037D \\u037F-\\u1FFF"
+ "\\u200C-\\u200D \\u2070-\\u218F \\u2C00-\\u2FEF"
+ "\\u3001-\\uD7FF \\uF900-\\uF9FF \\U00010000-\\U000DFFFF]");
UnicodeSet nameChar = new UnicodeSet("[\\- \\. 0-9 \\u00B7 \\u0300-\\u036F]")
.addAll(nameStartChar);
showSet("NameStartChar", nameStartChar);
showDiffs("NameChar", nameChar, "NameStartChar", nameStartChar);
UnicodeSet defaultIgnorable = UnifiedBinaryProperty.make(DERIVED | DefaultIgnorable).getSet();
UnicodeSet whitespace = UnifiedBinaryProperty.make(BINARY_PROPERTIES | White_space).getSet();
UnicodeSet notNFKC = new UnicodeSet();
UnicodeSet privateUse = new UnicodeSet();
UnicodeSet noncharacter = new UnicodeSet();
UnicodeSet format = new UnicodeSet("[:Cf:]");
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isAllocated(i)) continue;
if (!Default.nfkc.isNormalized(i)) notNFKC.add(i);
if (Default.ucd.isNoncharacter(i)) noncharacter.add(i);
if (Default.ucd.getCategory(i) == PRIVATE_USE) privateUse.add(i);
}
showSet("notNFKC in NameChar", new UnicodeSet(notNFKC).retainAll(nameChar));
showSet("notNFKC outside of NameChar", new UnicodeSet(notNFKC).removeAll(nameChar));
showSet("Whitespace in NameChar", new UnicodeSet(nameChar).retainAll(whitespace));
showSet("Whitespace not in NameChar", new UnicodeSet(whitespace).removeAll(nameChar));
showSet("Noncharacters in NameChar", new UnicodeSet(noncharacter).retainAll(noncharacter));
showSet("Noncharacters outside of NameChar", new UnicodeSet(noncharacter).removeAll(nameChar));
showSet("Format in NameChar", new UnicodeSet(nameChar).retainAll(format));
showSet("Other Default_Ignorables in NameChar", new UnicodeSet(defaultIgnorable).removeAll(format).retainAll(nameChar));
showSet("PrivateUse in NameChar", new UnicodeSet(defaultIgnorable).retainAll(privateUse));
UnicodeSet CID_Start = new UnicodeSet("[:ID_Start:]").removeAll(notNFKC);
UnicodeSet CID_Continue = new UnicodeSet("[:ID_Continue:]")
.removeAll(notNFKC).removeAll(format);
UnicodeSet CID_Continue_extras = new UnicodeSet(CID_Continue).removeAll(CID_Start);
showDiffs("NoK_ID_Start", CID_Start, "NameStartChar", nameStartChar);
showDiffs("NoK_ID_Continue_Extras", CID_Continue_extras, "NameChar", nameChar);
System.out.println("Removing canonical singletons");
}
static void showDiffs(String title1, UnicodeSet set1, String title2, UnicodeSet set2) {
showSet(title1 + " - " + title2, new UnicodeSet(set1).removeAll(set2));
}
static void showSet(String title1, UnicodeSet set1) {
System.out.println();
System.out.println(title1);
if (set1.size() == 0) {
System.out.println("\tNONE");
return;
}
System.out.println("\tCount:" + set1.size());
System.out.println("\tSet:" + set1.toPattern(true));
System.out.println("\tDetails:");
Utility.showSetNames("", set1, false, Default.ucd);
}
}

View file

@ -0,0 +1,566 @@
package com.ibm.text.UCD;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.lang.UCharacter;
import com.ibm.text.utility.*;
import java.util.*;
import java.io.*;
// Enumerated properties will be IntCodePointProperty.
// The string values they return will be the property value names.
// Binary properties are Enumerated properties. They return 0 or 1
public final class TernaryStore {
static final int DONE = Integer.MIN_VALUE;
static final int NOT_FOUND = Integer.MIN_VALUE+1;
// for testing
static DepthPrinter dp;
static void test() throws java.io.IOException {
Default.setUCD();
PrintWriter pw = Utility.openPrintWriter("TestTernary.txt", Utility.LATIN1_WINDOWS);
try {
dp = new DepthPrinter(pw);
String[] tests = {"the", "quick", "fish", "fisherman", "fishes",
"brown", "brow", "bracket", "bright", "brat",
"brough", "dogs", "upper", "zebra",
"fisher"};
test("Simple: ", tests, tests.length);
tests = new String[300000];
int counter = 0;
int i;
for (i = 0; counter < tests.length && i <= 0x10FFFF; ++i) {
if (Default.ucd.hasComputableName(i)) continue;
String temp = UCharacter.getName(i);
if (temp != null) tests[counter++] = temp.trim();
}
System.out.println("max-cp: " + Utility.hex(i));
test("Unicode Names: ", tests, counter);
//if (true) return;
BufferedReader br = Utility.openReadFile(UCD_Types.BASE_DIR + "dict\\DiploFreq.txt", Utility.LATIN1);
String line;
counter = 0;
while (counter < tests.length) {
line = Utility.readDataLine(br);
if (line == null) break;
if (line.length() == 0) continue;
Utility.dot(counter);
int tabPos = line.indexOf('\t');
if (tabPos < 0) {
System.out.println("???" + line);
continue;
}
tests[counter++] = line.substring(tabPos+1);
}
test("French: ", tests, counter);
} finally {
pw.close();
}
}
static void test(String title, String[] tests, int len) {
System.out.println();
System.out.println(title);
dp.println();
dp.print(title, 0);
dp.println();
TernaryStore.Builder builder = new TernaryStore.Builder();
int charCount = 0;
for (int i = 0; i < len; ++i) {
builder.add(tests[i], i);
charCount += tests[i].length();
}
System.out.println("charCount: " + charCount);
TernaryStore store = builder.build();
store.showNodes();
store.checkNodes();
dp.println("Storage");
dp.println(store.stringStore.toString());
System.out.println("StorageSize: " + store.stringStore.toString().length());
Matcher matcher = store.getMatcher();
for (int i = 0; i < len; ++i) {
int check = test(tests[i], matcher);
if (check != i) {
System.out.println("\tFail, result: " + tests[i] + ", " + check);
}
}
}
static int test(String s, Matcher matcher) {
matcher.reset(s, 0);
int lastResult = -1;
for (int result = matcher.next(); result != DONE; result = matcher.next()) {
lastResult = result;
}
return lastResult;
}
static final class Node {
String getString(StringStore stringStore) {
if (stringCode < 0) return tempString;
return stringStore.get(stringCode);
}
void setString(String s) {
tempString = s;
}
String tempString;
int stringCode = -1;
Node less;
Node greater;
Node next;
int result = NOT_FOUND;
public String toString(StringStore store) {
return getString(store)
+ (result != NOT_FOUND ? "(" + result + ")" : "")
+ (next != null ? next.toString() : "");
}
}
Node base;
StringStore stringStore = new StringStore();
final static class Matcher {
TernaryStore store;
String s;
int position;
Node lastNode;
void reset(String s, int position) {
this.s = s;
this.position = position;
this.lastNode = store.base;
}
// returns the next result
// or DONE when done
// sets position to point after end of found string
int next() {
while (lastNode != null && position < s.length()) {
char ch = s.charAt(position++);
do {
String nodeString = lastNode.getString(store.stringStore);
char first = nodeString.charAt(0);
if (ch == first) {
// now check the rest of the string
for (int i = 1; i < nodeString.length(); ++i) {
char other = nodeString.charAt(i);
if (other != s.charAt(position++)) {
return DONE;
}
}
// if we succeed, return result if there is one
int result = lastNode.result;
lastNode = lastNode.next;
if (result != NOT_FOUND) return result;
break; // get next char
}
// otherwise branch sideways, keeping same char
if (ch > first) {
lastNode = lastNode.greater;
} else {
lastNode = lastNode.less;
}
} while (lastNode != null);
}
return DONE;
}
}
public Matcher getMatcher() {
Matcher result = new Matcher();
result.store = this;
return result;
}
public void showNodes() {
showNodes2(base, "", 5);
}
public void showNodes2(Node n, String path, int depth) {
if (n.less != null) {
showNodes2(n.less, path+"-", depth);
}
dp.print("", depth);
if (false) dp.print(path);
dp.print(n.getString(stringStore));
if (n.result != NOT_FOUND) dp.print("/" + n.result);
dp.println();
if (n.next != null) {
showNodes2(n.next, path+".", depth+n.getString(stringStore).length());
}
if (n.greater != null) {
showNodes2(n.greater, path+"+", depth);
}
}
static class NodeInfo {
int nodeCount;
int resultCount;
int nullLessCount;
int nullGreaterCount;
int nullSimpleCount;
int nullNextCount;
}
public void checkNodes() {
NodeInfo nodeInfo = new NodeInfo();
checkNodes(base, nodeInfo);
System.out.println("Nodes: " + nodeInfo.nodeCount);
System.out.println("nullLessCount: " + nodeInfo.nullLessCount);
System.out.println("nullGreaterCount: " + nodeInfo.nullGreaterCount);
System.out.println("nullNextCount: " + nodeInfo.nullNextCount);
System.out.println("resultCount: " + nodeInfo.resultCount);
System.out.println("nullSimpleCount: " + nodeInfo.nullSimpleCount);
}
public void checkNodes(Node n, NodeInfo nodeInfo) {
nodeInfo.nodeCount++;
if (n.result != NOT_FOUND) nodeInfo.resultCount++;
if (n.less != null) {
checkNodes(n.less, nodeInfo);
} else {
nodeInfo.nullLessCount++;
if (n.greater == null && n.result == NOT_FOUND) nodeInfo.nullSimpleCount++;
}
if (n.next != null) {
checkNodes(n.next, nodeInfo);
} else {
nodeInfo.nullNextCount++;
}
if (n.greater != null) {
checkNodes(n.greater, nodeInfo);
} else {
nodeInfo.nullGreaterCount++;
}
}
final static class DepthPrinter {
private PrintWriter pw;
private int currentDepth = 0;
private String leader = ".";
DepthPrinter(PrintWriter pw) {
this.pw = pw;
}
void print(char ch) {
print(ch, 0);
}
void print(String s) {
print(s, 0);
}
void print(char ch, int depth) {
print(String.valueOf(ch), depth);
}
void print(String s, int depth) {
int delta = depth - currentDepth;
if (delta > 0) {
pw.print(Utility.repeat(leader, delta - 1));
currentDepth = depth;
}
pw.print(s);
currentDepth += s.length();
}
void println() {
pw.println();
currentDepth = 0;
}
void println(String s) {
pw.print(s);
pw.println();
currentDepth = 0;
}
}
final static class StringStore {
// initially, there is a simple strategy
private String buffer = "";
private static final char TERMINATOR = '\u007E';
private static final int PIECE_LENGTH = 5;
private static String[] pieces = new String[50]; // HACK
private static Set strings = new HashSet();
public void add(String s) {
strings.add(s);
}
public void compact() {
System.out.println("Adding Pieces");
// add all the pieces
Iterator it = strings.iterator();
Set additions = new HashSet();
while (it.hasNext()) {
String s = (String)it.next();
int len = Utility.split(s, ' ', pieces);
for (int i = 0; i < len; ++i) {
additions.add(pieces[i]);
}
}
store(additions);
store(strings);
}
private void store(Set stuff) {
System.out.println("Sorting");
// sort them by length, longest first
Set ordered = new TreeSet();
Iterator it = stuff.iterator();
while (it.hasNext()) {
String s = (String)it.next();
ordered.add(new Pair(new Integer(-s.length()), s));
}
System.out.println("Storing");
// add them
it = ordered.iterator();
while (it.hasNext()) {
String s = (String)(((Pair)it.next()).second);
get(s);
}
}
private int get(String s) {
System.out.println("Adding: \'" + s + "\'");
int index;
if (s.indexOf(' ') < 0) {
index = addNoSplit(s);
System.out.println("\tReturning: " + index);
return index;
}
int len = Utility.split(s, ' ', pieces);
StringBuffer itemCodes = new StringBuffer();
for (int i = 0; i < len; ++i) {
String piece = pieces[i];
itemCodes.append((char)addNoSplit(piece));
/*for (int j = 0; j < piece.length(); j += PIECE_LENGTH) {
int maxLen = j + PIECE_LENGTH;
if (maxLen > piece.length()) maxLen = piece.length();
itemCodes.append((char)addNoSplit(piece.substring(j, maxLen)));
}*/
}
index = 0x8000 | addNoSplit(itemCodes.toString()); // mark it as composite
System.out.println("\tReturning: " + index);
return index;
}
private int addNoSplit(String s) {
System.out.println("\tAdding2: \'" + s + "\'");
String sTerm = s + TERMINATOR;
int index = buffer.indexOf(sTerm);
if (index >= 0) return index;
index = buffer.length();
buffer += sTerm;
System.out.println("\t\tReturning2: " + index);
return index;
}
public String get(int index) {
String result;
System.out.println("Fetching: " + index);
if ((index & 0x8000) == 0) {
int end = buffer.indexOf(TERMINATOR, index);
result = buffer.substring(index, end);
System.out.println("\tReturning: '" + result + "'");
return result;
}
index &= ~0x8000; // remove 1 bit
int end = buffer.indexOf(TERMINATOR, index);
result = "";
for (int i = index; i < end; ++i) {
if (result.length() != 0) result += " ";
result += get(buffer.charAt(i));
}
System.out.println("\tReturning: '" + result + "'");
return result;
}
public String toString() {
return buffer;
}
}
final static class Builder {
Map map = new TreeMap();
String[] names;
TernaryStore store;
Set set = new TreeSet();
public void add(String name, int result) {
map.put(name, new Integer(result));
}
public TernaryStore build() {
// flatten strings into array
names = new String[map.size()];
Iterator it = map.keySet().iterator();
int count = 0;
while (it.hasNext()) {
names[count++] = (String) it.next();
if (false) {
dp.print((count-1) + " " + names[count-1]);
dp.println();
}
}
// build nodes
store = new TernaryStore();
addNode(0, names.length);
// free storage
names = null;
map.clear();
System.out.println("compacting");
compactStore(store.base);
store.stringStore.compact();
//compactStrings(store);
//set.clear(); // free more storage
replaceStrings(store.base);
//map.clear(); // free storage
// free storage
TernaryStore result = store;
store = null;
return result;
}
/*
void compactStrings(TernaryStore t) {
// we have a set of Pairs, first is length, second is string
// compact them, word by word
Iterator it = set.iterator();
while (it.hasNext()) {
String string = ((String)((Pair)it.next()).second);
int index = t.stringStore.add(string);
if (true) {
System.out.println("Checking: " + index);
String reverse = t.stringStore.get(index);
if (!reverse.equals(string)) {
System.out.println("source: \'" + string + "\'");
System.out.println("reverse: \'" + reverse + "\'");
throw new IllegalArgumentException("Failed roundtrip");
}
}
map.put(string, new Integer(index));
}
}
*/
public void replaceStrings(Node n) {
n.stringCode = store.stringStore.get(n.getString(store.stringStore));
n.setString(null);
if (n.less != null) replaceStrings(n.less);
if (n.next != null) replaceStrings(n.next);
if (n.greater != null) replaceStrings(n.greater);
}
public void compactStore(Node n) {
Node nextNode = n.next;
if (false) dp.println(n.toString());
while (n.result == NOT_FOUND && nextNode != null && nextNode.greater == null
&& nextNode.less == null) {
n.setString(n.getString(store.stringStore) + nextNode.getString(store.stringStore));
n.result = nextNode.result;
n.next = nextNode = nextNode.next; // remove old node
}
// add strings sorted by length, longest first
store.stringStore.add(n.getString(store.stringStore));
if (n.less != null) compactStore(n.less);
if (n.next != null) compactStore(n.next);
if (n.greater != null) compactStore(n.greater);
}
private void addNode(int start, int limit) {
if (start >= limit) return;
int mid = (start + limit) / 2;
//System.out.println("start: " + start + ", mid: " + mid + ", limit: " + limit);
//System.out.println("adding: " + names[mid]);
addNode(names[mid], ((Integer)map.get(names[mid])).intValue());
addNode(start, mid);
addNode(mid+1, limit);
}
private void addNode(String s, int result) {
if (store.base == null) {
store.base = addRest(s, 0, result);
return;
}
Node n = store.base;
Node lastNode = n;
for (int i = 0; i < s.length(); ++i) {
char ch = s.charAt(i);
while (true) {
char first = n.getString(store.stringStore).charAt(0);
if (ch == first) {
if (n.next == null) {
n.next = addRest(s, i+1, result);
return;
}
lastNode = n;
n = n.next;
break; // get next char
}
// otherwise branch sideways, keeping same char
if (ch > first) {
if (n.greater == null) {
n.greater = addRest(s, i, result);
return;
}
n = n.greater;
} else {
if (n.less == null) {
n.less = addRest(s, i, result);
return;
}
n = n.less;
}
}
}
lastNode.result = result;
}
private Node addRest(String s, int position, int result) {
Node lastNode = null;
for (int i = s.length() - 1; i >= position; --i) {
Node n = new Node();
n.setString(s.substring(i, i+1)); // + "" to force a new string
if (lastNode == null) {
n.result = result;
}
n.next = lastNode;
lastNode = n;
}
return lastNode;
}
}
}

View file

@ -5,21 +5,25 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2002/09/25 06:40:13 $
* $Revision: 1.18 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.19 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.List;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.BitSet;
import java.util.Map;
import java.io.IOException;
import java.io.DataInputStream;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.BufferedReader;
import com.ibm.text.utility.*;
@ -31,7 +35,7 @@ public final class UCD implements UCD_Types {
/**
* Used for the default version.
*/
public static final String latestVersion = "3.2.0";
public static final String latestVersion = "3.2.1";
/**
* Create singleton instance for default (latest) version
@ -651,7 +655,7 @@ public final class UCD implements UCD_Types {
}
public static String getCategoryID_fromIndex(byte prop) {
return UCD_Names.GC[prop];
return getCategoryID_fromIndex(prop, NORMAL);
}
public static String getCategoryID_fromIndex(byte prop, byte style) {
@ -660,7 +664,7 @@ public final class UCD implements UCD_Types {
public String getCombiningClassID(int codePoint) {
return getCombiningClassID_fromIndex(getCombiningClass(codePoint), NORMAL);
return getCombiningClassID(codePoint, NORMAL);
}
public String getCombiningClassID(int codePoint, byte style) {
@ -681,9 +685,9 @@ public final class UCD implements UCD_Types {
case 7: s = style < LONG ? "NK" : "Nukta"; break;
case 8: s = style < LONG ? "KV" : "KanaVoicing"; break;
case 9: s = style < LONG ? "VR" : "Virama"; break;
case 202: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
case 204: s = style < LONG ? "ATB" : "AttachedBelow"; break;
case 206: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
@ -734,7 +738,7 @@ public final class UCD implements UCD_Types {
}
public static String getNumericTypeID_fromIndex(byte prop) {
return UCD_Names.NT[prop];
return getNumericTypeID_fromIndex(prop, NORMAL);
}
public static String getNumericTypeID_fromIndex(byte prop, byte style) {
@ -746,7 +750,7 @@ public final class UCD implements UCD_Types {
}
public static String getEastAsianWidthID_fromIndex(byte prop) {
return UCD_Names.EA[prop];
return getEastAsianWidthID_fromIndex(prop, NORMAL);
}
public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
@ -758,7 +762,7 @@ public final class UCD implements UCD_Types {
}
public static String getLineBreakID_fromIndex(byte prop) {
return UCD_Names.LB[prop];
return getLineBreakID_fromIndex(prop, NORMAL);
}
public static String getLineBreakID_fromIndex(byte prop, byte style) {
@ -770,7 +774,7 @@ public final class UCD implements UCD_Types {
}
public static String getJoiningTypeID_fromIndex(byte prop) {
return UCD_Names.JOINING_TYPE[prop];
return getJoiningTypeID_fromIndex(prop, NORMAL);
}
public static String getJoiningTypeID_fromIndex(byte prop, byte style) {
@ -782,7 +786,7 @@ public final class UCD implements UCD_Types {
}
public static String getJoiningGroupID_fromIndex(byte prop) {
return UCD_Names.JOINING_GROUP[prop];
return getJoiningGroupID_fromIndex(prop, NORMAL);
}
public static String getJoiningGroupID_fromIndex(byte prop, byte style) {
@ -795,7 +799,7 @@ public final class UCD implements UCD_Types {
}
public static String getScriptID_fromIndex(byte prop) {
return UCD_Names.SCRIPT[prop];
return getScriptID_fromIndex(prop, NORMAL);
}
public static String getScriptID_fromIndex(byte prop, byte length) {
@ -808,7 +812,7 @@ public final class UCD implements UCD_Types {
}
public static String getAgeID_fromIndex(byte prop) {
return UCD_Names.AGE[prop];
return getAgeID_fromIndex(prop, NORMAL);
}
public static String getAgeID_fromIndex(byte prop, byte style) {
@ -1306,4 +1310,53 @@ to guarantee identifier closure.
}
}
}
public static class BlockData {
public int start;
public int end;
public String name;
}
public boolean getBlockData(int blockId, BlockData output) {
if (blocks == null) loadBlocks();
BlockData temp;
try {
temp = (BlockData) blocks.get(blockId);
} catch (IndexOutOfBoundsException e) {
return false;
}
output.name = temp.name;
output.start = temp.start;
output.end = temp.end;
return true;
}
private List blocks = null;
private void loadBlocks() {
blocks = new ArrayList();
try {
BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1);
try {
while (true) {
// 0000..007F; Basic Latin
String line = Utility.readDataLine(in);
if (line == null) break;
if (line.length() == 0) continue;
int pos1 = line.indexOf('.');
int pos2 = line.indexOf(';', pos1);
BlockData blockData = new BlockData();
blockData.start = Integer.parseInt(line.substring(0, pos1), 16);
blockData.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
blockData.name = line.substring(pos2+1).trim().replace(' ', '_');
blocks.add(blockData);
}
} finally {
in.close();
}
} catch (IOException e) {
throw new IllegalArgumentException("Can't read block file");
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2002/06/13 21:14:05 $
* $Revision: 1.14 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.15 $
*
*******************************************************************************
*/
@ -201,11 +201,11 @@ final class UCD_Names implements UCD_Types {
static final String[] YN_TABLE = {"N", "Y"};
static String[] EA = {
static String[] SHORT_EA = {
"N", "A", "H", "W", "F", "Na"
};
static String[] SHORT_EA = {
static String[] EA = {
"Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow"
};

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2002/08/04 21:38:45 $
* $Revision: 1.15 $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
public interface UCD_Types {
public static final int dVersion = 9; // change to fix the generated file D version. If less than zero, no "d"
public static final int dVersion = 2; // change to fix the generated file D version. If less than zero, no "d"
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
* $Date: 2002/08/04 21:38:44 $
* $Revision: 1.9 $
* $Date: 2002/10/05 01:28:57 $
* $Revision: 1.10 $
*
*******************************************************************************
*/
@ -299,13 +299,13 @@ public final class UnifiedBinaryProperty extends UnicodeProperty {
case COMBINING_CLASS>>8: return ucd.getCombiningClassID_fromIndex((byte)propValue, style);
case BIDI_CLASS>>8: return ucd.getBidiClassID_fromIndex((byte)propValue, style);
case DECOMPOSITION_TYPE>>8: return ucd.getDecompositionTypeID_fromIndex((byte)propValue, style);
case NUMERIC_TYPE>>8: ucd.getNumericTypeID_fromIndex((byte)propValue, style);
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex((byte)propValue);
case NUMERIC_TYPE>>8: return ucd.getNumericTypeID_fromIndex((byte)propValue, style);
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex((byte)propValue, style);
case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex((byte)propValue, style);
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex((byte)propValue);
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex((byte)propValue);
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex((byte)propValue, style);
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex((byte)propValue, style);
case BINARY_PROPERTIES>>8: return ucd.getBinaryPropertiesID_fromIndex((byte)propValue, style);
case SCRIPT>>8: return ucd.getScriptID_fromIndex((byte)propValue);
case SCRIPT>>8: return ucd.getScriptID_fromIndex((byte)propValue, style);
case AGE>>8: return ucd.getAgeID_fromIndex((byte)propValue);
/*
case DERIVED>>8:

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
* $Date: 2002/08/08 15:38:16 $
* $Revision: 1.1 $
* $Date: 2002/10/05 01:28:57 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -175,12 +175,12 @@ public final class UnifiedProperty extends UnicodeProperty {
case BIDI_CLASS>>8: return ucd.getBidiClassID_fromIndex(ucd.getBidiClass(cp), style);
case DECOMPOSITION_TYPE>>8: return ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(cp), style);
case NUMERIC_TYPE>>8: return ucd.getNumericTypeID_fromIndex(ucd.getNumericType(cp), style);
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(cp));
case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex(ucd.getLineBreak(cp));
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(cp));
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(cp));
case SCRIPT>>8: return ucd.getScriptID_fromIndex(ucd.getScript(cp));
case AGE>>8: return ucd.getAgeID_fromIndex(ucd.getAge(cp));
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(cp), style);
case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex(ucd.getLineBreak(cp), style);
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(cp), style);
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(cp), style);
case SCRIPT>>8: return ucd.getScriptID_fromIndex(ucd.getScript(cp), style);
case AGE>>8: return ucd.getAgeID_fromIndex(ucd.getAge(cp), style);
default: throw new IllegalArgumentException("Internal Error");
}
}

View file

@ -0,0 +1,99 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/PoorMansEnum.java,v $
* $Date: 2002/10/05 01:28:57 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
/* Goal for enum is:
* Easy to use
* ID <-> int
* ID <-> string name
*/
package com.ibm.text.utility;
import java.util.*;
public class PoorMansEnum {
protected int value;
protected String name;
protected PoorMansEnum next;
public int toInt() {
return value;
}
public String toString() {
return name;
}
// for subclassers
protected PoorMansEnum() {
}
/** Utility for subclasses
*/
protected static class EnumStore {
private List int2Id = new ArrayList();
private Map string2Id = new HashMap();
private PoorMansEnum last = null;
public PoorMansEnum add(PoorMansEnum id, String name) {
// both string and id must be new!
if (int2Id.indexOf(id) >= 0) {
throw new IllegalArgumentException("ID already stored for \"" + name + '"');
} else if (string2Id.containsKey(name)) {
throw new IllegalArgumentException('"' + name + "\" already stored for ID ");
}
id.value = int2Id.size();
id.name = name;
if (last != null) {
last.next = id;
}
int2Id.add(id);
string2Id.put(name, id);
last = id;
return id;
}
public PoorMansEnum addAlias(PoorMansEnum id, String name) {
// id must be old, string must be new
if (int2Id.indexOf(id) < 0) {
throw new IllegalArgumentException("ID must already be stored for \"" + name + '"');
} else if (string2Id.containsKey(name)) {
throw new IllegalArgumentException('"' + name + "\" already stored for ID ");
}
string2Id.put(name, id);
return id;
}
public Collection getAliases(PoorMansEnum id, Collection output) {
Iterator it = string2Id.keySet().iterator();
while (it.hasNext()) {
Object s = it.next();
if (s == id.name) continue;
if (id == string2Id.get(s)) output.add(s);
}
return output;
}
public int getMax() {
return int2Id.size();
}
public PoorMansEnum get(int value) {
return (PoorMansEnum) int2Id.get(value);
}
public PoorMansEnum get(String name) {
return (PoorMansEnum) string2Id.get(name);
}
}
}

View file

@ -0,0 +1,76 @@
/*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/SampleEnum.java,v $
* $Date: 2002/10/05 01:28:56 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.text.utility;
import java.util.*;
/** Sample Poor-Man's Enum.
* To use as a template, copy and
* <ul>
* <li>replace all instances of "SampleEnum" by your enum's name</li>
* <li>change the enum values to your values</li>
* <li>set any aliases (or remove that section)</li>
* </ul>
*/
public final class SampleEnum extends PoorMansEnum {
private static PoorMansEnum.EnumStore store = new PoorMansEnum.EnumStore();
public static final SampleEnum
ALPHA = add("The"),
BETA = add("Quick"),
GAMMA = add("Brown"),
FIRST = ALPHA;
static {
store.addAlias(ALPHA, "A");
}
/* Boilerplate */
public SampleEnum next() { return (SampleEnum) next; }
public void getAliases(Collection output) { store.getAliases(this, output); }
public static SampleEnum get(String s) { return (SampleEnum) store.get(s); }
public static SampleEnum get(int v) { return (SampleEnum) store.get(v); }
public static int getMax() { return store.getMax(); }
private SampleEnum() {}
private static SampleEnum add(String name) { return (SampleEnum) store.add(new SampleEnum(), name);}
/* just for testing */
public static void test() {
// int to string, collecting strings as we go
Set s = new TreeSet();
for (int i = 0; i < SampleEnum.getMax(); ++i) {
String n = SampleEnum.get(i).toString();
System.out.println(i + ", " + n);
s.add(n);
}
// String to int
Iterator it = s.iterator();
while (it.hasNext()) {
String n = (String)it.next();
System.out.println(n + ", " + SampleEnum.get(n).toInt());
}
// iteration
for (SampleEnum current = FIRST; current != null; current = current.next()) {
s.clear();
current.getAliases(s);
System.out.println(current.toInt() + ", " + current + ", " + s);
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2002/09/25 06:40:14 $
* $Revision: 1.25 $
* $Date: 2002/10/05 01:28:56 $
* $Revision: 1.26 $
*
*******************************************************************************
*/
@ -26,7 +26,7 @@ import com.ibm.text.UCD.*;
public final class Utility implements UCD_Types { // COMMON UTILITIES
static final boolean UTF8 = true; // TODO -- make argument
// static final boolean UTF8 = true; // TODO -- make argument
public static final char BOM = '\uFEFF';
public static String[] append(String[] array1, String[] array2) {
@ -521,7 +521,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return "<codepoint hex=\"" + hex(c,1) + "\"/>";
}
if (c <= 0x7E || UTF8) {
if (c <= 0x7E) {
return UTF32.valueOf32(c);
}
@ -634,17 +634,45 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
*/
static final byte WINDOWS_MASK = 1, UTF8_MASK = 2;
public static final byte
LATIN1_UNIX = 0,
LATIN1_WINDOWS = WINDOWS_MASK,
UTF8_UNIX = UTF8_MASK,
UTF8_WINDOWS = UTF8_MASK | WINDOWS_MASK;
public static final class Encoding extends PoorMansEnum {
private static PoorMansEnum.EnumStore store = new PoorMansEnum.EnumStore();
/* Boilerplate */
public Encoding next() { return (Encoding) next; }
public void getAliases(Collection output) { store.getAliases(this, output); }
public static Encoding get(String s) { return (Encoding) store.get(s); }
public static Encoding get(int v) { return (Encoding) store.get(v); }
public static int getMax() { return store.getMax(); }
private Encoding() {}
private static Encoding add(String name) { return (Encoding) store.add(new Encoding(), name);}
}
public static final Encoding
LATIN1_UNIX = Encoding.add("LATIN1_UNIX"),
LATIN1_WINDOWS = Encoding.add("LATIN1_WINDOWS"),
UTF8_UNIX = Encoding.add("UTF8_UNIX"),
UTF8_WINDOWS = Encoding.add("UTF8_WINDOWS"),
UTF8 = Encoding.add("UTF8"), // for read-only
LATIN1 = Encoding.add("LATIN1"), // for read-only
FIRST = LATIN1_UNIX;
/*
public static final Encoding
LATIN1_UNIX = Encoding.LATIN1_UNIX,
LATIN1_WINDOWS = Encoding.LATIN1_WINDOWS,
UTF8_UNIX = Encoding.UTF8_UNIX,
UTF8_WINDOWS = Encoding.UTF8_WINDOWS;
*/
// Normally use false, false.
// But for UCD files use true, true
// Or if they are UTF8, use true, false
public static PrintWriter openPrintWriter(String filename, byte options) throws IOException {
public static PrintWriter openPrintWriter(String filename, Encoding options) throws IOException {
File file = new File(getOutputName(filename));
Utility.fixDot();
System.out.println("Creating File: " + file.getCanonicalPath());
@ -655,7 +683,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
new UTF8StreamWriter(
new FileOutputStream(file),
32*1024,
(options & WINDOWS_MASK) == 0, (options & UTF8_MASK) == 0));
options == LATIN1_UNIX || options == UTF8_UNIX,
options == LATIN1_UNIX || options == LATIN1_WINDOWS));
}
public static String getOutputName(String filename) {
@ -714,13 +743,9 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
}
public static void appendFile(String filename, boolean utf8, PrintWriter output) throws IOException {
appendFile(filename, utf8, output, null);
}
public static BufferedReader openReadFile(String filename, boolean UTF8) throws FileNotFoundException, UnsupportedEncodingException {
public static BufferedReader openReadFile(String filename, Encoding encoding) throws FileNotFoundException, UnsupportedEncodingException {
FileInputStream fis = new FileInputStream(filename);
InputStreamReader isr = UTF8 ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
BufferedReader br = new BufferedReader(isr, 32*1024);
return br;
}
@ -769,10 +794,17 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return line;
}
public static void appendFile(String filename, boolean utf8, PrintWriter output, String[] replacementList) throws IOException {
public static void appendFile(String filename, Encoding encoding, PrintWriter output) throws IOException {
appendFile(filename, encoding, output, null);
}
public static void appendFile(String filename, Encoding encoding, PrintWriter output, String[] replacementList) throws IOException {
BufferedReader br = openReadFile(filename, encoding);
/*
FileInputStream fis = new FileInputStream(filename);
InputStreamReader isr = utf8 ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
BufferedReader br = new BufferedReader(isr, 32*1024);
*/
while (true) {
String line = br.readLine();
if (line == null) break;
@ -861,20 +893,20 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return -1;
}
public static void copyTextFile(String filename, boolean utf8, String newName, String[] replacementList) throws IOException {
public static void copyTextFile(String filename, Encoding encoding, String newName, String[] replacementList) throws IOException {
PrintWriter out = Utility.openPrintWriter(newName, UTF8_WINDOWS);
appendFile(filename, utf8, out, replacementList);
appendFile(filename, encoding, out, replacementList);
out.close();
}
public static void copyTextFile(String filename, boolean utf8, String newName) throws IOException {
copyTextFile(filename, utf8, newName, null);
public static void copyTextFile(String filename, Encoding encoding, String newName) throws IOException {
copyTextFile(filename, encoding, newName, null);
}
public static BufferedReader openUnicodeFile(String filename, String version, boolean show, boolean UTF8) throws IOException {
public static BufferedReader openUnicodeFile(String filename, String version, boolean show, Encoding encoding) throws IOException {
String name = getMostRecentUnicodeDataFile(filename, version, true, show);
if (name == null) return null;
return openReadFile(name, UTF8); // new BufferedReader(new FileReader(name),32*1024);
return openReadFile(name, encoding); // new BufferedReader(new FileReader(name),32*1024);
}
public static String getMostRecentUnicodeDataFile(String filename, String version,