mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
bunch o' changes
X-SVN-Rev: 9982
This commit is contained in:
parent
d29ea5e179
commit
5529d37324
24 changed files with 1489 additions and 138 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2002/10/03 22:58:17 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2002/10/05 01:28:56 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -921,14 +921,35 @@ public class WriteCharts implements UCD_Types {
|
|||
+ "<br><tt>" + Utility.hex(comp) + "</tt></td>";
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static void writeAllocation() throws IOException {
|
||||
Default.setUCD();
|
||||
String[] names = new String[300]; // HACK, 300 is plenty for now. Fix if it ever gets larger
|
||||
int[] starts = new int[names.length];
|
||||
int[] ends = new int[names.length];
|
||||
|
||||
UCD.BlockData blockData = new UCD.BlockData();
|
||||
|
||||
int counter = 0;
|
||||
UnicodeSet[] values = new UnicodeSet[500];
|
||||
String[] names = new String[values.length];
|
||||
int[] starts = new int[values.length];
|
||||
int[] ends = new int[values.length];
|
||||
int blockId = 0;
|
||||
while (Default.ucd.getBlockData(blockId++, blockData)) {
|
||||
names[counter] = blockData.name;
|
||||
starts[counter] = blockData.start;
|
||||
ends[counter] = blockData.end;
|
||||
//System.out.println(names[counter] + ", " + values[counter]);
|
||||
++counter;
|
||||
|
||||
// HACK
|
||||
if (blockData.name.equals("Tags")) {
|
||||
names[counter] = "<i>reserved default ignorable</i>";
|
||||
starts[counter] = 0xE0080;
|
||||
ends[counter] = 0xE0FFF;
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
BufferedReader in = Utility.openUnicodeFile("Blocks", "", true, false);
|
||||
try {
|
||||
while (true) {
|
||||
|
@ -947,42 +968,79 @@ public class WriteCharts implements UCD_Types {
|
|||
ends[counter] = end;
|
||||
//System.out.println(names[counter] + ", " + values[counter]);
|
||||
++counter;
|
||||
|
||||
// HACK
|
||||
if (name.equals("Tags")) {
|
||||
names[counter] = "<i>reserved default ignorable</i>";
|
||||
values[counter] = new UnicodeSet(0xE0080, 0xE0FFF);
|
||||
starts[counter] = 0xE0080;
|
||||
ends[counter] = 0xE0FFF;
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
*/
|
||||
|
||||
PrintWriter out = Utility.openPrintWriter("Allocation.html", Utility.LATIN1_WINDOWS);
|
||||
|
||||
/*
|
||||
Graphic
|
||||
Format
|
||||
Control
|
||||
Private Use
|
||||
Surrogate
|
||||
Noncharacter
|
||||
Reserved (default ignorable)
|
||||
Reserved (other)
|
||||
*/
|
||||
|
||||
PrintWriter out = Utility.openPrintWriter("allocation.html", Utility.LATIN1_WINDOWS);
|
||||
try {
|
||||
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
out.println("<title>Unicode Allocation</title></head>");
|
||||
out.println("<body bgcolor='#FFFFFF'><h1 align='center'><a href='#Notes'>Unicode Allocation</a></h1>");
|
||||
out.println("<table border='1' width='100%' cellspacing='0'>");
|
||||
out.println("<tr><th>Start</th><th align='left'>Block Name</th><th align='left'>Size</th></tr>");
|
||||
|
||||
UnicodeSetIterator it = new UnicodeSetIterator();
|
||||
|
||||
int lastEnd = -1;
|
||||
for (int i = 0; i < counter; ++i) {
|
||||
if (starts[i] != lastEnd + 1) {
|
||||
drawAllocation(out, lastEnd + 1, "<i>reserved</i>", starts[i] - lastEnd + 1, 0);
|
||||
for (int textOnly = 0; textOnly < 2; ++textOnly) {
|
||||
out.println("<table border='1' cellspacing='0'>"); // width='100%'
|
||||
if (textOnly == 0) {
|
||||
out.println("<tr><th>Start</th><th align='left'>Block Name</th><th align='left'>Size</th></tr>");
|
||||
} else {
|
||||
out.println("<tr><th>Block Name</th><th>Start</th><th>Total</th><th>Assigned</th></tr>");
|
||||
}
|
||||
int total = values[i].size();
|
||||
int alloc = 0;
|
||||
it.reset(values[i]);
|
||||
while (it.nextRange()) {
|
||||
for (int j = it.codepoint; j <= it.codepointEnd; ++j) {
|
||||
int lastEnd = -1;
|
||||
for (int i = 0; i < counter; ++i) {
|
||||
if (starts[i] != lastEnd + 1) {
|
||||
drawAllocation(out, lastEnd + 1, "<i>reserved</i>", starts[i] - lastEnd + 1, 0, "#000000", "#000000", textOnly);
|
||||
}
|
||||
int total = ends[i] - starts[i] + 1;
|
||||
int alloc = 0;
|
||||
for (int j = starts[i]; j <= ends[i]; ++j) {
|
||||
if (Default.ucd.isAllocated(j)) ++alloc;
|
||||
}
|
||||
//System.out.println(names[i] + "\t" + alloc + "\t" + total);
|
||||
String color = names[i].indexOf("Surrogates") >= 0 ? "#FF0000"
|
||||
: names[i].indexOf("Private") >= 0 ? "#0000FF"
|
||||
: "#00FF00";
|
||||
String colorReserved = names[i].indexOf("reserved default ignorable") >= 0 ? "#CCCCCC"
|
||||
: "#000000";
|
||||
drawAllocation(out, starts[i], names[i], total, alloc, color, colorReserved, textOnly);
|
||||
lastEnd = ends[i];
|
||||
}
|
||||
System.out.println(names[i] + "\t" + alloc + "\t" + total);
|
||||
drawAllocation(out, starts[i], names[i], total, alloc);
|
||||
lastEnd = ends[i];
|
||||
out.println("</table><p> </p>");
|
||||
}
|
||||
out.println("</table>");
|
||||
out.println("<p><a name='Notes'></a>This chart lists all the Unicode blocks and their starting code points. "
|
||||
+ "The area of each bar is proportional to the total number of code points in each block, "
|
||||
+ "with green for the proportion of assigned code points. "
|
||||
out.println("<h2>Key</h2><p><a name='Notes'></a>This chart lists all the Unicode blocks and their starting code points. "
|
||||
+ "The area of each bar is proportional to the total number of code points in each block. "
|
||||
+ "The colors have the following significance:<br>"
|
||||
+ "<table border='1' cellspacing='0' cellpadding='4'>"
|
||||
+ "<tr><td>Green</td><td>Graphic, Control, Format, Noncharacter* code points</td></tr>"
|
||||
+ "<tr><td>Red</td><td>Surrogate code points</td></tr>"
|
||||
+ "<tr><td>Blue</td><td>Private Use code points</td></tr>"
|
||||
+ "<tr><td>Gray</td><td>Reserved (default ignorable) code points</td></tr>"
|
||||
+ "<tr><td>Black</td><td>Reserved (other) code points</td></tr>"
|
||||
+ "</table><br>"
|
||||
+ "* Control, Format, and Noncharacter are not distinguished from Graphic characters by color, since they are mixed into other blocks. "
|
||||
+ "Tooltips on the bars show the total number of code points and the number assigned. "
|
||||
+ "(Remember that assigned <i>code points</i> are not necessarily assigned <i>characters</i>.)"
|
||||
+ "</p>");
|
||||
|
@ -997,23 +1055,27 @@ public class WriteCharts implements UCD_Types {
|
|||
static NumberFormat nf = NumberFormat.getNumberInstance(Locale.US);
|
||||
static {nf.setMaximumFractionDigits(0);}
|
||||
|
||||
static void drawAllocation(PrintWriter out, int start, String title, int total, int alloc) {
|
||||
int unalloc = total - alloc;
|
||||
|
||||
double totalWidth = longestBar*(Math.sqrt(total) / Math.sqrt(longestBlock));
|
||||
double allocWidth = alloc * totalWidth / total;
|
||||
double unallocWidth = totalWidth - allocWidth;
|
||||
|
||||
out.println("<tr><td align='right'><code>" + Utility.hex(start)
|
||||
+ "</code></td><td>" + title
|
||||
+ "</td><td title='total: " + nf.format(total) + ", assigned: " + nf.format(alloc)
|
||||
+ "'><table border='0' cellspacing='0' cellpadding='0'><tr>");
|
||||
|
||||
if (alloc != 0) out.println("<td style='font-size:1;width:" + allocWidth + ";height:" + totalWidth
|
||||
+ "' bgcolor='#00FF00'> </td>");
|
||||
if (unalloc != 0) out.println("<td style='font-size:1;width:" + unallocWidth + ";height:" + totalWidth
|
||||
+ "' bgcolor='#000000'> </td>");
|
||||
out.println("</tr></table></td></tr>");
|
||||
static void drawAllocation(PrintWriter out, int start, String title, int total, int alloc, String color, String colorReserved, int textOnly) {
|
||||
if (textOnly == 0) {
|
||||
int unalloc = total - alloc;
|
||||
|
||||
double totalWidth = longestBar*(Math.sqrt(total) / Math.sqrt(longestBlock));
|
||||
double allocWidth = alloc * totalWidth / total;
|
||||
double unallocWidth = totalWidth - allocWidth;
|
||||
|
||||
out.println("<tr><td align='right'><code>" + Utility.hex(start)
|
||||
+ "</code></td><td>" + title
|
||||
+ "</td><td title='total: " + nf.format(total) + ", assigned: " + nf.format(alloc)
|
||||
+ "'><table border='0' cellspacing='0' cellpadding='0'><tr>");
|
||||
|
||||
if (alloc != 0) out.println("<td style='font-size:1;width:" + allocWidth + ";height:" + totalWidth
|
||||
+ "' bgcolor='" + color + "'> </td>");
|
||||
if (unalloc != 0) out.println("<td style='font-size:1;width:" + unallocWidth + ";height:" + totalWidth
|
||||
+ "' bgcolor='" + colorReserved + "'> </td>");
|
||||
out.println("</tr></table></td></tr>");
|
||||
} else {
|
||||
out.println("<tr><td>" + title + "</td><td align='right'>" + start + "</td><td align='right'>" + total + "</td><td align='right'>" + alloc + "</td></tr>");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
47
tools/unicodetools/com/ibm/text/UCD/CaseTestHeader.txt
Normal file
47
tools/unicodetools/com/ibm/text/UCD/CaseTestHeader.txt
Normal file
|
@ -0,0 +1,47 @@
|
|||
#
|
||||
# This file is used to test (1) case conversion, (2) case detection,
|
||||
# and (3) case-insensitive matching.
|
||||
# (1) is represented below by function names such as toLower(),
|
||||
# (2) is represented below by function names such as isLower().
|
||||
# (3) is represented below by the function name equalsCaseInsensitive().
|
||||
# (The actual function names will vary depending on software language and/or library.)
|
||||
#
|
||||
# The test cases also check whether canonical equivalence is preserved
|
||||
# by these functions.
|
||||
#
|
||||
# Format:
|
||||
# <src> ; <lower> ; <upper> ; <title> ; <fold> (# <comment>)?
|
||||
#
|
||||
# Test:
|
||||
#
|
||||
# A. For each line:
|
||||
# 1. Verify the following equalities:
|
||||
# lower == toLower(src)
|
||||
# upper == toUpper(src)
|
||||
# title == toTitle(src)
|
||||
# fold == toFold(src)
|
||||
# 2. Verify that all of the following are true:
|
||||
# isLower(toLower(lower))
|
||||
# isUpper(toUpper(upper))
|
||||
# isTitle(toTitle(title))
|
||||
# isFold(toTitle(fold))
|
||||
# 3. Verify that all of the following are true:
|
||||
# equalsCaseInsensitive(src, lower)
|
||||
# equalsCaseInsensitive(src, upper)
|
||||
# equalsCaseInsensitive(src, title)
|
||||
# equalsCaseInsensitive(src, fold)
|
||||
#
|
||||
# B. For each code point that is NOT listed as a src:
|
||||
# 1. Verify the following equalities:
|
||||
# src == toLower(src) == toUpper(src) == toTitle(src) == toFold(src)
|
||||
# 2. Verify that all of the following are true:
|
||||
# isLower(toLower(lower))
|
||||
# isUpper(toUpper(upper))
|
||||
# isTitle(toTitle(title))
|
||||
# isFold(toTitle(fold))
|
||||
# 3. Verify that all of the following are true:
|
||||
# equalsCaseInsensitive(src, lower)
|
||||
# equalsCaseInsensitive(src, upper)
|
||||
# equalsCaseInsensitive(src, title)
|
||||
# equalsCaseInsensitive(src, fold)
|
||||
#
|
25
tools/unicodetools/com/ibm/text/UCD/Charts.java
Normal file
25
tools/unicodetools/com/ibm/text/UCD/Charts.java
Normal file
|
@ -0,0 +1,25 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Charts.java,v $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import java.io.*;
|
||||
|
||||
import java.util.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
|
||||
public class Charts {
|
||||
}
|
106
tools/unicodetools/com/ibm/text/UCD/CodePointProperty.java
Normal file
106
tools/unicodetools/com/ibm/text/UCD/CodePointProperty.java
Normal file
|
@ -0,0 +1,106 @@
|
|||
package com.ibm.text.UCD;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.text.utility.*;
|
||||
import java.util.*;
|
||||
|
||||
// Enumerated properties will be IntCodePointProperty.
|
||||
// The string values they return will be the property value names.
|
||||
// Binary properties are Enumerated properties. They return 0 or 1
|
||||
|
||||
abstract public class CodePointProperty {
|
||||
// styles for names and string values
|
||||
static final byte SHORT = 0, DEFAULT = 1, LONG = 2, NORMAL_LIMIT = 3;
|
||||
|
||||
// gets the property name
|
||||
abstract public String getName(byte style);
|
||||
|
||||
// value may also be numeric, etc, but this returns string equivalent.
|
||||
abstract public String getValue(int codePoint, byte style);
|
||||
|
||||
// returns true if the code point has the value
|
||||
// works with any style that getValue takes
|
||||
abstract public boolean hasValue(int codePoint, String value);
|
||||
|
||||
// returns the set of all code points with that value.
|
||||
// same effect as using hasValue one by one, but faster internal implementation
|
||||
abstract public UnicodeSet getSet(String value);
|
||||
|
||||
// returns a list of all possible values
|
||||
// logically the same as looping from 0..10FFFF with getValue and getStyleLimit,
|
||||
// and throwing out duplicates, but much faster.
|
||||
static Iterator getAllValues(byte style) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// gets top value style available for this property
|
||||
public byte getStyleLimit(byte style) {
|
||||
return NORMAL_LIMIT;
|
||||
}
|
||||
|
||||
// returns true if the value is known to be uniform over a type.
|
||||
// this is used for various optimizations, especially for Cn & Co
|
||||
public boolean isUniformOverCategory(byte generalCategory) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// subclasses
|
||||
|
||||
static abstract public class IntCodePointProperty extends CodePointProperty {
|
||||
abstract int getNumericValue(int codePoint);
|
||||
abstract int getMaxValue();
|
||||
abstract int getMinValue();
|
||||
static Iterator getAllNumericValues() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static abstract public class DoubleCodePointProperty extends CodePointProperty {
|
||||
abstract double getNumericValue(int codePoint);
|
||||
abstract double getMaxValue();
|
||||
abstract double getMinValue();
|
||||
static Iterator getAllNumericValues() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// registration and lookup
|
||||
|
||||
// register a new property
|
||||
static void register(CodePointProperty newProp) {
|
||||
//...
|
||||
}
|
||||
|
||||
// finds a registered property by name
|
||||
static CodePointProperty getInstance(String name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// returns a list of all registered properties
|
||||
static Iterator getAllRegistered() {
|
||||
return null;
|
||||
}
|
||||
|
||||
// UnicodeSet would use these internally to handle properties. That is, when
|
||||
// it encountered ... [:name=value:] ...
|
||||
// it would do:
|
||||
// CodePointProperty x = getInstance(name);
|
||||
// if (x != null) doError(name, value);
|
||||
// UnicodeSet s = x.getSet(value);
|
||||
// and then use s.
|
||||
|
||||
// open issue: we could have a property like: contains("dot")
|
||||
// in that case, we would register "contains" as the 'base' name,
|
||||
// but allow lookup with string parameters ("dot")
|
||||
// Maybe just adding:
|
||||
|
||||
public boolean hasParameters() {
|
||||
return false;
|
||||
}
|
||||
public void setParameters(String parameters) {}
|
||||
public String getParameters() {
|
||||
return null;
|
||||
}
|
||||
|
||||
// that way we could have [[:letter:]&[:contains(dot):]]
|
||||
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
|
||||
* $Date: 2002/06/13 21:14:05 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -331,7 +331,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
|
||||
static void readBlocks() throws Exception {
|
||||
System.out.println("Reading 'Blocks'");
|
||||
BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, false);
|
||||
BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, Utility.LATIN1);
|
||||
String line = "";
|
||||
try {
|
||||
String[] parts = new String[20];
|
||||
|
@ -376,7 +376,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
}
|
||||
String tempVersion = version;
|
||||
if (version.equals(UCD.latestVersion)) tempVersion = "";
|
||||
BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, false);
|
||||
BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, Utility.LATIN1);
|
||||
if (input == null) {
|
||||
System.out.println("COULDN'T OPEN: " + labels[0]);
|
||||
return;
|
||||
|
@ -834,7 +834,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT, true);
|
||||
|
||||
} else if (fieldName.equals("ea")) {
|
||||
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EA, true);
|
||||
uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.SHORT_EA, true);
|
||||
} else if (fieldName.equals("lb")) {
|
||||
uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB, true);
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
|
||||
* $Date: 2002/07/30 09:56:41 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -63,7 +63,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
out.println("# CaseFolding" + GenerateData.getFileSuffix(false));
|
||||
out.println(GenerateData.generateDateLine());
|
||||
out.println("#");
|
||||
Utility.appendFile("CaseFoldingHeader.txt", false, out);
|
||||
Utility.appendFile("CaseFoldingHeader.txt", Utility.LATIN1, out);
|
||||
|
||||
/*
|
||||
PrintWriter out = new PrintWriter(
|
||||
|
@ -561,7 +561,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
out.println("# SpecialCasing" + GenerateData.getFileSuffix(false));
|
||||
out.println(GenerateData.generateDateLine());
|
||||
out.println("#");
|
||||
Utility.appendFile("SpecialCasingHeader.txt", true, out);
|
||||
Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out);
|
||||
|
||||
Iterator it = sorted.keySet().iterator();
|
||||
int lastOrder = -1;
|
||||
|
@ -584,7 +584,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
case 3: out.println("# Ligatures"); break;
|
||||
case 4: skipLine = true; break;
|
||||
case 5: out.println("# No corresponding uppercase precomposed character"); break;
|
||||
case 6: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
|
||||
case 6: Utility.appendFile("SpecialCasingIota.txt", Utility.UTF8, out); break;
|
||||
case 7: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
|
||||
case 8: skipLine = true; break;
|
||||
}
|
||||
|
@ -592,7 +592,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
}
|
||||
out.println(line);
|
||||
}
|
||||
Utility.appendFile("SpecialCasingFooter.txt", true, out);
|
||||
Utility.appendFile("SpecialCasingFooter.txt", Utility.UTF8, out);
|
||||
out.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile));
|
||||
}
|
||||
|
|
94
tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java
Normal file
94
tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java
Normal file
|
@ -0,0 +1,94 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java,v $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
abstract public class GenerateCaseTest implements UCD_Types {
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61");
|
||||
Default.setUCD();
|
||||
|
||||
PrintWriter out = Utility.openPrintWriter("CaseTest.txt", Utility.UTF8_WINDOWS);
|
||||
|
||||
out.println("# CaseTest");
|
||||
out.println("# Generated: " + Default.getDate() + ", MED");
|
||||
Utility.appendFile("CaseTestHeader.txt", Utility.LATIN1, out);
|
||||
|
||||
for (int cp = 0; cp < 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
if (!Default.ucd.isAllocated(cp)) continue;
|
||||
if (Default.ucd.isHangulSyllable(cp)) continue;
|
||||
byte cat = Default.ucd.getCategory(cp);
|
||||
if (cp == PRIVATE_USE) continue;
|
||||
|
||||
String lower = Default.ucd.getCase(cp, FULL, LOWER);
|
||||
String upper = Default.ucd.getCase(cp, FULL, UPPER);
|
||||
String title = Default.ucd.getCase(cp, FULL, TITLE);
|
||||
String fold = Default.ucd.getCase(cp, FULL, FOLD);
|
||||
if (lower.equals(upper)
|
||||
&& lower.equals(title)
|
||||
&& lower.equals(fold)) continue;
|
||||
|
||||
String s = UTF16.valueOf(cp);
|
||||
write(out, s, true);
|
||||
|
||||
// if (cp == '\u0345') continue; // don't add combining for this special case
|
||||
|
||||
s = s + testChar;
|
||||
|
||||
String s2 = Default.nfd.normalize(s);
|
||||
|
||||
String lower1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, LOWER));
|
||||
String upper1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, UPPER));
|
||||
String title1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, TITLE));
|
||||
String fold1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, FOLD));
|
||||
|
||||
if (lower1.equals(Default.nfc.normalize(lower+testChar))
|
||||
&& upper1.equals(Default.nfc.normalize(upper+testChar))
|
||||
&& title1.equals(Default.nfc.normalize(title+testChar))
|
||||
&& fold1.equals(Default.nfc.normalize(fold+testChar))
|
||||
) continue;
|
||||
|
||||
write(out, s, true);
|
||||
}
|
||||
out.println("# total lines: " + counter);
|
||||
out.close();
|
||||
}
|
||||
|
||||
static final char testChar = '\u0316';
|
||||
static int counter = 0;
|
||||
|
||||
static void write(PrintWriter out, String ss, boolean doComment) {
|
||||
String s = Default.nfd.normalize(ss);
|
||||
String lower = Default.nfc.normalize(Default.ucd.getCase(s, FULL, LOWER));
|
||||
String upper = Default.nfc.normalize(Default.ucd.getCase(s, FULL, UPPER));
|
||||
String title = Default.nfc.normalize(Default.ucd.getCase(s, FULL, TITLE));
|
||||
String fold = Default.nfc.normalize(Default.ucd.getCase(s, FULL, FOLD));
|
||||
out.println(Utility.hex(ss) + "; "
|
||||
+ Utility.hex(lower) + "; "
|
||||
+ Utility.hex(upper) + "; "
|
||||
+ Utility.hex(title) + "; "
|
||||
+ Utility.hex(fold)
|
||||
+ (doComment ? "\t# " + Default.ucd.getName(ss) : "")
|
||||
);
|
||||
counter++;
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2002/07/30 09:56:41 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -545,6 +545,10 @@ public class GenerateData implements UCD_Types {
|
|||
if (i == (BINARY_PROPERTIES | CaseFoldTurkishI)) continue;
|
||||
if (i == (BINARY_PROPERTIES | Non_break)) continue;
|
||||
|
||||
if (type == NUMERIC_TYPE) {
|
||||
//System.out.println("debug");
|
||||
}
|
||||
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) continue;
|
||||
|
@ -587,8 +591,9 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
valueAbb = up.getValue(SHORT);
|
||||
if (valueAbb.length() == 0) valueAbb = "n/a";
|
||||
valueAbb = Utility.getUnskeleton(valueAbb, false);
|
||||
if (valueAbb.length() == 0) valueAbb = "n/a";
|
||||
//else if (valueAbb.equals(value)) valueAbb = "n/a";
|
||||
|
||||
|
||||
if (type == COMBINING_CLASS) {
|
||||
|
@ -643,6 +648,13 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
UCD.BlockData blockData = new UCD.BlockData();
|
||||
|
||||
int blockId = 0;
|
||||
while (Default.ucd.getBlockData(blockId++, blockData)) {
|
||||
addLine(sorted, "blk", "n/a", blockData.name);
|
||||
}
|
||||
|
||||
String filename = "PropertyAliases";
|
||||
String newFile = "DerivedData/" + filename + getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
|
@ -651,7 +663,7 @@ public class GenerateData implements UCD_Types {
|
|||
log.println("# " + filename + getFileSuffix(false));
|
||||
log.println(generateDateLine());
|
||||
log.println("#");
|
||||
Utility.appendFile("PropertyAliasHeader.txt", false, log);
|
||||
Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log);
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
Utility.print(log, sorted, "\r\n", new MyBreaker(true));
|
||||
|
@ -667,7 +679,7 @@ public class GenerateData implements UCD_Types {
|
|||
log.println("# " + filename + getFileSuffix(false));
|
||||
log.println(generateDateLine());
|
||||
log.println("#");
|
||||
Utility.appendFile("PropertyValueAliasHeader.txt", false, log);
|
||||
Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log);
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
Utility.print(log, sorted, "\r\n", new MyBreaker(false));
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
|
||||
* $Date: 2002/08/04 21:38:45 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -45,7 +45,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
log = Utility.openPrintWriter("Unihan_log.html", Utility.UTF8_WINDOWS);
|
||||
log.println("<body>");
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, true);
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
|
||||
|
||||
Map properties = new TreeMap();
|
||||
|
||||
|
@ -502,7 +502,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
|
||||
if (type == CHINESE) {
|
||||
System.out.println("Reading chinese_frequency.txt");
|
||||
br = Utility.openReadFile(BASE_DIR + "dict\\chinese_frequency.txt", true);
|
||||
br = Utility.openReadFile(BASE_DIR + "dict\\chinese_frequency.txt", Utility.UTF8);
|
||||
counter = 0;
|
||||
while (true) {
|
||||
line = Utility.readDataLine(br);
|
||||
|
@ -521,7 +521,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
if (type == JAPANESE) {
|
||||
System.out.println("Reading japanese_frequency.txt");
|
||||
|
||||
br = Utility.openReadFile( BASE_DIR + "dict\\japanese_frequency.txt", true);
|
||||
br = Utility.openReadFile( BASE_DIR + "dict\\japanese_frequency.txt", Utility.UTF8);
|
||||
Map japaneseMap = new HashMap();
|
||||
while (true) {
|
||||
line = Utility.readDataLine(br);
|
||||
|
@ -704,7 +704,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
if (type == JAPANESE) fname = "edict.txt";
|
||||
|
||||
System.out.println("Reading " + fname);
|
||||
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, true);
|
||||
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, Utility.UTF8);
|
||||
int counter = 0;
|
||||
String[] pieces = new String[50];
|
||||
String line = "";
|
||||
|
@ -751,7 +751,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
String fname = "Chinese_override.txt";
|
||||
|
||||
System.out.println("Reading " + fname);
|
||||
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, true);
|
||||
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, Utility.UTF8);
|
||||
int counter = 0;
|
||||
String[] pieces = new String[50];
|
||||
String line = "";
|
||||
|
@ -997,7 +997,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
|
||||
static void readCDICT() throws IOException {
|
||||
System.out.println("Reading cdict.txt");
|
||||
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\cdict.txt", true);
|
||||
BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\cdict.txt", Utility.UTF8);
|
||||
int counter = 0;
|
||||
String[] pieces = new String[50];
|
||||
String line = "";
|
||||
|
@ -1075,7 +1075,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
|
||||
static void readUnihanData(String key) throws java.io.IOException {
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, true);
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
|
||||
|
||||
int count = 0;
|
||||
int lineCounter = 0;
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java,v $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
import java.io.*;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.text.UnicodeSet;
|
||||
import java.util.*;
|
||||
|
||||
public class GenerateThaiBreaks {
|
||||
public static void main(String [] args) throws IOException {
|
||||
|
||||
BufferedReader br = new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new FileInputStream("\\icu4j\\src\\data\\thai6.ucs"), "UnicodeLittle"));
|
||||
try {
|
||||
Main.setUCD();
|
||||
UnicodeSet ignorables = new UnicodeSet("[:M:]");
|
||||
ignorables.retain(0x0E00, 0x0E7F); // just Thai block
|
||||
ignorables.add(0x0E40, 0x0E44); // add logical order exception
|
||||
ignorables.add(0, ' '); // add controls
|
||||
ignorables.add('.');
|
||||
|
||||
UnicodeSet initials = new UnicodeSet();
|
||||
UnicodeSet finals = new UnicodeSet();
|
||||
UnicodeSet medials = new UnicodeSet();
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
int end;
|
||||
|
||||
// find final consonant
|
||||
for (int i = line.length() - 1; ; --i) {
|
||||
char c = line.charAt(i);
|
||||
if (!ignorables.contains(c)) {
|
||||
finals.add(c);
|
||||
end = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
boolean haveFirst = false;
|
||||
for (int i = 0; i < end; ++i) {
|
||||
char c = line.charAt(i);
|
||||
if (ignorables.contains(c)) continue;
|
||||
if (!haveFirst) {
|
||||
initials.add(c);
|
||||
haveFirst = true;
|
||||
} else {
|
||||
medials.add(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
initials.removeAll(medials);
|
||||
finals.removeAll(medials);
|
||||
Utility.showSetNames("initials: ", initials, false, Main.ucd);
|
||||
Utility.showSetNames("finals: ", finals, false, Main.ucd);
|
||||
Utility.showSetNames("medials: ", medials, false, Main.ucd);
|
||||
} finally {
|
||||
br.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IANANames.java,v $
|
||||
* $Date: 2002/08/08 15:38:16 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -65,7 +65,7 @@ public class IANANames implements UCD_Types {
|
|||
}
|
||||
|
||||
public IANANames() throws IOException {
|
||||
BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", false);
|
||||
BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", Utility.LATIN1);
|
||||
try {
|
||||
boolean atStart = true;
|
||||
String lastName = "";
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2002/10/01 01:19:16 $
|
||||
* $Revision: 1.24 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.25 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -73,6 +73,8 @@ public final class Main implements UCD_Types {
|
|||
|
||||
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
|
||||
|
||||
else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
|
||||
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
|
||||
|
||||
|
|
|
@ -34,4 +34,4 @@
|
|||
# In addition, some property names may be the same as some property value names.
|
||||
#
|
||||
# The combination of property value and property name is, however, unique.
|
||||
# For more information, see UTR #24: Regular Expression Guidelines
|
||||
# For more information, see UTR #18: Regular Expression Guidelines
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
# and '_' are ignored.
|
||||
#
|
||||
# NOTE: The Block property values are in Blocks.txt, and not repeated here.
|
||||
# For more information on the use of blocks, see UTR #24: Regular Expression Guidelines
|
||||
# For more information on the use of blocks, see UTR #18: Regular Expression Guidelines
|
||||
#
|
||||
# NOTE: Currently there is at most one abbreviated name and one long name for
|
||||
# property value. However, in the future additional aliases
|
||||
|
|
103
tools/unicodetools/com/ibm/text/UCD/QuickTest.java
Normal file
103
tools/unicodetools/com/ibm/text/UCD/QuickTest.java
Normal file
|
@ -0,0 +1,103 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
public class QuickTest implements UCD_Types {
|
||||
static final void test() {
|
||||
Default.setUCD();
|
||||
/*
|
||||
[4] NameStartChar := ":" | [A-Z] | "_" | [a-z] |
|
||||
[#xC0 - #x2FF] | [#x370 - #x37D] | [#x37F - #x1FFF] |
|
||||
[#x200C - #x200D] | [#x2070 - #x218F] | [#x2C00 - #x2FEF] |
|
||||
[#x3001 - #xD7FF] | [#xF900 - #xF9FF] | [#x10000 - #xDFFFF]
|
||||
|
||||
[4a] NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F]
|
||||
*/
|
||||
UnicodeSet nameStartChar = new UnicodeSet("[\\: A-Z \\_ a-z"
|
||||
+ "\\u00c0-\\u02FF \\u0370-\\u037D \\u037F-\\u1FFF"
|
||||
+ "\\u200C-\\u200D \\u2070-\\u218F \\u2C00-\\u2FEF"
|
||||
+ "\\u3001-\\uD7FF \\uF900-\\uF9FF \\U00010000-\\U000DFFFF]");
|
||||
|
||||
UnicodeSet nameChar = new UnicodeSet("[\\- \\. 0-9 \\u00B7 \\u0300-\\u036F]")
|
||||
.addAll(nameStartChar);
|
||||
|
||||
showSet("NameStartChar", nameStartChar);
|
||||
showDiffs("NameChar", nameChar, "NameStartChar", nameStartChar);
|
||||
|
||||
|
||||
UnicodeSet defaultIgnorable = UnifiedBinaryProperty.make(DERIVED | DefaultIgnorable).getSet();
|
||||
UnicodeSet whitespace = UnifiedBinaryProperty.make(BINARY_PROPERTIES | White_space).getSet();
|
||||
|
||||
UnicodeSet notNFKC = new UnicodeSet();
|
||||
UnicodeSet privateUse = new UnicodeSet();
|
||||
UnicodeSet noncharacter = new UnicodeSet();
|
||||
UnicodeSet format = new UnicodeSet("[:Cf:]");
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isAllocated(i)) continue;
|
||||
if (!Default.nfkc.isNormalized(i)) notNFKC.add(i);
|
||||
if (Default.ucd.isNoncharacter(i)) noncharacter.add(i);
|
||||
if (Default.ucd.getCategory(i) == PRIVATE_USE) privateUse.add(i);
|
||||
}
|
||||
|
||||
showSet("notNFKC in NameChar", new UnicodeSet(notNFKC).retainAll(nameChar));
|
||||
showSet("notNFKC outside of NameChar", new UnicodeSet(notNFKC).removeAll(nameChar));
|
||||
|
||||
showSet("Whitespace in NameChar", new UnicodeSet(nameChar).retainAll(whitespace));
|
||||
showSet("Whitespace not in NameChar", new UnicodeSet(whitespace).removeAll(nameChar));
|
||||
|
||||
|
||||
showSet("Noncharacters in NameChar", new UnicodeSet(noncharacter).retainAll(noncharacter));
|
||||
showSet("Noncharacters outside of NameChar", new UnicodeSet(noncharacter).removeAll(nameChar));
|
||||
|
||||
showSet("Format in NameChar", new UnicodeSet(nameChar).retainAll(format));
|
||||
showSet("Other Default_Ignorables in NameChar", new UnicodeSet(defaultIgnorable).removeAll(format).retainAll(nameChar));
|
||||
showSet("PrivateUse in NameChar", new UnicodeSet(defaultIgnorable).retainAll(privateUse));
|
||||
|
||||
UnicodeSet CID_Start = new UnicodeSet("[:ID_Start:]").removeAll(notNFKC);
|
||||
UnicodeSet CID_Continue = new UnicodeSet("[:ID_Continue:]")
|
||||
.removeAll(notNFKC).removeAll(format);
|
||||
|
||||
UnicodeSet CID_Continue_extras = new UnicodeSet(CID_Continue).removeAll(CID_Start);
|
||||
|
||||
showDiffs("NoK_ID_Start", CID_Start, "NameStartChar", nameStartChar);
|
||||
showDiffs("NoK_ID_Continue_Extras", CID_Continue_extras, "NameChar", nameChar);
|
||||
|
||||
System.out.println("Removing canonical singletons");
|
||||
}
|
||||
|
||||
static void showDiffs(String title1, UnicodeSet set1, String title2, UnicodeSet set2) {
|
||||
showSet(title1 + " - " + title2, new UnicodeSet(set1).removeAll(set2));
|
||||
}
|
||||
|
||||
static void showSet(String title1, UnicodeSet set1) {
|
||||
System.out.println();
|
||||
System.out.println(title1);
|
||||
if (set1.size() == 0) {
|
||||
System.out.println("\tNONE");
|
||||
return;
|
||||
}
|
||||
System.out.println("\tCount:" + set1.size());
|
||||
System.out.println("\tSet:" + set1.toPattern(true));
|
||||
System.out.println("\tDetails:");
|
||||
Utility.showSetNames("", set1, false, Default.ucd);
|
||||
}
|
||||
}
|
566
tools/unicodetools/com/ibm/text/UCD/TernaryStore.java
Normal file
566
tools/unicodetools/com/ibm/text/UCD/TernaryStore.java
Normal file
|
@ -0,0 +1,566 @@
|
|||
package com.ibm.text.UCD;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.text.utility.*;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
// Enumerated properties will be IntCodePointProperty.
|
||||
// The string values they return will be the property value names.
|
||||
// Binary properties are Enumerated properties. They return 0 or 1
|
||||
|
||||
public final class TernaryStore {
|
||||
|
||||
static final int DONE = Integer.MIN_VALUE;
|
||||
static final int NOT_FOUND = Integer.MIN_VALUE+1;
|
||||
|
||||
// for testing
|
||||
static DepthPrinter dp;
|
||||
|
||||
static void test() throws java.io.IOException {
|
||||
Default.setUCD();
|
||||
|
||||
PrintWriter pw = Utility.openPrintWriter("TestTernary.txt", Utility.LATIN1_WINDOWS);
|
||||
try {
|
||||
dp = new DepthPrinter(pw);
|
||||
|
||||
String[] tests = {"the", "quick", "fish", "fisherman", "fishes",
|
||||
"brown", "brow", "bracket", "bright", "brat",
|
||||
"brough", "dogs", "upper", "zebra",
|
||||
"fisher"};
|
||||
test("Simple: ", tests, tests.length);
|
||||
|
||||
|
||||
tests = new String[300000];
|
||||
int counter = 0;
|
||||
int i;
|
||||
for (i = 0; counter < tests.length && i <= 0x10FFFF; ++i) {
|
||||
if (Default.ucd.hasComputableName(i)) continue;
|
||||
|
||||
String temp = UCharacter.getName(i);
|
||||
if (temp != null) tests[counter++] = temp.trim();
|
||||
}
|
||||
System.out.println("max-cp: " + Utility.hex(i));
|
||||
test("Unicode Names: ", tests, counter);
|
||||
|
||||
//if (true) return;
|
||||
|
||||
BufferedReader br = Utility.openReadFile(UCD_Types.BASE_DIR + "dict\\DiploFreq.txt", Utility.LATIN1);
|
||||
String line;
|
||||
counter = 0;
|
||||
while (counter < tests.length) {
|
||||
line = Utility.readDataLine(br);
|
||||
if (line == null) break;
|
||||
if (line.length() == 0) continue;
|
||||
Utility.dot(counter);
|
||||
int tabPos = line.indexOf('\t');
|
||||
if (tabPos < 0) {
|
||||
System.out.println("???" + line);
|
||||
continue;
|
||||
}
|
||||
tests[counter++] = line.substring(tabPos+1);
|
||||
}
|
||||
test("French: ", tests, counter);
|
||||
} finally {
|
||||
pw.close();
|
||||
}
|
||||
}
|
||||
|
||||
static void test(String title, String[] tests, int len) {
|
||||
System.out.println();
|
||||
System.out.println(title);
|
||||
dp.println();
|
||||
dp.print(title, 0);
|
||||
dp.println();
|
||||
TernaryStore.Builder builder = new TernaryStore.Builder();
|
||||
int charCount = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
builder.add(tests[i], i);
|
||||
charCount += tests[i].length();
|
||||
}
|
||||
System.out.println("charCount: " + charCount);
|
||||
TernaryStore store = builder.build();
|
||||
store.showNodes();
|
||||
store.checkNodes();
|
||||
|
||||
dp.println("Storage");
|
||||
dp.println(store.stringStore.toString());
|
||||
System.out.println("StorageSize: " + store.stringStore.toString().length());
|
||||
|
||||
Matcher matcher = store.getMatcher();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int check = test(tests[i], matcher);
|
||||
if (check != i) {
|
||||
System.out.println("\tFail, result: " + tests[i] + ", " + check);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int test(String s, Matcher matcher) {
|
||||
matcher.reset(s, 0);
|
||||
int lastResult = -1;
|
||||
for (int result = matcher.next(); result != DONE; result = matcher.next()) {
|
||||
lastResult = result;
|
||||
}
|
||||
return lastResult;
|
||||
}
|
||||
|
||||
static final class Node {
|
||||
String getString(StringStore stringStore) {
|
||||
if (stringCode < 0) return tempString;
|
||||
return stringStore.get(stringCode);
|
||||
}
|
||||
void setString(String s) {
|
||||
tempString = s;
|
||||
}
|
||||
String tempString;
|
||||
int stringCode = -1;
|
||||
Node less;
|
||||
Node greater;
|
||||
Node next;
|
||||
int result = NOT_FOUND;
|
||||
|
||||
public String toString(StringStore store) {
|
||||
return getString(store)
|
||||
+ (result != NOT_FOUND ? "(" + result + ")" : "")
|
||||
+ (next != null ? next.toString() : "");
|
||||
}
|
||||
}
|
||||
|
||||
Node base;
|
||||
StringStore stringStore = new StringStore();
|
||||
|
||||
final static class Matcher {
|
||||
TernaryStore store;
|
||||
String s;
|
||||
int position;
|
||||
Node lastNode;
|
||||
|
||||
void reset(String s, int position) {
|
||||
this.s = s;
|
||||
this.position = position;
|
||||
this.lastNode = store.base;
|
||||
}
|
||||
|
||||
// returns the next result
|
||||
// or DONE when done
|
||||
// sets position to point after end of found string
|
||||
|
||||
int next() {
|
||||
while (lastNode != null && position < s.length()) {
|
||||
char ch = s.charAt(position++);
|
||||
do {
|
||||
String nodeString = lastNode.getString(store.stringStore);
|
||||
char first = nodeString.charAt(0);
|
||||
if (ch == first) {
|
||||
// now check the rest of the string
|
||||
for (int i = 1; i < nodeString.length(); ++i) {
|
||||
char other = nodeString.charAt(i);
|
||||
if (other != s.charAt(position++)) {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
// if we succeed, return result if there is one
|
||||
int result = lastNode.result;
|
||||
lastNode = lastNode.next;
|
||||
if (result != NOT_FOUND) return result;
|
||||
break; // get next char
|
||||
}
|
||||
// otherwise branch sideways, keeping same char
|
||||
if (ch > first) {
|
||||
lastNode = lastNode.greater;
|
||||
} else {
|
||||
lastNode = lastNode.less;
|
||||
}
|
||||
} while (lastNode != null);
|
||||
}
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
public Matcher getMatcher() {
|
||||
Matcher result = new Matcher();
|
||||
result.store = this;
|
||||
return result;
|
||||
}
|
||||
|
||||
public void showNodes() {
|
||||
showNodes2(base, "", 5);
|
||||
}
|
||||
|
||||
public void showNodes2(Node n, String path, int depth) {
|
||||
if (n.less != null) {
|
||||
showNodes2(n.less, path+"-", depth);
|
||||
}
|
||||
dp.print("", depth);
|
||||
if (false) dp.print(path);
|
||||
dp.print(n.getString(stringStore));
|
||||
if (n.result != NOT_FOUND) dp.print("/" + n.result);
|
||||
dp.println();
|
||||
if (n.next != null) {
|
||||
showNodes2(n.next, path+".", depth+n.getString(stringStore).length());
|
||||
}
|
||||
if (n.greater != null) {
|
||||
showNodes2(n.greater, path+"+", depth);
|
||||
}
|
||||
}
|
||||
|
||||
static class NodeInfo {
|
||||
int nodeCount;
|
||||
int resultCount;
|
||||
int nullLessCount;
|
||||
int nullGreaterCount;
|
||||
int nullSimpleCount;
|
||||
int nullNextCount;
|
||||
}
|
||||
|
||||
public void checkNodes() {
|
||||
NodeInfo nodeInfo = new NodeInfo();
|
||||
checkNodes(base, nodeInfo);
|
||||
System.out.println("Nodes: " + nodeInfo.nodeCount);
|
||||
System.out.println("nullLessCount: " + nodeInfo.nullLessCount);
|
||||
System.out.println("nullGreaterCount: " + nodeInfo.nullGreaterCount);
|
||||
System.out.println("nullNextCount: " + nodeInfo.nullNextCount);
|
||||
System.out.println("resultCount: " + nodeInfo.resultCount);
|
||||
System.out.println("nullSimpleCount: " + nodeInfo.nullSimpleCount);
|
||||
}
|
||||
|
||||
public void checkNodes(Node n, NodeInfo nodeInfo) {
|
||||
nodeInfo.nodeCount++;
|
||||
if (n.result != NOT_FOUND) nodeInfo.resultCount++;
|
||||
if (n.less != null) {
|
||||
checkNodes(n.less, nodeInfo);
|
||||
} else {
|
||||
nodeInfo.nullLessCount++;
|
||||
if (n.greater == null && n.result == NOT_FOUND) nodeInfo.nullSimpleCount++;
|
||||
}
|
||||
if (n.next != null) {
|
||||
checkNodes(n.next, nodeInfo);
|
||||
} else {
|
||||
nodeInfo.nullNextCount++;
|
||||
}
|
||||
if (n.greater != null) {
|
||||
checkNodes(n.greater, nodeInfo);
|
||||
} else {
|
||||
nodeInfo.nullGreaterCount++;
|
||||
}
|
||||
}
|
||||
|
||||
final static class DepthPrinter {
|
||||
private PrintWriter pw;
|
||||
private int currentDepth = 0;
|
||||
private String leader = ".";
|
||||
|
||||
DepthPrinter(PrintWriter pw) {
|
||||
this.pw = pw;
|
||||
}
|
||||
|
||||
void print(char ch) {
|
||||
print(ch, 0);
|
||||
}
|
||||
|
||||
void print(String s) {
|
||||
print(s, 0);
|
||||
}
|
||||
|
||||
void print(char ch, int depth) {
|
||||
print(String.valueOf(ch), depth);
|
||||
}
|
||||
|
||||
void print(String s, int depth) {
|
||||
int delta = depth - currentDepth;
|
||||
if (delta > 0) {
|
||||
pw.print(Utility.repeat(leader, delta - 1));
|
||||
currentDepth = depth;
|
||||
}
|
||||
pw.print(s);
|
||||
currentDepth += s.length();
|
||||
}
|
||||
|
||||
void println() {
|
||||
pw.println();
|
||||
currentDepth = 0;
|
||||
}
|
||||
|
||||
void println(String s) {
|
||||
pw.print(s);
|
||||
pw.println();
|
||||
currentDepth = 0;
|
||||
}
|
||||
}
|
||||
|
||||
final static class StringStore {
|
||||
// initially, there is a simple strategy
|
||||
|
||||
private String buffer = "";
|
||||
private static final char TERMINATOR = '\u007E';
|
||||
private static final int PIECE_LENGTH = 5;
|
||||
private static String[] pieces = new String[50]; // HACK
|
||||
private static Set strings = new HashSet();
|
||||
|
||||
public void add(String s) {
|
||||
strings.add(s);
|
||||
}
|
||||
|
||||
public void compact() {
|
||||
System.out.println("Adding Pieces");
|
||||
// add all the pieces
|
||||
Iterator it = strings.iterator();
|
||||
Set additions = new HashSet();
|
||||
while (it.hasNext()) {
|
||||
String s = (String)it.next();
|
||||
int len = Utility.split(s, ' ', pieces);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
additions.add(pieces[i]);
|
||||
}
|
||||
}
|
||||
|
||||
store(additions);
|
||||
store(strings);
|
||||
}
|
||||
|
||||
private void store(Set stuff) {
|
||||
System.out.println("Sorting");
|
||||
// sort them by length, longest first
|
||||
Set ordered = new TreeSet();
|
||||
Iterator it = stuff.iterator();
|
||||
while (it.hasNext()) {
|
||||
String s = (String)it.next();
|
||||
ordered.add(new Pair(new Integer(-s.length()), s));
|
||||
}
|
||||
System.out.println("Storing");
|
||||
// add them
|
||||
it = ordered.iterator();
|
||||
while (it.hasNext()) {
|
||||
String s = (String)(((Pair)it.next()).second);
|
||||
get(s);
|
||||
}
|
||||
}
|
||||
|
||||
private int get(String s) {
|
||||
System.out.println("Adding: \'" + s + "\'");
|
||||
int index;
|
||||
if (s.indexOf(' ') < 0) {
|
||||
index = addNoSplit(s);
|
||||
System.out.println("\tReturning: " + index);
|
||||
return index;
|
||||
}
|
||||
int len = Utility.split(s, ' ', pieces);
|
||||
StringBuffer itemCodes = new StringBuffer();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String piece = pieces[i];
|
||||
itemCodes.append((char)addNoSplit(piece));
|
||||
/*for (int j = 0; j < piece.length(); j += PIECE_LENGTH) {
|
||||
int maxLen = j + PIECE_LENGTH;
|
||||
if (maxLen > piece.length()) maxLen = piece.length();
|
||||
itemCodes.append((char)addNoSplit(piece.substring(j, maxLen)));
|
||||
}*/
|
||||
}
|
||||
index = 0x8000 | addNoSplit(itemCodes.toString()); // mark it as composite
|
||||
System.out.println("\tReturning: " + index);
|
||||
return index;
|
||||
}
|
||||
|
||||
private int addNoSplit(String s) {
|
||||
System.out.println("\tAdding2: \'" + s + "\'");
|
||||
String sTerm = s + TERMINATOR;
|
||||
int index = buffer.indexOf(sTerm);
|
||||
if (index >= 0) return index;
|
||||
|
||||
index = buffer.length();
|
||||
buffer += sTerm;
|
||||
System.out.println("\t\tReturning2: " + index);
|
||||
return index;
|
||||
}
|
||||
|
||||
public String get(int index) {
|
||||
String result;
|
||||
System.out.println("Fetching: " + index);
|
||||
|
||||
if ((index & 0x8000) == 0) {
|
||||
int end = buffer.indexOf(TERMINATOR, index);
|
||||
result = buffer.substring(index, end);
|
||||
System.out.println("\tReturning: '" + result + "'");
|
||||
return result;
|
||||
}
|
||||
index &= ~0x8000; // remove 1 bit
|
||||
|
||||
int end = buffer.indexOf(TERMINATOR, index);
|
||||
result = "";
|
||||
for (int i = index; i < end; ++i) {
|
||||
if (result.length() != 0) result += " ";
|
||||
result += get(buffer.charAt(i));
|
||||
}
|
||||
System.out.println("\tReturning: '" + result + "'");
|
||||
return result;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final static class Builder {
|
||||
Map map = new TreeMap();
|
||||
String[] names;
|
||||
TernaryStore store;
|
||||
Set set = new TreeSet();
|
||||
|
||||
public void add(String name, int result) {
|
||||
map.put(name, new Integer(result));
|
||||
}
|
||||
|
||||
public TernaryStore build() {
|
||||
// flatten strings into array
|
||||
names = new String[map.size()];
|
||||
Iterator it = map.keySet().iterator();
|
||||
int count = 0;
|
||||
while (it.hasNext()) {
|
||||
names[count++] = (String) it.next();
|
||||
if (false) {
|
||||
dp.print((count-1) + " " + names[count-1]);
|
||||
dp.println();
|
||||
}
|
||||
}
|
||||
|
||||
// build nodes
|
||||
store = new TernaryStore();
|
||||
addNode(0, names.length);
|
||||
|
||||
// free storage
|
||||
names = null;
|
||||
map.clear();
|
||||
|
||||
System.out.println("compacting");
|
||||
compactStore(store.base);
|
||||
store.stringStore.compact();
|
||||
|
||||
//compactStrings(store);
|
||||
//set.clear(); // free more storage
|
||||
|
||||
replaceStrings(store.base);
|
||||
//map.clear(); // free storage
|
||||
|
||||
// free storage
|
||||
TernaryStore result = store;
|
||||
store = null;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
void compactStrings(TernaryStore t) {
|
||||
// we have a set of Pairs, first is length, second is string
|
||||
// compact them, word by word
|
||||
Iterator it = set.iterator();
|
||||
while (it.hasNext()) {
|
||||
String string = ((String)((Pair)it.next()).second);
|
||||
int index = t.stringStore.add(string);
|
||||
if (true) {
|
||||
System.out.println("Checking: " + index);
|
||||
String reverse = t.stringStore.get(index);
|
||||
if (!reverse.equals(string)) {
|
||||
System.out.println("source: \'" + string + "\'");
|
||||
System.out.println("reverse: \'" + reverse + "\'");
|
||||
throw new IllegalArgumentException("Failed roundtrip");
|
||||
}
|
||||
}
|
||||
|
||||
map.put(string, new Integer(index));
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
public void replaceStrings(Node n) {
|
||||
n.stringCode = store.stringStore.get(n.getString(store.stringStore));
|
||||
n.setString(null);
|
||||
if (n.less != null) replaceStrings(n.less);
|
||||
if (n.next != null) replaceStrings(n.next);
|
||||
if (n.greater != null) replaceStrings(n.greater);
|
||||
}
|
||||
|
||||
public void compactStore(Node n) {
|
||||
Node nextNode = n.next;
|
||||
if (false) dp.println(n.toString());
|
||||
while (n.result == NOT_FOUND && nextNode != null && nextNode.greater == null
|
||||
&& nextNode.less == null) {
|
||||
n.setString(n.getString(store.stringStore) + nextNode.getString(store.stringStore));
|
||||
n.result = nextNode.result;
|
||||
n.next = nextNode = nextNode.next; // remove old node
|
||||
}
|
||||
// add strings sorted by length, longest first
|
||||
store.stringStore.add(n.getString(store.stringStore));
|
||||
|
||||
if (n.less != null) compactStore(n.less);
|
||||
if (n.next != null) compactStore(n.next);
|
||||
if (n.greater != null) compactStore(n.greater);
|
||||
}
|
||||
|
||||
private void addNode(int start, int limit) {
|
||||
if (start >= limit) return;
|
||||
int mid = (start + limit) / 2;
|
||||
//System.out.println("start: " + start + ", mid: " + mid + ", limit: " + limit);
|
||||
//System.out.println("adding: " + names[mid]);
|
||||
addNode(names[mid], ((Integer)map.get(names[mid])).intValue());
|
||||
addNode(start, mid);
|
||||
addNode(mid+1, limit);
|
||||
}
|
||||
|
||||
private void addNode(String s, int result) {
|
||||
if (store.base == null) {
|
||||
store.base = addRest(s, 0, result);
|
||||
return;
|
||||
}
|
||||
Node n = store.base;
|
||||
Node lastNode = n;
|
||||
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
char ch = s.charAt(i);
|
||||
while (true) {
|
||||
char first = n.getString(store.stringStore).charAt(0);
|
||||
if (ch == first) {
|
||||
if (n.next == null) {
|
||||
n.next = addRest(s, i+1, result);
|
||||
return;
|
||||
}
|
||||
lastNode = n;
|
||||
n = n.next;
|
||||
break; // get next char
|
||||
}
|
||||
// otherwise branch sideways, keeping same char
|
||||
if (ch > first) {
|
||||
if (n.greater == null) {
|
||||
n.greater = addRest(s, i, result);
|
||||
return;
|
||||
}
|
||||
n = n.greater;
|
||||
} else {
|
||||
if (n.less == null) {
|
||||
n.less = addRest(s, i, result);
|
||||
return;
|
||||
}
|
||||
n = n.less;
|
||||
}
|
||||
}
|
||||
}
|
||||
lastNode.result = result;
|
||||
}
|
||||
|
||||
private Node addRest(String s, int position, int result) {
|
||||
Node lastNode = null;
|
||||
for (int i = s.length() - 1; i >= position; --i) {
|
||||
Node n = new Node();
|
||||
n.setString(s.substring(i, i+1)); // + "" to force a new string
|
||||
if (lastNode == null) {
|
||||
n.result = result;
|
||||
}
|
||||
n.next = lastNode;
|
||||
lastNode = n;
|
||||
}
|
||||
return lastNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5,21 +5,25 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2002/09/25 06:40:13 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.BitSet;
|
||||
import java.util.Map;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.BufferedReader;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
|
@ -31,7 +35,7 @@ public final class UCD implements UCD_Types {
|
|||
/**
|
||||
* Used for the default version.
|
||||
*/
|
||||
public static final String latestVersion = "3.2.0";
|
||||
public static final String latestVersion = "3.2.1";
|
||||
|
||||
/**
|
||||
* Create singleton instance for default (latest) version
|
||||
|
@ -651,7 +655,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getCategoryID_fromIndex(byte prop) {
|
||||
return UCD_Names.GC[prop];
|
||||
return getCategoryID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getCategoryID_fromIndex(byte prop, byte style) {
|
||||
|
@ -660,7 +664,7 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
|
||||
public String getCombiningClassID(int codePoint) {
|
||||
return getCombiningClassID_fromIndex(getCombiningClass(codePoint), NORMAL);
|
||||
return getCombiningClassID(codePoint, NORMAL);
|
||||
}
|
||||
|
||||
public String getCombiningClassID(int codePoint, byte style) {
|
||||
|
@ -681,9 +685,9 @@ public final class UCD implements UCD_Types {
|
|||
case 7: s = style < LONG ? "NK" : "Nukta"; break;
|
||||
case 8: s = style < LONG ? "KV" : "KanaVoicing"; break;
|
||||
case 9: s = style < LONG ? "VR" : "Virama"; break;
|
||||
case 202: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
|
||||
case 204: s = style < LONG ? "ATB" : "AttachedBelow"; break;
|
||||
case 206: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
|
||||
case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
|
||||
case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
|
||||
case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
|
||||
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
|
||||
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
|
||||
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
|
||||
|
@ -734,7 +738,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getNumericTypeID_fromIndex(byte prop) {
|
||||
return UCD_Names.NT[prop];
|
||||
return getNumericTypeID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getNumericTypeID_fromIndex(byte prop, byte style) {
|
||||
|
@ -746,7 +750,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getEastAsianWidthID_fromIndex(byte prop) {
|
||||
return UCD_Names.EA[prop];
|
||||
return getEastAsianWidthID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
|
||||
|
@ -758,7 +762,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getLineBreakID_fromIndex(byte prop) {
|
||||
return UCD_Names.LB[prop];
|
||||
return getLineBreakID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getLineBreakID_fromIndex(byte prop, byte style) {
|
||||
|
@ -770,7 +774,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getJoiningTypeID_fromIndex(byte prop) {
|
||||
return UCD_Names.JOINING_TYPE[prop];
|
||||
return getJoiningTypeID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getJoiningTypeID_fromIndex(byte prop, byte style) {
|
||||
|
@ -782,7 +786,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getJoiningGroupID_fromIndex(byte prop) {
|
||||
return UCD_Names.JOINING_GROUP[prop];
|
||||
return getJoiningGroupID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getJoiningGroupID_fromIndex(byte prop, byte style) {
|
||||
|
@ -795,7 +799,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getScriptID_fromIndex(byte prop) {
|
||||
return UCD_Names.SCRIPT[prop];
|
||||
return getScriptID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getScriptID_fromIndex(byte prop, byte length) {
|
||||
|
@ -808,7 +812,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getAgeID_fromIndex(byte prop) {
|
||||
return UCD_Names.AGE[prop];
|
||||
return getAgeID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
|
||||
public static String getAgeID_fromIndex(byte prop, byte style) {
|
||||
|
@ -1306,4 +1310,53 @@ to guarantee identifier closure.
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class BlockData {
|
||||
public int start;
|
||||
public int end;
|
||||
public String name;
|
||||
}
|
||||
|
||||
public boolean getBlockData(int blockId, BlockData output) {
|
||||
if (blocks == null) loadBlocks();
|
||||
BlockData temp;
|
||||
try {
|
||||
temp = (BlockData) blocks.get(blockId);
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
return false;
|
||||
}
|
||||
output.name = temp.name;
|
||||
output.start = temp.start;
|
||||
output.end = temp.end;
|
||||
return true;
|
||||
}
|
||||
|
||||
private List blocks = null;
|
||||
|
||||
private void loadBlocks() {
|
||||
blocks = new ArrayList();
|
||||
try {
|
||||
BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1);
|
||||
try {
|
||||
while (true) {
|
||||
// 0000..007F; Basic Latin
|
||||
String line = Utility.readDataLine(in);
|
||||
if (line == null) break;
|
||||
if (line.length() == 0) continue;
|
||||
int pos1 = line.indexOf('.');
|
||||
int pos2 = line.indexOf(';', pos1);
|
||||
|
||||
BlockData blockData = new BlockData();
|
||||
blockData.start = Integer.parseInt(line.substring(0, pos1), 16);
|
||||
blockData.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
|
||||
blockData.name = line.substring(pos2+1).trim().replace(' ', '_');
|
||||
blocks.add(blockData);
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("Can't read block file");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2002/06/13 21:14:05 $
|
||||
* $Revision: 1.14 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.15 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -201,11 +201,11 @@ final class UCD_Names implements UCD_Types {
|
|||
|
||||
static final String[] YN_TABLE = {"N", "Y"};
|
||||
|
||||
static String[] EA = {
|
||||
static String[] SHORT_EA = {
|
||||
"N", "A", "H", "W", "F", "Na"
|
||||
};
|
||||
|
||||
static String[] SHORT_EA = {
|
||||
static String[] EA = {
|
||||
"Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow"
|
||||
};
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2002/08/04 21:38:45 $
|
||||
* $Revision: 1.15 $
|
||||
* $Date: 2002/10/05 01:28:58 $
|
||||
* $Revision: 1.16 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
|
|||
|
||||
public interface UCD_Types {
|
||||
|
||||
public static final int dVersion = 9; // change to fix the generated file D version. If less than zero, no "d"
|
||||
public static final int dVersion = 2; // change to fix the generated file D version. If less than zero, no "d"
|
||||
|
||||
public static final String BASE_DIR = "C:\\DATA\\";
|
||||
public static final String UCD_DIR = BASE_DIR + "UCD\\";
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
|
||||
* $Date: 2002/08/04 21:38:44 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2002/10/05 01:28:57 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -299,13 +299,13 @@ public final class UnifiedBinaryProperty extends UnicodeProperty {
|
|||
case COMBINING_CLASS>>8: return ucd.getCombiningClassID_fromIndex((byte)propValue, style);
|
||||
case BIDI_CLASS>>8: return ucd.getBidiClassID_fromIndex((byte)propValue, style);
|
||||
case DECOMPOSITION_TYPE>>8: return ucd.getDecompositionTypeID_fromIndex((byte)propValue, style);
|
||||
case NUMERIC_TYPE>>8: ucd.getNumericTypeID_fromIndex((byte)propValue, style);
|
||||
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex((byte)propValue);
|
||||
case NUMERIC_TYPE>>8: return ucd.getNumericTypeID_fromIndex((byte)propValue, style);
|
||||
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex((byte)propValue, style);
|
||||
case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex((byte)propValue, style);
|
||||
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex((byte)propValue);
|
||||
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex((byte)propValue);
|
||||
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex((byte)propValue, style);
|
||||
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex((byte)propValue, style);
|
||||
case BINARY_PROPERTIES>>8: return ucd.getBinaryPropertiesID_fromIndex((byte)propValue, style);
|
||||
case SCRIPT>>8: return ucd.getScriptID_fromIndex((byte)propValue);
|
||||
case SCRIPT>>8: return ucd.getScriptID_fromIndex((byte)propValue, style);
|
||||
case AGE>>8: return ucd.getAgeID_fromIndex((byte)propValue);
|
||||
/*
|
||||
case DERIVED>>8:
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
|
||||
* $Date: 2002/08/08 15:38:16 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2002/10/05 01:28:57 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -175,12 +175,12 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
case BIDI_CLASS>>8: return ucd.getBidiClassID_fromIndex(ucd.getBidiClass(cp), style);
|
||||
case DECOMPOSITION_TYPE>>8: return ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(cp), style);
|
||||
case NUMERIC_TYPE>>8: return ucd.getNumericTypeID_fromIndex(ucd.getNumericType(cp), style);
|
||||
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(cp));
|
||||
case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex(ucd.getLineBreak(cp));
|
||||
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(cp));
|
||||
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(cp));
|
||||
case SCRIPT>>8: return ucd.getScriptID_fromIndex(ucd.getScript(cp));
|
||||
case AGE>>8: return ucd.getAgeID_fromIndex(ucd.getAge(cp));
|
||||
case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(cp), style);
|
||||
case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex(ucd.getLineBreak(cp), style);
|
||||
case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(cp), style);
|
||||
case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(cp), style);
|
||||
case SCRIPT>>8: return ucd.getScriptID_fromIndex(ucd.getScript(cp), style);
|
||||
case AGE>>8: return ucd.getAgeID_fromIndex(ucd.getAge(cp), style);
|
||||
default: throw new IllegalArgumentException("Internal Error");
|
||||
}
|
||||
}
|
||||
|
|
99
tools/unicodetools/com/ibm/text/utility/PoorMansEnum.java
Normal file
99
tools/unicodetools/com/ibm/text/utility/PoorMansEnum.java
Normal file
|
@ -0,0 +1,99 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/PoorMansEnum.java,v $
|
||||
* $Date: 2002/10/05 01:28:57 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/* Goal for enum is:
|
||||
* Easy to use
|
||||
* ID <-> int
|
||||
* ID <-> string name
|
||||
*/
|
||||
package com.ibm.text.utility;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class PoorMansEnum {
|
||||
protected int value;
|
||||
protected String name;
|
||||
protected PoorMansEnum next;
|
||||
|
||||
public int toInt() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
|
||||
// for subclassers
|
||||
|
||||
protected PoorMansEnum() {
|
||||
}
|
||||
|
||||
/** Utility for subclasses
|
||||
*/
|
||||
protected static class EnumStore {
|
||||
private List int2Id = new ArrayList();
|
||||
private Map string2Id = new HashMap();
|
||||
private PoorMansEnum last = null;
|
||||
|
||||
public PoorMansEnum add(PoorMansEnum id, String name) {
|
||||
// both string and id must be new!
|
||||
if (int2Id.indexOf(id) >= 0) {
|
||||
throw new IllegalArgumentException("ID already stored for \"" + name + '"');
|
||||
} else if (string2Id.containsKey(name)) {
|
||||
throw new IllegalArgumentException('"' + name + "\" already stored for ID ");
|
||||
}
|
||||
id.value = int2Id.size();
|
||||
id.name = name;
|
||||
if (last != null) {
|
||||
last.next = id;
|
||||
}
|
||||
int2Id.add(id);
|
||||
string2Id.put(name, id);
|
||||
last = id;
|
||||
return id;
|
||||
}
|
||||
|
||||
public PoorMansEnum addAlias(PoorMansEnum id, String name) {
|
||||
// id must be old, string must be new
|
||||
if (int2Id.indexOf(id) < 0) {
|
||||
throw new IllegalArgumentException("ID must already be stored for \"" + name + '"');
|
||||
} else if (string2Id.containsKey(name)) {
|
||||
throw new IllegalArgumentException('"' + name + "\" already stored for ID ");
|
||||
}
|
||||
string2Id.put(name, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
public Collection getAliases(PoorMansEnum id, Collection output) {
|
||||
Iterator it = string2Id.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object s = it.next();
|
||||
if (s == id.name) continue;
|
||||
if (id == string2Id.get(s)) output.add(s);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
public int getMax() {
|
||||
return int2Id.size();
|
||||
}
|
||||
|
||||
public PoorMansEnum get(int value) {
|
||||
return (PoorMansEnum) int2Id.get(value);
|
||||
}
|
||||
|
||||
public PoorMansEnum get(String name) {
|
||||
return (PoorMansEnum) string2Id.get(name);
|
||||
}
|
||||
}
|
||||
}
|
76
tools/unicodetools/com/ibm/text/utility/SampleEnum.java
Normal file
76
tools/unicodetools/com/ibm/text/utility/SampleEnum.java
Normal file
|
@ -0,0 +1,76 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/SampleEnum.java,v $
|
||||
* $Date: 2002/10/05 01:28:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.utility;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/** Sample Poor-Man's Enum.
|
||||
* To use as a template, copy and
|
||||
* <ul>
|
||||
* <li>replace all instances of "SampleEnum" by your enum's name</li>
|
||||
* <li>change the enum values to your values</li>
|
||||
* <li>set any aliases (or remove that section)</li>
|
||||
* </ul>
|
||||
*/
|
||||
public final class SampleEnum extends PoorMansEnum {
|
||||
private static PoorMansEnum.EnumStore store = new PoorMansEnum.EnumStore();
|
||||
|
||||
public static final SampleEnum
|
||||
ALPHA = add("The"),
|
||||
BETA = add("Quick"),
|
||||
GAMMA = add("Brown"),
|
||||
|
||||
FIRST = ALPHA;
|
||||
|
||||
static {
|
||||
store.addAlias(ALPHA, "A");
|
||||
}
|
||||
|
||||
/* Boilerplate */
|
||||
public SampleEnum next() { return (SampleEnum) next; }
|
||||
public void getAliases(Collection output) { store.getAliases(this, output); }
|
||||
public static SampleEnum get(String s) { return (SampleEnum) store.get(s); }
|
||||
public static SampleEnum get(int v) { return (SampleEnum) store.get(v); }
|
||||
public static int getMax() { return store.getMax(); }
|
||||
|
||||
private SampleEnum() {}
|
||||
private static SampleEnum add(String name) { return (SampleEnum) store.add(new SampleEnum(), name);}
|
||||
|
||||
|
||||
|
||||
/* just for testing */
|
||||
public static void test() {
|
||||
// int to string, collecting strings as we go
|
||||
Set s = new TreeSet();
|
||||
for (int i = 0; i < SampleEnum.getMax(); ++i) {
|
||||
String n = SampleEnum.get(i).toString();
|
||||
System.out.println(i + ", " + n);
|
||||
s.add(n);
|
||||
}
|
||||
// String to int
|
||||
Iterator it = s.iterator();
|
||||
while (it.hasNext()) {
|
||||
String n = (String)it.next();
|
||||
System.out.println(n + ", " + SampleEnum.get(n).toInt());
|
||||
}
|
||||
|
||||
// iteration
|
||||
for (SampleEnum current = FIRST; current != null; current = current.next()) {
|
||||
s.clear();
|
||||
current.getAliases(s);
|
||||
System.out.println(current.toInt() + ", " + current + ", " + s);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2002/09/25 06:40:14 $
|
||||
* $Revision: 1.25 $
|
||||
* $Date: 2002/10/05 01:28:56 $
|
||||
* $Revision: 1.26 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -26,7 +26,7 @@ import com.ibm.text.UCD.*;
|
|||
|
||||
public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
|
||||
static final boolean UTF8 = true; // TODO -- make argument
|
||||
// static final boolean UTF8 = true; // TODO -- make argument
|
||||
public static final char BOM = '\uFEFF';
|
||||
|
||||
public static String[] append(String[] array1, String[] array2) {
|
||||
|
@ -521,7 +521,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
return "<codepoint hex=\"" + hex(c,1) + "\"/>";
|
||||
}
|
||||
|
||||
if (c <= 0x7E || UTF8) {
|
||||
if (c <= 0x7E) {
|
||||
return UTF32.valueOf32(c);
|
||||
}
|
||||
|
||||
|
@ -634,17 +634,45 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
}
|
||||
*/
|
||||
|
||||
static final byte WINDOWS_MASK = 1, UTF8_MASK = 2;
|
||||
public static final byte
|
||||
LATIN1_UNIX = 0,
|
||||
LATIN1_WINDOWS = WINDOWS_MASK,
|
||||
UTF8_UNIX = UTF8_MASK,
|
||||
UTF8_WINDOWS = UTF8_MASK | WINDOWS_MASK;
|
||||
public static final class Encoding extends PoorMansEnum {
|
||||
private static PoorMansEnum.EnumStore store = new PoorMansEnum.EnumStore();
|
||||
|
||||
/* Boilerplate */
|
||||
public Encoding next() { return (Encoding) next; }
|
||||
public void getAliases(Collection output) { store.getAliases(this, output); }
|
||||
public static Encoding get(String s) { return (Encoding) store.get(s); }
|
||||
public static Encoding get(int v) { return (Encoding) store.get(v); }
|
||||
public static int getMax() { return store.getMax(); }
|
||||
|
||||
private Encoding() {}
|
||||
private static Encoding add(String name) { return (Encoding) store.add(new Encoding(), name);}
|
||||
}
|
||||
|
||||
public static final Encoding
|
||||
LATIN1_UNIX = Encoding.add("LATIN1_UNIX"),
|
||||
LATIN1_WINDOWS = Encoding.add("LATIN1_WINDOWS"),
|
||||
UTF8_UNIX = Encoding.add("UTF8_UNIX"),
|
||||
UTF8_WINDOWS = Encoding.add("UTF8_WINDOWS"),
|
||||
|
||||
UTF8 = Encoding.add("UTF8"), // for read-only
|
||||
LATIN1 = Encoding.add("LATIN1"), // for read-only
|
||||
|
||||
FIRST = LATIN1_UNIX;
|
||||
|
||||
|
||||
/*
|
||||
public static final Encoding
|
||||
LATIN1_UNIX = Encoding.LATIN1_UNIX,
|
||||
LATIN1_WINDOWS = Encoding.LATIN1_WINDOWS,
|
||||
UTF8_UNIX = Encoding.UTF8_UNIX,
|
||||
UTF8_WINDOWS = Encoding.UTF8_WINDOWS;
|
||||
*/
|
||||
|
||||
|
||||
// Normally use false, false.
|
||||
// But for UCD files use true, true
|
||||
// Or if they are UTF8, use true, false
|
||||
public static PrintWriter openPrintWriter(String filename, byte options) throws IOException {
|
||||
public static PrintWriter openPrintWriter(String filename, Encoding options) throws IOException {
|
||||
File file = new File(getOutputName(filename));
|
||||
Utility.fixDot();
|
||||
System.out.println("Creating File: " + file.getCanonicalPath());
|
||||
|
@ -655,7 +683,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
new UTF8StreamWriter(
|
||||
new FileOutputStream(file),
|
||||
32*1024,
|
||||
(options & WINDOWS_MASK) == 0, (options & UTF8_MASK) == 0));
|
||||
options == LATIN1_UNIX || options == UTF8_UNIX,
|
||||
options == LATIN1_UNIX || options == LATIN1_WINDOWS));
|
||||
}
|
||||
|
||||
public static String getOutputName(String filename) {
|
||||
|
@ -714,13 +743,9 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
}
|
||||
}
|
||||
|
||||
public static void appendFile(String filename, boolean utf8, PrintWriter output) throws IOException {
|
||||
appendFile(filename, utf8, output, null);
|
||||
}
|
||||
|
||||
public static BufferedReader openReadFile(String filename, boolean UTF8) throws FileNotFoundException, UnsupportedEncodingException {
|
||||
public static BufferedReader openReadFile(String filename, Encoding encoding) throws FileNotFoundException, UnsupportedEncodingException {
|
||||
FileInputStream fis = new FileInputStream(filename);
|
||||
InputStreamReader isr = UTF8 ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
|
||||
InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
|
||||
BufferedReader br = new BufferedReader(isr, 32*1024);
|
||||
return br;
|
||||
}
|
||||
|
@ -769,10 +794,17 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
return line;
|
||||
}
|
||||
|
||||
public static void appendFile(String filename, boolean utf8, PrintWriter output, String[] replacementList) throws IOException {
|
||||
public static void appendFile(String filename, Encoding encoding, PrintWriter output) throws IOException {
|
||||
appendFile(filename, encoding, output, null);
|
||||
}
|
||||
|
||||
public static void appendFile(String filename, Encoding encoding, PrintWriter output, String[] replacementList) throws IOException {
|
||||
BufferedReader br = openReadFile(filename, encoding);
|
||||
/*
|
||||
FileInputStream fis = new FileInputStream(filename);
|
||||
InputStreamReader isr = utf8 ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
|
||||
InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
|
||||
BufferedReader br = new BufferedReader(isr, 32*1024);
|
||||
*/
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
|
@ -861,20 +893,20 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
return -1;
|
||||
}
|
||||
|
||||
public static void copyTextFile(String filename, boolean utf8, String newName, String[] replacementList) throws IOException {
|
||||
public static void copyTextFile(String filename, Encoding encoding, String newName, String[] replacementList) throws IOException {
|
||||
PrintWriter out = Utility.openPrintWriter(newName, UTF8_WINDOWS);
|
||||
appendFile(filename, utf8, out, replacementList);
|
||||
appendFile(filename, encoding, out, replacementList);
|
||||
out.close();
|
||||
}
|
||||
|
||||
public static void copyTextFile(String filename, boolean utf8, String newName) throws IOException {
|
||||
copyTextFile(filename, utf8, newName, null);
|
||||
public static void copyTextFile(String filename, Encoding encoding, String newName) throws IOException {
|
||||
copyTextFile(filename, encoding, newName, null);
|
||||
}
|
||||
|
||||
public static BufferedReader openUnicodeFile(String filename, String version, boolean show, boolean UTF8) throws IOException {
|
||||
public static BufferedReader openUnicodeFile(String filename, String version, boolean show, Encoding encoding) throws IOException {
|
||||
String name = getMostRecentUnicodeDataFile(filename, version, true, show);
|
||||
if (name == null) return null;
|
||||
return openReadFile(name, UTF8); // new BufferedReader(new FileReader(name),32*1024);
|
||||
return openReadFile(name, encoding); // new BufferedReader(new FileReader(name),32*1024);
|
||||
}
|
||||
|
||||
public static String getMostRecentUnicodeDataFile(String filename, String version,
|
||||
|
|
Loading…
Add table
Reference in a new issue