ICU-0 ; misc updates

X-SVN-Rev: 12601
This commit is contained in:
Mark Davis 2003-07-07 15:58:57 +00:00
parent 9ddbcf3ebe
commit 8864f1fd40
6 changed files with 386 additions and 26 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.12 $
* $Date: 2003/07/07 15:58:57 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -79,7 +79,8 @@ public class Main {
else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest();
else if (arg.equalsIgnoreCase("short")) shortPrint = true;
else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
else if (arg.equalsIgnoreCase("probe")) Probe.test();
else {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
* $Date: 2003/02/25 23:38:22 $
* $Revision: 1.11 $
* $Date: 2003/07/07 15:58:57 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -44,6 +44,10 @@ public final class GenerateHanTransliterator implements UCD_Types {
log = Utility.openPrintWriter("Unihan_log.html", Utility.UTF8_WINDOWS);
log.println("<body>");
log.println("<head>");
log.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
log.println("<title>Unihan check</title>");
log.println("</head>");
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
@ -244,6 +248,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
static final int CHINESE = 2, JAPANESE = 1, DEFINITION = 0;
static final boolean DO_SIMPLE = true;
static final boolean SKIP_OVERRIDES = true;
public static void main(int typeIn) {
type = typeIn;
@ -277,16 +282,18 @@ public final class GenerateHanTransliterator implements UCD_Types {
log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS);
log.print('\uFEFF');
log.println();
log.println("@*Override Data");
log.println();
readOverrides(type);
log.println();
log.println("@*DICT Data");
log.println();
readCDICTDefinitions(type);
if (!SKIP_OVERRIDES) {
log.println();
log.println("@*Override Data");
log.println();
readOverrides(type);
log.println();
log.println("@*DICT Data");
log.println();
readCDICTDefinitions(type);
}
log.println();
log.println("@Unihan Data");
log.println();
@ -1151,7 +1158,8 @@ U+7878
int cp = line.charAt(i);
int script = Default.ucd.getScript(cp);
if (script != HAN_SCRIPT) {
if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT) {
if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT
&& cp != 0x30FB && cp != 0x30FC) {
System.out.println("Huh: " + Default.ucd.getCodeAndName(cp));
}
continue;
@ -1887,7 +1895,7 @@ Bad pinyin data: \u4E7F ? LE
if (definition.length() == 0) {
Utility.fixDot();
System.out.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line));
err.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line));
} else {
addCheck(UTF16.valueOf(cp), definition, line);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
* $Date: 2003/05/02 21:46:33 $
* $Revision: 1.10 $
* $Date: 2003/07/07 15:58:57 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -17,12 +17,57 @@ import java.util.*;
import java.io.*;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.util.Currency;
import java.math.BigDecimal;
import java.util.regex.*;
import com.ibm.icu.text.*;
import com.ibm.text.utility.*;
public class TestData implements UCD_Types {
public static void main (String[] args) throws IOException {
Default.setUCD();
UnicodeSet us = getSetForName("LATIN LETTER.*P");
Utility.showSetNames("",us,false,Default.ucd);
us = getSetForName(".*VARIA(TION|NT).*");
Utility.showSetNames("",us,false,Default.ucd);
if (true) return;
/*showSet();
*/
String x = "[[[:s:][:p:]&[:ascii:]] | [\\u2190-\\u2BFF] | "
+ "[[:s:][:p:]"
// + "&[:decompositiontype=none:]"
// + "- [:id_continue:]"
+ "-[:sk:]"
+ "]]";
PrintWriter pw = Utility.openPrintWriter("Syntax.txt", Utility.UTF8_WINDOWS);
showSet(pw, x, false);
showSet(pw, "[[\\u2000-\\u205F]-" + x + "]", true);
showSet(pw, "[[:whitespace:]&[:decompositiontype=none:]]", false);
pw.close();
if (true) return;
testFormatHack();
if (true) return;
testConvertToBDD();
if (true) return;
System.out.println("Shift: " + SHIFT + ", Mask: " + Long.toHexString(MASK));
showNumber(-5);
showNumber(0);
showNumber(5);
showNumber(500);
showNumber(5000000);
if (true) return;
String script = args[0];
PrintWriter log = Utility.openPrintWriter("TranslitSkeleton_" + script + ".txt", Utility.UTF8_WINDOWS);
try {
@ -39,6 +84,305 @@ public class TestData implements UCD_Types {
log.close();
}
}
static private UnicodeSet getSetForName(String regexPattern) {
UnicodeSet result = new UnicodeSet();
Pattern p = Pattern.compile(regexPattern);
Matcher m = p.matcher("");
for (int i = 0; i < 0x10FFFF; ++i) {
Utility.dot(i);
if (!Default.ucd.isAssigned(i)) continue;
byte cat = Default.ucd.getCategory(i);
if (cat == PRIVATE_USE) continue;
m.reset(Default.ucd.getName(i));
if (m.matches()) {
result.add(i);
}
}
return result;
}
private static void showSet(PrintWriter pw, String x, boolean separateLines) {
pw.println("****************************");
System.out.println(x);
UnicodeSet ss = new UnicodeSet(x);
pw.println(x);
Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd);
pw.println("****************************");
}
static int SHIFT = 6;
static int MASK = (1<<6) - 1;
static int OTHER = 0xFF & ~MASK;
static void showNumber(float x) {
System.out.println("Number: " + x);
//long bits = Double.doubleToLongBits(x);
long bits = (Float.floatToIntBits(x) + 0L) << 32;
System.out.println("IEEE: " + Long.toBinaryString(bits));
System.out.print("Broken: ");
long lastShift = 64-SHIFT;
for (long shift = 64-SHIFT; shift > 0; shift -= SHIFT) {
long temp = bits >>> shift;
temp &= MASK;
if (temp != 0) lastShift = shift;
temp |= OTHER;
String piece = Long.toBinaryString(temp);
System.out.print(" " + piece);
}
System.out.println();
System.out.print("Bytes: 1B");
for (long shift = 64-SHIFT; shift >= lastShift; shift -= SHIFT) {
long temp = bits >>> shift;
temp &= MASK;
temp |= OTHER;
if (shift == lastShift) {
temp &= ~0x80;
}
String piece = Long.toHexString(temp).toUpperCase();
System.out.print(" " + piece);
}
System.out.println();
}
static int findFirstNonZero(String digits) {
for (int i = 0; i < digits.length(); ++i) {
if (digits.charAt(i) != '0') return i;
}
return digits.length();
}
static String remove(String s, int start, int limit) {
return s.substring(0, start) + s.substring(limit);
}
static String hexByte(int i) {
String result = Integer.toHexString(i).toUpperCase();
if (result.length() == 1) result = '0' + result;
return result;
}
// dumb implementation
static String convertToBCD(String digits) {
// fix negatives, remove leading zeros, get decimal
int[] pairs = new int[120];
boolean negative = false;
boolean removedNegative = false;
boolean removedDecimal = false;
int leadZeros = 0;
int trailZeros = 0;
if (digits.charAt(0) == '-') {
negative = true;
removedNegative = true;
digits = remove(digits, 0, 1);
}
while (digits.length() > 0 && digits.charAt(0) == '0') {
digits = remove(digits, 0, 1);
leadZeros++;
}
int decimalOffset = digits.indexOf('.');
if (decimalOffset < 0) {
decimalOffset = digits.length();
} else {
digits = digits = remove(digits, decimalOffset, decimalOffset+1);
removedDecimal = true;
}
// remove trailing zeros
while (digits.length() > 0 && digits.charAt(digits.length() - 1) == '0') {
digits = remove(digits, digits.length() - 1, digits.length());
trailZeros++;
}
// make the digits even (in non-fraction part)
if (((decimalOffset) & 1) != 0) {
digits = '0' + digits; // make even
++decimalOffset;
leadZeros--;
}
if (((digits.length()) & 1) != 0) {
digits = digits + '0'; // make even
trailZeros--;
}
// handle 0
if (digits.length() == 0) {
negative = false;
digits = "00";
leadZeros -= 2;
}
// store exponent
int exp = decimalOffset/2;
if (!negative) exp |= 0x80;
else exp = (~exp) & 0x7F;
String result = hexByte(exp);
for (int i = 0; i < digits.length(); i += 2) {
int base100 = ((digits.charAt(i) - '0')*10 + (digits.charAt(i+1) - '0')) << 1;
if (i < digits.length() - 2) base100 |= 0x1; // mark all but last
if (negative) base100 = (~base100) & 0xFF;
result += "." + hexByte(base100);
}
/**
// add a secondary weight
// assume we don't care about more than too many leads/trails
leadZeros += 2; // make non-negative; might have padded by 2, for 0
trailZeros += 2; // make non-negative; might have padded by 1
if (leadZeros > 7) leadZeros = 7;
if (trailZeros > 7) trailZeros = 7;
int secondary = (removedNegative ? 0 : 0x80) // only for zero
| (leadZeros << 4)
| (removedDecimal ? 0 : 0x08)
| (trailZeros);
result += ";" + hexByte(secondary);
*/
return result;
}
static int stamp = 0;
static void add(Map m, String s) {
add2(m, s);
add2(m, "0" + s);
if (s.indexOf('.') >= 0) {
add2(m, s + "0");
add2(m, "0" + s + "0");
} else {
add2(m, s + ".");
add2(m, "0" + s + ".");
add2(m, s + ".0");
add2(m, "0" + s + ".0");
}
}
static void add2(Map m, String s) {
add3(m,s);
if (s.indexOf('-') < 0) add3(m, "-" + s);
}
private static void add3(Map m, String s) {
String base = convertToBCD(s);
base += "|" + Math.random() + stamp++; // just something for uniqueness
m.put(base, s);
}
static boolean SHOW_ALL = true;
static NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH);
static {
nf.setGroupingUsed(false);
}
static String cleanToString(double d) {
return nf.format(d);
}
static void testConvertToBDD() {
System.out.println("Starting Test");
double[] testList = {0, 0.00000001, 0.001, 5, 10, 50, 100, 1000, 100000000};
Map m = new TreeMap();
for (int i = 0; i < testList.length; ++i) {
double d = testList[i];
add(m, cleanToString(d));
add(m, cleanToString(d + 0.1));
add(m, cleanToString(d + 1));
add(m, cleanToString(d + 1.1));
if (d > 0.1) add(m, cleanToString(d - 0.1));
if (d > 1.0) add(m, cleanToString(d - 1.0));
if (d > 1.1) add(m, cleanToString(d - 1.1));
}
Iterator it = m.keySet().iterator();
String lastKey = "";
String lastValue = "";
boolean lastPrinted = false;
double lastNumber = Double.NEGATIVE_INFINITY;
int errorCount = 0;
while (it.hasNext()) {
String key = (String) it.next();
String value = (String) m.get(key);
key = key.substring(0, key.indexOf('|')); // remove stamp
double number = Double.parseDouble(value);
if (lastNumber > number) {
if (!lastPrinted) System.out.println("\t" + lastValue + "\t" + lastKey);
System.out.println("Fail:\t" + value + "\t" + key);
lastPrinted = true;
errorCount++;
} else if (SHOW_ALL) {
System.out.println("\t" + value + "\t" + key);
lastPrinted = true;
}
lastNumber = number;
lastKey = key;
lastValue = value;
}
System.out.println("Done Test, " + errorCount + " Errors");
}
static void testFormatHack() {
String[] testCurrencies = {"USD","GBP","JPY","EUR"};
Locale[] testLocales = NumberFormat.getAvailableLocales();
for (int i = 0; i < testLocales.length; ++i) {
// since none of this should vary by country, we'll just do by language
if (!testLocales[i].getCountry().equals("")) continue;
System.out.println(testLocales[i].getDisplayName());
for (int j = 0; j < testCurrencies.length; ++j) {
NumberFormat nf = getCurrencyFormat(
Currency.getInstance(testCurrencies[j]), testLocales[i], true);
String newVersion = nf.format(1234.567);
System.out.print("\t" + newVersion);
nf = getCurrencyFormat(
Currency.getInstance(testCurrencies[j]), testLocales[i], false);
String oldVersion = nf.format(1234.567);
if (!oldVersion.equals(newVersion)) {
System.out.print(" (" + oldVersion + ")");
}
}
System.out.println();
}
}
static NumberFormat getCurrencyFormat(Currency currency, Locale displayLocale, boolean ICU26) {
// code for ICU 2.6
if (ICU26) {
NumberFormat result = NumberFormat.getCurrencyInstance();
result.setCurrency(currency);
return result;
}
// ugly work-around for 2.4
DecimalFormat result = (DecimalFormat)NumberFormat.getCurrencyInstance(displayLocale);
HackCurrencyInfo hack = (HackCurrencyInfo)(hackData.get(currency.getCurrencyCode()));
result.setMinimumFractionDigits(hack.decimals);
result.setMaximumFractionDigits(hack.decimals);
result.setRoundingIncrement(hack.rounding);
DecimalFormatSymbols symbols = result.getDecimalFormatSymbols();
symbols.setCurrencySymbol(hack.symbol);
result.setDecimalFormatSymbols(symbols);
return result;
}
static Map hackData = new HashMap();
static class HackCurrencyInfo {
int decimals;
double rounding;
String symbol;
HackCurrencyInfo(int decimals, double rounding, String symbol) {
this.decimals = decimals;
this.rounding = rounding;
this.symbol = symbol;
}
}
static {
hackData.put("USD", new HackCurrencyInfo(2, 0.01, "$"));
hackData.put("GBP", new HackCurrencyInfo(2, 0.01, "\u00a3"));
hackData.put("JPY", new HackCurrencyInfo(0, 1, "\u00a5"));
hackData.put("EUR", new HackCurrencyInfo(2, 0.01, "\u20AC"));
}
/*
System.out.println("START");

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2003/05/02 21:46:33 $
* $Revision: 1.26 $
* $Date: 2003/07/07 15:58:56 $
* $Revision: 1.27 $
*
*******************************************************************************
*/
@ -35,7 +35,7 @@ public final class UCD implements UCD_Types {
/**
* Used for the default version.
*/
public static final String latestVersion = "4.0.0";
public static final String latestVersion = "4.0.1";
/**
* Create singleton instance for default (latest) version

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Main.java,v $
* $Date: 2002/06/22 21:01:25 $
* $Revision: 1.1 $
* $Date: 2003/07/07 15:58:56 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -48,6 +48,12 @@ public class Main {
}
static public void main (String[] args) {
for (int i = 0; i < args.length; ++i) {
String arg = args[i];
if (arg.equalsIgnoreCase("probe")) Probe.test("da");
}
if (true) return;
for (CollatorStyle i = CollatorStyle.ZEROED; i != null; i = i.next()) {
System.out.println(i);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2003/05/02 21:46:33 $
* $Revision: 1.33 $
* $Date: 2003/07/07 15:58:56 $
* $Revision: 1.34 $
*
*******************************************************************************
*/
@ -643,6 +643,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
private static final String[] searchPath = {
"EXTRAS",
"4.0.1",
"4.0.0",
"3.2.0",
"3.1.1",