From 8864f1fd40e16561051d30f05dfdc4365c9cce66 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 7 Jul 2003 15:58:57 +0000 Subject: [PATCH] ICU-0 ; misc updates X-SVN-Rev: 12601 --- tools/unicodetools/com/ibm/text/UCA/Main.java | 7 +- .../text/UCD/GenerateHanTransliterator.java | 36 +- .../com/ibm/text/UCD/TestData.java | 348 +++++++++++++++++- tools/unicodetools/com/ibm/text/UCD/UCD.java | 6 +- .../com/ibm/text/utility/Main.java | 10 +- .../com/ibm/text/utility/Utility.java | 5 +- 6 files changed, 386 insertions(+), 26 deletions(-) diff --git a/tools/unicodetools/com/ibm/text/UCA/Main.java b/tools/unicodetools/com/ibm/text/UCA/Main.java index 65fde0eaf9f..f3b55d3c287 100644 --- a/tools/unicodetools/com/ibm/text/UCA/Main.java +++ b/tools/unicodetools/com/ibm/text/UCA/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ -* $Date: 2003/04/25 01:39:15 $ -* $Revision: 1.12 $ +* $Date: 2003/07/07 15:58:57 $ +* $Revision: 1.13 $ * ******************************************************************************* */ @@ -79,7 +79,8 @@ public class Main { else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest(); else if (arg.equalsIgnoreCase("short")) shortPrint = true; - else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation(); + else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation(); + else if (arg.equalsIgnoreCase("probe")) Probe.test(); else { diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java index 2f57802c75a..1652e4c3443 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $ -* $Date: 2003/02/25 23:38:22 $ -* $Revision: 1.11 $ +* $Date: 2003/07/07 15:58:57 $ +* $Revision: 1.12 $ * ******************************************************************************* */ @@ -44,6 +44,10 @@ public final class GenerateHanTransliterator implements UCD_Types { log = Utility.openPrintWriter("Unihan_log.html", Utility.UTF8_WINDOWS); log.println(""); + log.println(""); + log.println(""); + log.println("Unihan check"); + log.println(""); BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8); @@ -244,6 +248,7 @@ public final class GenerateHanTransliterator implements UCD_Types { static final int CHINESE = 2, JAPANESE = 1, DEFINITION = 0; static final boolean DO_SIMPLE = true; + static final boolean SKIP_OVERRIDES = true; public static void main(int typeIn) { type = typeIn; @@ -277,16 +282,18 @@ public final class GenerateHanTransliterator implements UCD_Types { log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS); log.print('\uFEFF'); - log.println(); - log.println("@*Override Data"); - log.println(); - readOverrides(type); - - log.println(); - log.println("@*DICT Data"); - log.println(); - readCDICTDefinitions(type); - + if (!SKIP_OVERRIDES) { + log.println(); + log.println("@*Override Data"); + log.println(); + readOverrides(type); + + log.println(); + log.println("@*DICT Data"); + log.println(); + readCDICTDefinitions(type); + } + log.println(); log.println("@Unihan Data"); log.println(); @@ -1151,7 +1158,8 @@ U+7878 int cp = line.charAt(i); int script = Default.ucd.getScript(cp); if (script != HAN_SCRIPT) { - if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT) { + if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT + && cp != 0x30FB && cp != 0x30FC) { System.out.println("Huh: " + Default.ucd.getCodeAndName(cp)); } continue; @@ -1887,7 +1895,7 @@ Bad pinyin data: \u4E7F ? LE if (definition.length() == 0) { Utility.fixDot(); - System.out.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line)); + err.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line)); } else { addCheck(UTF16.valueOf(cp), definition, line); } diff --git a/tools/unicodetools/com/ibm/text/UCD/TestData.java b/tools/unicodetools/com/ibm/text/UCD/TestData.java index 0cf4c47e314..bee114fcd47 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TestData.java +++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $ -* $Date: 2003/05/02 21:46:33 $ -* $Revision: 1.10 $ +* $Date: 2003/07/07 15:58:57 $ +* $Revision: 1.11 $ * ******************************************************************************* */ @@ -17,12 +17,57 @@ import java.util.*; import java.io.*; import java.text.DateFormat; import java.text.SimpleDateFormat; +import com.ibm.icu.text.NumberFormat; +import com.ibm.icu.util.Currency; +import java.math.BigDecimal; + +import java.util.regex.*; import com.ibm.icu.text.*; import com.ibm.text.utility.*; public class TestData implements UCD_Types { public static void main (String[] args) throws IOException { + + Default.setUCD(); + + UnicodeSet us = getSetForName("LATIN LETTER.*P"); + Utility.showSetNames("",us,false,Default.ucd); + + us = getSetForName(".*VARIA(TION|NT).*"); + Utility.showSetNames("",us,false,Default.ucd); + + if (true) return; + + /*showSet(); + */ + String x = "[[[:s:][:p:]&[:ascii:]] | [\\u2190-\\u2BFF] | " + + "[[:s:][:p:]" + // + "&[:decompositiontype=none:]" + // + "- [:id_continue:]" + + "-[:sk:]" + + "]]"; + PrintWriter pw = Utility.openPrintWriter("Syntax.txt", Utility.UTF8_WINDOWS); + showSet(pw, x, false); + showSet(pw, "[[\\u2000-\\u205F]-" + x + "]", true); + showSet(pw, "[[:whitespace:]&[:decompositiontype=none:]]", false); + pw.close(); + + if (true) return; + + testFormatHack(); + if (true) return; + testConvertToBDD(); + if (true) return; + + System.out.println("Shift: " + SHIFT + ", Mask: " + Long.toHexString(MASK)); + showNumber(-5); + showNumber(0); + showNumber(5); + showNumber(500); + showNumber(5000000); + if (true) return; + String script = args[0]; PrintWriter log = Utility.openPrintWriter("TranslitSkeleton_" + script + ".txt", Utility.UTF8_WINDOWS); try { @@ -39,6 +84,305 @@ public class TestData implements UCD_Types { log.close(); } } + + static private UnicodeSet getSetForName(String regexPattern) { + UnicodeSet result = new UnicodeSet(); + Pattern p = Pattern.compile(regexPattern); + Matcher m = p.matcher(""); + for (int i = 0; i < 0x10FFFF; ++i) { + Utility.dot(i); + if (!Default.ucd.isAssigned(i)) continue; + byte cat = Default.ucd.getCategory(i); + if (cat == PRIVATE_USE) continue; + m.reset(Default.ucd.getName(i)); + if (m.matches()) { + result.add(i); + } + } + return result; + } + + private static void showSet(PrintWriter pw, String x, boolean separateLines) { + pw.println("****************************"); + System.out.println(x); + UnicodeSet ss = new UnicodeSet(x); + pw.println(x); + Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd); + pw.println("****************************"); + } + + static int SHIFT = 6; + static int MASK = (1<<6) - 1; + static int OTHER = 0xFF & ~MASK; + + static void showNumber(float x) { + System.out.println("Number: " + x); + //long bits = Double.doubleToLongBits(x); + long bits = (Float.floatToIntBits(x) + 0L) << 32; + System.out.println("IEEE: " + Long.toBinaryString(bits)); + System.out.print("Broken: "); + long lastShift = 64-SHIFT; + for (long shift = 64-SHIFT; shift > 0; shift -= SHIFT) { + long temp = bits >>> shift; + temp &= MASK; + if (temp != 0) lastShift = shift; + temp |= OTHER; + String piece = Long.toBinaryString(temp); + System.out.print(" " + piece); + } + System.out.println(); + System.out.print("Bytes: 1B"); + for (long shift = 64-SHIFT; shift >= lastShift; shift -= SHIFT) { + long temp = bits >>> shift; + temp &= MASK; + temp |= OTHER; + if (shift == lastShift) { + temp &= ~0x80; + } + String piece = Long.toHexString(temp).toUpperCase(); + System.out.print(" " + piece); + } + System.out.println(); + } + + static int findFirstNonZero(String digits) { + for (int i = 0; i < digits.length(); ++i) { + if (digits.charAt(i) != '0') return i; + } + return digits.length(); + } + + static String remove(String s, int start, int limit) { + return s.substring(0, start) + s.substring(limit); + } + + static String hexByte(int i) { + String result = Integer.toHexString(i).toUpperCase(); + if (result.length() == 1) result = '0' + result; + return result; + } + + // dumb implementation + static String convertToBCD(String digits) { + + // fix negatives, remove leading zeros, get decimal + + int[] pairs = new int[120]; + boolean negative = false; + boolean removedNegative = false; + boolean removedDecimal = false; + int leadZeros = 0; + int trailZeros = 0; + + if (digits.charAt(0) == '-') { + negative = true; + removedNegative = true; + digits = remove(digits, 0, 1); + } + while (digits.length() > 0 && digits.charAt(0) == '0') { + digits = remove(digits, 0, 1); + leadZeros++; + } + int decimalOffset = digits.indexOf('.'); + if (decimalOffset < 0) { + decimalOffset = digits.length(); + } else { + digits = digits = remove(digits, decimalOffset, decimalOffset+1); + removedDecimal = true; + } + + // remove trailing zeros + while (digits.length() > 0 && digits.charAt(digits.length() - 1) == '0') { + digits = remove(digits, digits.length() - 1, digits.length()); + trailZeros++; + } + + // make the digits even (in non-fraction part) + if (((decimalOffset) & 1) != 0) { + digits = '0' + digits; // make even + ++decimalOffset; + leadZeros--; + } + if (((digits.length()) & 1) != 0) { + digits = digits + '0'; // make even + trailZeros--; + } + + // handle 0 + if (digits.length() == 0) { + negative = false; + digits = "00"; + leadZeros -= 2; + } + + // store exponent + int exp = decimalOffset/2; + if (!negative) exp |= 0x80; + else exp = (~exp) & 0x7F; + String result = hexByte(exp); + for (int i = 0; i < digits.length(); i += 2) { + int base100 = ((digits.charAt(i) - '0')*10 + (digits.charAt(i+1) - '0')) << 1; + if (i < digits.length() - 2) base100 |= 0x1; // mark all but last + if (negative) base100 = (~base100) & 0xFF; + result += "." + hexByte(base100); + } + + /** + // add a secondary weight + // assume we don't care about more than too many leads/trails + leadZeros += 2; // make non-negative; might have padded by 2, for 0 + trailZeros += 2; // make non-negative; might have padded by 1 + if (leadZeros > 7) leadZeros = 7; + if (trailZeros > 7) trailZeros = 7; + int secondary = (removedNegative ? 0 : 0x80) // only for zero + | (leadZeros << 4) + | (removedDecimal ? 0 : 0x08) + | (trailZeros); + result += ";" + hexByte(secondary); + */ + + return result; + } + + static int stamp = 0; + static void add(Map m, String s) { + add2(m, s); + add2(m, "0" + s); + if (s.indexOf('.') >= 0) { + add2(m, s + "0"); + add2(m, "0" + s + "0"); + } else { + add2(m, s + "."); + add2(m, "0" + s + "."); + add2(m, s + ".0"); + add2(m, "0" + s + ".0"); + } + } + + static void add2(Map m, String s) { + add3(m,s); + if (s.indexOf('-') < 0) add3(m, "-" + s); + } + + private static void add3(Map m, String s) { + String base = convertToBCD(s); + base += "|" + Math.random() + stamp++; // just something for uniqueness + m.put(base, s); + } + + static boolean SHOW_ALL = true; + + static NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH); + static { + nf.setGroupingUsed(false); + } + + static String cleanToString(double d) { + return nf.format(d); + } + + static void testConvertToBDD() { + System.out.println("Starting Test"); + double[] testList = {0, 0.00000001, 0.001, 5, 10, 50, 100, 1000, 100000000}; + Map m = new TreeMap(); + + for (int i = 0; i < testList.length; ++i) { + double d = testList[i]; + add(m, cleanToString(d)); + add(m, cleanToString(d + 0.1)); + add(m, cleanToString(d + 1)); + add(m, cleanToString(d + 1.1)); + if (d > 0.1) add(m, cleanToString(d - 0.1)); + if (d > 1.0) add(m, cleanToString(d - 1.0)); + if (d > 1.1) add(m, cleanToString(d - 1.1)); + } + Iterator it = m.keySet().iterator(); + String lastKey = ""; + String lastValue = ""; + boolean lastPrinted = false; + double lastNumber = Double.NEGATIVE_INFINITY; + int errorCount = 0; + while (it.hasNext()) { + String key = (String) it.next(); + String value = (String) m.get(key); + key = key.substring(0, key.indexOf('|')); // remove stamp + double number = Double.parseDouble(value); + if (lastNumber > number) { + if (!lastPrinted) System.out.println("\t" + lastValue + "\t" + lastKey); + System.out.println("Fail:\t" + value + "\t" + key); + lastPrinted = true; + errorCount++; + } else if (SHOW_ALL) { + System.out.println("\t" + value + "\t" + key); + lastPrinted = true; + } + lastNumber = number; + lastKey = key; + lastValue = value; + } + System.out.println("Done Test, " + errorCount + " Errors"); + } + + static void testFormatHack() { + String[] testCurrencies = {"USD","GBP","JPY","EUR"}; + Locale[] testLocales = NumberFormat.getAvailableLocales(); + for (int i = 0; i < testLocales.length; ++i) { + // since none of this should vary by country, we'll just do by language + if (!testLocales[i].getCountry().equals("")) continue; + System.out.println(testLocales[i].getDisplayName()); + for (int j = 0; j < testCurrencies.length; ++j) { + NumberFormat nf = getCurrencyFormat( + Currency.getInstance(testCurrencies[j]), testLocales[i], true); + String newVersion = nf.format(1234.567); + System.out.print("\t" + newVersion); + nf = getCurrencyFormat( + Currency.getInstance(testCurrencies[j]), testLocales[i], false); + String oldVersion = nf.format(1234.567); + if (!oldVersion.equals(newVersion)) { + System.out.print(" (" + oldVersion + ")"); + } + } + System.out.println(); + } + } + + static NumberFormat getCurrencyFormat(Currency currency, Locale displayLocale, boolean ICU26) { + // code for ICU 2.6 + if (ICU26) { + NumberFormat result = NumberFormat.getCurrencyInstance(); + result.setCurrency(currency); + return result; + } + + // ugly work-around for 2.4 + DecimalFormat result = (DecimalFormat)NumberFormat.getCurrencyInstance(displayLocale); + HackCurrencyInfo hack = (HackCurrencyInfo)(hackData.get(currency.getCurrencyCode())); + result.setMinimumFractionDigits(hack.decimals); + result.setMaximumFractionDigits(hack.decimals); + result.setRoundingIncrement(hack.rounding); + DecimalFormatSymbols symbols = result.getDecimalFormatSymbols(); + symbols.setCurrencySymbol(hack.symbol); + result.setDecimalFormatSymbols(symbols); + return result; + } + + static Map hackData = new HashMap(); + static class HackCurrencyInfo { + int decimals; + double rounding; + String symbol; + HackCurrencyInfo(int decimals, double rounding, String symbol) { + this.decimals = decimals; + this.rounding = rounding; + this.symbol = symbol; + } + } + static { + hackData.put("USD", new HackCurrencyInfo(2, 0.01, "$")); + hackData.put("GBP", new HackCurrencyInfo(2, 0.01, "\u00a3")); + hackData.put("JPY", new HackCurrencyInfo(0, 1, "\u00a5")); + hackData.put("EUR", new HackCurrencyInfo(2, 0.01, "\u20AC")); + } /* System.out.println("START"); diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java index 5540a2c2c0c..aa1d21b5aab 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $ -* $Date: 2003/05/02 21:46:33 $ -* $Revision: 1.26 $ +* $Date: 2003/07/07 15:58:56 $ +* $Revision: 1.27 $ * ******************************************************************************* */ @@ -35,7 +35,7 @@ public final class UCD implements UCD_Types { /** * Used for the default version. */ - public static final String latestVersion = "4.0.0"; + public static final String latestVersion = "4.0.1"; /** * Create singleton instance for default (latest) version diff --git a/tools/unicodetools/com/ibm/text/utility/Main.java b/tools/unicodetools/com/ibm/text/utility/Main.java index 8ef1f6b4a6f..1f4637987e6 100644 --- a/tools/unicodetools/com/ibm/text/utility/Main.java +++ b/tools/unicodetools/com/ibm/text/utility/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Main.java,v $ -* $Date: 2002/06/22 21:01:25 $ -* $Revision: 1.1 $ +* $Date: 2003/07/07 15:58:56 $ +* $Revision: 1.2 $ * ******************************************************************************* */ @@ -48,6 +48,12 @@ public class Main { } static public void main (String[] args) { + for (int i = 0; i < args.length; ++i) { + String arg = args[i]; + if (arg.equalsIgnoreCase("probe")) Probe.test("da"); + } + if (true) return; + for (CollatorStyle i = CollatorStyle.ZEROED; i != null; i = i.next()) { System.out.println(i); } diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index 8e916d5cb83..ba4205fa35a 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ -* $Date: 2003/05/02 21:46:33 $ -* $Revision: 1.33 $ +* $Date: 2003/07/07 15:58:56 $ +* $Revision: 1.34 $ * ******************************************************************************* */ @@ -643,6 +643,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES private static final String[] searchPath = { "EXTRAS", + "4.0.1", "4.0.0", "3.2.0", "3.1.1",