From 8864f1fd40e16561051d30f05dfdc4365c9cce66 Mon Sep 17 00:00:00 2001
From: Mark Davis <mark@macchiato.com>
Date: Mon, 7 Jul 2003 15:58:57 +0000
Subject: [PATCH] ICU-0 ; misc updates

X-SVN-Rev: 12601
---
 tools/unicodetools/com/ibm/text/UCA/Main.java |   7 +-
 .../text/UCD/GenerateHanTransliterator.java   |  36 +-
 .../com/ibm/text/UCD/TestData.java            | 348 +++++++++++++++++-
 tools/unicodetools/com/ibm/text/UCD/UCD.java  |   6 +-
 .../com/ibm/text/utility/Main.java            |  10 +-
 .../com/ibm/text/utility/Utility.java         |   5 +-
 6 files changed, 386 insertions(+), 26 deletions(-)
diff --git a/tools/unicodetools/com/ibm/text/UCA/Main.java b/tools/unicodetools/com/ibm/text/UCA/Main.java
index 65fde0eaf9f..f3b55d3c287 100644
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ 
-* $Date: 2003/04/25 01:39:15 $ 
-* $Revision: 1.12 $
+* $Date: 2003/07/07 15:58:57 $ 
+* $Revision: 1.13 $
 *
 *******************************************************************************
 */
@@ -79,7 +79,8 @@ public class Main {
             else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest();
             else if (arg.equalsIgnoreCase("short")) shortPrint = true;
             
-            else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
+			else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
+			else if (arg.equalsIgnoreCase("probe")) Probe.test();
             
             
             else {
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
index 2f57802c75a..1652e4c3443 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
-* $Date: 2003/02/25 23:38:22 $
-* $Revision: 1.11 $
+* $Date: 2003/07/07 15:58:57 $
+* $Revision: 1.12 $
 *
 *******************************************************************************
 */
@@ -44,6 +44,10 @@ public final class GenerateHanTransliterator implements UCD_Types {
 
         log = Utility.openPrintWriter("Unihan_log.html", Utility.UTF8_WINDOWS);
         log.println("<body>");
+        log.println("<head>");
+        log.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+        log.println("<title>Unihan check</title>");
+        log.println("</head>");
 
         BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8); 
         
@@ -244,6 +248,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
     static final int CHINESE = 2, JAPANESE = 1, DEFINITION = 0;
     
     static final boolean DO_SIMPLE = true;
+    static final boolean SKIP_OVERRIDES = true;
     
     public static void main(int typeIn) {
     	type = typeIn;
@@ -277,16 +282,18 @@ public final class GenerateHanTransliterator implements UCD_Types {
             log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS);
             log.print('\uFEFF');
             
-            log.println();
-            log.println("@*Override Data");
-            log.println();
-            readOverrides(type);
-
-            log.println();
-            log.println("@*DICT Data");
-            log.println();
-            readCDICTDefinitions(type);
-            
+            if (!SKIP_OVERRIDES) {
+                log.println();
+                log.println("@*Override Data");
+                log.println();
+                readOverrides(type);
+    
+                log.println();
+                log.println("@*DICT Data");
+                log.println();
+                readCDICTDefinitions(type);
+            }
+          
             log.println();
             log.println("@Unihan Data");
             log.println();
@@ -1151,7 +1158,8 @@ U+7878	
                         int cp = line.charAt(i);
                         int script = Default.ucd.getScript(cp);
                         if (script != HAN_SCRIPT) {
-                            if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT) {
+                            if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT 
+                                && cp != 0x30FB && cp != 0x30FC) {
                                 System.out.println("Huh: " + Default.ucd.getCodeAndName(cp));
                             }
                             continue;
@@ -1887,7 +1895,7 @@ Bad pinyin data: \u4E7F	?	LE
 
         if (definition.length() == 0) {
             Utility.fixDot();
-            System.out.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line));
+            err.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line));
         } else {
             addCheck(UTF16.valueOf(cp), definition, line);
         }
diff --git a/tools/unicodetools/com/ibm/text/UCD/TestData.java b/tools/unicodetools/com/ibm/text/UCD/TestData.java
index 0cf4c47e314..bee114fcd47 100644
--- a/tools/unicodetools/com/ibm/text/UCD/TestData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
-* $Date: 2003/05/02 21:46:33 $
-* $Revision: 1.10 $
+* $Date: 2003/07/07 15:58:57 $
+* $Revision: 1.11 $
 *
 *******************************************************************************
 */
@@ -17,12 +17,57 @@ import java.util.*;
 import java.io.*;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.util.Currency;
+import java.math.BigDecimal;
+
+import java.util.regex.*;
 
 import com.ibm.icu.text.*;
 import com.ibm.text.utility.*;
 
 public class TestData implements UCD_Types {
 	public static void main (String[] args) throws IOException {
+        
+        Default.setUCD();
+        
+        UnicodeSet us = getSetForName("LATIN LETTER.*P");
+        Utility.showSetNames("",us,false,Default.ucd);
+        
+        us = getSetForName(".*VARIA(TION|NT).*");
+        Utility.showSetNames("",us,false,Default.ucd);
+       
+        if (true) return;
+        
+        /*showSet();
+        */
+        String x = "[[[:s:][:p:]&[:ascii:]] | [\\u2190-\\u2BFF] | "
+        + "[[:s:][:p:]"
+   //     + "&[:decompositiontype=none:]"
+   // + "- [:id_continue:]"
+        + "-[:sk:]"
+        + "]]";
+        PrintWriter pw = Utility.openPrintWriter("Syntax.txt", Utility.UTF8_WINDOWS);
+        showSet(pw, x, false);
+        showSet(pw, "[[\\u2000-\\u205F]-" + x + "]", true);
+        showSet(pw, "[[:whitespace:]&[:decompositiontype=none:]]", false);
+        pw.close();
+        
+        if (true) return;
+		
+		testFormatHack();
+		if (true) return;
+		testConvertToBDD();
+		if (true) return;
+		
+		System.out.println("Shift: " + SHIFT + ", Mask: " + Long.toHexString(MASK));
+		showNumber(-5);
+		showNumber(0);
+		showNumber(5);
+		showNumber(500);
+		showNumber(5000000);
+		if (true) return;
+		
 		String script = args[0];
 		PrintWriter log = Utility.openPrintWriter("TranslitSkeleton_" + script + ".txt", Utility.UTF8_WINDOWS);
 		try {
@@ -39,6 +84,305 @@ public class TestData implements UCD_Types {
 			log.close();
 		}
 	}
+    
+    static private UnicodeSet getSetForName(String regexPattern) {
+        UnicodeSet result = new UnicodeSet();
+        Pattern p = Pattern.compile(regexPattern);
+        Matcher m = p.matcher("");
+        for (int i = 0; i < 0x10FFFF; ++i) {
+            Utility.dot(i);
+            if (!Default.ucd.isAssigned(i)) continue;
+            byte cat = Default.ucd.getCategory(i);
+            if (cat == PRIVATE_USE) continue;
+            m.reset(Default.ucd.getName(i));
+            if (m.matches()) {
+                result.add(i);
+            }
+        }
+        return result;
+    }
+
+    private static void showSet(PrintWriter pw, String x, boolean separateLines) {
+        pw.println("****************************");
+        System.out.println(x);
+        UnicodeSet ss = new UnicodeSet(x);
+        pw.println(x);
+        Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd);
+        pw.println("****************************");
+    }
+	
+	static int SHIFT = 6;
+	static int MASK = (1<<6) - 1;
+	static int OTHER = 0xFF & ~MASK;
+
+	static void showNumber(float x) {
+		System.out.println("Number: " + x);
+		//long bits = Double.doubleToLongBits(x);
+		long bits = (Float.floatToIntBits(x) + 0L) << 32;
+		System.out.println("IEEE: " + Long.toBinaryString(bits));
+		System.out.print("Broken: ");
+		long lastShift = 64-SHIFT;
+		for (long shift = 64-SHIFT; shift > 0; shift -= SHIFT) {
+			long temp = bits >>> shift;
+			temp &= MASK;
+			if (temp != 0) lastShift = shift;
+			temp |= OTHER;
+			String piece = Long.toBinaryString(temp);
+			System.out.print(" " + piece);
+		}
+		System.out.println();
+		System.out.print("Bytes: 1B");
+		for (long shift = 64-SHIFT; shift >= lastShift; shift -= SHIFT) {
+			long temp = bits >>> shift;
+			temp &= MASK;
+			temp |= OTHER;
+			if (shift == lastShift) {
+				temp &= ~0x80;
+			} 
+			String piece = Long.toHexString(temp).toUpperCase();
+			System.out.print(" " + piece);
+		}
+		System.out.println();
+	}
+	
+	static int findFirstNonZero(String digits) {
+		for (int i = 0; i < digits.length(); ++i) {
+			if (digits.charAt(i) != '0') return i;
+		}
+		return digits.length();
+	}
+	
+	static String remove(String s, int start, int limit) {
+		return s.substring(0, start) + s.substring(limit);
+	}
+	
+	static String hexByte(int i) {
+		String result = Integer.toHexString(i).toUpperCase();
+		if (result.length() == 1) result = '0' + result;
+		return result;
+	}
+	
+	// dumb implementation
+	static String convertToBCD(String digits) {
+		
+		// fix negatives, remove leading zeros, get decimal
+		
+		int[] pairs = new int[120];
+		boolean negative = false;
+		boolean removedNegative = false;
+		boolean removedDecimal = false;
+		int leadZeros = 0;
+		int trailZeros = 0;
+
+		if (digits.charAt(0) == '-') {
+			negative = true;
+			removedNegative = true;
+			digits = remove(digits, 0, 1);
+		}
+		while (digits.length() > 0 && digits.charAt(0) == '0') {
+			digits = remove(digits, 0, 1);
+			leadZeros++;
+		}
+		int decimalOffset = digits.indexOf('.');
+		if (decimalOffset < 0) {
+			decimalOffset = digits.length();
+		} else {
+			digits = digits = remove(digits, decimalOffset, decimalOffset+1);
+			removedDecimal = true;
+		}
+		
+		// remove trailing zeros
+		while (digits.length() > 0 && digits.charAt(digits.length() - 1) == '0') {
+			digits = remove(digits, digits.length() - 1, digits.length());
+			trailZeros++;
+		}
+		
+		// make the digits even (in non-fraction part)
+		if (((decimalOffset) & 1) != 0) {
+			digits = '0' + digits; // make even
+			++decimalOffset;
+			leadZeros--;
+		}
+		if (((digits.length()) & 1) != 0) {
+			digits = digits + '0'; // make even
+			trailZeros--;
+		}
+		
+		// handle 0
+		if (digits.length() == 0) {
+			negative = false;
+			digits = "00";
+			leadZeros -= 2;
+		} 
+		
+		// store exponent
+		int exp = decimalOffset/2;
+		if (!negative) exp |= 0x80;
+		else exp = (~exp) & 0x7F;
+		String result = hexByte(exp);
+		for (int i = 0; i < digits.length(); i += 2) {
+			int base100 = ((digits.charAt(i) - '0')*10 + (digits.charAt(i+1) - '0')) << 1;
+			if (i < digits.length() - 2) base100 |= 0x1; // mark all but last
+			if (negative) base100 = (~base100) & 0xFF;
+			result += "." + hexByte(base100);
+		}
+		
+		/**
+		// add a secondary weight
+		// assume we don't care about more than too many leads/trails
+		leadZeros += 2; // make non-negative; might have padded by 2, for 0
+		trailZeros += 2; // make non-negative; might have padded by 1
+		if (leadZeros > 7) leadZeros = 7;
+		if (trailZeros > 7) trailZeros = 7;
+		int secondary = (removedNegative ? 0 : 0x80) // only for zero
+						| (leadZeros << 4)
+						| (removedDecimal ? 0 : 0x08)
+						| (trailZeros);
+		result += ";" + hexByte(secondary);
+		*/
+		
+		return result;
+	}
+	
+	static int stamp = 0;
+	static void add(Map m, String s) {
+		add2(m, s);
+		add2(m, "0" + s);
+		if (s.indexOf('.') >= 0) {
+			add2(m, s + "0");
+			add2(m, "0" + s + "0");
+		} else {
+			add2(m, s + ".");
+			add2(m, "0" + s + ".");
+			add2(m, s + ".0");
+			add2(m, "0" + s + ".0");
+		}
+	}
+	
+	static void add2(Map m, String s) {
+		add3(m,s);
+		if (s.indexOf('-') < 0) add3(m, "-" + s);
+	}
+
+	private static void add3(Map m, String s) {
+		String base = convertToBCD(s);
+		base += "|" + Math.random() + stamp++; // just something for uniqueness
+		m.put(base, s);
+	}
+	
+	static boolean SHOW_ALL = true;
+	
+	static NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH);
+	static {
+		nf.setGroupingUsed(false);
+	}
+	
+	static String cleanToString(double d) {
+		return nf.format(d);
+	}
+	
+	static void testConvertToBDD() {
+		System.out.println("Starting Test");
+		double[] testList = {0, 0.00000001, 0.001, 5, 10, 50, 100, 1000, 100000000};
+		Map m = new TreeMap();
+		
+		for (int i = 0; i < testList.length; ++i) {
+			double d = testList[i];
+			add(m, cleanToString(d));
+			add(m, cleanToString(d + 0.1));
+			add(m, cleanToString(d + 1));
+			add(m, cleanToString(d + 1.1));
+			if (d > 0.1) add(m, cleanToString(d - 0.1));
+			if (d > 1.0) add(m, cleanToString(d - 1.0));
+			if (d > 1.1) add(m, cleanToString(d - 1.1));
+		}
+		Iterator it = m.keySet().iterator();
+		String lastKey = "";
+		String lastValue = "";
+		boolean lastPrinted = false;
+		double lastNumber = Double.NEGATIVE_INFINITY;
+		int errorCount = 0;
+		while (it.hasNext()) {
+			String key = (String) it.next();
+			String value = (String) m.get(key);
+			key = key.substring(0, key.indexOf('|')); // remove stamp
+			double number = Double.parseDouble(value);
+			if (lastNumber > number) {
+				if (!lastPrinted) System.out.println("\t" + lastValue + "\t" + lastKey);
+				System.out.println("Fail:\t" + value + "\t" + key);
+				lastPrinted = true;
+				errorCount++;
+			} else if (SHOW_ALL) {
+				System.out.println("\t" + value + "\t" + key);
+				lastPrinted = true;
+			}
+			lastNumber = number;
+			lastKey = key;
+			lastValue = value;
+		}
+		System.out.println("Done Test, " + errorCount + " Errors");
+	}
+	
+	static void testFormatHack() {
+		String[] testCurrencies = {"USD","GBP","JPY","EUR"};
+		Locale[] testLocales = NumberFormat.getAvailableLocales();
+		for (int i = 0; i < testLocales.length; ++i) {
+			// since none of this should vary by country, we'll just do by language
+			if (!testLocales[i].getCountry().equals("")) continue;
+			System.out.println(testLocales[i].getDisplayName());
+			for (int j = 0; j < testCurrencies.length; ++j) {
+				NumberFormat nf = getCurrencyFormat(
+					Currency.getInstance(testCurrencies[j]), testLocales[i], true);
+				String newVersion = nf.format(1234.567);
+				System.out.print("\t" + newVersion);
+				nf = getCurrencyFormat(
+					Currency.getInstance(testCurrencies[j]), testLocales[i], false);
+				String oldVersion = nf.format(1234.567);
+				if (!oldVersion.equals(newVersion)) {
+					System.out.print(" (" + oldVersion + ")");
+				}
+			}
+			System.out.println();
+		}
+	}
+	
+	static NumberFormat getCurrencyFormat(Currency currency, Locale displayLocale, boolean ICU26) {
+		// code for ICU 2.6
+		if (ICU26) {
+			NumberFormat result = NumberFormat.getCurrencyInstance();
+			result.setCurrency(currency);
+			return result;
+		}
+
+		// ugly work-around for 2.4
+		DecimalFormat result = (DecimalFormat)NumberFormat.getCurrencyInstance(displayLocale);
+		HackCurrencyInfo hack = (HackCurrencyInfo)(hackData.get(currency.getCurrencyCode()));
+		result.setMinimumFractionDigits(hack.decimals);
+		result.setMaximumFractionDigits(hack.decimals);
+		result.setRoundingIncrement(hack.rounding);
+		DecimalFormatSymbols symbols = result.getDecimalFormatSymbols();
+		symbols.setCurrencySymbol(hack.symbol);
+		result.setDecimalFormatSymbols(symbols);
+		return result;
+	}
+	
+	static Map hackData = new HashMap();
+	static class HackCurrencyInfo {
+		int decimals;
+		double rounding;
+		String symbol;
+		HackCurrencyInfo(int decimals, double rounding, String symbol) {
+			this.decimals = decimals;
+			this.rounding = rounding;
+			this.symbol = symbol;
+		}
+	}
+	static {
+		hackData.put("USD", new HackCurrencyInfo(2, 0.01, "$"));
+		hackData.put("GBP", new HackCurrencyInfo(2, 0.01, "\u00a3"));
+		hackData.put("JPY", new HackCurrencyInfo(0, 1, "\u00a5"));
+		hackData.put("EUR", new HackCurrencyInfo(2, 0.01, "\u20AC"));
+	}
     /*
 
         System.out.println("START");
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java
index 5540a2c2c0c..aa1d21b5aab 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2003/05/02 21:46:33 $
-* $Revision: 1.26 $
+* $Date: 2003/07/07 15:58:56 $
+* $Revision: 1.27 $
 *
 *******************************************************************************
 */
@@ -35,7 +35,7 @@ public final class UCD implements UCD_Types {
     /**
      * Used for the default version.
      */
-    public static final String latestVersion = "4.0.0";
+    public static final String latestVersion = "4.0.1";
 
     /**
      * Create singleton instance for default (latest) version
diff --git a/tools/unicodetools/com/ibm/text/utility/Main.java b/tools/unicodetools/com/ibm/text/utility/Main.java
index 8ef1f6b4a6f..1f4637987e6 100644
--- a/tools/unicodetools/com/ibm/text/utility/Main.java
+++ b/tools/unicodetools/com/ibm/text/utility/Main.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Main.java,v $
-* $Date: 2002/06/22 21:01:25 $
-* $Revision: 1.1 $
+* $Date: 2003/07/07 15:58:56 $
+* $Revision: 1.2 $
 *
 *******************************************************************************
 */
@@ -48,6 +48,12 @@ public class Main {
     }
     
     static public void main (String[] args) {
+		for (int i = 0; i < args.length; ++i) {
+ 			String arg = args[i];
+ 			if (arg.equalsIgnoreCase("probe")) Probe.test("da");
+		}
+		if (true) return;
+
         for (CollatorStyle i = CollatorStyle.ZEROED; i != null; i = i.next()) {
             System.out.println(i);
         }
diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java
index 8e916d5cb83..ba4205fa35a 100644
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2003/05/02 21:46:33 $
-* $Revision: 1.33 $
+* $Date: 2003/07/07 15:58:56 $
+* $Revision: 1.34 $
 *
 *******************************************************************************
 */
@@ -643,6 +643,7 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
 
     private static final String[] searchPath = {
         "EXTRAS",
+        "4.0.1",
         "4.0.0",
         "3.2.0",
         "3.1.1",