mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-0 ; misc updates
X-SVN-Rev: 12601
This commit is contained in:
parent
9ddbcf3ebe
commit
8864f1fd40
6 changed files with 386 additions and 26 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2003/07/07 15:58:57 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -79,7 +79,8 @@ public class Main {
|
|||
else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest();
|
||||
else if (arg.equalsIgnoreCase("short")) shortPrint = true;
|
||||
|
||||
else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
|
||||
else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
|
||||
else if (arg.equalsIgnoreCase("probe")) Probe.test();
|
||||
|
||||
|
||||
else {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
|
||||
* $Date: 2003/02/25 23:38:22 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2003/07/07 15:58:57 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -44,6 +44,10 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
|
||||
log = Utility.openPrintWriter("Unihan_log.html", Utility.UTF8_WINDOWS);
|
||||
log.println("<body>");
|
||||
log.println("<head>");
|
||||
log.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
log.println("<title>Unihan check</title>");
|
||||
log.println("</head>");
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
|
||||
|
||||
|
@ -244,6 +248,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
static final int CHINESE = 2, JAPANESE = 1, DEFINITION = 0;
|
||||
|
||||
static final boolean DO_SIMPLE = true;
|
||||
static final boolean SKIP_OVERRIDES = true;
|
||||
|
||||
public static void main(int typeIn) {
|
||||
type = typeIn;
|
||||
|
@ -277,16 +282,18 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS);
|
||||
log.print('\uFEFF');
|
||||
|
||||
log.println();
|
||||
log.println("@*Override Data");
|
||||
log.println();
|
||||
readOverrides(type);
|
||||
|
||||
log.println();
|
||||
log.println("@*DICT Data");
|
||||
log.println();
|
||||
readCDICTDefinitions(type);
|
||||
|
||||
if (!SKIP_OVERRIDES) {
|
||||
log.println();
|
||||
log.println("@*Override Data");
|
||||
log.println();
|
||||
readOverrides(type);
|
||||
|
||||
log.println();
|
||||
log.println("@*DICT Data");
|
||||
log.println();
|
||||
readCDICTDefinitions(type);
|
||||
}
|
||||
|
||||
log.println();
|
||||
log.println("@Unihan Data");
|
||||
log.println();
|
||||
|
@ -1151,7 +1158,8 @@ U+7878
|
|||
int cp = line.charAt(i);
|
||||
int script = Default.ucd.getScript(cp);
|
||||
if (script != HAN_SCRIPT) {
|
||||
if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT) {
|
||||
if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT
|
||||
&& cp != 0x30FB && cp != 0x30FC) {
|
||||
System.out.println("Huh: " + Default.ucd.getCodeAndName(cp));
|
||||
}
|
||||
continue;
|
||||
|
@ -1887,7 +1895,7 @@ Bad pinyin data: \u4E7F ? LE
|
|||
|
||||
if (definition.length() == 0) {
|
||||
Utility.fixDot();
|
||||
System.out.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line));
|
||||
err.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line));
|
||||
} else {
|
||||
addCheck(UTF16.valueOf(cp), definition, line);
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
||||
* $Date: 2003/05/02 21:46:33 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2003/07/07 15:58:57 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -17,12 +17,57 @@ import java.util.*;
|
|||
import java.io.*;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import java.util.regex.*;
|
||||
|
||||
import com.ibm.icu.text.*;
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
public class TestData implements UCD_Types {
|
||||
public static void main (String[] args) throws IOException {
|
||||
|
||||
Default.setUCD();
|
||||
|
||||
UnicodeSet us = getSetForName("LATIN LETTER.*P");
|
||||
Utility.showSetNames("",us,false,Default.ucd);
|
||||
|
||||
us = getSetForName(".*VARIA(TION|NT).*");
|
||||
Utility.showSetNames("",us,false,Default.ucd);
|
||||
|
||||
if (true) return;
|
||||
|
||||
/*showSet();
|
||||
*/
|
||||
String x = "[[[:s:][:p:]&[:ascii:]] | [\\u2190-\\u2BFF] | "
|
||||
+ "[[:s:][:p:]"
|
||||
// + "&[:decompositiontype=none:]"
|
||||
// + "- [:id_continue:]"
|
||||
+ "-[:sk:]"
|
||||
+ "]]";
|
||||
PrintWriter pw = Utility.openPrintWriter("Syntax.txt", Utility.UTF8_WINDOWS);
|
||||
showSet(pw, x, false);
|
||||
showSet(pw, "[[\\u2000-\\u205F]-" + x + "]", true);
|
||||
showSet(pw, "[[:whitespace:]&[:decompositiontype=none:]]", false);
|
||||
pw.close();
|
||||
|
||||
if (true) return;
|
||||
|
||||
testFormatHack();
|
||||
if (true) return;
|
||||
testConvertToBDD();
|
||||
if (true) return;
|
||||
|
||||
System.out.println("Shift: " + SHIFT + ", Mask: " + Long.toHexString(MASK));
|
||||
showNumber(-5);
|
||||
showNumber(0);
|
||||
showNumber(5);
|
||||
showNumber(500);
|
||||
showNumber(5000000);
|
||||
if (true) return;
|
||||
|
||||
String script = args[0];
|
||||
PrintWriter log = Utility.openPrintWriter("TranslitSkeleton_" + script + ".txt", Utility.UTF8_WINDOWS);
|
||||
try {
|
||||
|
@ -39,6 +84,305 @@ public class TestData implements UCD_Types {
|
|||
log.close();
|
||||
}
|
||||
}
|
||||
|
||||
static private UnicodeSet getSetForName(String regexPattern) {
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
Pattern p = Pattern.compile(regexPattern);
|
||||
Matcher m = p.matcher("");
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
Utility.dot(i);
|
||||
if (!Default.ucd.isAssigned(i)) continue;
|
||||
byte cat = Default.ucd.getCategory(i);
|
||||
if (cat == PRIVATE_USE) continue;
|
||||
m.reset(Default.ucd.getName(i));
|
||||
if (m.matches()) {
|
||||
result.add(i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void showSet(PrintWriter pw, String x, boolean separateLines) {
|
||||
pw.println("****************************");
|
||||
System.out.println(x);
|
||||
UnicodeSet ss = new UnicodeSet(x);
|
||||
pw.println(x);
|
||||
Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd);
|
||||
pw.println("****************************");
|
||||
}
|
||||
|
||||
static int SHIFT = 6;
|
||||
static int MASK = (1<<6) - 1;
|
||||
static int OTHER = 0xFF & ~MASK;
|
||||
|
||||
static void showNumber(float x) {
|
||||
System.out.println("Number: " + x);
|
||||
//long bits = Double.doubleToLongBits(x);
|
||||
long bits = (Float.floatToIntBits(x) + 0L) << 32;
|
||||
System.out.println("IEEE: " + Long.toBinaryString(bits));
|
||||
System.out.print("Broken: ");
|
||||
long lastShift = 64-SHIFT;
|
||||
for (long shift = 64-SHIFT; shift > 0; shift -= SHIFT) {
|
||||
long temp = bits >>> shift;
|
||||
temp &= MASK;
|
||||
if (temp != 0) lastShift = shift;
|
||||
temp |= OTHER;
|
||||
String piece = Long.toBinaryString(temp);
|
||||
System.out.print(" " + piece);
|
||||
}
|
||||
System.out.println();
|
||||
System.out.print("Bytes: 1B");
|
||||
for (long shift = 64-SHIFT; shift >= lastShift; shift -= SHIFT) {
|
||||
long temp = bits >>> shift;
|
||||
temp &= MASK;
|
||||
temp |= OTHER;
|
||||
if (shift == lastShift) {
|
||||
temp &= ~0x80;
|
||||
}
|
||||
String piece = Long.toHexString(temp).toUpperCase();
|
||||
System.out.print(" " + piece);
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
static int findFirstNonZero(String digits) {
|
||||
for (int i = 0; i < digits.length(); ++i) {
|
||||
if (digits.charAt(i) != '0') return i;
|
||||
}
|
||||
return digits.length();
|
||||
}
|
||||
|
||||
static String remove(String s, int start, int limit) {
|
||||
return s.substring(0, start) + s.substring(limit);
|
||||
}
|
||||
|
||||
static String hexByte(int i) {
|
||||
String result = Integer.toHexString(i).toUpperCase();
|
||||
if (result.length() == 1) result = '0' + result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// dumb implementation
|
||||
static String convertToBCD(String digits) {
|
||||
|
||||
// fix negatives, remove leading zeros, get decimal
|
||||
|
||||
int[] pairs = new int[120];
|
||||
boolean negative = false;
|
||||
boolean removedNegative = false;
|
||||
boolean removedDecimal = false;
|
||||
int leadZeros = 0;
|
||||
int trailZeros = 0;
|
||||
|
||||
if (digits.charAt(0) == '-') {
|
||||
negative = true;
|
||||
removedNegative = true;
|
||||
digits = remove(digits, 0, 1);
|
||||
}
|
||||
while (digits.length() > 0 && digits.charAt(0) == '0') {
|
||||
digits = remove(digits, 0, 1);
|
||||
leadZeros++;
|
||||
}
|
||||
int decimalOffset = digits.indexOf('.');
|
||||
if (decimalOffset < 0) {
|
||||
decimalOffset = digits.length();
|
||||
} else {
|
||||
digits = digits = remove(digits, decimalOffset, decimalOffset+1);
|
||||
removedDecimal = true;
|
||||
}
|
||||
|
||||
// remove trailing zeros
|
||||
while (digits.length() > 0 && digits.charAt(digits.length() - 1) == '0') {
|
||||
digits = remove(digits, digits.length() - 1, digits.length());
|
||||
trailZeros++;
|
||||
}
|
||||
|
||||
// make the digits even (in non-fraction part)
|
||||
if (((decimalOffset) & 1) != 0) {
|
||||
digits = '0' + digits; // make even
|
||||
++decimalOffset;
|
||||
leadZeros--;
|
||||
}
|
||||
if (((digits.length()) & 1) != 0) {
|
||||
digits = digits + '0'; // make even
|
||||
trailZeros--;
|
||||
}
|
||||
|
||||
// handle 0
|
||||
if (digits.length() == 0) {
|
||||
negative = false;
|
||||
digits = "00";
|
||||
leadZeros -= 2;
|
||||
}
|
||||
|
||||
// store exponent
|
||||
int exp = decimalOffset/2;
|
||||
if (!negative) exp |= 0x80;
|
||||
else exp = (~exp) & 0x7F;
|
||||
String result = hexByte(exp);
|
||||
for (int i = 0; i < digits.length(); i += 2) {
|
||||
int base100 = ((digits.charAt(i) - '0')*10 + (digits.charAt(i+1) - '0')) << 1;
|
||||
if (i < digits.length() - 2) base100 |= 0x1; // mark all but last
|
||||
if (negative) base100 = (~base100) & 0xFF;
|
||||
result += "." + hexByte(base100);
|
||||
}
|
||||
|
||||
/**
|
||||
// add a secondary weight
|
||||
// assume we don't care about more than too many leads/trails
|
||||
leadZeros += 2; // make non-negative; might have padded by 2, for 0
|
||||
trailZeros += 2; // make non-negative; might have padded by 1
|
||||
if (leadZeros > 7) leadZeros = 7;
|
||||
if (trailZeros > 7) trailZeros = 7;
|
||||
int secondary = (removedNegative ? 0 : 0x80) // only for zero
|
||||
| (leadZeros << 4)
|
||||
| (removedDecimal ? 0 : 0x08)
|
||||
| (trailZeros);
|
||||
result += ";" + hexByte(secondary);
|
||||
*/
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static int stamp = 0;
|
||||
static void add(Map m, String s) {
|
||||
add2(m, s);
|
||||
add2(m, "0" + s);
|
||||
if (s.indexOf('.') >= 0) {
|
||||
add2(m, s + "0");
|
||||
add2(m, "0" + s + "0");
|
||||
} else {
|
||||
add2(m, s + ".");
|
||||
add2(m, "0" + s + ".");
|
||||
add2(m, s + ".0");
|
||||
add2(m, "0" + s + ".0");
|
||||
}
|
||||
}
|
||||
|
||||
static void add2(Map m, String s) {
|
||||
add3(m,s);
|
||||
if (s.indexOf('-') < 0) add3(m, "-" + s);
|
||||
}
|
||||
|
||||
private static void add3(Map m, String s) {
|
||||
String base = convertToBCD(s);
|
||||
base += "|" + Math.random() + stamp++; // just something for uniqueness
|
||||
m.put(base, s);
|
||||
}
|
||||
|
||||
static boolean SHOW_ALL = true;
|
||||
|
||||
static NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH);
|
||||
static {
|
||||
nf.setGroupingUsed(false);
|
||||
}
|
||||
|
||||
static String cleanToString(double d) {
|
||||
return nf.format(d);
|
||||
}
|
||||
|
||||
static void testConvertToBDD() {
|
||||
System.out.println("Starting Test");
|
||||
double[] testList = {0, 0.00000001, 0.001, 5, 10, 50, 100, 1000, 100000000};
|
||||
Map m = new TreeMap();
|
||||
|
||||
for (int i = 0; i < testList.length; ++i) {
|
||||
double d = testList[i];
|
||||
add(m, cleanToString(d));
|
||||
add(m, cleanToString(d + 0.1));
|
||||
add(m, cleanToString(d + 1));
|
||||
add(m, cleanToString(d + 1.1));
|
||||
if (d > 0.1) add(m, cleanToString(d - 0.1));
|
||||
if (d > 1.0) add(m, cleanToString(d - 1.0));
|
||||
if (d > 1.1) add(m, cleanToString(d - 1.1));
|
||||
}
|
||||
Iterator it = m.keySet().iterator();
|
||||
String lastKey = "";
|
||||
String lastValue = "";
|
||||
boolean lastPrinted = false;
|
||||
double lastNumber = Double.NEGATIVE_INFINITY;
|
||||
int errorCount = 0;
|
||||
while (it.hasNext()) {
|
||||
String key = (String) it.next();
|
||||
String value = (String) m.get(key);
|
||||
key = key.substring(0, key.indexOf('|')); // remove stamp
|
||||
double number = Double.parseDouble(value);
|
||||
if (lastNumber > number) {
|
||||
if (!lastPrinted) System.out.println("\t" + lastValue + "\t" + lastKey);
|
||||
System.out.println("Fail:\t" + value + "\t" + key);
|
||||
lastPrinted = true;
|
||||
errorCount++;
|
||||
} else if (SHOW_ALL) {
|
||||
System.out.println("\t" + value + "\t" + key);
|
||||
lastPrinted = true;
|
||||
}
|
||||
lastNumber = number;
|
||||
lastKey = key;
|
||||
lastValue = value;
|
||||
}
|
||||
System.out.println("Done Test, " + errorCount + " Errors");
|
||||
}
|
||||
|
||||
static void testFormatHack() {
|
||||
String[] testCurrencies = {"USD","GBP","JPY","EUR"};
|
||||
Locale[] testLocales = NumberFormat.getAvailableLocales();
|
||||
for (int i = 0; i < testLocales.length; ++i) {
|
||||
// since none of this should vary by country, we'll just do by language
|
||||
if (!testLocales[i].getCountry().equals("")) continue;
|
||||
System.out.println(testLocales[i].getDisplayName());
|
||||
for (int j = 0; j < testCurrencies.length; ++j) {
|
||||
NumberFormat nf = getCurrencyFormat(
|
||||
Currency.getInstance(testCurrencies[j]), testLocales[i], true);
|
||||
String newVersion = nf.format(1234.567);
|
||||
System.out.print("\t" + newVersion);
|
||||
nf = getCurrencyFormat(
|
||||
Currency.getInstance(testCurrencies[j]), testLocales[i], false);
|
||||
String oldVersion = nf.format(1234.567);
|
||||
if (!oldVersion.equals(newVersion)) {
|
||||
System.out.print(" (" + oldVersion + ")");
|
||||
}
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
static NumberFormat getCurrencyFormat(Currency currency, Locale displayLocale, boolean ICU26) {
|
||||
// code for ICU 2.6
|
||||
if (ICU26) {
|
||||
NumberFormat result = NumberFormat.getCurrencyInstance();
|
||||
result.setCurrency(currency);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ugly work-around for 2.4
|
||||
DecimalFormat result = (DecimalFormat)NumberFormat.getCurrencyInstance(displayLocale);
|
||||
HackCurrencyInfo hack = (HackCurrencyInfo)(hackData.get(currency.getCurrencyCode()));
|
||||
result.setMinimumFractionDigits(hack.decimals);
|
||||
result.setMaximumFractionDigits(hack.decimals);
|
||||
result.setRoundingIncrement(hack.rounding);
|
||||
DecimalFormatSymbols symbols = result.getDecimalFormatSymbols();
|
||||
symbols.setCurrencySymbol(hack.symbol);
|
||||
result.setDecimalFormatSymbols(symbols);
|
||||
return result;
|
||||
}
|
||||
|
||||
static Map hackData = new HashMap();
|
||||
static class HackCurrencyInfo {
|
||||
int decimals;
|
||||
double rounding;
|
||||
String symbol;
|
||||
HackCurrencyInfo(int decimals, double rounding, String symbol) {
|
||||
this.decimals = decimals;
|
||||
this.rounding = rounding;
|
||||
this.symbol = symbol;
|
||||
}
|
||||
}
|
||||
static {
|
||||
hackData.put("USD", new HackCurrencyInfo(2, 0.01, "$"));
|
||||
hackData.put("GBP", new HackCurrencyInfo(2, 0.01, "\u00a3"));
|
||||
hackData.put("JPY", new HackCurrencyInfo(0, 1, "\u00a5"));
|
||||
hackData.put("EUR", new HackCurrencyInfo(2, 0.01, "\u20AC"));
|
||||
}
|
||||
/*
|
||||
|
||||
System.out.println("START");
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2003/05/02 21:46:33 $
|
||||
* $Revision: 1.26 $
|
||||
* $Date: 2003/07/07 15:58:56 $
|
||||
* $Revision: 1.27 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -35,7 +35,7 @@ public final class UCD implements UCD_Types {
|
|||
/**
|
||||
* Used for the default version.
|
||||
*/
|
||||
public static final String latestVersion = "4.0.0";
|
||||
public static final String latestVersion = "4.0.1";
|
||||
|
||||
/**
|
||||
* Create singleton instance for default (latest) version
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Main.java,v $
|
||||
* $Date: 2002/06/22 21:01:25 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/07/07 15:58:56 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -48,6 +48,12 @@ public class Main {
|
|||
}
|
||||
|
||||
static public void main (String[] args) {
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
String arg = args[i];
|
||||
if (arg.equalsIgnoreCase("probe")) Probe.test("da");
|
||||
}
|
||||
if (true) return;
|
||||
|
||||
for (CollatorStyle i = CollatorStyle.ZEROED; i != null; i = i.next()) {
|
||||
System.out.println(i);
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2003/05/02 21:46:33 $
|
||||
* $Revision: 1.33 $
|
||||
* $Date: 2003/07/07 15:58:56 $
|
||||
* $Revision: 1.34 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -643,6 +643,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
|
||||
private static final String[] searchPath = {
|
||||
"EXTRAS",
|
||||
"4.0.1",
|
||||
"4.0.0",
|
||||
"3.2.0",
|
||||
"3.1.1",
|
||||
|
|
Loading…
Add table
Reference in a new issue