diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java b/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java index 4cf9d9f8ad4..d2c4685fb6a 100755 --- a/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java +++ b/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java,v $ - * $Date: 2001/11/03 05:44:32 $ - * $Revision: 1.4 $ + * $Date: 2001/11/13 00:30:14 $ + * $Revision: 1.5 $ * ***************************************************************************************** */ @@ -21,6 +21,7 @@ import java.io.*; public class WriteCharts { public static void main(String[] args) throws IOException { + testSet(); String testSet = ""; if (args.length == 0) args = all; for (int i = 0; i < args.length; ++i) { @@ -34,6 +35,16 @@ public class WriteCharts { } } + public static void testSet() { + UnicodeSet s = new UnicodeSet("[[\u0020-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]"); + int count = s.getRangeCount(); + for (int i = 0; i < count; ++i) { + int start = s.getRangeStart(i); + int end = s.getRangeEnd(i); + System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16)); + } + } + static final String[] all = { "Cyrillic-Latin", "Greek-Latin", "el-Latin", @@ -88,6 +99,12 @@ public class WriteCharts { UnicodeSet privateUse = new UnicodeSet("[:private use:]"); Map map = new TreeMap(); + + UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet); + targetSetPlusAnyways.addAll(okAnyway); + + UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet); + sourceSetPlusAnyways.addAll(okAnyway); int count = sourceSet.getRangeCount(); for (int i = 0; i < count; ++i) { @@ -97,14 +114,14 @@ public class WriteCharts { String ss = UTF16.valueOf(j); String ts = t.transliterate(ss); char group = 0; - if (!isIn(ts, targetSet)) { + if (!containsAll(targetSetPlusAnyways, ts)) { group |= 1; } if (UTF16.countCodePoint(ts) == 1) { leftOverSet.remove(UTF16.charAt(ts,0)); } String rt = inverse.transliterate(ts); - if (!isIn(rt, sourceSet)) { + if (!containsAll(sourceSetPlusAnyways, rt)) { group |= 2; } else if (!ss.equals(rt)) { group |= 4; @@ -114,10 +131,11 @@ public class WriteCharts { group |= 16; } - map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss, + map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + + "\u0000" + ss, "" + ss + "
" + hex.transliterate(ss) + "" - + ts + "
" + hex.transliterate(ts) + "" - + rt + "
" + hex.transliterate(rt) + "" ); + + ts + "
" + hex.transliterate(ts) + "" + + rt + "
" + hex.transliterate(rt) + "" ); } } @@ -128,14 +146,14 @@ public class WriteCharts { int end = leftOverSet.getRangeEnd(i); for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) { String ts = UTF16.valueOf(j); - String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0); - if (!decomp.equals(ts)) continue; + // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0); + // if (!decomp.equals(ts)) continue; String rt = inverse.transliterate(ts); String flag = ""; char group = 0x80; - if (!isIn(rt, sourceSet)) { + if (!containsAll(sourceSetPlusAnyways, rt)) { group |= 8; } if (containsSome(privateUse, rt)) { @@ -204,6 +222,7 @@ public class WriteCharts { static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]"); + /* // tests whether a string is in a set. Also checks for Common and Inherited public static boolean isIn(String s, UnicodeSet set) { int cp; @@ -215,8 +234,9 @@ public class WriteCharts { } return true; } + */ - // tests whether a string is in a set. Also checks for Common and Inherited + // tests whether a string is in a set. public static boolean containsSome(UnicodeSet set, String s) { int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) { @@ -226,6 +246,16 @@ public class WriteCharts { return false; } + // tests whether a string is in a set. + public static boolean containsAll(UnicodeSet set, String s) { + int cp; + for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) { + cp = UTF16.charAt(s, i); + if (!set.contains(cp)) return false; + } + return true; + } + } \ No newline at end of file diff --git a/icu4j/src/com/ibm/test/translit/WriteCharts.java b/icu4j/src/com/ibm/test/translit/WriteCharts.java index a2089be54b1..4e21ba43568 100755 --- a/icu4j/src/com/ibm/test/translit/WriteCharts.java +++ b/icu4j/src/com/ibm/test/translit/WriteCharts.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/WriteCharts.java,v $ - * $Date: 2001/11/03 05:44:32 $ - * $Revision: 1.4 $ + * $Date: 2001/11/13 00:30:14 $ + * $Revision: 1.5 $ * ***************************************************************************************** */ @@ -21,6 +21,7 @@ import java.io.*; public class WriteCharts { public static void main(String[] args) throws IOException { + testSet(); String testSet = ""; if (args.length == 0) args = all; for (int i = 0; i < args.length; ++i) { @@ -34,6 +35,16 @@ public class WriteCharts { } } + public static void testSet() { + UnicodeSet s = new UnicodeSet("[[\u0020-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]"); + int count = s.getRangeCount(); + for (int i = 0; i < count; ++i) { + int start = s.getRangeStart(i); + int end = s.getRangeEnd(i); + System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16)); + } + } + static final String[] all = { "Cyrillic-Latin", "Greek-Latin", "el-Latin", @@ -88,6 +99,12 @@ public class WriteCharts { UnicodeSet privateUse = new UnicodeSet("[:private use:]"); Map map = new TreeMap(); + + UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet); + targetSetPlusAnyways.addAll(okAnyway); + + UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet); + sourceSetPlusAnyways.addAll(okAnyway); int count = sourceSet.getRangeCount(); for (int i = 0; i < count; ++i) { @@ -97,14 +114,14 @@ public class WriteCharts { String ss = UTF16.valueOf(j); String ts = t.transliterate(ss); char group = 0; - if (!isIn(ts, targetSet)) { + if (!containsAll(targetSetPlusAnyways, ts)) { group |= 1; } if (UTF16.countCodePoint(ts) == 1) { leftOverSet.remove(UTF16.charAt(ts,0)); } String rt = inverse.transliterate(ts); - if (!isIn(rt, sourceSet)) { + if (!containsAll(sourceSetPlusAnyways, rt)) { group |= 2; } else if (!ss.equals(rt)) { group |= 4; @@ -114,10 +131,11 @@ public class WriteCharts { group |= 16; } - map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss, + map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + + "\u0000" + ss, "" + ss + "
" + hex.transliterate(ss) + "" - + ts + "
" + hex.transliterate(ts) + "" - + rt + "
" + hex.transliterate(rt) + "" ); + + ts + "
" + hex.transliterate(ts) + "" + + rt + "
" + hex.transliterate(rt) + "" ); } } @@ -128,14 +146,14 @@ public class WriteCharts { int end = leftOverSet.getRangeEnd(i); for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) { String ts = UTF16.valueOf(j); - String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0); - if (!decomp.equals(ts)) continue; + // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0); + // if (!decomp.equals(ts)) continue; String rt = inverse.transliterate(ts); String flag = ""; char group = 0x80; - if (!isIn(rt, sourceSet)) { + if (!containsAll(sourceSetPlusAnyways, rt)) { group |= 8; } if (containsSome(privateUse, rt)) { @@ -204,6 +222,7 @@ public class WriteCharts { static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]"); + /* // tests whether a string is in a set. Also checks for Common and Inherited public static boolean isIn(String s, UnicodeSet set) { int cp; @@ -215,8 +234,9 @@ public class WriteCharts { } return true; } + */ - // tests whether a string is in a set. Also checks for Common and Inherited + // tests whether a string is in a set. public static boolean containsSome(UnicodeSet set, String s) { int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) { @@ -226,6 +246,16 @@ public class WriteCharts { return false; } + // tests whether a string is in a set. + public static boolean containsAll(UnicodeSet set, String s) { + int cp; + for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) { + cp = UTF16.charAt(s, i); + if (!set.contains(cp)) return false; + } + return true; + } + } \ No newline at end of file