ICU-1422 fixed chart to reveal half-width/fullwidth forms

X-SVN-Rev: 6808
2025-04-07 06:25:30 +00:00 · 2001-11-13 00:30:14 +00:00 · 2001-11-13 00:30:14 +00:00 · e6733853a0
commit e6733853a0
parent 32e60b2248
2 changed files with 82 additions and 22 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java,v $
- * $Date: 2001/11/03 05:44:32 $
- * $Revision: 1.4 $
+ * $Date: 2001/11/13 00:30:14 $
+ * $Revision: 1.5 $
 *
 *****************************************************************************************
 */
@ -21,6 +21,7 @@ import java.io.*;

 public class WriteCharts {
    public static void main(String[] args) throws IOException {
+        testSet();
        String testSet = "";
        if (args.length == 0) args = all;
        for (int i = 0; i < args.length; ++i) {
@ -34,6 +35,16 @@ public class WriteCharts {
        }
    }
    
+    public static void testSet() {
+        UnicodeSet s = new UnicodeSet("[[\u0020-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]");
+        int count = s.getRangeCount();
+        for (int i = 0; i < count; ++i) {
+            int start = s.getRangeStart(i);
+            int end = s.getRangeEnd(i);
+            System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16));
+        }
+    }
+    
    static final String[] all = {
        "Cyrillic-Latin", "Greek-Latin", 
        "el-Latin",
@ -88,6 +99,12 @@ public class WriteCharts {
        UnicodeSet privateUse = new UnicodeSet("[:private use:]");
            
        Map map = new TreeMap();
+        
+        UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet);
+        targetSetPlusAnyways.addAll(okAnyway);
+        
+        UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet);
+        sourceSetPlusAnyways.addAll(okAnyway);
                
        int count = sourceSet.getRangeCount();
        for (int i = 0; i < count; ++i) {
@ -97,14 +114,14 @@ public class WriteCharts {
                String ss = UTF16.valueOf(j);
                String ts = t.transliterate(ss);
                char group = 0;
-                if (!isIn(ts, targetSet)) {
+                if (!containsAll(targetSetPlusAnyways, ts)) {
                    group |= 1;
                }
                if (UTF16.countCodePoint(ts) == 1) {
                    leftOverSet.remove(UTF16.charAt(ts,0));
                }
                String rt = inverse.transliterate(ts);
-                if (!isIn(rt, sourceSet)) {
+                if (!containsAll(sourceSetPlusAnyways, rt)) {
                    group |= 2;
                } else if (!ss.equals(rt)) {
                    group |= 4;
@ -114,10 +131,11 @@ public class WriteCharts {
                    group |= 16;
                }
                    
-                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss, 
+                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0))
+                        + "\u0000" + ss, 
                    "<tr><td>" + ss + "<br><tt>" + hex.transliterate(ss) + "</tt></td><td>"
-                    + ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
-                    + rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
+                        + ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
+                        + rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
            }
        }
        
@ -128,14 +146,14 @@ public class WriteCharts {
            int end = leftOverSet.getRangeEnd(i);
            for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) {
                String ts = UTF16.valueOf(j);
-                String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
-                if (!decomp.equals(ts)) continue;
+                // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
+                // if (!decomp.equals(ts)) continue;
                
                String rt = inverse.transliterate(ts);
                String flag = "";
                char group = 0x80;
                    
-                if (!isIn(rt, sourceSet)) {
+                if (!containsAll(sourceSetPlusAnyways, rt)) {
                    group |= 8;
                }
                if (containsSome(privateUse, rt)) {
@ -204,6 +222,7 @@ public class WriteCharts {
    
    static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
    
+    /*
    // tests whether a string is in a set. Also checks for Common and Inherited
    public static boolean isIn(String s, UnicodeSet set) {
        int cp;
@ -215,8 +234,9 @@ public class WriteCharts {
        }
        return true;
    }
+    */
    
-    // tests whether a string is in a set. Also checks for Common and Inherited
+    // tests whether a string is in a set.
    public static boolean containsSome(UnicodeSet set, String s) {
        int cp;
        for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
@ -226,6 +246,16 @@ public class WriteCharts {
        return false;
    }
    
+    // tests whether a string is in a set.
+    public static boolean containsAll(UnicodeSet set, String s) {
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
+            cp = UTF16.charAt(s, i);
+            if (!set.contains(cp)) return false;
+        }
+        return true;
+    }
+    
    
 }
  
--- a/icu4j/src/com/ibm/test/translit/WriteCharts.java
+++ b/icu4j/src/com/ibm/test/translit/WriteCharts.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/WriteCharts.java,v $
- * $Date: 2001/11/03 05:44:32 $
- * $Revision: 1.4 $
+ * $Date: 2001/11/13 00:30:14 $
+ * $Revision: 1.5 $
 *
 *****************************************************************************************
 */
@ -21,6 +21,7 @@ import java.io.*;

 public class WriteCharts {
    public static void main(String[] args) throws IOException {
+        testSet();
        String testSet = "";
        if (args.length == 0) args = all;
        for (int i = 0; i < args.length; ++i) {
@ -34,6 +35,16 @@ public class WriteCharts {
        }
    }
    
+    public static void testSet() {
+        UnicodeSet s = new UnicodeSet("[[\u0020-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]");
+        int count = s.getRangeCount();
+        for (int i = 0; i < count; ++i) {
+            int start = s.getRangeStart(i);
+            int end = s.getRangeEnd(i);
+            System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16));
+        }
+    }
+    
    static final String[] all = {
        "Cyrillic-Latin", "Greek-Latin", 
        "el-Latin",
@ -88,6 +99,12 @@ public class WriteCharts {
        UnicodeSet privateUse = new UnicodeSet("[:private use:]");
            
        Map map = new TreeMap();
+        
+        UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet);
+        targetSetPlusAnyways.addAll(okAnyway);
+        
+        UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet);
+        sourceSetPlusAnyways.addAll(okAnyway);
                
        int count = sourceSet.getRangeCount();
        for (int i = 0; i < count; ++i) {
@ -97,14 +114,14 @@ public class WriteCharts {
                String ss = UTF16.valueOf(j);
                String ts = t.transliterate(ss);
                char group = 0;
-                if (!isIn(ts, targetSet)) {
+                if (!containsAll(targetSetPlusAnyways, ts)) {
                    group |= 1;
                }
                if (UTF16.countCodePoint(ts) == 1) {
                    leftOverSet.remove(UTF16.charAt(ts,0));
                }
                String rt = inverse.transliterate(ts);
-                if (!isIn(rt, sourceSet)) {
+                if (!containsAll(sourceSetPlusAnyways, rt)) {
                    group |= 2;
                } else if (!ss.equals(rt)) {
                    group |= 4;
@ -114,10 +131,11 @@ public class WriteCharts {
                    group |= 16;
                }
                    
-                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss, 
+                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0))
+                        + "\u0000" + ss, 
                    "<tr><td>" + ss + "<br><tt>" + hex.transliterate(ss) + "</tt></td><td>"
-                    + ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
-                    + rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
+                        + ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
+                        + rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
            }
        }
        
@ -128,14 +146,14 @@ public class WriteCharts {
            int end = leftOverSet.getRangeEnd(i);
            for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) {
                String ts = UTF16.valueOf(j);
-                String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
-                if (!decomp.equals(ts)) continue;
+                // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
+                // if (!decomp.equals(ts)) continue;
                
                String rt = inverse.transliterate(ts);
                String flag = "";
                char group = 0x80;
                    
-                if (!isIn(rt, sourceSet)) {
+                if (!containsAll(sourceSetPlusAnyways, rt)) {
                    group |= 8;
                }
                if (containsSome(privateUse, rt)) {
@ -204,6 +222,7 @@ public class WriteCharts {
    
    static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
    
+    /*
    // tests whether a string is in a set. Also checks for Common and Inherited
    public static boolean isIn(String s, UnicodeSet set) {
        int cp;
@ -215,8 +234,9 @@ public class WriteCharts {
        }
        return true;
    }
+    */
    
-    // tests whether a string is in a set. Also checks for Common and Inherited
+    // tests whether a string is in a set.
    public static boolean containsSome(UnicodeSet set, String s) {
        int cp;
        for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
@ -226,6 +246,16 @@ public class WriteCharts {
        return false;
    }
    
+    // tests whether a string is in a set.
+    public static boolean containsAll(UnicodeSet set, String s) {
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
+            cp = UTF16.charAt(s, i);
+            if (!set.contains(cp)) return false;
+        }
+        return true;
+    }
+    
    
 }