mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-1422 fixed chart to reveal half-width/fullwidth forms
X-SVN-Rev: 6808
This commit is contained in:
parent
32e60b2248
commit
e6733853a0
2 changed files with 82 additions and 22 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java,v $
|
||||
* $Date: 2001/11/03 05:44:32 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2001/11/13 00:30:14 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -21,6 +21,7 @@ import java.io.*;
|
|||
|
||||
public class WriteCharts {
|
||||
public static void main(String[] args) throws IOException {
|
||||
testSet();
|
||||
String testSet = "";
|
||||
if (args.length == 0) args = all;
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
|
@ -34,6 +35,16 @@ public class WriteCharts {
|
|||
}
|
||||
}
|
||||
|
||||
public static void testSet() {
|
||||
UnicodeSet s = new UnicodeSet("[[\u0020-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]");
|
||||
int count = s.getRangeCount();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
int start = s.getRangeStart(i);
|
||||
int end = s.getRangeEnd(i);
|
||||
System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16));
|
||||
}
|
||||
}
|
||||
|
||||
static final String[] all = {
|
||||
"Cyrillic-Latin", "Greek-Latin",
|
||||
"el-Latin",
|
||||
|
@ -88,6 +99,12 @@ public class WriteCharts {
|
|||
UnicodeSet privateUse = new UnicodeSet("[:private use:]");
|
||||
|
||||
Map map = new TreeMap();
|
||||
|
||||
UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet);
|
||||
targetSetPlusAnyways.addAll(okAnyway);
|
||||
|
||||
UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet);
|
||||
sourceSetPlusAnyways.addAll(okAnyway);
|
||||
|
||||
int count = sourceSet.getRangeCount();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
|
@ -97,14 +114,14 @@ public class WriteCharts {
|
|||
String ss = UTF16.valueOf(j);
|
||||
String ts = t.transliterate(ss);
|
||||
char group = 0;
|
||||
if (!isIn(ts, targetSet)) {
|
||||
if (!containsAll(targetSetPlusAnyways, ts)) {
|
||||
group |= 1;
|
||||
}
|
||||
if (UTF16.countCodePoint(ts) == 1) {
|
||||
leftOverSet.remove(UTF16.charAt(ts,0));
|
||||
}
|
||||
String rt = inverse.transliterate(ts);
|
||||
if (!isIn(rt, sourceSet)) {
|
||||
if (!containsAll(sourceSetPlusAnyways, rt)) {
|
||||
group |= 2;
|
||||
} else if (!ss.equals(rt)) {
|
||||
group |= 4;
|
||||
|
@ -114,10 +131,11 @@ public class WriteCharts {
|
|||
group |= 16;
|
||||
}
|
||||
|
||||
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss,
|
||||
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0))
|
||||
+ "\u0000" + ss,
|
||||
"<tr><td>" + ss + "<br><tt>" + hex.transliterate(ss) + "</tt></td><td>"
|
||||
+ ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
|
||||
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
|
||||
+ ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
|
||||
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -128,14 +146,14 @@ public class WriteCharts {
|
|||
int end = leftOverSet.getRangeEnd(i);
|
||||
for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) {
|
||||
String ts = UTF16.valueOf(j);
|
||||
String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
|
||||
if (!decomp.equals(ts)) continue;
|
||||
// String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
|
||||
// if (!decomp.equals(ts)) continue;
|
||||
|
||||
String rt = inverse.transliterate(ts);
|
||||
String flag = "";
|
||||
char group = 0x80;
|
||||
|
||||
if (!isIn(rt, sourceSet)) {
|
||||
if (!containsAll(sourceSetPlusAnyways, rt)) {
|
||||
group |= 8;
|
||||
}
|
||||
if (containsSome(privateUse, rt)) {
|
||||
|
@ -204,6 +222,7 @@ public class WriteCharts {
|
|||
|
||||
static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
|
||||
|
||||
/*
|
||||
// tests whether a string is in a set. Also checks for Common and Inherited
|
||||
public static boolean isIn(String s, UnicodeSet set) {
|
||||
int cp;
|
||||
|
@ -215,8 +234,9 @@ public class WriteCharts {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
// tests whether a string is in a set. Also checks for Common and Inherited
|
||||
// tests whether a string is in a set.
|
||||
public static boolean containsSome(UnicodeSet set, String s) {
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
|
||||
|
@ -226,6 +246,16 @@ public class WriteCharts {
|
|||
return false;
|
||||
}
|
||||
|
||||
// tests whether a string is in a set.
|
||||
public static boolean containsAll(UnicodeSet set, String s) {
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
if (!set.contains(cp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/WriteCharts.java,v $
|
||||
* $Date: 2001/11/03 05:44:32 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2001/11/13 00:30:14 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -21,6 +21,7 @@ import java.io.*;
|
|||
|
||||
public class WriteCharts {
|
||||
public static void main(String[] args) throws IOException {
|
||||
testSet();
|
||||
String testSet = "";
|
||||
if (args.length == 0) args = all;
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
|
@ -34,6 +35,16 @@ public class WriteCharts {
|
|||
}
|
||||
}
|
||||
|
||||
public static void testSet() {
|
||||
UnicodeSet s = new UnicodeSet("[[\u0020-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]");
|
||||
int count = s.getRangeCount();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
int start = s.getRangeStart(i);
|
||||
int end = s.getRangeEnd(i);
|
||||
System.out.println(Integer.toString(start,16) + ".." + Integer.toString(end,16));
|
||||
}
|
||||
}
|
||||
|
||||
static final String[] all = {
|
||||
"Cyrillic-Latin", "Greek-Latin",
|
||||
"el-Latin",
|
||||
|
@ -88,6 +99,12 @@ public class WriteCharts {
|
|||
UnicodeSet privateUse = new UnicodeSet("[:private use:]");
|
||||
|
||||
Map map = new TreeMap();
|
||||
|
||||
UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet);
|
||||
targetSetPlusAnyways.addAll(okAnyway);
|
||||
|
||||
UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet);
|
||||
sourceSetPlusAnyways.addAll(okAnyway);
|
||||
|
||||
int count = sourceSet.getRangeCount();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
|
@ -97,14 +114,14 @@ public class WriteCharts {
|
|||
String ss = UTF16.valueOf(j);
|
||||
String ts = t.transliterate(ss);
|
||||
char group = 0;
|
||||
if (!isIn(ts, targetSet)) {
|
||||
if (!containsAll(targetSetPlusAnyways, ts)) {
|
||||
group |= 1;
|
||||
}
|
||||
if (UTF16.countCodePoint(ts) == 1) {
|
||||
leftOverSet.remove(UTF16.charAt(ts,0));
|
||||
}
|
||||
String rt = inverse.transliterate(ts);
|
||||
if (!isIn(rt, sourceSet)) {
|
||||
if (!containsAll(sourceSetPlusAnyways, rt)) {
|
||||
group |= 2;
|
||||
} else if (!ss.equals(rt)) {
|
||||
group |= 4;
|
||||
|
@ -114,10 +131,11 @@ public class WriteCharts {
|
|||
group |= 16;
|
||||
}
|
||||
|
||||
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss,
|
||||
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0))
|
||||
+ "\u0000" + ss,
|
||||
"<tr><td>" + ss + "<br><tt>" + hex.transliterate(ss) + "</tt></td><td>"
|
||||
+ ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
|
||||
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
|
||||
+ ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
|
||||
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -128,14 +146,14 @@ public class WriteCharts {
|
|||
int end = leftOverSet.getRangeEnd(i);
|
||||
for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) {
|
||||
String ts = UTF16.valueOf(j);
|
||||
String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
|
||||
if (!decomp.equals(ts)) continue;
|
||||
// String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
|
||||
// if (!decomp.equals(ts)) continue;
|
||||
|
||||
String rt = inverse.transliterate(ts);
|
||||
String flag = "";
|
||||
char group = 0x80;
|
||||
|
||||
if (!isIn(rt, sourceSet)) {
|
||||
if (!containsAll(sourceSetPlusAnyways, rt)) {
|
||||
group |= 8;
|
||||
}
|
||||
if (containsSome(privateUse, rt)) {
|
||||
|
@ -204,6 +222,7 @@ public class WriteCharts {
|
|||
|
||||
static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
|
||||
|
||||
/*
|
||||
// tests whether a string is in a set. Also checks for Common and Inherited
|
||||
public static boolean isIn(String s, UnicodeSet set) {
|
||||
int cp;
|
||||
|
@ -215,8 +234,9 @@ public class WriteCharts {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
// tests whether a string is in a set. Also checks for Common and Inherited
|
||||
// tests whether a string is in a set.
|
||||
public static boolean containsSome(UnicodeSet set, String s) {
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
|
||||
|
@ -226,6 +246,16 @@ public class WriteCharts {
|
|||
return false;
|
||||
}
|
||||
|
||||
// tests whether a string is in a set.
|
||||
public static boolean containsAll(UnicodeSet set, String s) {
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
if (!set.contains(cp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue