mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-4700 misc fixes
X-SVN-Rev: 18773
This commit is contained in:
parent
99d3e09191
commit
6509d8087c
5 changed files with 198 additions and 47 deletions
|
@ -14,6 +14,7 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
@ -39,13 +40,23 @@ public final class CollectionUtilities {
|
|||
return target;
|
||||
}
|
||||
|
||||
public static Collection addAll(Collection target, Iterator source) {
|
||||
public static Collection addAll(Iterator source, Collection target) {
|
||||
while (source.hasNext()) {
|
||||
target.add(source.next());
|
||||
}
|
||||
return target; // for chaining
|
||||
}
|
||||
|
||||
public static int size(Iterator source) {
|
||||
int result = 0;
|
||||
while (source.hasNext()) {
|
||||
source.next();
|
||||
++result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public static Map asMap(Object[][] source) {
|
||||
return asMap(source, new HashMap(), false);
|
||||
}
|
||||
|
@ -409,4 +420,84 @@ public final class CollectionUtilities {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Modifies Unicode set to flatten the strings. Eg [abc{da}] => [abcd]
|
||||
* Returns the set for chaining.
|
||||
* @param exemplar1
|
||||
* @return
|
||||
*/
|
||||
public static UnicodeSet flatten(UnicodeSet exemplar1) {
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
boolean gotString = false;
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(exemplar1); it.nextRange();) {
|
||||
if (it.codepoint == it.IS_STRING) {
|
||||
result.addAll(it.string);
|
||||
gotString = true;
|
||||
} else {
|
||||
result.add(it.codepoint, it.codepointEnd);
|
||||
}
|
||||
}
|
||||
if (gotString) exemplar1.set(result);
|
||||
return exemplar1;
|
||||
}
|
||||
|
||||
/**
|
||||
* For producing filtered iterators
|
||||
*/
|
||||
public static abstract class FilteredIterator implements Iterator {
|
||||
private Iterator baseIterator;
|
||||
private static final Object EMPTY = new Object();
|
||||
private static final Object DONE = new Object();
|
||||
private Object nextObject = EMPTY;
|
||||
public FilteredIterator set(Iterator baseIterator) {
|
||||
this.baseIterator = baseIterator;
|
||||
return this;
|
||||
}
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Doesn't support removal");
|
||||
}
|
||||
public Object next() {
|
||||
Object result = nextObject;
|
||||
nextObject = EMPTY;
|
||||
return result;
|
||||
}
|
||||
public boolean hasNext() {
|
||||
if (nextObject == DONE) return false;
|
||||
if (nextObject != EMPTY) return true;
|
||||
while (baseIterator.hasNext()) {
|
||||
nextObject = baseIterator.next();
|
||||
if (isIncluded(nextObject)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
nextObject = DONE;
|
||||
return false;
|
||||
}
|
||||
abstract public boolean isIncluded(Object item);
|
||||
}
|
||||
|
||||
public static class PrefixIterator extends FilteredIterator {
|
||||
private String prefix;
|
||||
public PrefixIterator set(Iterator baseIterator, String prefix) {
|
||||
super.set(baseIterator);
|
||||
this.prefix = prefix;
|
||||
return this;
|
||||
}
|
||||
public boolean isIncluded(Object item) {
|
||||
return ((String)item).startsWith(prefix);
|
||||
}
|
||||
}
|
||||
|
||||
public static class RegexIterator extends FilteredIterator {
|
||||
private Matcher matcher;
|
||||
public RegexIterator set(Iterator baseIterator, Matcher matcher) {
|
||||
super.set(baseIterator);
|
||||
this.matcher = matcher;
|
||||
return this;
|
||||
}
|
||||
public boolean isIncluded(Object item) {
|
||||
return matcher.reset((String)item).matches();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2004, International Business Machines Corporation and *
|
||||
* Copyright (C) 2002-2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -151,6 +151,7 @@ public abstract class Tabber {
|
|||
setPostfix("</tr>");
|
||||
}
|
||||
public void setParameters(int count, String params) {
|
||||
while (count >= parameters.size()) parameters.add(null);
|
||||
parameters.set(count,params);
|
||||
}
|
||||
|
||||
|
|
|
@ -406,7 +406,11 @@ public class MakeNamesChart {
|
|||
//String hex = Utility.hex(cp);
|
||||
//return "<img alt='" + hex + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + hex + "'>";
|
||||
}
|
||||
|
||||
int type = Default.ucd().getCategory(cp);
|
||||
if (type == UCD.Cn || type == UCD.Co || type == UCD.Cs) {
|
||||
return "\u2588";
|
||||
}
|
||||
String result = BagFormatter.toHTML.transliterate(UTF16.valueOf(cp));
|
||||
if (type == UCD.Me || type == UCD.Mn) {
|
||||
result = "\u25CC" + result;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
|
||||
* $Date: 2005/11/01 00:10:54 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2005/11/08 05:19:59 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -32,8 +32,8 @@ import com.ibm.text.utility.*;
|
|||
public class QuickTest implements UCD_Types {
|
||||
public static void main(String[] args) throws IOException {
|
||||
try {
|
||||
//getBidiMirrored();
|
||||
getCaseFoldingUnstable();
|
||||
getBidiMirrored();
|
||||
//getCaseFoldingUnstable();
|
||||
if (true) return;
|
||||
getHasAllNormalizations();
|
||||
getLengths("NFC", Default.nfc());
|
||||
|
@ -115,61 +115,116 @@ public class QuickTest implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
static UnicodeMap.Composer MyComposer = new UnicodeMap.Composer(){
|
||||
public Object compose(int codePoint, Object a, Object b) {
|
||||
if (a == null) return b;
|
||||
if (b == null) return a;
|
||||
return a + "; " + b;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
private static void getBidiMirrored() {
|
||||
ToolUnicodePropertySource foo = ToolUnicodePropertySource.make("");
|
||||
static void add(UnicodeMap map, int cp, String s) {
|
||||
String x = (String) map.getValue(cp);
|
||||
if (x == null) map.put(cp, s);
|
||||
else map.put(cp, x + "; " + s);
|
||||
}
|
||||
|
||||
private static void getBidiMirrored() throws IOException {
|
||||
//UnicodeMap.Composer composer;
|
||||
//ToolUnicodePropertySource foo = ToolUnicodePropertySource.make("");
|
||||
UnicodeSet proposed = new UnicodeSet("[\u0F3A-\u0F3D\u169B\u169C\u2018-\u201F\u301D-\u301F\uFD3E\uFD3F\uFE59-\uFE5E\uFE64\uFE65\\U0001D6DB\\U0001D715\\U0001D74F\\U0001D789\\U0001D7C3]");
|
||||
//UnicodeSet proposed = new UnicodeSet("[\u0F3A-\u0F3D\u169B\u169C\u2018-\u201F\u301D-\u301F\uFD3E\uFD3F\uFE59-\uFE5E\uFE64\uFE65]");
|
||||
UnicodeMap status = new UnicodeMap();
|
||||
status.putAll(foo.getSet("generalcategory=ps"), "*open/close*");
|
||||
status.putAll(foo.getSet("generalcategory=pe"), "*open/close*");
|
||||
status.putAll(foo.getSet("generalcategory=pi"), "*open/close*");
|
||||
status.putAll(foo.getSet("generalcategory=pf"), "*open/close*");
|
||||
UCD ucd31 = UCD.make("3.1.0");
|
||||
for (int cp = 0; cp < 0x10FFFF; ++cp) {
|
||||
if (!Default.ucd().isAssigned(cp)) continue;
|
||||
if (Default.ucd().isPUA(cp)) continue;
|
||||
|
||||
if (proposed.contains(cp)) {
|
||||
add(status, cp, "***");
|
||||
}
|
||||
|
||||
int type = Default.ucd().getCategory(cp);
|
||||
if (type == UCD.Ps || type == Pe || type == Pi || type == Pf) {
|
||||
add(status, cp, "Px");
|
||||
}
|
||||
|
||||
String s = Default.ucd().getBidiMirror(cp);
|
||||
if (!s.equals(UTF16.valueOf(cp))) add(status, cp, "bmg");
|
||||
|
||||
if (ucd31.getBinaryProperty(cp,BidiMirrored)) {
|
||||
add(status, cp, "bmp3.1");
|
||||
} else if (Default.ucd().getBinaryProperty(cp,BidiMirrored)) {
|
||||
add(status, cp, "bmp5.0");
|
||||
} else if (!Default.nfkc().isNormalized(cp)) {
|
||||
String ss = Default.nfkc().normalize(cp);
|
||||
if (isBidiMirrored(ss)) {
|
||||
add(status, cp, "bmp(" + Utility.hex(ss) + ")");
|
||||
String name = Default.ucd().getName(cp);
|
||||
if (name.indexOf("VERTICAL") < 0) proposed.add(cp);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (type == Sm) {
|
||||
add(status, cp, "Sm");
|
||||
}
|
||||
else if (Default.ucd().getBinaryProperty(cp,Math_Property)) {
|
||||
String ss = Default.nfkc().normalize(cp);
|
||||
if (UTF16.countCodePoint(ss) == 1) {
|
||||
int cp2 = UTF16.charAt(ss, 0);
|
||||
int type2 = Default.ucd().getCategory(cp2);
|
||||
if (type2 == UCD.Lu || type2 == Ll || type2 == Lo || type2 == Nd) {
|
||||
//System.out.println("Skipping: " + Default.ucd().getCodeAndName(cp));
|
||||
} else {
|
||||
add(status, cp, "S-Math");
|
||||
}
|
||||
} else {
|
||||
add(status, cp, "S-Math");
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeSet bidiMirroredSet = foo.getSet("bidimirrored=true");
|
||||
status.putAll(bidiMirroredSet, "*core*");
|
||||
UnicodeSet bidiMirroringSet = new UnicodeSet();
|
||||
UnicodeProperty x = foo.getProperty("bidimirroringglyph");
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
String s = x.getValue(i);
|
||||
if (!s.equals(UTF16.valueOf(i))) bidiMirroringSet.add(i);
|
||||
}
|
||||
status.putAll(new UnicodeSet(bidiMirroredSet).removeAll(bidiMirroringSet), "no bidi mirroring");
|
||||
UnicodeSet mathSet = foo.getSet("generalcategory=sm");
|
||||
status.putAll(mathSet, "math");
|
||||
// temp = new UnicodeMap();
|
||||
// UnicodeSet special = new UnicodeSet("[<>]");
|
||||
// for (UnicodeSetIterator it = new UnicodeSetIterator(mathSet); it.next();) {
|
||||
// String s = Default.nfkd().normalize(it.codepoint);
|
||||
// if (special.containsSome(s)) temp.put(it.codepoint, "*special*");
|
||||
// }
|
||||
// status.composeWith(temp, MyComposer);
|
||||
|
||||
UnicodeSet special = new UnicodeSet("[<>]");
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(mathSet); it.next();) {
|
||||
String s = Default.nfkd().normalize(it.codepoint);
|
||||
if (special.containsSome(s)) status.put(it.codepoint, "*special*");
|
||||
}
|
||||
//showStatus(status);
|
||||
// close under nfd
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if (!Default.ucd().isAssigned(i)) continue;
|
||||
if (!Default.ucd().isPUA(i)) continue;
|
||||
if (Default.nfkc().isNormalized(i)) continue;
|
||||
String oldValue = (String) status.getValue(i);
|
||||
if (oldValue != null) continue;
|
||||
String s = Default.nfkc().normalize(i);
|
||||
if (UTF16.countCodePoint(s) != 1) continue;
|
||||
int cp = UTF16.charAt(s, 0);
|
||||
String value = (String)status.getValue(cp);
|
||||
if (value != null) status.put(i, "nfc-closure-" + value);
|
||||
|
||||
}
|
||||
showStatus(status, bidiMirroredSet);
|
||||
//proposed = status.getSet("Px");
|
||||
System.out.println(proposed);
|
||||
//showStatus(status);
|
||||
PrintWriter pw = BagFormatter.openUTF8Writer(UCD.GEN_DIR, "bidimirroring_chars.txt");
|
||||
showStatus(pw, status);
|
||||
pw.close();
|
||||
}
|
||||
|
||||
private static boolean isBidiMirrored(String ss) {
|
||||
int cp;
|
||||
for (int i = 0; i < ss.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(ss, i);
|
||||
if (!Default.ucd().getBinaryProperty(cp,BidiMirrored)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static BagFormatter bf = new BagFormatter();
|
||||
private static void showStatus(UnicodeMap status, UnicodeSet x) {
|
||||
private static void showStatus(PrintWriter pw, UnicodeMap status) {
|
||||
Collection list = new TreeSet(status.getAvailableValues());
|
||||
for (Iterator it = list.iterator(); it.hasNext(); ) {
|
||||
String value = (String) it.next();
|
||||
if (value == null) continue;
|
||||
UnicodeSet set = status.getSet(value);
|
||||
for (UnicodeSetIterator umi = new UnicodeSetIterator(set); umi.next();) {
|
||||
System.out.println(Utility.hex(umi.codepoint)
|
||||
+ (value.startsWith("*") ? ";\tBidi_Mirrored" : "")
|
||||
+ "\t#\t" + value
|
||||
pw.println(Utility.hex(umi.codepoint)
|
||||
//+ (value.startsWith("*") ? ";\tBidi_Mirrored" : "")
|
||||
+ "\t# " + value
|
||||
+ "\t\t( " + UTF16.valueOf(umi.codepoint) + " ) "
|
||||
//+ ";\t" + (x.contains(umi.codepoint) ? "O" : "")
|
||||
+ "\t" + Default.ucd().getName(umi.codepoint));
|
||||
}
|
||||
|
|
|
@ -137,8 +137,8 @@ Show [$name:«.*LETTER.*» - $alphabetic]
|
|||
|
||||
# Pattern characters are invariant!
|
||||
# Add after 4.1.0
|
||||
#$Pattern_Whitespace = $×Pattern_Whitespace
|
||||
#$Pattern_Syntax = $×Pattern_Syntax
|
||||
$Pattern_Whitespace = $×Pattern_Whitespace
|
||||
$Pattern_Syntax = $×Pattern_Syntax
|
||||
|
||||
#BIDI invariant constants
|
||||
Let $R_blocks = [$block:Kharoshthi $block:Hebrew $block:Cypriot_Syllabary \u07C0-\u08FF \uFB1D-\uFB4F \U00010840-\U00010FFF]
|
||||
|
|
Loading…
Add table
Reference in a new issue