mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 15:42:14 +00:00
no message
X-SVN-Rev: 14925
This commit is contained in:
parent
efe870f9b6
commit
506399c2d3
3 changed files with 249 additions and 24 deletions
|
@ -6,6 +6,8 @@ import java.io.IOException;
|
|||
import java.io.PrintWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.text.ParseException;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
|
@ -18,12 +20,17 @@ import java.util.ResourceBundle;
|
|||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.Tabber;
|
||||
import com.ibm.icu.dev.test.util.UnicodeLabel;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.SymbolTable;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeMatcher;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.text.utility.UnicodeDataFile;
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
@ -52,8 +59,9 @@ public class MakeUnicodeFiles {
|
|||
|
||||
static boolean DEBUG = false;
|
||||
|
||||
public static void main() throws IOException {
|
||||
generateFile();
|
||||
public static void main(String[] args) throws IOException {
|
||||
//generateFile();
|
||||
testInvariants(ToolUnicodePropertySource.make(Default.ucdVersion()));
|
||||
}
|
||||
|
||||
static class Format {
|
||||
|
@ -1096,8 +1104,215 @@ public class MakeUnicodeFiles {
|
|||
return nameStr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Matcher invariantLine = Pattern.compile("([^=><!?])\\s*([=><!?])\\s*([^=><!?])").matcher("");
|
||||
|
||||
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[=><!?]");
|
||||
|
||||
static void testInvariants(UnicodeProperty.Factory factory) throws IOException {
|
||||
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
|
||||
BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
|
||||
BagFormatter bf = new BagFormatter();
|
||||
SymbolTable st = factory.getSymbolTable();
|
||||
ParsePosition pp = new ParsePosition(0);
|
||||
int parseErrorCount = 0;
|
||||
int testFailureCount = 0;
|
||||
while (true) {
|
||||
String rightSide = null;
|
||||
String leftSide = null;
|
||||
String line = in.readLine();
|
||||
if (line == null) break;
|
||||
line = line.trim();
|
||||
int pos = line.indexOf('#');
|
||||
if (pos >= 0) line = line.substring(0,pos).trim();
|
||||
if (line.length() == 0) continue;
|
||||
|
||||
char relation = 0;
|
||||
UnicodeSet leftSet = null;
|
||||
UnicodeSet rightSet = null;
|
||||
try {
|
||||
pp.setIndex(0);
|
||||
leftSet = new UnicodeSet(line, pp, st);
|
||||
leftSide = line.substring(0,pp.getIndex());
|
||||
eatWhitespace(line, pp);
|
||||
relation = line.charAt(pp.getIndex());
|
||||
if (!INVARIANT_RELATIONS.contains(relation)) {
|
||||
throw new ParseException("Invalid relation", pp.getIndex());
|
||||
}
|
||||
pp.setIndex(pp.getIndex()+1); // skip char
|
||||
eatWhitespace(line, pp);
|
||||
int start = pp.getIndex();
|
||||
rightSet = new UnicodeSet(line, pp, st);
|
||||
rightSide = line.substring(start,pp.getIndex());
|
||||
eatWhitespace(line, pp);
|
||||
if (line.length() != pp.getIndex()) {
|
||||
throw new ParseException("Extra characters at end", pp.getIndex());
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset())
|
||||
+ "<@>" + line.substring(e.getErrorOffset()));
|
||||
out.println();
|
||||
out.println("**** START Error Info ****");
|
||||
out.println(e.getMessage());
|
||||
out.println("**** END Error Info ****");
|
||||
out.println();
|
||||
parseErrorCount++;
|
||||
continue;
|
||||
} catch (IllegalArgumentException e) {
|
||||
out.println("PARSE ERROR:\t" + line);
|
||||
out.println();
|
||||
out.println("**** START Error Info ****");
|
||||
out.println(e.getMessage());
|
||||
out.println("**** END Error Info ****");
|
||||
out.println();
|
||||
parseErrorCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean ok = true;
|
||||
switch(relation) {
|
||||
case '=': ok = leftSet.equals(rightSet); break;
|
||||
case '>': ok = leftSet.containsAll(rightSet); break;
|
||||
case '<': ok = rightSet.containsAll(leftSet); break;
|
||||
case '!': ok = leftSet.containsNone(rightSet); break;
|
||||
case '?': ok = !leftSet.equals(rightSet)
|
||||
&& !leftSet.containsAll(rightSet)
|
||||
&& !rightSet.containsAll(leftSet)
|
||||
&& !leftSet.containsNone(rightSet);
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Internal Error");
|
||||
}
|
||||
out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH) + ":\t" + line);
|
||||
if (ok) continue;
|
||||
out.println();
|
||||
out.println("**** START Error Info ****");
|
||||
bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
|
||||
out.println("**** END Error Info ****");
|
||||
out.println();
|
||||
testFailureCount++;
|
||||
}
|
||||
out.println();
|
||||
out.println("**** SUMMARY ****");
|
||||
out.println();
|
||||
out.println("ParseErrorCount=" + parseErrorCount);
|
||||
out.println("TestFailureCount=" + testFailureCount);
|
||||
out.close();
|
||||
System.out.println("ParseErrorCount=" + parseErrorCount);
|
||||
System.out.println("TestFailureCount=" + testFailureCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param line
|
||||
* @param pp
|
||||
*/
|
||||
private static void eatWhitespace(String line, ParsePosition pp) {
|
||||
int cp = 0;
|
||||
int i;
|
||||
for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(line, i);
|
||||
if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
pp.setIndex(i);
|
||||
}
|
||||
|
||||
/*
|
||||
static class PropertySymbolTable implements SymbolTable {
|
||||
static boolean DEBUG = false;
|
||||
UnicodeProperty.Factory factory;
|
||||
//static Matcher identifier = Pattern.compile("([:letter:] [\\_\\-[:letter:][:number:]]*)").matcher("");
|
||||
|
||||
PropertySymbolTable (UnicodeProperty.Factory factory) {
|
||||
this.factory = factory;
|
||||
}
|
||||
|
||||
public char[] lookup(String s) {
|
||||
if (DEBUG) System.out.println("\tLooking up " + s);
|
||||
int pos = s.indexOf('=');
|
||||
if (pos < 0) return null; // should never happen
|
||||
UnicodeProperty prop = factory.getProperty(s.substring(0,pos));
|
||||
if (prop == null) {
|
||||
throw new IllegalArgumentException("Invalid Property: " + s + "\r\nUse "
|
||||
+ showSet(factory.getAvailableNames()));
|
||||
}
|
||||
String value = s.substring(pos+1);
|
||||
UnicodeSet set = prop.getSet(value);
|
||||
if (set.size() == 0) {
|
||||
throw new IllegalArgumentException("Empty Property-Value: " + s + "\r\nUse "
|
||||
+ showSet(prop.getAvailableValues()));
|
||||
}
|
||||
if (DEBUG) System.out.println("\tReturning " + set.toPattern(true));
|
||||
return set.toPattern(true).toCharArray(); // really ugly
|
||||
}
|
||||
|
||||
private String showSet(List list) {
|
||||
StringBuffer result = new StringBuffer("[");
|
||||
boolean first = true;
|
||||
for (Iterator it = list.iterator(); it.hasNext();) {
|
||||
if (!first) result.append(", ");
|
||||
else first = false;
|
||||
result.append(it.next().toString());
|
||||
}
|
||||
result.append("]");
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public UnicodeMatcher lookupMatcher(int ch) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String parseReference(String text, ParsePosition pos, int limit) {
|
||||
if (DEBUG) System.out.println("\tParsing <" + text.substring(pos.getIndex(),limit) + ">");
|
||||
int start = pos.getIndex();
|
||||
int i = getIdentifier(text, start, limit);
|
||||
if (i == start) return null;
|
||||
String prop = text.substring(start, i);
|
||||
String value = "true";
|
||||
if (i < limit) {
|
||||
int cp = text.charAt(i);
|
||||
if (cp == ':' || cp == '=') {
|
||||
int j = getIdentifier(text, i+1, limit);
|
||||
value = text.substring(i+1, j);
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
pos.setIndex(i);
|
||||
if (DEBUG) System.out.println("\tParsed <" + prop + ">=<" + value + ">");
|
||||
return prop + '=' + value;
|
||||
}
|
||||
|
||||
private int getIdentifier(String text, int start, int limit) {
|
||||
if (DEBUG) System.out.println("\tGetID <" + text.substring(start,limit) + ">");
|
||||
int cp = 0;
|
||||
int i;
|
||||
for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(text, i);
|
||||
if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (DEBUG) System.out.println("\tGotID <" + text.substring(start,i) + ">");
|
||||
return i;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* getCombo(UnicodeProperty.Factory factory, String line) {
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
String[] pieces = Utility.split(line, '+');
|
||||
for (int i = 0; i < pieces.length; ++i) {
|
||||
String[] parts = Utility.split(pieces[i],':');
|
||||
String prop = parts[0].trim();
|
||||
String value = "true";
|
||||
if (parts.length > 1) value = parts[1].trim();
|
||||
UnicodeProperty p = factory.getProperty(prop);
|
||||
result.addAll(p.getSet(value));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
static class OrderedMap {
|
||||
|
|
|
@ -1,8 +1,15 @@
|
|||
Generate: DerivedCoreProperties
|
||||
DeltaVersion: 11
|
||||
Generate:
|
||||
DeltaVersion: 13
|
||||
|
||||
File: Blocks
|
||||
Property: Block
|
||||
# Note: When comparing block names, casing, whitespace, hyphens,
|
||||
# and underbars are ignored.
|
||||
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
|
||||
# For more information on the comparison of property values,
|
||||
# see UCD.html.
|
||||
#
|
||||
# Code points not explicitly listed in this file are given the value No_Block.
|
||||
Format: valueList
|
||||
|
||||
File: CaseFolding
|
||||
|
@ -102,8 +109,7 @@ Property: XID_Continue
|
|||
|
||||
Property: Default_Ignorable_Code_Point
|
||||
# Derived Property: Default_Ignorable_Code_Point
|
||||
# Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs - White_Space
|
||||
|
||||
# Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs + Noncharacters - White_Space - Annotation_characters
|
||||
|
||||
Property: Grapheme_Extend
|
||||
# Derived Property: Grapheme_Extend
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2004/03/11 19:03:16 $
|
||||
* $Revision: 1.41 $
|
||||
* $Date: 2004/04/10 16:49:19 $
|
||||
* $Revision: 1.42 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -463,21 +463,25 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
* and returns the number of pieces.
|
||||
*/
|
||||
public static int split(String s, char divider, String[] output) {
|
||||
int last = 0;
|
||||
int current = 0;
|
||||
int i;
|
||||
for (i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) {
|
||||
output[current++] = s.substring(last,i);
|
||||
last = i+1;
|
||||
}
|
||||
}
|
||||
output[current++] = s.substring(last,i);
|
||||
int result = current;
|
||||
while (current < output.length) {
|
||||
output[current++] = "";
|
||||
}
|
||||
return result;
|
||||
try {
|
||||
int last = 0;
|
||||
int current = 0;
|
||||
int i;
|
||||
for (i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) {
|
||||
output[current++] = s.substring(last,i);
|
||||
last = i+1;
|
||||
}
|
||||
}
|
||||
output[current++] = s.substring(last,i);
|
||||
int result = current;
|
||||
while (current < output.length) {
|
||||
output[current++] = "";
|
||||
}
|
||||
return result;
|
||||
} catch (RuntimeException e) {
|
||||
throw new RuntimeException("Failure at line: " + s, e);
|
||||
}
|
||||
}
|
||||
|
||||
public static String[] split(String s, char divider) {
|
||||
|
|
Loading…
Add table
Reference in a new issue