ICU-4677 misc

X-SVN-Rev: 18288
This commit is contained in:
Mark Davis 2005-07-19 17:21:01 +00:00
parent 198efcc77c
commit ddb17f01a1
2 changed files with 61 additions and 21 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateConfusables.java,v $
* $Date: 2005/07/02 21:24:46 $
* $Revision: 1.6 $
* $Date: 2005/07/19 17:21:00 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -43,13 +43,13 @@ import com.ibm.icu.dev.test.util.UnicodeLabel;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.UnicodePropertySource;
import com.ibm.icu.dev.test.util.XEquivalenceClass;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.text.UCD.GenerateConfusables.WholeScript.UnicodeSetToScript;
import com.ibm.text.utility.Utility;
import com.ibm.text.utility.XEquivalenceClass;
import com.sun.corba.se.connection.GetEndPointInfoAgainException;
import com.ibm.icu.dev.test.util.CollectionUtilities;
@ -431,8 +431,8 @@ public class GenerateConfusables {
//reviews.putAll(UNASSIGNED, "");
out.print("\uFEFF");
out.println("# Review List for IDN");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("");
UnicodeSet fullSet = reviews.getSet("").complement();
@ -487,8 +487,8 @@ public class GenerateConfusables {
PrintWriter out = BagFormatter.openUTF8Writer(outdir, "idnchars.txt");
out.println("# Recommended Identifier Profiles for IDN");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("");
out.println("# Output Characters");
@ -557,8 +557,8 @@ public class GenerateConfusables {
"xidmodifications.txt");
out.println("# Security Profile for General Identifiers");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("");
out.println("# Characters restricted");
@ -614,8 +614,8 @@ public class GenerateConfusables {
//someRemovals = removals;
out = BagFormatter.openUTF8Writer(outdir, "draft-restrictions.txt");
out.println("# Characters restricted in domain names");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("#");
out.println("# This file contains a draft list of characters for use in");
out.println("# UTR #36: Unicode Security Considerations");
@ -1149,8 +1149,8 @@ public class GenerateConfusables {
public void writeSource(String directory, String filename) throws IOException {
PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
out.println("# Source File for IDN Confusables");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("");
dataMixedAnycase.writeSource(out);
out.close();
@ -1160,8 +1160,8 @@ public class GenerateConfusables {
PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
out.print('\uFEFF');
out.println("# Recommended confusable mapping for IDN");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("");
if (appendFile) {
@ -1369,8 +1369,8 @@ public class GenerateConfusables {
UnicodeSet representable = new UnicodeSet();
out.print('\uFEFF');
out.println("# Summary: Recommended confusable mapping for IDN");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("");
MyEquivalenceClass data = dataMixedAnycase;
Set items = data.getOrderedExplicitItems();
@ -1494,8 +1494,8 @@ public class GenerateConfusables {
PrintWriter out = BagFormatter.openUTF8Writer(outdir, filename);
out.print('\uFEFF');
out.println("# Summary: Whole-Script Confusables");
out.println("# $Revision: 1.6 $");
out.println("# $Date: 2005/07/02 21:24:46 $");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# This data is used for determining whether a strings is a");
out.println("# whole-script or mixed-script confusable.");
out.println("# The mappings here ignore common and inherited script characters,");

View file

@ -7,6 +7,7 @@ import java.util.BitSet;
import java.util.List;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.UTF16;
@ -72,7 +73,7 @@ public class TestIdentifiers {
TestIdentifiers(String caseType) throws IOException {
this.caseType = caseType;
loadFile(caseType);
loadWholeScriptConfusables(caseType);
}
private static class UnicodeSetToScript {
@ -101,8 +102,47 @@ public class TestIdentifiers {
UnicodeSetToScript[][] scriptToUnicodeSetToScript = new UnicodeSetToScript[UScript.CODE_LIMIT][];
UnicodeSet[] fastReject = new UnicodeSet[UScript.CODE_LIMIT];
UnicodeMap idnChars = new UnicodeMap();
UnicodeSet nonstarting = new UnicodeSet();
void loadIdentifiers() throws IOException {
BufferedReader br = BagFormatter.openUTF8Reader(indir,
"idnchars.txt");
String line = null;
try {
while (true) {
line = Utility.readDataLine(br);
if (line == null)
break;
if (line.length() == 0)
continue;
String[] pieces = Utility.split(line, ';');
// part 0 is range
String range = pieces[0].trim();
int rangeDivider = range.indexOf("..");
int start, end;
if (rangeDivider < 0) {
start = end = Integer.parseInt(range, 16);
} else {
start = Integer.parseInt(range.substring(0, rangeDivider),
16);
end = Integer.parseInt(range.substring(rangeDivider + 2),
16);
}
// part 1 is script1
String type = pieces[1].trim().intern();
if (type.equals("nonstarting")) nonstarting.add(start,end);
else idnChars.putAll(start, end, type);
}
} catch (Exception e) {
throw (RuntimeException) new RuntimeException("Failure on line "
+ line).initCause(e);
}
br.close();
}
void loadFile(String filterType) throws IOException {
void loadWholeScriptConfusables(String filterType) throws IOException {
UnicodeSet[][] script_script_set = new UnicodeSet[UScript.CODE_LIMIT][UScript.CODE_LIMIT];
for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
script_script_set[i] = new UnicodeSet[UScript.CODE_LIMIT];