mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-4677 misc
X-SVN-Rev: 18288
This commit is contained in:
parent
198efcc77c
commit
ddb17f01a1
2 changed files with 61 additions and 21 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateConfusables.java,v $
|
||||
* $Date: 2005/07/02 21:24:46 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2005/07/19 17:21:00 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -43,13 +43,13 @@ import com.ibm.icu.dev.test.util.UnicodeLabel;
|
|||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.dev.test.util.UnicodePropertySource;
|
||||
import com.ibm.icu.dev.test.util.XEquivalenceClass;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
import com.ibm.text.UCD.GenerateConfusables.WholeScript.UnicodeSetToScript;
|
||||
import com.ibm.text.utility.Utility;
|
||||
import com.ibm.text.utility.XEquivalenceClass;
|
||||
import com.sun.corba.se.connection.GetEndPointInfoAgainException;
|
||||
import com.ibm.icu.dev.test.util.CollectionUtilities;
|
||||
|
||||
|
@ -431,8 +431,8 @@ public class GenerateConfusables {
|
|||
//reviews.putAll(UNASSIGNED, "");
|
||||
out.print("\uFEFF");
|
||||
out.println("# Review List for IDN");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
out.println("");
|
||||
|
||||
UnicodeSet fullSet = reviews.getSet("").complement();
|
||||
|
@ -487,8 +487,8 @@ public class GenerateConfusables {
|
|||
PrintWriter out = BagFormatter.openUTF8Writer(outdir, "idnchars.txt");
|
||||
|
||||
out.println("# Recommended Identifier Profiles for IDN");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
|
||||
out.println("");
|
||||
out.println("# Output Characters");
|
||||
|
@ -557,8 +557,8 @@ public class GenerateConfusables {
|
|||
"xidmodifications.txt");
|
||||
|
||||
out.println("# Security Profile for General Identifiers");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
out.println("");
|
||||
|
||||
out.println("# Characters restricted");
|
||||
|
@ -614,8 +614,8 @@ public class GenerateConfusables {
|
|||
//someRemovals = removals;
|
||||
out = BagFormatter.openUTF8Writer(outdir, "draft-restrictions.txt");
|
||||
out.println("# Characters restricted in domain names");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
out.println("#");
|
||||
out.println("# This file contains a draft list of characters for use in");
|
||||
out.println("# UTR #36: Unicode Security Considerations");
|
||||
|
@ -1149,8 +1149,8 @@ public class GenerateConfusables {
|
|||
public void writeSource(String directory, String filename) throws IOException {
|
||||
PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
|
||||
out.println("# Source File for IDN Confusables");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
out.println("");
|
||||
dataMixedAnycase.writeSource(out);
|
||||
out.close();
|
||||
|
@ -1160,8 +1160,8 @@ public class GenerateConfusables {
|
|||
PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
|
||||
out.print('\uFEFF');
|
||||
out.println("# Recommended confusable mapping for IDN");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
out.println("");
|
||||
|
||||
if (appendFile) {
|
||||
|
@ -1369,8 +1369,8 @@ public class GenerateConfusables {
|
|||
UnicodeSet representable = new UnicodeSet();
|
||||
out.print('\uFEFF');
|
||||
out.println("# Summary: Recommended confusable mapping for IDN");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
out.println("");
|
||||
MyEquivalenceClass data = dataMixedAnycase;
|
||||
Set items = data.getOrderedExplicitItems();
|
||||
|
@ -1494,8 +1494,8 @@ public class GenerateConfusables {
|
|||
PrintWriter out = BagFormatter.openUTF8Writer(outdir, filename);
|
||||
out.print('\uFEFF');
|
||||
out.println("# Summary: Whole-Script Confusables");
|
||||
out.println("# $Revision: 1.6 $");
|
||||
out.println("# $Date: 2005/07/02 21:24:46 $");
|
||||
out.println("# $Revision: 1.7 $");
|
||||
out.println("# $Date: 2005/07/19 17:21:00 $");
|
||||
out.println("# This data is used for determining whether a strings is a");
|
||||
out.println("# whole-script or mixed-script confusable.");
|
||||
out.println("# The mappings here ignore common and inherited script characters,");
|
||||
|
|
|
@ -7,6 +7,7 @@ import java.util.BitSet;
|
|||
import java.util.List;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
@ -72,7 +73,7 @@ public class TestIdentifiers {
|
|||
|
||||
TestIdentifiers(String caseType) throws IOException {
|
||||
this.caseType = caseType;
|
||||
loadFile(caseType);
|
||||
loadWholeScriptConfusables(caseType);
|
||||
}
|
||||
|
||||
private static class UnicodeSetToScript {
|
||||
|
@ -101,8 +102,47 @@ public class TestIdentifiers {
|
|||
|
||||
UnicodeSetToScript[][] scriptToUnicodeSetToScript = new UnicodeSetToScript[UScript.CODE_LIMIT][];
|
||||
UnicodeSet[] fastReject = new UnicodeSet[UScript.CODE_LIMIT];
|
||||
|
||||
UnicodeMap idnChars = new UnicodeMap();
|
||||
UnicodeSet nonstarting = new UnicodeSet();
|
||||
|
||||
void loadIdentifiers() throws IOException {
|
||||
BufferedReader br = BagFormatter.openUTF8Reader(indir,
|
||||
"idnchars.txt");
|
||||
String line = null;
|
||||
try {
|
||||
while (true) {
|
||||
line = Utility.readDataLine(br);
|
||||
if (line == null)
|
||||
break;
|
||||
if (line.length() == 0)
|
||||
continue;
|
||||
String[] pieces = Utility.split(line, ';');
|
||||
// part 0 is range
|
||||
String range = pieces[0].trim();
|
||||
int rangeDivider = range.indexOf("..");
|
||||
int start, end;
|
||||
if (rangeDivider < 0) {
|
||||
start = end = Integer.parseInt(range, 16);
|
||||
} else {
|
||||
start = Integer.parseInt(range.substring(0, rangeDivider),
|
||||
16);
|
||||
end = Integer.parseInt(range.substring(rangeDivider + 2),
|
||||
16);
|
||||
}
|
||||
// part 1 is script1
|
||||
String type = pieces[1].trim().intern();
|
||||
if (type.equals("nonstarting")) nonstarting.add(start,end);
|
||||
else idnChars.putAll(start, end, type);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw (RuntimeException) new RuntimeException("Failure on line "
|
||||
+ line).initCause(e);
|
||||
}
|
||||
br.close();
|
||||
}
|
||||
|
||||
void loadFile(String filterType) throws IOException {
|
||||
void loadWholeScriptConfusables(String filterType) throws IOException {
|
||||
UnicodeSet[][] script_script_set = new UnicodeSet[UScript.CODE_LIMIT][UScript.CODE_LIMIT];
|
||||
for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
|
||||
script_script_set[i] = new UnicodeSet[UScript.CODE_LIMIT];
|
||||
|
|
Loading…
Add table
Reference in a new issue