ICU-0 updates for security

X-SVN-Rev: 18047
This commit is contained in:
Mark Davis 2005-06-24 23:51:52 +00:00
parent 1453a364a7
commit 1dbdb6ec78
6 changed files with 933 additions and 302 deletions

File diff suppressed because it is too large Load diff

View file

@ -202,7 +202,7 @@ class GenerateStringPrep implements UCD_Types {
UnicodeMap scripts = ToolUnicodePropertySource.make("").getProperty("script").getUnicodeMap();
UnicodeMap blocks = ToolUnicodePropertySource.make("").getProperty("block").getUnicodeMap();
UnicodeMap.Composer myCompose = new UnicodeMap.Composer() {
public Object compose(Object a, Object b) {
public Object compose(int codePoint, Object a, Object b) {
return a + "\t" + b;
}
};

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
* $Date: 2005/06/21 21:28:31 $
* $Revision: 1.5 $
* $Date: 2005/06/24 23:51:52 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -16,12 +16,87 @@ package com.ibm.text.UCD;
import java.util.*;
import java.io.*;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.text.utility.*;
public class QuickTest implements UCD_Types {
public static class Length {
String title;
int bytesPerCodeUnit;
int longestCodePoint = -1;
int longestLength = 0;
UnicodeSet longestSet = new UnicodeSet();
Length(String title, int bytesPerCodeUnit) {
this.title = title;
this.bytesPerCodeUnit = bytesPerCodeUnit;
}
void add(int codePoint, int codeUnitLength) {
if (codeUnitLength > longestLength) {
longestCodePoint = codePoint;
longestLength = codeUnitLength;
longestSet.clear();
longestSet.add(codePoint);
System.out.println(title + " \t(" + codeUnitLength*bytesPerCodeUnit + " bytes, "
+ codeUnitLength + " code units) \t"
+ Default.ucd().getCodeAndName(codePoint));
} else if (codeUnitLength == longestLength) {
longestSet.add(codePoint);
}
}
}
public static void main(String[] args) throws IOException {
getLengths("NFC", Default.nfc());
getLengths("NFD", Default.nfd());
getLengths("NFKC", Default.nfkc());
getLengths("NFKD", Default.nfkd());
System.out.println("Done");
}
static final int skip = (1<<UCD.UNASSIGNED) | (1<<UCD.PRIVATE_USE) | (1<<UCD.SURROGATE);
/**
*
*/
private static void getLengths(String title, Normalizer normalizer) throws IOException {
System.out.println();
Length utf8Len = new Length(title + "\tUTF8", 1);
Length utf16Len = new Length(title + "\tUTF16", 1);
Length utf32Len = new Length(title + "\tUTF32", 1);
for (int i = 0; i <= 0x10FFFF; ++i) {
int type = Default.ucd().getCategoryMask(i);
if ((type & skip) != 0) continue;
String norm = normalizer.normalize(i);
utf8Len.add(i, getUTF8Length(norm));
utf16Len.add(i, norm.length());
utf32Len.add(i, UTF16.countCodePoint(norm));
}
UnicodeSet common = new UnicodeSet(utf8Len.longestSet)
.retainAll(utf16Len.longestSet)
.retainAll(utf32Len.longestSet);
if (common.size() > 0) {
UnicodeSetIterator it = new UnicodeSetIterator(common);
it.next();
System.out.println("Common Exemplar: " + Default.ucd().getCodeAndName(it.codepoint));
}
}
static ByteArrayOutputStream utf8baos;
static Writer utf8bw;
static int getUTF8Length(String source) throws IOException {
if (utf8bw == null) {
utf8baos = new ByteArrayOutputStream();
utf8bw = new OutputStreamWriter(utf8baos, "UTF-8");
}
utf8baos.reset();
utf8bw.write(source);
utf8bw.flush();
return utf8baos.size();
}
static final void test() {
String test2 = "ab\u263ac";
StringTokenizer st = new StringTokenizer(test2, "\u263a");

View file

@ -0,0 +1,30 @@
package com.ibm.text.UCD;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.TransliteratorUtilities;
import com.ibm.icu.text.Transliterator;
public class UseTransliterator {
public static void main(String[] args) throws IOException {
try {
String filename = args[0];
File f2 = new File("com/ibm/text/UCD/");
System.out.println(f2.getAbsolutePath());
TransliteratorUtilities.registerTransliteratorFromFile("com/ibm/text/UCD/", "any-temp");
Transliterator t = Transliterator.getInstance("any-temp");
File f = new File(filename);
String fileContents = TransliteratorUtilities.getFileContents(f.getParent() + File.separator, f.getName());
String newContents = t.transliterate(fileContents);
PrintWriter pw = BagFormatter.openUTF8Writer(f.getParent() + File.separator, "new-" + f.getName());
pw.write(newContents);
pw.close();
} finally {
// TODO Auto-generated catch block
System.out.println("Done");
}
}
}

View file

@ -0,0 +1 @@
::[[:control:][:default_ignorable_code_point:]-[:whitespace:]] any-hex;

View file

@ -0,0 +1,245 @@
package com.ibm.text.utility;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class XEquivalenceClass {
// quick test
static public void main(String[] args) {
XEquivalenceClass foo1 = new XEquivalenceClass("NONE");
String[][] tests = {{"b","a1"}, {"b", "c"}, {"a1", "c"}, {"d", "e"}, {"e", "f"}, {"c", "d"}};
for (int i = 0; i < tests.length; ++i) {
System.out.println("Adding: " + tests[i][0] + ", " + tests[i][1]);
foo1.add(tests[i][0], tests[i][1], new Integer(i));
for (Iterator it = foo1.getExplicitItems().iterator(); it.hasNext();) {
Object item = it.next();
System.out.println("\t" + item + ";\t" + foo1.getSample(item) + ";\t" + foo1.getEquivalences(item));
System.out.println("\t\t" + foo1.getReasons(item, foo1.getSample(item)));
}
}
}
private Map toPartitionSet = new HashMap();
private Map obj_obj_reasons = new HashMap();
private Object defaultReason;
/**
* Create class with comparator, and default reason.
*
*/
public XEquivalenceClass(Object defaultReason) {
this.defaultReason = defaultReason;
}
/**
* Add two equivalent items, with NO_REASON for the reason.
*/
public XEquivalenceClass add(Object a, Object b) {
return add(a,b,null);
}
/**
* Add two equivalent items, plus a reason. The reason is only used for getReasons
*/
public XEquivalenceClass add(Object a, Object b, Object reason) {
if (a.equals(b)) return this;
if (reason == null) reason = defaultReason;
addReason(a,b,reason);
addReason(b,a,reason);
Set aPartitionSet = (Set) toPartitionSet.get(a);
Set bPartitionSet = (Set) toPartitionSet.get(b);
if (aPartitionSet == null) {
if (bPartitionSet == null) { // both null, set up bSet
bPartitionSet = new HashSet();
bPartitionSet.add(b);
toPartitionSet.put(b, bPartitionSet);
}
bPartitionSet.add(a);
toPartitionSet.put(a, bPartitionSet);
} else if (bPartitionSet == null) { // aSet is not null, bSet null
aPartitionSet.add(b);
toPartitionSet.put(b, aPartitionSet);
} else if (aPartitionSet != bPartitionSet) { // both non-null, not equal, merge. Equality check ok here
aPartitionSet.addAll(bPartitionSet);
// remap every x that had x => bPartitionSet
for (Iterator it = bPartitionSet.iterator(); it.hasNext();) {
toPartitionSet.put(it.next(), aPartitionSet);
}
}
return this;
}
/**
* Add all the information from the other class
*
*/
public XEquivalenceClass addAll(XEquivalenceClass other) {
// For now, does the simple, not optimized version
for (Iterator it = other.obj_obj_reasons.keySet().iterator(); it.hasNext();) {
Object a = it.next();
Map obj_reasons = (Map) other.obj_obj_reasons.get(a);
for (Iterator it2 = obj_reasons.keySet().iterator(); it2.hasNext();) {
Object b = it2.next();
Set reasons = (Set) obj_reasons.get(b);
for (Iterator it3 = reasons.iterator(); it3.hasNext();) {
Object reason = it3.next();
add(a, b, reason);
}
}
}
return this;
}
/**
*
*/
private void addReason(Object a, Object b, Object reason) {
Map obj_reasons = (Map) obj_obj_reasons.get(a);
if (obj_reasons == null) obj_obj_reasons.put(a, obj_reasons = new HashMap());
Set reasons = (Set) obj_reasons.get(b);
if (reasons == null) obj_reasons.put(b, reasons = new HashSet());
reasons.add(reason);
}
/**
* Returns a set of all the explicit items in the equivalence set. (Any non-explicit items only
* have themselves as equivalences.)
*
*/
public Set getExplicitItems() {
return Collections.unmodifiableSet(toPartitionSet.keySet());
}
/**
* Returns an unmodifiable set of all the equivalent objects
*
*/
public Set getEquivalences(Object a) {
Set aPartitionSet = (Set) toPartitionSet.get(a);
if (aPartitionSet == null) { // manufacture an equivalence
aPartitionSet = new HashSet();
aPartitionSet.add(a);
}
return Collections.unmodifiableSet(aPartitionSet);
}
/**
* returns true iff a is equivalent to b (or a.equals b)
*
*/
public boolean isEquivalent(Object a, Object b) {
if (a.equals(b)) return true;
Set aPartitionSet = (Set) toPartitionSet.get(a);
if (aPartitionSet == null) return false;
return aPartitionSet.contains(b);
}
/**
* Gets a sample object in the equivalence set for a.
*
*/
public Object getSample(Object a) {
Set aPartitionSet = (Set) toPartitionSet.get(a);
if (aPartitionSet == null) return a; // singleton
return aPartitionSet.iterator().next();
}
public interface Filter {
boolean matches(Object o);
}
public Object getSample(Object a, Filter f) {
Set aPartitionSet = (Set) toPartitionSet.get(a);
if (aPartitionSet == null) return a; // singleton
for (Iterator it = aPartitionSet.iterator(); it.hasNext();) {
Object obj = it.next();
if (f.matches(obj)) return obj;
}
return a;
}
/**
* gets the set of all the samples, one from each equivalence class.
*
*/
public Set getSamples() {
Set seenAlready = new HashSet();
Set result = new HashSet();
for (Iterator it = toPartitionSet.keySet().iterator(); it.hasNext();) {
Object item = it.next();
if (seenAlready.contains(item)) continue;
Set partition = (Set) toPartitionSet.get(item);
result.add(partition.iterator().next());
seenAlready.addAll(partition);
}
return result;
}
/**
* Returns a list of lists. Each sublist is in the form [reasons, obj, reasons, obj,..., reasons]
* where each reasons is a set of reasons to go from one obj to the next.<br>
* Returns null if there is no connection.
*/
public List getReasons(Object a, Object b) {
// use dumb algorithm for getting shortest path
// don't bother with optimization
Set aPartitionSet = (Set) toPartitionSet.get(a);
Set bPartitionSet = (Set) toPartitionSet.get(b);
// see if they connect
if (aPartitionSet == null || bPartitionSet == null || aPartitionSet != bPartitionSet || a.equals(b)) return null;
ArrayList list = new ArrayList();
list.add(a);
ArrayList lists = new ArrayList();
lists.add(list);
// this will contain the results
List foundLists = new ArrayList();
Set sawLastTime = new HashSet();
sawLastTime.add(a);
// each time, we extend the lists by one (adding multiple other lists)
while (foundLists.size() == 0) {
ArrayList extendedList = new ArrayList();
Set sawThisTime = new HashSet();
for (Iterator it = lists.iterator(); it.hasNext();) {
ArrayList lista = (ArrayList) it.next();
Object last = lista.get(lista.size()-1);
Map obj_reasons = (Map) obj_obj_reasons.get(last);
for (Iterator it2 = obj_reasons.keySet().iterator(); it2.hasNext();) {
Object item = it2.next();
if (sawLastTime.contains(item)) {
continue; // skip since we have shorter
}
sawThisTime.add(item);
Set reasons = (Set) obj_reasons.get(item);
ArrayList lista2 = (ArrayList)lista.clone();
lista2.add(reasons);
lista2.add(item);
extendedList.add(lista2);
if (item.equals(b)) {
// remove first and last
ArrayList found = (ArrayList)lista2.clone();
found.remove(0);
found.remove(found.size()-1);
foundLists.add(found);
}
}
}
lists = extendedList;
sawLastTime.addAll(sawThisTime);
}
return foundLists;
}
}