ICU-0 updates for security

X-SVN-Rev: 18047
2025-04-13 17:01:16 +00:00 · 2005-06-24 23:51:52 +00:00 · 2005-06-24 23:51:52 +00:00 · 1dbdb6ec78
commit 1dbdb6ec78
parent 1453a364a7
6 changed files with 933 additions and 302 deletions
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
@ -202,7 +202,7 @@ class GenerateStringPrep implements UCD_Types {
 		UnicodeMap scripts = ToolUnicodePropertySource.make("").getProperty("script").getUnicodeMap();
 		UnicodeMap blocks = ToolUnicodePropertySource.make("").getProperty("block").getUnicodeMap();
 		UnicodeMap.Composer myCompose = new UnicodeMap.Composer() {
-			public Object compose(Object a, Object b) {
+			public Object compose(int codePoint, Object a, Object b) {
 				return a + "\t" + b;
 			}
 		};
--- a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
-* $Date: 2005/06/21 21:28:31 $
-* $Revision: 1.5 $
+* $Date: 2005/06/24 23:51:52 $
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -16,12 +16,87 @@ package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;

+import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;

 import com.ibm.text.utility.*;

 public class QuickTest implements UCD_Types {
+	
+	public static class Length {
+		String title;
+		int bytesPerCodeUnit;
+		int longestCodePoint = -1;
+		int longestLength = 0;
+		UnicodeSet longestSet = new UnicodeSet();
+		Length(String title, int bytesPerCodeUnit) {
+			this.title = title;
+			this.bytesPerCodeUnit = bytesPerCodeUnit;
+		}
+		void add(int codePoint, int codeUnitLength) {
+			if (codeUnitLength > longestLength) {
+				longestCodePoint = codePoint;
+				longestLength = codeUnitLength;
+				longestSet.clear();
+				longestSet.add(codePoint);
+				System.out.println(title + " \t(" + codeUnitLength*bytesPerCodeUnit + " bytes, "
+						+ codeUnitLength + " code units) \t"
+						+ Default.ucd().getCodeAndName(codePoint));				
+			} else if (codeUnitLength == longestLength) {
+				longestSet.add(codePoint);
+			}
+		}
+	}
+	
+	public static void main(String[] args) throws IOException {
+		getLengths("NFC", Default.nfc());
+		getLengths("NFD", Default.nfd());
+		getLengths("NFKC", Default.nfkc());
+		getLengths("NFKD", Default.nfkd());
+		System.out.println("Done");
+	}
+	
+	static final int skip = (1<<UCD.UNASSIGNED) | (1<<UCD.PRIVATE_USE) | (1<<UCD.SURROGATE);
+	/**
+	 * 
+	 */
+	private static void getLengths(String title, Normalizer normalizer) throws IOException {
+		System.out.println();
+		Length utf8Len = new Length(title + "\tUTF8", 1);
+		Length utf16Len = new Length(title + "\tUTF16", 1);
+		Length utf32Len = new Length(title + "\tUTF32", 1);
+		for (int i = 0; i <= 0x10FFFF; ++i) {
+			int type = Default.ucd().getCategoryMask(i);
+			if ((type & skip) != 0) continue;
+			String norm = normalizer.normalize(i);
+			utf8Len.add(i, getUTF8Length(norm));
+			utf16Len.add(i, norm.length());
+			utf32Len.add(i, UTF16.countCodePoint(norm));
+		}
+		UnicodeSet common = new UnicodeSet(utf8Len.longestSet)
+			.retainAll(utf16Len.longestSet)
+			.retainAll(utf32Len.longestSet);
+		if (common.size() > 0) {
+			UnicodeSetIterator it = new UnicodeSetIterator(common);
+			it.next();
+			System.out.println("Common Exemplar: " + Default.ucd().getCodeAndName(it.codepoint));
+		}
+	}
+
+	static ByteArrayOutputStream utf8baos;
+	static Writer utf8bw;
+	static int getUTF8Length(String source) throws IOException {
+		if (utf8bw == null) {
+			utf8baos = new ByteArrayOutputStream();
+			utf8bw = new OutputStreamWriter(utf8baos, "UTF-8");
+		}
+		utf8baos.reset();
+		utf8bw.write(source);
+		utf8bw.flush();
+		return utf8baos.size();
+	}
 	static final void test() {
 		String test2 = "ab\u263ac";
 		StringTokenizer st = new StringTokenizer(test2, "\u263a");
--- a/tools/unicodetools/com/ibm/text/UCD/UseTransliterator.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UseTransliterator.java
@ -0,0 +1,30 @@
+package com.ibm.text.UCD;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.dev.test.util.TransliteratorUtilities;
+import com.ibm.icu.text.Transliterator;
+
+public class UseTransliterator {
+	public static void main(String[] args) throws IOException {
+		try {
+			String filename = args[0];
+			File f2 = new File("com/ibm/text/UCD/");
+			System.out.println(f2.getAbsolutePath());
+			TransliteratorUtilities.registerTransliteratorFromFile("com/ibm/text/UCD/", "any-temp");
+			Transliterator t = Transliterator.getInstance("any-temp");
+			File f = new File(filename);
+			String fileContents = TransliteratorUtilities.getFileContents(f.getParent() + File.separator, f.getName());
+			String newContents = t.transliterate(fileContents);
+			PrintWriter pw = BagFormatter.openUTF8Writer(f.getParent() + File.separator, "new-" + f.getName());
+			pw.write(newContents);
+			pw.close();
+		} finally {
+			// TODO Auto-generated catch block
+			System.out.println("Done");
+		}
+	}
+}
--- a/tools/unicodetools/com/ibm/text/UCD/any_temp.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/any_temp.txt
@ -0,0 +1 @@
+::[[:control:][:default_ignorable_code_point:]-[:whitespace:]] any-hex;
--- a/tools/unicodetools/com/ibm/text/utility/XEquivalenceClass.java
+++ b/tools/unicodetools/com/ibm/text/utility/XEquivalenceClass.java
@ -0,0 +1,245 @@
+package com.ibm.text.utility;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class XEquivalenceClass {
+	
+	// quick test
+	static public void main(String[] args) {
+		XEquivalenceClass foo1 = new XEquivalenceClass("NONE");
+		String[][] tests = {{"b","a1"}, {"b", "c"}, {"a1", "c"}, {"d", "e"}, {"e", "f"}, {"c", "d"}};
+		for (int i = 0; i < tests.length; ++i) {
+			System.out.println("Adding: " + tests[i][0] + ", " + tests[i][1]);
+			foo1.add(tests[i][0], tests[i][1], new Integer(i));
+			for (Iterator it = foo1.getExplicitItems().iterator(); it.hasNext();) {
+				Object item = it.next();
+				System.out.println("\t" + item  + ";\t" + foo1.getSample(item) + ";\t" + foo1.getEquivalences(item));
+				System.out.println("\t\t" + foo1.getReasons(item, foo1.getSample(item)));
+			}
+		}
+	}
+	
+	private Map toPartitionSet = new HashMap();
+	private Map obj_obj_reasons = new HashMap();
+	private Object defaultReason;
+	
+	
+	/**
+	 * Create class with comparator, and default reason.
+	 *
+	 */
+	public XEquivalenceClass(Object defaultReason) {
+		this.defaultReason = defaultReason;
+	}
+	
+	/**
+	 * Add two equivalent items, with NO_REASON for the reason.
+	 */
+	public XEquivalenceClass add(Object a, Object b) {
+		return add(a,b,null);
+	}
+	
+	/**
+	 * Add two equivalent items, plus a reason. The reason is only used for getReasons
+	 */
+	public XEquivalenceClass add(Object a, Object b, Object reason) {
+		if (a.equals(b)) return this;
+		if (reason == null) reason = defaultReason;
+		addReason(a,b,reason);
+		addReason(b,a,reason);
+		Set aPartitionSet = (Set) toPartitionSet.get(a);
+		Set bPartitionSet = (Set) toPartitionSet.get(b);
+		if (aPartitionSet == null) {
+			if (bPartitionSet == null) { // both null, set up bSet
+				bPartitionSet = new HashSet();
+				bPartitionSet.add(b);
+				toPartitionSet.put(b, bPartitionSet);				
+			}
+			bPartitionSet.add(a);
+			toPartitionSet.put(a, bPartitionSet);				
+		} else if (bPartitionSet == null) { // aSet is not null, bSet null
+			aPartitionSet.add(b);
+			toPartitionSet.put(b, aPartitionSet);							
+		} else if (aPartitionSet != bPartitionSet) {  // both non-null, not equal, merge.  Equality check ok here
+			aPartitionSet.addAll(bPartitionSet);
+			// remap every x that had x => bPartitionSet
+			for (Iterator it = bPartitionSet.iterator(); it.hasNext();) {
+				toPartitionSet.put(it.next(), aPartitionSet);	
+			}
+		}
+		return this;
+	}
+	
+	/**
+	 * Add all the information from the other class
+	 *
+	 */
+	public XEquivalenceClass addAll(XEquivalenceClass other) {
+		// For now, does the simple, not optimized version
+		for (Iterator it = other.obj_obj_reasons.keySet().iterator(); it.hasNext();) {
+			Object a = it.next();
+			Map obj_reasons = (Map) other.obj_obj_reasons.get(a);
+			for (Iterator it2 = obj_reasons.keySet().iterator(); it2.hasNext();) {
+				Object b = it2.next();
+				Set reasons = (Set) obj_reasons.get(b);
+				for (Iterator it3 = reasons.iterator(); it3.hasNext();) {
+					Object reason = it3.next();
+					add(a, b, reason);
+				}
+			}
+		}
+		return this;
+	}
+	
+	/**
+	 * 
+	 */
+	private void addReason(Object a, Object b, Object reason) {
+		Map obj_reasons = (Map) obj_obj_reasons.get(a);
+		if (obj_reasons == null) obj_obj_reasons.put(a, obj_reasons = new HashMap());
+		Set reasons = (Set) obj_reasons.get(b);
+		if (reasons == null) obj_reasons.put(b, reasons = new HashSet());
+		reasons.add(reason);
+	}
+
+	/**
+	 * Returns a set of all the explicit items in the equivalence set. (Any non-explicit items only
+	 * have themselves as equivalences.)
+	 *
+	 */
+	public Set getExplicitItems() {
+		return Collections.unmodifiableSet(toPartitionSet.keySet());
+	}
+	
+	/**
+	 * Returns an unmodifiable set of all the equivalent objects
+	 *
+	 */
+	public Set getEquivalences(Object a) {
+		Set aPartitionSet = (Set) toPartitionSet.get(a);
+		if (aPartitionSet == null) { // manufacture an equivalence
+			aPartitionSet = new HashSet();
+			aPartitionSet.add(a); 
+		}
+		return Collections.unmodifiableSet(aPartitionSet);
+	}
+	
+	/**
+	 * returns true iff a is equivalent to b (or a.equals b)
+	 *
+	 */
+	public boolean isEquivalent(Object a, Object b) {
+		if (a.equals(b)) return true;
+		Set aPartitionSet = (Set) toPartitionSet.get(a);
+		if (aPartitionSet == null) return false;
+		return aPartitionSet.contains(b);
+	}
+	
+	/**
+	 * Gets a sample object in the equivalence set for a. 
+	 *
+	 */
+	public Object getSample(Object a) {
+		Set aPartitionSet = (Set) toPartitionSet.get(a);
+		if (aPartitionSet == null) return a; // singleton
+		return aPartitionSet.iterator().next();
+	}
+	
+	public interface Filter {
+		boolean matches(Object o);
+	}
+	
+	public Object getSample(Object a, Filter f) {
+		Set aPartitionSet = (Set) toPartitionSet.get(a);
+		if (aPartitionSet == null) return a; // singleton
+		for (Iterator it = aPartitionSet.iterator(); it.hasNext();) {
+			Object obj = it.next();
+			if (f.matches(obj)) return obj;
+		}
+		return a;
+	}
+
+	/**
+	 * gets the set of all the samples, one from each equivalence class. 
+	 *
+	 */
+	public Set getSamples() {
+		Set seenAlready = new HashSet();
+		Set result = new HashSet();
+		for (Iterator it = toPartitionSet.keySet().iterator(); it.hasNext();) {
+			Object item = it.next();
+			if (seenAlready.contains(item)) continue;
+			Set partition = (Set) toPartitionSet.get(item);
+			result.add(partition.iterator().next());
+			seenAlready.addAll(partition);
+		}
+		return result;
+	}
+	
+
+	/**
+	 * Returns a list of lists. Each sublist is in the form [reasons, obj, reasons, obj,..., reasons]
+	 * where each reasons is a set of reasons to go from one obj to the next.<br>
+	 * Returns null if there is no connection.
+	 */
+	public List getReasons(Object a, Object b) {
+		// use dumb algorithm for getting shortest path
+		// don't bother with optimization
+		Set aPartitionSet = (Set) toPartitionSet.get(a);
+		Set bPartitionSet = (Set) toPartitionSet.get(b);
+		
+		// see if they connect
+		if (aPartitionSet == null || bPartitionSet == null || aPartitionSet != bPartitionSet || a.equals(b)) return null;
+		
+		ArrayList list = new ArrayList();
+		list.add(a);
+		ArrayList lists = new ArrayList();
+		lists.add(list);
+
+		// this will contain the results
+		List foundLists = new ArrayList();
+		Set sawLastTime = new HashSet();
+		sawLastTime.add(a);
+		
+		// each time, we extend the lists by one (adding multiple other lists)
+		while (foundLists.size() == 0) {
+			ArrayList extendedList = new ArrayList();
+			Set sawThisTime = new HashSet();
+			for (Iterator it = lists.iterator(); it.hasNext();) {
+				ArrayList lista = (ArrayList) it.next();
+				Object last = lista.get(lista.size()-1);
+				Map obj_reasons = (Map) obj_obj_reasons.get(last);
+				for (Iterator it2 = obj_reasons.keySet().iterator(); it2.hasNext();) {
+					Object item = it2.next();
+					if (sawLastTime.contains(item)) {
+						continue; // skip since we have shorter
+					}
+					sawThisTime.add(item);
+					Set reasons = (Set) obj_reasons.get(item);
+					ArrayList lista2 = (ArrayList)lista.clone();
+					lista2.add(reasons);
+					lista2.add(item);
+					extendedList.add(lista2);
+					if (item.equals(b)) {
+						// remove first and last
+						ArrayList found = (ArrayList)lista2.clone();
+						found.remove(0);
+						found.remove(found.size()-1);
+						foundLists.add(found);
+					}
+				}
+			}
+			lists = extendedList;
+			sawLastTime.addAll(sawThisTime);
+		}
+		return foundLists;
+	}
+}
				`@ -0,0 +1 @@`
				`::[[:control:][:default_ignorable_code_point:]-[:whitespace:]] any-hex;`