diff --git a/icu4j/src/com/ibm/icu/text/CanonicalIterator.java b/icu4j/src/com/ibm/icu/text/CanonicalIterator.java index 9db4255206e..1109cde7b33 100755 --- a/icu4j/src/com/ibm/icu/text/CanonicalIterator.java +++ b/icu4j/src/com/ibm/icu/text/CanonicalIterator.java @@ -117,29 +117,30 @@ public final class CanonicalIterator { } // find the segments - List list = new ArrayList(); + List segmentList = new ArrayList(); int cp; int start = 0; // i should be the end of the first code point + // break up the string into segements int i = UTF16.findOffsetFromCodePoint(source, 1); for (; i < source.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(source, i); if (NormalizerImpl.isCanonSafeStart(cp)) { - list.add(source.substring(start, i)); // add up to i + segmentList.add(source.substring(start, i)); // add up to i start = i; } } - list.add(source.substring(start, i)); // add last one + segmentList.add(source.substring(start, i)); // add last one // allocate the arrays, and find the strings that are CE to each segment - pieces = new String[list.size()][]; - current = new int[list.size()]; + pieces = new String[segmentList.size()][]; + current = new int[segmentList.size()]; for (i = 0; i < pieces.length; ++i) { if (PROGRESS) System.out.println("SEGMENT"); - pieces[i] = getEquivalents((String) list.get(i)); + pieces[i] = getEquivalents((String) segmentList.get(i)); } } @@ -285,12 +286,12 @@ public final class CanonicalIterator { StringBuffer workingBuffer = new StringBuffer(); // cycle through all the characters - int cp=0,end=0; + int cp=0; int[] range = new int[2]; for (int i = 0; i < segment.length(); i += UTF16.getCharCount(cp)) { // see if any character is at the start of some decomposition - cp = UTF16.charAt(segment, i);; + cp = UTF16.charAt(segment, i); USerializedSet starts = new USerializedSet(); if (!NormalizerImpl.getCanonStartSet(cp, starts)) { @@ -298,28 +299,27 @@ public final class CanonicalIterator { } int j=0; // if so, see which decompositions match - for(j = 0, cp = end+1; cp <= end ||starts.getRange(j++, range); ++cp) { - if(cp>end){ - cp=range[0]; - end=range[1]; + int rangeCount = starts.countRanges(); + for(j = 0; j < rangeCount; ++j) { + starts.getRange(j, range); + int end=range[1]; + for (int cp2 = range[0]; cp2 <= end; ++cp2) { + Set remainder = extract(cp2, segment, i, workingBuffer); + if (remainder == null) continue; + + // there were some matches, so add all the possibilities to the set. + String prefix= segment.substring(0,i); + prefix += UTF16.valueOf(cp2); + //int el = -1; + Iterator iter = remainder.iterator(); + while (iter.hasNext()) { + String item = (String) iter.next(); + String toAdd = new String(prefix); + toAdd += item; + result.add(toAdd); + //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd))); + } } - - Set remainder = extract(cp, segment, i,workingBuffer); - if (remainder == null) continue; - - // there were some matches, so add all the possibilities to the set. - String prefix= segment.substring(0,i); - prefix += UTF16.valueOf(cp); - //int el = -1; - Iterator iter = remainder.iterator(); - while (iter.hasNext()) { - String item = (String) iter.next(); - String toAdd = new String(prefix); - toAdd += item; - result.add(toAdd); - //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd))); - } - } } return result; diff --git a/tools/unicodetools/com/ibm/text/UCA/Main.java b/tools/unicodetools/com/ibm/text/UCA/Main.java index f914671af27..e64ae924bef 100644 --- a/tools/unicodetools/com/ibm/text/UCA/Main.java +++ b/tools/unicodetools/com/ibm/text/UCA/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ -* $Date: 2005/04/06 08:48:16 $ -* $Revision: 1.19 $ +* $Date: 2005/04/06 15:15:43 $ +* $Revision: 1.20 $ * ******************************************************************************* */ @@ -33,7 +33,6 @@ public class Main { }; public static void main(String args[]) throws Exception { - //checkCanonicalIterator(); // NOTE: so far, we don't need to build the UCA with anything but the latest versions. // A few changes would need to be made to the code to do older versions. try { @@ -101,6 +100,9 @@ public class Main { else if (arg.equalsIgnoreCase("short")) shortPrint = !shortPrint; else if (arg.equalsIgnoreCase("noCE")) noCE = !noCE; + else if (arg.equalsIgnoreCase("checkCanonicalIterator")) checkCanonicalIterator(); + + else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation(); // else if (arg.equalsIgnoreCase("probe")) Probe.test(); @@ -148,16 +150,16 @@ public class Main { System.out.println("first implicit: " + Utility.hex((long)(firstImplicit & 0xFFFFFFFFL))); CanonicalIterator it = new CanonicalIterator(""); - String[] tests = new String[] {"\uF900"}; + String[] tests = new String[] {"\uF900", "\u00C5d\u0307\u0327"}; for (int j = 0; j < tests.length; ++j) { - System.out.println(tests[j]); + System.out.println(Default.ucd().getCodeAndName(tests[j])); it.setSource(tests[j]); String ss; for (int i = 0; (ss = it.next()) != null; ++i) { - System.out.println(i + "\t" + Utility.hex(ss)); + System.out.println(i + "\t" + Default.ucd().getCodeAndName(ss)); } } - if (true) throw new IllegalArgumentException(); + // verify that nothing breaks for (int i = 0; i < 0x10FFFF; ++i) { int cat = UCharacter.getType(i); if (cat == UCharacter.UNASSIGNED || cat == UCharacter.PRIVATE_USE || cat == UCharacter.SURROGATE) continue;