ICU-7869 added getBucketIndex and a test. It doesn't quite work right yet, but checking in so that Andy can look at it.

X-SVN-Rev: 28540
2025-04-21 12:40:02 +00:00 · 2010-08-28 01:15:48 +00:00 · 2010-08-28 01:15:48 +00:00 · fe40416511
commit fe40416511
parent c91d109999
2 changed files with 185 additions and 80 deletions
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
@ -508,6 +508,52 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
        return this;
    }

+    /**
+     * Get the bucket number for the given name. This routine permits callers to implement their own bucket handling
+     * mechanisms, including client-server handling. For example, when a new name is created on the client, it can ask
+     * the server for the bucket for that name, and the sortkey (using getCollator). Once the client has that
+     * information, it can put the name into the right bucket, and sort it within that bucket, without having access to
+     * the index or collator.
+     * <p>
+     * Note that the bucket number (and sort key) are only valid for the settings of the current AlphabeticIndex; if
+     * those are changed, then the bucket number and sort key must be regenerated.
+     * 
+     * @param name
+     *            Name, such as a name
+     * @param info
+     *            Info, such as an address or link
+     * @return this, for chaining
+     * @draft ICU 4.6
+     * @provisional This API might change or be removed in a future release.
+     */
+    public int getBucketIndex(CharSequence name) {
+        if (buckets == null) {
+            buckets = getIndexBuckets();
+        }
+        if (langType == LangType.SIMPLIFIED) {
+            String hackPrefix = hackName(name, collatorPrimaryOnly);
+            if (hackPrefix != null) {
+                name = hackPrefix + name;
+            }
+        }
+
+        // TODO use a binary search
+        int result = -1;
+        for (Bucket<V> bucket : this) {
+            if (bucket.lowerBoundary == null) { // last bucket
+                return result;
+            }
+            int comp = collatorPrimaryOnly.compare(name, bucket.lowerBoundary);
+            if (comp < 0) { // the first boundary is always "", and so -1 will never be returned
+                return result;
+            } else if (comp == 0) {
+                return result + 1;
+            }
+            result++;
+        }
+        return result;
+    }
+
    /**
     * Clear the index.
     * 
@ -705,73 +751,73 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
    "[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]").freeze();
    private static final UnicodeSet TO_TRY = new UnicodeSet("[:^nfcqc=no:]").removeAll(IGNORE_SCRIPTS).freeze();

-//    /**
-//     * Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
-//     * collation.
-//     * 
-//     * @param ruleBasedCollator
-//     *            TODO
-//     * @param comparator
-//     * @param lowerLimit
-//     * @param testScript
-//     * 
-//     * @return
-//     */
-//
-//    private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
-//        String[] results = new String[UScript.CODE_LIMIT];
-//        for (String current : TO_TRY) {
-//            if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
-//                // symbols.
-//                continue;
-//            }
-//            int script = UScript.getScript(current.codePointAt(0));
-//            if (results[script] == null) {
-//                results[script] = current;
-//            } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
-//                results[script] = current;
-//            }
-//        }
-//
-//        try {
-//            UnicodeSet extras = new UnicodeSet();
-//            UnicodeSet expansions = new UnicodeSet();
-//            ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
-//            extras.addAll(expansions).removeAll(TO_TRY);
-//            if (extras.size() != 0) {
-//                Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
-//                for (String current : extras) {
-//                    if (!TO_TRY.containsAll(current))
-//                        continue;
-//                    if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
-//                        continue;
-//                    }
-//                    int script = UScript.getScript(current.codePointAt(0));
-//                    if (results[script] == null) {
-//                        results[script] = current;
-//                    } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
-//                        results[script] = current;
-//                    }
-//                }
-//            }
-//        } catch (Exception e) {
-//        } // why have a checked exception???
-//
-//        TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
-//        for (int i = 0; i < results.length; ++i) {
-//            if (results[i] != null) {
-//                sorted.add(results[i]);
-//            }
-//        }
-//        if (true) {
-//            for (String s : sorted) {
-//                System.out.println("\"" + s + "\",");
-//            }
-//        }
-//
-//        List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
-//        return result;
-//    }
+    //    /**
+    //     * Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
+    //     * collation.
+    //     * 
+    //     * @param ruleBasedCollator
+    //     *            TODO
+    //     * @param comparator
+    //     * @param lowerLimit
+    //     * @param testScript
+    //     * 
+    //     * @return
+    //     */
+    //
+    //    private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
+    //        String[] results = new String[UScript.CODE_LIMIT];
+    //        for (String current : TO_TRY) {
+    //            if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
+    //                // symbols.
+    //                continue;
+    //            }
+    //            int script = UScript.getScript(current.codePointAt(0));
+    //            if (results[script] == null) {
+    //                results[script] = current;
+    //            } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
+    //                results[script] = current;
+    //            }
+    //        }
+    //
+    //        try {
+    //            UnicodeSet extras = new UnicodeSet();
+    //            UnicodeSet expansions = new UnicodeSet();
+    //            ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
+    //            extras.addAll(expansions).removeAll(TO_TRY);
+    //            if (extras.size() != 0) {
+    //                Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
+    //                for (String current : extras) {
+    //                    if (!TO_TRY.containsAll(current))
+    //                        continue;
+    //                    if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
+    //                        continue;
+    //                    }
+    //                    int script = UScript.getScript(current.codePointAt(0));
+    //                    if (results[script] == null) {
+    //                        results[script] = current;
+    //                    } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
+    //                        results[script] = current;
+    //                    }
+    //                }
+    //            }
+    //        } catch (Exception e) {
+    //        } // why have a checked exception???
+    //
+    //        TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
+    //        for (int i = 0; i < results.length; ++i) {
+    //            if (results[i] != null) {
+    //                sorted.add(results[i]);
+    //            }
+    //        }
+    //        if (true) {
+    //            for (String s : sorted) {
+    //                System.out.println("\"" + s + "\",");
+    //            }
+    //        }
+    //
+    //        List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
+    //        return result;
+    //    }

    private static final PreferenceComparator PREFERENCE_COMPARATOR = new PreferenceComparator();
    private int maxLabelCount = 99;
@ -1001,7 +1047,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
    /**
     * HACKS
     */
-    private static CharSequence hackName(CharSequence name, Comparator comparator) {
+    private static String hackName(CharSequence name, Comparator comparator) {
        if (!UNIHAN.contains(Character.codePointAt(name, 0))) {
            return null;
        }
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java
@ -13,9 +13,13 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeSet;

 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.dev.test.util.CollectionUtilities;
+import com.ibm.icu.impl.Row;
+import com.ibm.icu.impl.Row.R3;
+import com.ibm.icu.impl.Row.R4;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.AlphabeticIndex;
@ -23,6 +27,7 @@ import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.AlphabeticIndex.Bucket;
+import com.ibm.icu.text.AlphabeticIndex.Record;
 import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
 import com.ibm.icu.util.ULocale;

@ -183,7 +188,7 @@ public class AlphabeticIndexTest extends TestFmwk {
            checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron");
        }
    }
-    
+
    public void TestInflow() {
        Object[][] tests = {
                {0, ULocale.ENGLISH},
@ -311,7 +316,7 @@ public class AlphabeticIndexTest extends TestFmwk {

            // Join the elements of the list to a string with delimiter ":"
            StringBuilder sb = new StringBuilder();
-            Iterator iter = indexCharacters.iterator();
+            Iterator<String> iter = indexCharacters.iterator();
            while (iter.hasNext()) {
                sb.append(iter.next());
                if (!iter.hasNext()) {
@ -396,17 +401,71 @@ public class AlphabeticIndexTest extends TestFmwk {
            }
        }
    }
+
+    public void TestClientSupport() {
+        for (String localeString : new String[] {"zh"}) { // KEY_LOCALES
+            ULocale ulocale = new ULocale(localeString);
+            AlphabeticIndex<Double> indexCharacters = new AlphabeticIndex<Double>(ulocale).addLabels(ULocale.ENGLISH);
+            RuleBasedCollator collator = indexCharacters.getCollator();
+            for (String name : SimpleTests) {
+                indexCharacters.addRecord(name, (double)name.length());
+            }
+            // make my own copy
+            List<String> myBucketLabels = indexCharacters.getLabels();
+            ArrayList<Set<R4>> myBucketContents = new ArrayList<Set<R4>>(myBucketLabels.size());
+            for (int i = 0; i < myBucketLabels.size(); ++i) {
+                myBucketContents.add(new TreeSet<R4>());
+            }
+            int counter = 0;
+            for (String name : SimpleTests) {
+                int bucketIndex = indexCharacters.getBucketIndex(name);
+                Set<R4> myBucket = myBucketContents.get(bucketIndex);
+                myBucket.add(Row.of(collator.getRawCollationKey(name, null), name, name.length(), (double) counter++));
+            }
+            // now compare
+            int index = 0;
+            for (AlphabeticIndex.Bucket<Double> bucket : indexCharacters) {
+                String bucketLabel = bucket.getLabel();
+                String myLabel = myBucketLabels.get(index);
+                if (!bucketLabel.equals(myLabel)) {
+                    assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel);
+                }
+                Set<R4> myBucket = myBucketContents.get(index);
+                Iterator<R4> myBucketIterator = myBucket.iterator();
+                int recordIndex = 0;
+                for (Record<Double> record : bucket) {
+                    String myName = null;
+                    if (myBucketIterator.hasNext()) {
+                        R4 myRecord = myBucketIterator.next();
+                        myName = (String) myRecord.get1();
+                    }
+                    if (!record.getName().equals(myName)) {
+                        assertEquals(ulocale + "\t" + bucketLabel + "\t" + 
+                        		"Record Names (" + recordIndex++ + ":)", record.getName(), myName);
+                    }
+                }
+                while (myBucketIterator.hasNext()) {
+                    R4 myRecord = myBucketIterator.next();
+                    String myName = (String) myRecord.get1();
+                    assertEquals(ulocale + "\t" + bucketLabel + "\t" +
+                    		"Record Names (" + recordIndex++ + ":)", null, myName);
+                }
+                index++;
+            }
+        }
+    }
+
    public void TestZZZ() {
-//            int x = 3;
-//            AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
-//            UnicodeSet additions = new UnicodeSet();
-//            additions.add(0x410).add(0x415);  // Cyrillic
-//            // additions.add(0x391).add(0x393);     // Greek
-//            index.addLabels(additions);
-//            int lc = index.getLabels().size();
-//            List  labels = index.getLabels();
-//            System.out.println("Label Count = " + lc + "\t" + labels);
-//            System.out.println("Bucket Count =" + index.getBucketCount());
+        //            int x = 3;
+        //            AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
+        //            UnicodeSet additions = new UnicodeSet();
+        //            additions.add(0x410).add(0x415);  // Cyrillic
+        //            // additions.add(0x391).add(0x393);     // Greek
+        //            index.addLabels(additions);
+        //            int lc = index.getLabels().size();
+        //            List  labels = index.getLabels();
+        //            System.out.println("Label Count = " + lc + "\t" + labels);
+        //            System.out.println("Bucket Count =" + index.getBucketCount());
    }

    public void TestSimplified() {