ICU-7869 added getBucketIndex and a test. It doesn't quite work right yet, but checking in so that Andy can look at it.

X-SVN-Rev: 28540
This commit is contained in:
Mark Davis 2010-08-28 01:15:48 +00:00
parent c91d109999
commit fe40416511
2 changed files with 185 additions and 80 deletions

View file

@ -508,6 +508,52 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
return this;
}
/**
* Get the bucket number for the given name. This routine permits callers to implement their own bucket handling
* mechanisms, including client-server handling. For example, when a new name is created on the client, it can ask
* the server for the bucket for that name, and the sortkey (using getCollator). Once the client has that
* information, it can put the name into the right bucket, and sort it within that bucket, without having access to
* the index or collator.
* <p>
* Note that the bucket number (and sort key) are only valid for the settings of the current AlphabeticIndex; if
* those are changed, then the bucket number and sort key must be regenerated.
*
* @param name
* Name, such as a name
* @param info
* Info, such as an address or link
* @return this, for chaining
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public int getBucketIndex(CharSequence name) {
if (buckets == null) {
buckets = getIndexBuckets();
}
if (langType == LangType.SIMPLIFIED) {
String hackPrefix = hackName(name, collatorPrimaryOnly);
if (hackPrefix != null) {
name = hackPrefix + name;
}
}
// TODO use a binary search
int result = -1;
for (Bucket<V> bucket : this) {
if (bucket.lowerBoundary == null) { // last bucket
return result;
}
int comp = collatorPrimaryOnly.compare(name, bucket.lowerBoundary);
if (comp < 0) { // the first boundary is always "", and so -1 will never be returned
return result;
} else if (comp == 0) {
return result + 1;
}
result++;
}
return result;
}
/**
* Clear the index.
*
@ -705,73 +751,73 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
"[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]").freeze();
private static final UnicodeSet TO_TRY = new UnicodeSet("[:^nfcqc=no:]").removeAll(IGNORE_SCRIPTS).freeze();
// /**
// * Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
// * collation.
// *
// * @param ruleBasedCollator
// * TODO
// * @param comparator
// * @param lowerLimit
// * @param testScript
// *
// * @return
// */
//
// private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
// String[] results = new String[UScript.CODE_LIMIT];
// for (String current : TO_TRY) {
// if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
// // symbols.
// continue;
// }
// int script = UScript.getScript(current.codePointAt(0));
// if (results[script] == null) {
// results[script] = current;
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
// results[script] = current;
// }
// }
//
// try {
// UnicodeSet extras = new UnicodeSet();
// UnicodeSet expansions = new UnicodeSet();
// ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
// extras.addAll(expansions).removeAll(TO_TRY);
// if (extras.size() != 0) {
// Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
// for (String current : extras) {
// if (!TO_TRY.containsAll(current))
// continue;
// if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
// continue;
// }
// int script = UScript.getScript(current.codePointAt(0));
// if (results[script] == null) {
// results[script] = current;
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
// results[script] = current;
// }
// }
// }
// } catch (Exception e) {
// } // why have a checked exception???
//
// TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
// for (int i = 0; i < results.length; ++i) {
// if (results[i] != null) {
// sorted.add(results[i]);
// }
// }
// if (true) {
// for (String s : sorted) {
// System.out.println("\"" + s + "\",");
// }
// }
//
// List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
// return result;
// }
// /**
// * Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
// * collation.
// *
// * @param ruleBasedCollator
// * TODO
// * @param comparator
// * @param lowerLimit
// * @param testScript
// *
// * @return
// */
//
// private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
// String[] results = new String[UScript.CODE_LIMIT];
// for (String current : TO_TRY) {
// if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
// // symbols.
// continue;
// }
// int script = UScript.getScript(current.codePointAt(0));
// if (results[script] == null) {
// results[script] = current;
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
// results[script] = current;
// }
// }
//
// try {
// UnicodeSet extras = new UnicodeSet();
// UnicodeSet expansions = new UnicodeSet();
// ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
// extras.addAll(expansions).removeAll(TO_TRY);
// if (extras.size() != 0) {
// Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
// for (String current : extras) {
// if (!TO_TRY.containsAll(current))
// continue;
// if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
// continue;
// }
// int script = UScript.getScript(current.codePointAt(0));
// if (results[script] == null) {
// results[script] = current;
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
// results[script] = current;
// }
// }
// }
// } catch (Exception e) {
// } // why have a checked exception???
//
// TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
// for (int i = 0; i < results.length; ++i) {
// if (results[i] != null) {
// sorted.add(results[i]);
// }
// }
// if (true) {
// for (String s : sorted) {
// System.out.println("\"" + s + "\",");
// }
// }
//
// List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
// return result;
// }
private static final PreferenceComparator PREFERENCE_COMPARATOR = new PreferenceComparator();
private int maxLabelCount = 99;
@ -1001,7 +1047,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
/**
* HACKS
*/
private static CharSequence hackName(CharSequence name, Comparator comparator) {
private static String hackName(CharSequence name, Comparator comparator) {
if (!UNIHAN.contains(Character.codePointAt(name, 0))) {
return null;
}

View file

@ -13,9 +13,13 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.dev.test.util.CollectionUtilities;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R3;
import com.ibm.icu.impl.Row.R4;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.AlphabeticIndex;
@ -23,6 +27,7 @@ import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.AlphabeticIndex.Bucket;
import com.ibm.icu.text.AlphabeticIndex.Record;
import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
import com.ibm.icu.util.ULocale;
@ -183,7 +188,7 @@ public class AlphabeticIndexTest extends TestFmwk {
checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron");
}
}
public void TestInflow() {
Object[][] tests = {
{0, ULocale.ENGLISH},
@ -311,7 +316,7 @@ public class AlphabeticIndexTest extends TestFmwk {
// Join the elements of the list to a string with delimiter ":"
StringBuilder sb = new StringBuilder();
Iterator iter = indexCharacters.iterator();
Iterator<String> iter = indexCharacters.iterator();
while (iter.hasNext()) {
sb.append(iter.next());
if (!iter.hasNext()) {
@ -396,17 +401,71 @@ public class AlphabeticIndexTest extends TestFmwk {
}
}
}
public void TestClientSupport() {
for (String localeString : new String[] {"zh"}) { // KEY_LOCALES
ULocale ulocale = new ULocale(localeString);
AlphabeticIndex<Double> indexCharacters = new AlphabeticIndex<Double>(ulocale).addLabels(ULocale.ENGLISH);
RuleBasedCollator collator = indexCharacters.getCollator();
for (String name : SimpleTests) {
indexCharacters.addRecord(name, (double)name.length());
}
// make my own copy
List<String> myBucketLabels = indexCharacters.getLabels();
ArrayList<Set<R4>> myBucketContents = new ArrayList<Set<R4>>(myBucketLabels.size());
for (int i = 0; i < myBucketLabels.size(); ++i) {
myBucketContents.add(new TreeSet<R4>());
}
int counter = 0;
for (String name : SimpleTests) {
int bucketIndex = indexCharacters.getBucketIndex(name);
Set<R4> myBucket = myBucketContents.get(bucketIndex);
myBucket.add(Row.of(collator.getRawCollationKey(name, null), name, name.length(), (double) counter++));
}
// now compare
int index = 0;
for (AlphabeticIndex.Bucket<Double> bucket : indexCharacters) {
String bucketLabel = bucket.getLabel();
String myLabel = myBucketLabels.get(index);
if (!bucketLabel.equals(myLabel)) {
assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel);
}
Set<R4> myBucket = myBucketContents.get(index);
Iterator<R4> myBucketIterator = myBucket.iterator();
int recordIndex = 0;
for (Record<Double> record : bucket) {
String myName = null;
if (myBucketIterator.hasNext()) {
R4 myRecord = myBucketIterator.next();
myName = (String) myRecord.get1();
}
if (!record.getName().equals(myName)) {
assertEquals(ulocale + "\t" + bucketLabel + "\t" +
"Record Names (" + recordIndex++ + ":)", record.getName(), myName);
}
}
while (myBucketIterator.hasNext()) {
R4 myRecord = myBucketIterator.next();
String myName = (String) myRecord.get1();
assertEquals(ulocale + "\t" + bucketLabel + "\t" +
"Record Names (" + recordIndex++ + ":)", null, myName);
}
index++;
}
}
}
public void TestZZZ() {
// int x = 3;
// AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
// UnicodeSet additions = new UnicodeSet();
// additions.add(0x410).add(0x415); // Cyrillic
// // additions.add(0x391).add(0x393); // Greek
// index.addLabels(additions);
// int lc = index.getLabels().size();
// List labels = index.getLabels();
// System.out.println("Label Count = " + lc + "\t" + labels);
// System.out.println("Bucket Count =" + index.getBucketCount());
// int x = 3;
// AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
// UnicodeSet additions = new UnicodeSet();
// additions.add(0x410).add(0x415); // Cyrillic
// // additions.add(0x391).add(0x393); // Greek
// index.addLabels(additions);
// int lc = index.getLabels().size();
// List labels = index.getLabels();
// System.out.println("Label Count = " + lc + "\t" + labels);
// System.out.println("Bucket Count =" + index.getBucketCount());
}
public void TestSimplified() {