mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 12:40:02 +00:00
ICU-7869 added getBucketIndex and a test. It doesn't quite work right yet, but checking in so that Andy can look at it.
X-SVN-Rev: 28540
This commit is contained in:
parent
c91d109999
commit
fe40416511
2 changed files with 185 additions and 80 deletions
|
@ -508,6 +508,52 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the bucket number for the given name. This routine permits callers to implement their own bucket handling
|
||||
* mechanisms, including client-server handling. For example, when a new name is created on the client, it can ask
|
||||
* the server for the bucket for that name, and the sortkey (using getCollator). Once the client has that
|
||||
* information, it can put the name into the right bucket, and sort it within that bucket, without having access to
|
||||
* the index or collator.
|
||||
* <p>
|
||||
* Note that the bucket number (and sort key) are only valid for the settings of the current AlphabeticIndex; if
|
||||
* those are changed, then the bucket number and sort key must be regenerated.
|
||||
*
|
||||
* @param name
|
||||
* Name, such as a name
|
||||
* @param info
|
||||
* Info, such as an address or link
|
||||
* @return this, for chaining
|
||||
* @draft ICU 4.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public int getBucketIndex(CharSequence name) {
|
||||
if (buckets == null) {
|
||||
buckets = getIndexBuckets();
|
||||
}
|
||||
if (langType == LangType.SIMPLIFIED) {
|
||||
String hackPrefix = hackName(name, collatorPrimaryOnly);
|
||||
if (hackPrefix != null) {
|
||||
name = hackPrefix + name;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO use a binary search
|
||||
int result = -1;
|
||||
for (Bucket<V> bucket : this) {
|
||||
if (bucket.lowerBoundary == null) { // last bucket
|
||||
return result;
|
||||
}
|
||||
int comp = collatorPrimaryOnly.compare(name, bucket.lowerBoundary);
|
||||
if (comp < 0) { // the first boundary is always "", and so -1 will never be returned
|
||||
return result;
|
||||
} else if (comp == 0) {
|
||||
return result + 1;
|
||||
}
|
||||
result++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the index.
|
||||
*
|
||||
|
@ -705,73 +751,73 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
"[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]").freeze();
|
||||
private static final UnicodeSet TO_TRY = new UnicodeSet("[:^nfcqc=no:]").removeAll(IGNORE_SCRIPTS).freeze();
|
||||
|
||||
// /**
|
||||
// * Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
|
||||
// * collation.
|
||||
// *
|
||||
// * @param ruleBasedCollator
|
||||
// * TODO
|
||||
// * @param comparator
|
||||
// * @param lowerLimit
|
||||
// * @param testScript
|
||||
// *
|
||||
// * @return
|
||||
// */
|
||||
//
|
||||
// private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
|
||||
// String[] results = new String[UScript.CODE_LIMIT];
|
||||
// for (String current : TO_TRY) {
|
||||
// if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
|
||||
// // symbols.
|
||||
// continue;
|
||||
// }
|
||||
// int script = UScript.getScript(current.codePointAt(0));
|
||||
// if (results[script] == null) {
|
||||
// results[script] = current;
|
||||
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
// results[script] = current;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
// UnicodeSet extras = new UnicodeSet();
|
||||
// UnicodeSet expansions = new UnicodeSet();
|
||||
// ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
|
||||
// extras.addAll(expansions).removeAll(TO_TRY);
|
||||
// if (extras.size() != 0) {
|
||||
// Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
|
||||
// for (String current : extras) {
|
||||
// if (!TO_TRY.containsAll(current))
|
||||
// continue;
|
||||
// if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
|
||||
// continue;
|
||||
// }
|
||||
// int script = UScript.getScript(current.codePointAt(0));
|
||||
// if (results[script] == null) {
|
||||
// results[script] = current;
|
||||
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
// results[script] = current;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// } catch (Exception e) {
|
||||
// } // why have a checked exception???
|
||||
//
|
||||
// TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
|
||||
// for (int i = 0; i < results.length; ++i) {
|
||||
// if (results[i] != null) {
|
||||
// sorted.add(results[i]);
|
||||
// }
|
||||
// }
|
||||
// if (true) {
|
||||
// for (String s : sorted) {
|
||||
// System.out.println("\"" + s + "\",");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
|
||||
// return result;
|
||||
// }
|
||||
// /**
|
||||
// * Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
|
||||
// * collation.
|
||||
// *
|
||||
// * @param ruleBasedCollator
|
||||
// * TODO
|
||||
// * @param comparator
|
||||
// * @param lowerLimit
|
||||
// * @param testScript
|
||||
// *
|
||||
// * @return
|
||||
// */
|
||||
//
|
||||
// private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
|
||||
// String[] results = new String[UScript.CODE_LIMIT];
|
||||
// for (String current : TO_TRY) {
|
||||
// if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
|
||||
// // symbols.
|
||||
// continue;
|
||||
// }
|
||||
// int script = UScript.getScript(current.codePointAt(0));
|
||||
// if (results[script] == null) {
|
||||
// results[script] = current;
|
||||
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
// results[script] = current;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
// UnicodeSet extras = new UnicodeSet();
|
||||
// UnicodeSet expansions = new UnicodeSet();
|
||||
// ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
|
||||
// extras.addAll(expansions).removeAll(TO_TRY);
|
||||
// if (extras.size() != 0) {
|
||||
// Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
|
||||
// for (String current : extras) {
|
||||
// if (!TO_TRY.containsAll(current))
|
||||
// continue;
|
||||
// if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
|
||||
// continue;
|
||||
// }
|
||||
// int script = UScript.getScript(current.codePointAt(0));
|
||||
// if (results[script] == null) {
|
||||
// results[script] = current;
|
||||
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
// results[script] = current;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// } catch (Exception e) {
|
||||
// } // why have a checked exception???
|
||||
//
|
||||
// TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
|
||||
// for (int i = 0; i < results.length; ++i) {
|
||||
// if (results[i] != null) {
|
||||
// sorted.add(results[i]);
|
||||
// }
|
||||
// }
|
||||
// if (true) {
|
||||
// for (String s : sorted) {
|
||||
// System.out.println("\"" + s + "\",");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
|
||||
// return result;
|
||||
// }
|
||||
|
||||
private static final PreferenceComparator PREFERENCE_COMPARATOR = new PreferenceComparator();
|
||||
private int maxLabelCount = 99;
|
||||
|
@ -1001,7 +1047,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
/**
|
||||
* HACKS
|
||||
*/
|
||||
private static CharSequence hackName(CharSequence name, Comparator comparator) {
|
||||
private static String hackName(CharSequence name, Comparator comparator) {
|
||||
if (!UNIHAN.contains(Character.codePointAt(name, 0))) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -13,9 +13,13 @@ import java.util.LinkedHashSet;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.test.util.CollectionUtilities;
|
||||
import com.ibm.icu.impl.Row;
|
||||
import com.ibm.icu.impl.Row.R3;
|
||||
import com.ibm.icu.impl.Row.R4;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.AlphabeticIndex;
|
||||
|
@ -23,6 +27,7 @@ import com.ibm.icu.text.Collator;
|
|||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.AlphabeticIndex.Bucket;
|
||||
import com.ibm.icu.text.AlphabeticIndex.Record;
|
||||
import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -183,7 +188,7 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void TestInflow() {
|
||||
Object[][] tests = {
|
||||
{0, ULocale.ENGLISH},
|
||||
|
@ -311,7 +316,7 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
|
||||
// Join the elements of the list to a string with delimiter ":"
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Iterator iter = indexCharacters.iterator();
|
||||
Iterator<String> iter = indexCharacters.iterator();
|
||||
while (iter.hasNext()) {
|
||||
sb.append(iter.next());
|
||||
if (!iter.hasNext()) {
|
||||
|
@ -396,17 +401,71 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void TestClientSupport() {
|
||||
for (String localeString : new String[] {"zh"}) { // KEY_LOCALES
|
||||
ULocale ulocale = new ULocale(localeString);
|
||||
AlphabeticIndex<Double> indexCharacters = new AlphabeticIndex<Double>(ulocale).addLabels(ULocale.ENGLISH);
|
||||
RuleBasedCollator collator = indexCharacters.getCollator();
|
||||
for (String name : SimpleTests) {
|
||||
indexCharacters.addRecord(name, (double)name.length());
|
||||
}
|
||||
// make my own copy
|
||||
List<String> myBucketLabels = indexCharacters.getLabels();
|
||||
ArrayList<Set<R4>> myBucketContents = new ArrayList<Set<R4>>(myBucketLabels.size());
|
||||
for (int i = 0; i < myBucketLabels.size(); ++i) {
|
||||
myBucketContents.add(new TreeSet<R4>());
|
||||
}
|
||||
int counter = 0;
|
||||
for (String name : SimpleTests) {
|
||||
int bucketIndex = indexCharacters.getBucketIndex(name);
|
||||
Set<R4> myBucket = myBucketContents.get(bucketIndex);
|
||||
myBucket.add(Row.of(collator.getRawCollationKey(name, null), name, name.length(), (double) counter++));
|
||||
}
|
||||
// now compare
|
||||
int index = 0;
|
||||
for (AlphabeticIndex.Bucket<Double> bucket : indexCharacters) {
|
||||
String bucketLabel = bucket.getLabel();
|
||||
String myLabel = myBucketLabels.get(index);
|
||||
if (!bucketLabel.equals(myLabel)) {
|
||||
assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel);
|
||||
}
|
||||
Set<R4> myBucket = myBucketContents.get(index);
|
||||
Iterator<R4> myBucketIterator = myBucket.iterator();
|
||||
int recordIndex = 0;
|
||||
for (Record<Double> record : bucket) {
|
||||
String myName = null;
|
||||
if (myBucketIterator.hasNext()) {
|
||||
R4 myRecord = myBucketIterator.next();
|
||||
myName = (String) myRecord.get1();
|
||||
}
|
||||
if (!record.getName().equals(myName)) {
|
||||
assertEquals(ulocale + "\t" + bucketLabel + "\t" +
|
||||
"Record Names (" + recordIndex++ + ":)", record.getName(), myName);
|
||||
}
|
||||
}
|
||||
while (myBucketIterator.hasNext()) {
|
||||
R4 myRecord = myBucketIterator.next();
|
||||
String myName = (String) myRecord.get1();
|
||||
assertEquals(ulocale + "\t" + bucketLabel + "\t" +
|
||||
"Record Names (" + recordIndex++ + ":)", null, myName);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void TestZZZ() {
|
||||
// int x = 3;
|
||||
// AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
|
||||
// UnicodeSet additions = new UnicodeSet();
|
||||
// additions.add(0x410).add(0x415); // Cyrillic
|
||||
// // additions.add(0x391).add(0x393); // Greek
|
||||
// index.addLabels(additions);
|
||||
// int lc = index.getLabels().size();
|
||||
// List labels = index.getLabels();
|
||||
// System.out.println("Label Count = " + lc + "\t" + labels);
|
||||
// System.out.println("Bucket Count =" + index.getBucketCount());
|
||||
// int x = 3;
|
||||
// AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
|
||||
// UnicodeSet additions = new UnicodeSet();
|
||||
// additions.add(0x410).add(0x415); // Cyrillic
|
||||
// // additions.add(0x391).add(0x393); // Greek
|
||||
// index.addLabels(additions);
|
||||
// int lc = index.getLabels().size();
|
||||
// List labels = index.getLabels();
|
||||
// System.out.println("Label Count = " + lc + "\t" + labels);
|
||||
// System.out.println("Bucket Count =" + index.getBucketCount());
|
||||
}
|
||||
|
||||
public void TestSimplified() {
|
||||
|
|
Loading…
Add table
Reference in a new issue