mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-7869 Added test that first script characters are actually the first ones.
X-SVN-Rev: 28905
This commit is contained in:
parent
7e7750a459
commit
4f9ae7991e
2 changed files with 100 additions and 78 deletions
|
@ -830,75 +830,6 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
|
||||
private static final UnicodeSet IGNORE_SCRIPTS = new UnicodeSet(
|
||||
"[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]").freeze();
|
||||
//private static final UnicodeSet TO_TRY = new UnicodeSet("[:^nfcqc=no:]").removeAll(IGNORE_SCRIPTS).freeze();
|
||||
|
||||
// /**
|
||||
// * Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
|
||||
// * collation.
|
||||
// *
|
||||
// * @param ruleBasedCollator
|
||||
// * TODO
|
||||
// * @param comparator
|
||||
// * @param lowerLimit
|
||||
// * @param testScript
|
||||
// *
|
||||
// * @return
|
||||
// */
|
||||
//
|
||||
// private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
|
||||
// String[] results = new String[UScript.CODE_LIMIT];
|
||||
// for (String current : TO_TRY) {
|
||||
// if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
|
||||
// // symbols.
|
||||
// continue;
|
||||
// }
|
||||
// int script = UScript.getScript(current.codePointAt(0));
|
||||
// if (results[script] == null) {
|
||||
// results[script] = current;
|
||||
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
// results[script] = current;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
// UnicodeSet extras = new UnicodeSet();
|
||||
// UnicodeSet expansions = new UnicodeSet();
|
||||
// ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
|
||||
// extras.addAll(expansions).removeAll(TO_TRY);
|
||||
// if (extras.size() != 0) {
|
||||
// Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
|
||||
// for (String current : extras) {
|
||||
// if (!TO_TRY.containsAll(current))
|
||||
// continue;
|
||||
// if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
|
||||
// continue;
|
||||
// }
|
||||
// int script = UScript.getScript(current.codePointAt(0));
|
||||
// if (results[script] == null) {
|
||||
// results[script] = current;
|
||||
// } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
// results[script] = current;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// } catch (Exception e) {
|
||||
// } // why have a checked exception???
|
||||
//
|
||||
// TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
|
||||
// for (int i = 0; i < results.length; ++i) {
|
||||
// if (results[i] != null) {
|
||||
// sorted.add(results[i]);
|
||||
// }
|
||||
// }
|
||||
// if (true) {
|
||||
// for (String s : sorted) {
|
||||
// System.out.println("\"" + s + "\",");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
|
||||
// return result;
|
||||
// }
|
||||
|
||||
private static final PreferenceComparator PREFERENCE_COMPARATOR = new PreferenceComparator();
|
||||
private int maxLabelCount = 99;
|
||||
|
@ -1199,4 +1130,13 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
"\u3041", "\u30A1", "\u3105", "\uA000", "\uA4F8", "\uD800\uDE80", "\uD800\uDEA0", "\uD802\uDD20", "\uD800\uDF00", "\uD800\uDF30", "\uD801\uDC28", "\uD801\uDC50", "\uD801\uDC80", "\uD800\uDC00", "\uD802\uDC00", "\uD802\uDE60", "\uD802\uDF00", "\uD802\uDC40",
|
||||
"\uD802\uDF40", "\uD802\uDF60", "\uD800\uDF80", "\uD800\uDFA0", "\uD808\uDC00", "\uD80C\uDC00", "\u4E00"
|
||||
});
|
||||
|
||||
/**
|
||||
* Only for testing...
|
||||
* @internal
|
||||
* @deprecated only for internal testing
|
||||
*/
|
||||
public static List<String> getFirstCharactersInScripts() {
|
||||
return HACK_FIRST_CHARS_IN_SCRIPTS;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ package com.ibm.icu.dev.test.collator;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
|
@ -26,7 +27,9 @@ import com.ibm.icu.text.AlphabeticIndex;
|
|||
import com.ibm.icu.text.AlphabeticIndex.Bucket;
|
||||
import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
|
||||
import com.ibm.icu.text.AlphabeticIndex.Record;
|
||||
import com.ibm.icu.text.Normalizer2.Mode;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
@ -433,15 +436,15 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
// displayPairs(false);
|
||||
// }
|
||||
|
||||
// private void displayPairs(boolean in) {
|
||||
// for (String[] pair : localeAndIndexCharactersLists) {
|
||||
// if (KEY_LOCALES.contains(pair[0]) == in) {
|
||||
// logln("\t"
|
||||
// + "/* " + ULocale.getDisplayName(pair[0], "en") + "*/\t"
|
||||
// + "{\"" + pair[0] + "\", \"" + pair[1] + "\"},");
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// private void displayPairs(boolean in) {
|
||||
// for (String[] pair : localeAndIndexCharactersLists) {
|
||||
// if (KEY_LOCALES.contains(pair[0]) == in) {
|
||||
// logln("\t"
|
||||
// + "/* " + ULocale.getDisplayName(pair[0], "en") + "*/\t"
|
||||
// + "{\"" + pair[0] + "\", \"" + pair[1] + "\"},");
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
public void TestClientSupport() {
|
||||
for (String localeString : KEY_LOCALES) { // KEY_LOCALES, new String[] {"zh"}
|
||||
|
@ -518,6 +521,83 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
public void TestFirstScriptCharacters() {
|
||||
List<String> firstCharacters = AlphabeticIndex.getFirstCharactersInScripts();
|
||||
List<String> expectedFirstCharacters = firstStringsInScript((RuleBasedCollator) Collator.getInstance(ULocale.ROOT));
|
||||
assertEquals("First Characters", expectedFirstCharacters, firstCharacters);
|
||||
}
|
||||
|
||||
private static final UnicodeSet TO_TRY = new UnicodeSet("[[:^nfcqc=no:]-[:sc=Common:]-[:sc=Inherited:]-[:sc=Unknown:]]").freeze();
|
||||
|
||||
/**
|
||||
* Returns a list of all the "First" characters of scripts, according to the collation, and sorted according to the
|
||||
* collation.
|
||||
*
|
||||
* @param ruleBasedCollator
|
||||
* TODO
|
||||
* @param comparator
|
||||
* @param lowerLimit
|
||||
* @param testScript
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
|
||||
private static List<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
|
||||
String[] results = new String[UScript.CODE_LIMIT];
|
||||
for (String current : TO_TRY) {
|
||||
if (ruleBasedCollator.compare(current, "a") < 0) { // TODO fix; we only want "real" script characters, not
|
||||
// symbols.
|
||||
continue;
|
||||
}
|
||||
int script = UScript.getScript(current.codePointAt(0));
|
||||
if (results[script] == null) {
|
||||
results[script] = current;
|
||||
} else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
results[script] = current;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
UnicodeSet extras = new UnicodeSet();
|
||||
UnicodeSet expansions = new UnicodeSet();
|
||||
ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
|
||||
extras.addAll(expansions).removeAll(TO_TRY);
|
||||
if (extras.size() != 0) {
|
||||
Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Mode.COMPOSE);
|
||||
for (String current : extras) {
|
||||
if (!TO_TRY.containsAll(current))
|
||||
continue;
|
||||
if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "a") < 0) {
|
||||
continue;
|
||||
}
|
||||
int script = UScript.getScript(current.codePointAt(0));
|
||||
if (results[script] == null) {
|
||||
results[script] = current;
|
||||
} else if (ruleBasedCollator.compare(current, results[script]) < 0) {
|
||||
results[script] = current;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
} // why have a checked exception???
|
||||
|
||||
TreeSet<String> sorted = new TreeSet<String>(ruleBasedCollator);
|
||||
for (int i = 0; i < results.length; ++i) {
|
||||
if (results[i] != null) {
|
||||
sorted.add(results[i]);
|
||||
}
|
||||
}
|
||||
if (false) {
|
||||
for (String s : sorted) {
|
||||
System.out.println("\"" + s + "\",");
|
||||
}
|
||||
}
|
||||
|
||||
List<String> result = Collections.unmodifiableList(new ArrayList<String>(sorted));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public void TestZZZ() {
|
||||
// int x = 3;
|
||||
// AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
|
||||
|
@ -628,4 +708,6 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
"\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6", "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1", "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580",
|
||||
"\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a", "\u5e74", "\u611b", "\u967d", "\u4f5f"
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue