mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-13331 fix Java AlphabeticIndex.addIndexExemplars() for missing index exemplars (test for empty, not just null)
X-SVN-Rev: 40941
This commit is contained in:
parent
1d3a9958df
commit
cf61e9823a
5 changed files with 54 additions and 18 deletions
|
@ -725,7 +725,7 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status
|
|||
}
|
||||
|
||||
// question: should we add auxiliary exemplars?
|
||||
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
|
||||
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) {
|
||||
exemplars.add(0x61, 0x7A);
|
||||
}
|
||||
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
|
||||
|
@ -740,14 +740,9 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status
|
|||
// cut down to small list
|
||||
// make use of the fact that Ethiopic is allocated in 8's, where
|
||||
// the base is 0 mod 8.
|
||||
UnicodeSet ethiopic(
|
||||
UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
|
||||
UnicodeSetIterator it(ethiopic);
|
||||
while (it.next() && !it.isString()) {
|
||||
if ((it.getCodepoint() & 0x7) != 0) {
|
||||
exemplars.remove(it.getCodepoint());
|
||||
}
|
||||
}
|
||||
UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status);
|
||||
ethiopic.retainAll(exemplars);
|
||||
exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic);
|
||||
}
|
||||
|
||||
// Upper-case any that aren't already so.
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "unicode/localpointer.h"
|
||||
#include "unicode/tblcoll.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
|
@ -66,6 +67,7 @@ void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char*
|
|||
TESTCASE_AUTO(TestChineseZhuyin);
|
||||
TESTCASE_AUTO(TestJapaneseKanji);
|
||||
TESTCASE_AUTO(TestChineseUnihan);
|
||||
TESTCASE_AUTO(testHasBuckets);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -724,4 +726,23 @@ void AlphabeticIndexTest::TestChineseUnihan() {
|
|||
assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
|
||||
}
|
||||
|
||||
void AlphabeticIndexTest::testHasBuckets() {
|
||||
checkHasBuckets(Locale("am"), USCRIPT_ETHIOPIC);
|
||||
checkHasBuckets(Locale("haw"), USCRIPT_LATIN);
|
||||
checkHasBuckets(Locale("hy"), USCRIPT_ARMENIAN);
|
||||
checkHasBuckets(Locale("vai"), USCRIPT_VAI);
|
||||
}
|
||||
|
||||
void AlphabeticIndexTest::checkHasBuckets(const Locale &locale, UScriptCode script) {
|
||||
IcuTestErrorCode errorCode(*this, "checkHasBuckets");
|
||||
AlphabeticIndex aindex(locale, errorCode);
|
||||
LocalPointer<AlphabeticIndex::ImmutableIndex> index(aindex.buildImmutableIndex(errorCode));
|
||||
UnicodeString loc = locale.getName();
|
||||
assertTrue(loc + u" at least 3 buckets", index->getBucketCount() >= 3);
|
||||
const AlphabeticIndex::Bucket *bucket = index->getBucket(1);
|
||||
assertEquals(loc + u" real bucket", U_ALPHAINDEX_NORMAL, bucket->getLabelType());
|
||||
assertEquals(loc + u" expected script", script,
|
||||
uscript_getScript(bucket->getLabel().char32At(0), errorCode));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#ifndef ALPHAINDEXTST_H
|
||||
#define ALPHAINDEXTST_H
|
||||
|
||||
#include "unicode/uscript.h"
|
||||
#include "intltest.h"
|
||||
|
||||
class AlphabeticIndexTest: public IntlTest {
|
||||
|
@ -49,6 +50,9 @@ public:
|
|||
void TestChineseZhuyin();
|
||||
void TestJapaneseKanji();
|
||||
void TestChineseUnihan();
|
||||
|
||||
void testHasBuckets();
|
||||
void checkHasBuckets(const Locale &locale, UScriptCode script);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -523,7 +523,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
*/
|
||||
private void addIndexExemplars(ULocale locale) {
|
||||
UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX);
|
||||
if (exemplars != null) {
|
||||
if (exemplars != null && !exemplars.isEmpty()) {
|
||||
initialLabels.addAll(exemplars);
|
||||
return;
|
||||
}
|
||||
|
@ -534,7 +534,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
|
||||
exemplars = exemplars.cloneAsThawed();
|
||||
// question: should we add auxiliary exemplars?
|
||||
if (exemplars.containsSome('a', 'z') || exemplars.size() == 0) {
|
||||
if (exemplars.containsSome('a', 'z') || exemplars.isEmpty()) {
|
||||
exemplars.addAll('a', 'z');
|
||||
}
|
||||
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
|
||||
|
@ -549,13 +549,9 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
|
|||
// cut down to small list
|
||||
// make use of the fact that Ethiopic is allocated in 8's, where
|
||||
// the base is 0 mod 8.
|
||||
UnicodeSet ethiopic = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
|
||||
UnicodeSetIterator it = new UnicodeSetIterator(ethiopic);
|
||||
while (it.next() && it.codepoint != UnicodeSetIterator.IS_STRING) {
|
||||
if ((it.codepoint & 0x7) != 0) {
|
||||
exemplars.remove(it.codepoint);
|
||||
}
|
||||
}
|
||||
UnicodeSet ethiopic = new UnicodeSet("[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]");
|
||||
ethiopic.retainAll(exemplars);
|
||||
exemplars.remove('ሀ', 0x137F).addAll(ethiopic);
|
||||
}
|
||||
|
||||
// Upper-case any that aren't already so.
|
||||
|
|
|
@ -1160,4 +1160,24 @@ public class AlphabeticIndexTest extends TestFmwk {
|
|||
assertEquals("Wrong bucket label", "inflow", index.getInflowLabel());
|
||||
assertEquals("Bucket size not 1", 1, inflowBucket.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasBuckets() {
|
||||
checkHasBuckets(new Locale("am"), UScript.ETHIOPIC);
|
||||
checkHasBuckets(new Locale("haw"), UScript.LATIN);
|
||||
checkHasBuckets(new Locale("hy"), UScript.ARMENIAN);
|
||||
checkHasBuckets(new Locale("vai"), UScript.VAI);
|
||||
}
|
||||
|
||||
private void checkHasBuckets(Locale locale, int script) {
|
||||
AlphabeticIndex.ImmutableIndex index =
|
||||
new AlphabeticIndex<String>(locale).buildImmutableIndex();
|
||||
String loc = locale.toString();
|
||||
assertTrue(loc + " at least 3 buckets", index.getBucketCount() >= 3);
|
||||
AlphabeticIndex.Bucket bucket = index.getBucket(1);
|
||||
assertEquals(loc + " real bucket", AlphabeticIndex.Bucket.LabelType.NORMAL,
|
||||
bucket.getLabelType());
|
||||
assertEquals(loc + " expected script", script,
|
||||
UScript.getScript(bucket.getLabel().codePointAt(0)));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue