ICU-13331 fix Java AlphabeticIndex.addIndexExemplars() for missing index exemplars (test for empty, not just null)

X-SVN-Rev: 40941
This commit is contained in:
Markus Scherer 2018-02-16 19:48:49 +00:00
parent 1d3a9958df
commit cf61e9823a
5 changed files with 54 additions and 18 deletions

View file

@ -725,7 +725,7 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status
}
// question: should we add auxiliary exemplars?
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) {
exemplars.add(0x61, 0x7A);
}
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
@ -740,14 +740,9 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSet ethiopic(
UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
UnicodeSetIterator it(ethiopic);
while (it.next() && !it.isString()) {
if ((it.getCodepoint() & 0x7) != 0) {
exemplars.remove(it.getCodepoint());
}
}
UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status);
ethiopic.retainAll(exemplars);
exemplars.remove(u'', 0x137F).addAll(ethiopic);
}
// Upper-case any that aren't already so.

View file

@ -22,6 +22,7 @@
#include "unicode/localpointer.h"
#include "unicode/tblcoll.h"
#include "unicode/uniset.h"
#include "unicode/uscript.h"
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
@ -66,6 +67,7 @@ void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char*
TESTCASE_AUTO(TestChineseZhuyin);
TESTCASE_AUTO(TestJapaneseKanji);
TESTCASE_AUTO(TestChineseUnihan);
TESTCASE_AUTO(testHasBuckets);
TESTCASE_AUTO_END;
}
@ -724,4 +726,23 @@ void AlphabeticIndexTest::TestChineseUnihan() {
assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
}
void AlphabeticIndexTest::testHasBuckets() {
checkHasBuckets(Locale("am"), USCRIPT_ETHIOPIC);
checkHasBuckets(Locale("haw"), USCRIPT_LATIN);
checkHasBuckets(Locale("hy"), USCRIPT_ARMENIAN);
checkHasBuckets(Locale("vai"), USCRIPT_VAI);
}
void AlphabeticIndexTest::checkHasBuckets(const Locale &locale, UScriptCode script) {
IcuTestErrorCode errorCode(*this, "checkHasBuckets");
AlphabeticIndex aindex(locale, errorCode);
LocalPointer<AlphabeticIndex::ImmutableIndex> index(aindex.buildImmutableIndex(errorCode));
UnicodeString loc = locale.getName();
assertTrue(loc + u" at least 3 buckets", index->getBucketCount() >= 3);
const AlphabeticIndex::Bucket *bucket = index->getBucket(1);
assertEquals(loc + u" real bucket", U_ALPHAINDEX_NORMAL, bucket->getLabelType());
assertEquals(loc + u" expected script", script,
uscript_getScript(bucket->getLabel().char32At(0), errorCode));
}
#endif

View file

@ -13,6 +13,7 @@
#ifndef ALPHAINDEXTST_H
#define ALPHAINDEXTST_H
#include "unicode/uscript.h"
#include "intltest.h"
class AlphabeticIndexTest: public IntlTest {
@ -49,6 +50,9 @@ public:
void TestChineseZhuyin();
void TestJapaneseKanji();
void TestChineseUnihan();
void testHasBuckets();
void checkHasBuckets(const Locale &locale, UScriptCode script);
};
#endif

View file

@ -523,7 +523,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
*/
private void addIndexExemplars(ULocale locale) {
UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX);
if (exemplars != null) {
if (exemplars != null && !exemplars.isEmpty()) {
initialLabels.addAll(exemplars);
return;
}
@ -534,7 +534,7 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
exemplars = exemplars.cloneAsThawed();
// question: should we add auxiliary exemplars?
if (exemplars.containsSome('a', 'z') || exemplars.size() == 0) {
if (exemplars.containsSome('a', 'z') || exemplars.isEmpty()) {
exemplars.addAll('a', 'z');
}
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
@ -549,13 +549,9 @@ public final class AlphabeticIndex<V> implements Iterable<Bucket<V>> {
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSet ethiopic = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
UnicodeSetIterator it = new UnicodeSetIterator(ethiopic);
while (it.next() && it.codepoint != UnicodeSetIterator.IS_STRING) {
if ((it.codepoint & 0x7) != 0) {
exemplars.remove(it.codepoint);
}
}
UnicodeSet ethiopic = new UnicodeSet("[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]");
ethiopic.retainAll(exemplars);
exemplars.remove('', 0x137F).addAll(ethiopic);
}
// Upper-case any that aren't already so.

View file

@ -1160,4 +1160,24 @@ public class AlphabeticIndexTest extends TestFmwk {
assertEquals("Wrong bucket label", "inflow", index.getInflowLabel());
assertEquals("Bucket size not 1", 1, inflowBucket.size());
}
@Test
public void testHasBuckets() {
checkHasBuckets(new Locale("am"), UScript.ETHIOPIC);
checkHasBuckets(new Locale("haw"), UScript.LATIN);
checkHasBuckets(new Locale("hy"), UScript.ARMENIAN);
checkHasBuckets(new Locale("vai"), UScript.VAI);
}
private void checkHasBuckets(Locale locale, int script) {
AlphabeticIndex.ImmutableIndex index =
new AlphabeticIndex<String>(locale).buildImmutableIndex();
String loc = locale.toString();
assertTrue(loc + " at least 3 buckets", index.getBucketCount() >= 3);
AlphabeticIndex.Bucket bucket = index.getBucket(1);
assertEquals(loc + " real bucket", AlphabeticIndex.Bucket.LabelType.NORMAL,
bucket.getLabelType());
assertEquals(loc + " expected script", script,
UScript.getScript(bucket.getLabel().codePointAt(0)));
}
}