From cf61e9823a972d85fd8b44a181ac555ad4f6fa9d Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 16 Feb 2018 19:48:49 +0000 Subject: [PATCH] ICU-13331 fix Java AlphabeticIndex.addIndexExemplars() for missing index exemplars (test for empty, not just null) X-SVN-Rev: 40941 --- icu4c/source/i18n/alphaindex.cpp | 13 ++++-------- icu4c/source/test/intltest/alphaindextst.cpp | 21 +++++++++++++++++++ icu4c/source/test/intltest/alphaindextst.h | 4 ++++ .../src/com/ibm/icu/text/AlphabeticIndex.java | 14 +++++-------- .../test/collator/AlphabeticIndexTest.java | 20 ++++++++++++++++++ 5 files changed, 54 insertions(+), 18 deletions(-) diff --git a/icu4c/source/i18n/alphaindex.cpp b/icu4c/source/i18n/alphaindex.cpp index d877cb2a991..d36a2cc6de3 100644 --- a/icu4c/source/i18n/alphaindex.cpp +++ b/icu4c/source/i18n/alphaindex.cpp @@ -725,7 +725,7 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status } // question: should we add auxiliary exemplars? - if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { + if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) { exemplars.add(0x61, 0x7A); } if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables @@ -740,14 +740,9 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. - UnicodeSet ethiopic( - UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); - UnicodeSetIterator it(ethiopic); - while (it.next() && !it.isString()) { - if ((it.getCodepoint() & 0x7) != 0) { - exemplars.remove(it.getCodepoint()); - } - } + UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status); + ethiopic.retainAll(exemplars); + exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic); } // Upper-case any that aren't already so. diff --git a/icu4c/source/test/intltest/alphaindextst.cpp b/icu4c/source/test/intltest/alphaindextst.cpp index a3ebd1114a8..667e0435a86 100644 --- a/icu4c/source/test/intltest/alphaindextst.cpp +++ b/icu4c/source/test/intltest/alphaindextst.cpp @@ -22,6 +22,7 @@ #include "unicode/localpointer.h" #include "unicode/tblcoll.h" #include "unicode/uniset.h" +#include "unicode/uscript.h" #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION @@ -66,6 +67,7 @@ void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char* TESTCASE_AUTO(TestChineseZhuyin); TESTCASE_AUTO(TestJapaneseKanji); TESTCASE_AUTO(TestChineseUnihan); + TESTCASE_AUTO(testHasBuckets); TESTCASE_AUTO_END; } @@ -724,4 +726,23 @@ void AlphabeticIndexTest::TestChineseUnihan() { assertEquals("getBucketIndex(U+7527)", 101, bucketIndex); } +void AlphabeticIndexTest::testHasBuckets() { + checkHasBuckets(Locale("am"), USCRIPT_ETHIOPIC); + checkHasBuckets(Locale("haw"), USCRIPT_LATIN); + checkHasBuckets(Locale("hy"), USCRIPT_ARMENIAN); + checkHasBuckets(Locale("vai"), USCRIPT_VAI); +} + +void AlphabeticIndexTest::checkHasBuckets(const Locale &locale, UScriptCode script) { + IcuTestErrorCode errorCode(*this, "checkHasBuckets"); + AlphabeticIndex aindex(locale, errorCode); + LocalPointer index(aindex.buildImmutableIndex(errorCode)); + UnicodeString loc = locale.getName(); + assertTrue(loc + u" at least 3 buckets", index->getBucketCount() >= 3); + const AlphabeticIndex::Bucket *bucket = index->getBucket(1); + assertEquals(loc + u" real bucket", U_ALPHAINDEX_NORMAL, bucket->getLabelType()); + assertEquals(loc + u" expected script", script, + uscript_getScript(bucket->getLabel().char32At(0), errorCode)); +} + #endif diff --git a/icu4c/source/test/intltest/alphaindextst.h b/icu4c/source/test/intltest/alphaindextst.h index 6bbe153f6fa..a785fca08e7 100644 --- a/icu4c/source/test/intltest/alphaindextst.h +++ b/icu4c/source/test/intltest/alphaindextst.h @@ -13,6 +13,7 @@ #ifndef ALPHAINDEXTST_H #define ALPHAINDEXTST_H +#include "unicode/uscript.h" #include "intltest.h" class AlphabeticIndexTest: public IntlTest { @@ -49,6 +50,9 @@ public: void TestChineseZhuyin(); void TestJapaneseKanji(); void TestChineseUnihan(); + + void testHasBuckets(); + void checkHasBuckets(const Locale &locale, UScriptCode script); }; #endif diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java index 6bbf99916de..3dbe3c076a5 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java @@ -523,7 +523,7 @@ public final class AlphabeticIndex implements Iterable> { */ private void addIndexExemplars(ULocale locale) { UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX); - if (exemplars != null) { + if (exemplars != null && !exemplars.isEmpty()) { initialLabels.addAll(exemplars); return; } @@ -534,7 +534,7 @@ public final class AlphabeticIndex implements Iterable> { exemplars = exemplars.cloneAsThawed(); // question: should we add auxiliary exemplars? - if (exemplars.containsSome('a', 'z') || exemplars.size() == 0) { + if (exemplars.containsSome('a', 'z') || exemplars.isEmpty()) { exemplars.addAll('a', 'z'); } if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables @@ -549,13 +549,9 @@ public final class AlphabeticIndex implements Iterable> { // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. - UnicodeSet ethiopic = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"); - UnicodeSetIterator it = new UnicodeSetIterator(ethiopic); - while (it.next() && it.codepoint != UnicodeSetIterator.IS_STRING) { - if ((it.codepoint & 0x7) != 0) { - exemplars.remove(it.codepoint); - } - } + UnicodeSet ethiopic = new UnicodeSet("[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"); + ethiopic.retainAll(exemplars); + exemplars.remove('ሀ', 0x137F).addAll(ethiopic); } // Upper-case any that aren't already so. diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java index 6cc274a563d..00e3f58e331 100644 --- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java +++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java @@ -1160,4 +1160,24 @@ public class AlphabeticIndexTest extends TestFmwk { assertEquals("Wrong bucket label", "inflow", index.getInflowLabel()); assertEquals("Bucket size not 1", 1, inflowBucket.size()); } + + @Test + public void testHasBuckets() { + checkHasBuckets(new Locale("am"), UScript.ETHIOPIC); + checkHasBuckets(new Locale("haw"), UScript.LATIN); + checkHasBuckets(new Locale("hy"), UScript.ARMENIAN); + checkHasBuckets(new Locale("vai"), UScript.VAI); + } + + private void checkHasBuckets(Locale locale, int script) { + AlphabeticIndex.ImmutableIndex index = + new AlphabeticIndex(locale).buildImmutableIndex(); + String loc = locale.toString(); + assertTrue(loc + " at least 3 buckets", index.getBucketCount() >= 3); + AlphabeticIndex.Bucket bucket = index.getBucket(1); + assertEquals(loc + " real bucket", AlphabeticIndex.Bucket.LabelType.NORMAL, + bucket.getLabelType()); + assertEquals(loc + " expected script", script, + UScript.getScript(bucket.getLabel().codePointAt(0))); + } }