diff --git a/icu4c/source/common/brkiter.cpp b/icu4c/source/common/brkiter.cpp index 464c96f9534..3bfc13adee2 100644 --- a/icu4c/source/common/brkiter.cpp +++ b/icu4c/source/common/brkiter.cpp @@ -60,7 +60,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UResourceBundle *brkRules = &brkRulesStack; UResourceBundle *brkName = &brkNameStack; RuleBasedBreakIterator *result = NULL; - + if (U_FAILURE(status)) return NULL; @@ -96,7 +96,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, uprv_strncpy(actualLocale, ures_getLocale(brkName, &status), sizeof(actualLocale)/sizeof(actualLocale[0])); - + UChar* extStart=u_strchr(brkfname, 0x002e); int len = 0; if(extStart!=NULL){ @@ -110,7 +110,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, ures_close(brkRules); ures_close(brkName); - + UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); if (U_FAILURE(status)) { ures_close(b); @@ -128,7 +128,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, } ures_close(b); - + if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple delete result; return NULL; @@ -189,15 +189,6 @@ BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) // ------------------------------------- -// Creates a break iterator for Extended Grapheme Cluster breaks. -BreakIterator* U_EXPORT2 -BreakIterator::createXGraphemeClusterInstance(const Locale& key, UErrorCode& status) -{ - return createInstance(key, UBRK_X_GRAPHEME_CLUSTER, status); -} - -// ------------------------------------- - // Gets all the available locales that has localized text boundary data. const Locale* U_EXPORT2 BreakIterator::getAvailableLocales(int32_t& count) @@ -266,11 +257,11 @@ public: UErrorCode status = U_ZERO_ERROR; registerFactory(new ICUBreakIteratorFactory(), status); } - + virtual UObject* cloneInstance(UObject* instance) const { return ((BreakIterator*)instance)->clone(); } - + virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { LocaleKey& lkey = (LocaleKey&)key; int32_t kind = lkey.kind(); @@ -278,7 +269,7 @@ public: lkey.currentLocale(loc); return BreakIterator::makeInstance(loc, kind, status); } - + virtual UBool isDefault() const { return countFactories() == 1; } @@ -293,7 +284,7 @@ U_NAMESPACE_END static U_NAMESPACE_QUALIFIER ICULocaleService* gService = NULL; /** - * Release all static memory held by breakiterator. + * Release all static memory held by breakiterator. */ U_CDECL_BEGIN static UBool U_CALLCONV breakiterator_cleanup(void) { @@ -308,12 +299,12 @@ static UBool U_CALLCONV breakiterator_cleanup(void) { U_CDECL_END U_NAMESPACE_BEGIN -static ICULocaleService* +static ICULocaleService* getService(void) { UBool needsInit; UMTX_CHECK(NULL, (UBool)(gService == NULL), needsInit); - + if (needsInit) { ICULocaleService *tService = new ICUBreakIteratorService(); umtx_lock(NULL); @@ -331,7 +322,7 @@ getService(void) // ------------------------------------- static inline UBool -hasService(void) +hasService(void) { UBool retVal; UMTX_CHECK(NULL, gService != NULL, retVal); @@ -341,7 +332,7 @@ hasService(void) // ------------------------------------- URegistryKey U_EXPORT2 -BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) +BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) { return getService()->registerInstance(toAdopt, locale, kind, status); } @@ -349,7 +340,7 @@ BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UB // ------------------------------------- UBool U_EXPORT2 -BreakIterator::unregister(URegistryKey key, UErrorCode& status) +BreakIterator::unregister(URegistryKey key, UErrorCode& status) { if (U_SUCCESS(status)) { if (hasService()) { @@ -377,7 +368,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu if (U_FAILURE(status)) { return NULL; } - + u_init(&status); #if !UCONFIG_NO_SERVICE if (hasService()) { @@ -408,7 +399,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu // ------------------------------------- -BreakIterator* +BreakIterator* BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) { @@ -418,7 +409,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) BreakIterator *result = NULL; switch (kind) { - case UBRK_CHARACTER: + case UBRK_CHARACTER: result = BreakIterator::buildInstance(loc, "grapheme", kind, status); break; case UBRK_WORD: @@ -433,9 +424,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) case UBRK_TITLE: result = BreakIterator::buildInstance(loc, "title", kind, status); break; - case UBRK_X_GRAPHEME_CLUSTER: - result = BreakIterator::buildInstance(loc, "xgc", kind, status); - break; default: status = U_ILLEGAL_ARGUMENT_ERROR; } @@ -447,7 +435,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) return result; } -Locale +Locale BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { U_LOCALE_BASED(locBased, *this); return locBased.getLocale(type, status); diff --git a/icu4c/source/common/unicode/brkiter.h b/icu4c/source/common/unicode/brkiter.h index 63fcf7562a1..a9d7a67e81b 100644 --- a/icu4c/source/common/unicode/brkiter.h +++ b/icu4c/source/common/unicode/brkiter.h @@ -26,7 +26,7 @@ * \file * \brief C++ API: Break Iterator. */ - + #if UCONFIG_NO_BREAK_ITERATION U_NAMESPACE_BEGIN @@ -92,7 +92,7 @@ U_NAMESPACE_BEGIN * file ubrk.h *

* Code snippits illustrating the use of the Break Iterator APIs - * are available in the ICU User Guide, + * are available in the ICU User Guide, * http://icu-project.org/userguide/boundaryAnalysis.html * and in the sample program icu/source/samples/break/break.cpp" * @@ -174,7 +174,7 @@ public: virtual void setText(const UnicodeString &text) = 0; /** - * Reset the break iterator to operate over the text represented by + * Reset the break iterator to operate over the text represented by * the UText. The iterator position is reset to the start. * * This function makes a shallow clone of the supplied UText. This means @@ -397,22 +397,6 @@ public: static BreakIterator* U_EXPORT2 createTitleInstance(const Locale& where, UErrorCode& status); - /** - * Create BreakIterator for Extended Grapheme Clusters using specified locale - * Returns an instance of a BreakIterator for locating XGC booundaries - * Extended Grapheme Clusters are combining character sequences and other - * sequences that should remain unbroken when iterating over - * "characters" from a user perspective. - * @param loc the locale. - * @param status Receive information regarding any errors or warnings that - * occurred in creating the break iterator. - * @return A BreakIterator for Extended Grapheme Clusters. - * The caller owns the returned object and is responsible for deleting it. - * @draft ICU 3.8 - */ - static BreakIterator* U_EXPORT2 - createXGraphemeClusterInstance(const Locale& loc, UErrorCode& status); - /** * Get the set of Locales for which TextBoundaries are installed. *

Note: this will not return locales added through the register diff --git a/icu4c/source/common/unicode/ubrk.h b/icu4c/source/common/unicode/ubrk.h index 728eafa75c9..765b0a84f5c 100644 --- a/icu4c/source/common/unicode/ubrk.h +++ b/icu4c/source/common/unicode/ubrk.h @@ -106,8 +106,7 @@ typedef enum UBreakIteratorType { UBRK_TITLE = 4, #endif /* U_HIDE_DEPRECATED_API */ /** Extended Grapheme Cluster breaks @draft ICU 3.8 */ - UBRK_X_GRAPHEME_CLUSTER=5, - UBRK_COUNT = 6 + UBRK_COUNT = 5 } UBreakIteratorType; /** Value indicating all text boundaries have been returned. diff --git a/icu4c/source/test/intltest/rbbiapts.cpp b/icu4c/source/test/intltest/rbbiapts.cpp index b59542040f5..36305991862 100644 --- a/icu4c/source/test/intltest/rbbiapts.cpp +++ b/icu4c/source/test/intltest/rbbiapts.cpp @@ -68,7 +68,7 @@ void RBBIAPITest::TestCloneEquals() errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); - // Quick test of RulesBasedBreakIterator assignment - + // Quick test of RulesBasedBreakIterator assignment - // Check that // two different iterators are != // they are == after assignment @@ -122,16 +122,16 @@ void RBBIAPITest::TestCloneEquals() RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); - if(*bi1clone != *bi1 || *bi1clone != *biequal || + if(*bi1clone != *bi1 || *bi1clone != *biequal || *bi1clone == *bi3 || *bi1clone == *bi2) errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); - if(*bi2clone == *bi1 || *bi2clone == *biequal || + if(*bi2clone == *bi1 || *bi2clone == *biequal || *bi2clone == *bi3 || *bi2clone != *bi2) errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); if(bi1->getText() != bi1clone->getText() || - bi2clone->getText() != bi2->getText() || + bi2clone->getText() != bi2->getText() || *bi2clone == *bi1clone ) errln((UnicodeString)"ERROR: RBBI's clone() method failed"); @@ -232,7 +232,7 @@ void RBBIAPITest::TestHashCode() errln((UnicodeString)"ERROR: different objects have same hashcodes"); delete bi1clone; - delete bi2clone; + delete bi2clone; delete bi1; delete bi2; delete bi3; @@ -256,7 +256,7 @@ void RBBIAPITest::TestGetSetAdoptText() CharacterIterator* text1Clone = text1->clone(); CharacterIterator* text2= new StringCharacterIterator(str2); CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" - + wordIter1->setText(str1); CharacterIterator *tci = &wordIter1->getText(); UnicodeString tstr; @@ -366,9 +366,9 @@ void RBBIAPITest::TestGetSetAdoptText() delete charIter1; delete rb; - } + } + - void RBBIAPITest::TestIteration() { // This test just verifies that the API is present. @@ -409,13 +409,6 @@ void RBBIAPITest::TestIteration() } delete bi; - status=U_ZERO_ERROR; - bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator:: - createXGraphemeClusterInstance(Locale::getDefault(), status); - TEST_ASSERT_SUCCESS(status); - TEST_ASSERT(bi != NULL); - delete bi; - status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { @@ -605,7 +598,7 @@ void RBBIAPITest::TestBuilder() { int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { errln("FAIL : in construction"); @@ -632,7 +625,7 @@ void RBBIAPITest::TestQuoteGrouping() { int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { errln("FAIL : in construction"); @@ -648,7 +641,7 @@ void RBBIAPITest::TestQuoteGrouping() { // Test word break rule status constants. // void RBBIAPITest::TestRuleStatus() { - UChar str[30]; + UChar str[30]; u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094", // 012345678901234567 8 9 0 1 2 3 4 5 6 // Ideographic Katakana Hiragana @@ -666,7 +659,7 @@ void RBBIAPITest::TestRuleStatus() { UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; UErrorCode status=U_ZERO_ERROR; - + RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); if(U_FAILURE(status)) { errln("FAIL : in construction"); @@ -688,7 +681,7 @@ void RBBIAPITest::TestRuleStatus() { errln("FAIL: incorrect tag value %d at position %d", tag, pos); break; } - + // Check that we get the same tag values from getRuleStatusVec() int32_t vec[10]; int t = bi->getRuleStatusVec(vec, 10, status); @@ -764,7 +757,7 @@ void RBBIAPITest::TestRuleStatusVec() { UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); TEST_ASSERT_SUCCESS(status); if (U_SUCCESS(status)) { @@ -823,7 +816,7 @@ void RBBIAPITest::TestRuleStatusVec() { TEST_ASSERT(statusVals[0] == 0); // - // Check buffer overflow error handling. Char == A + // Check buffer overflow error handling. Char == A // bi->first(); pos = bi->next(); @@ -867,7 +860,7 @@ void RBBIAPITest::TestBug2190() { int32_t bounds1[] = {0, 4, 8}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { errln("FAIL : in construction"); @@ -883,19 +876,19 @@ void RBBIAPITest::TestRegistration() { #if !UCONFIG_NO_SERVICE UErrorCode status = U_ZERO_ERROR; BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); - + // ok to not delete these if we exit because of error? BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); BreakIterator* root_word = BreakIterator::createWordInstance("", status); BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); - + URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); { if (ja_word && *ja_word == *root_word) { errln("japan not different from root"); } } - + { BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); UBool fail = TRUE; @@ -907,7 +900,7 @@ void RBBIAPITest::TestRegistration() { errln("bad result for xx_XX/word"); } } - + { BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); UBool fail = TRUE; @@ -919,7 +912,7 @@ void RBBIAPITest::TestRegistration() { errln("bad result for ja_JP/char"); } } - + { BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); UBool fail = TRUE; @@ -931,7 +924,7 @@ void RBBIAPITest::TestRegistration() { errln("bad result for xx_XX/char"); } } - + { StringEnumeration* avail = BreakIterator::getAvailableLocales(); UBool found = FALSE; @@ -947,14 +940,14 @@ void RBBIAPITest::TestRegistration() { errln("did not find test locale"); } } - + { UBool unreg = BreakIterator::unregister(key, status); if (!unreg) { errln("unable to unregister"); } } - + { BreakIterator* result = BreakIterator::createWordInstance("en_US", status); BreakIterator* root = BreakIterator::createWordInstance("", status); @@ -968,7 +961,7 @@ void RBBIAPITest::TestRegistration() { errln("did not get root break"); } } - + { StringEnumeration* avail = BreakIterator::getAvailableLocales(); UBool found = FALSE; @@ -984,7 +977,7 @@ void RBBIAPITest::TestRegistration() { errln("found test locale"); } } - + { int32_t count; UBool foundLocale = FALSE; @@ -999,8 +992,8 @@ void RBBIAPITest::TestRegistration() { errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); } } - - + + // ja_word was adopted by factory delete ja_char; delete root_word; @@ -1111,7 +1104,7 @@ void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof if(gotoffset != expectedOffset) errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); if(start <= gotoffset){ - testString.extractBetween(start, gotoffset, selected); + testString.extractBetween(start, gotoffset, selected); } else{ testString.extractBetween(gotoffset, start, selected); diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index db5894aaa13..574c0ac65d2 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -1194,8 +1194,8 @@ void RBBITest::TestBug5775() { TEST_ASSERT(pos == 6); delete bi; } - - + + /** * Test Japanese Line Break @@ -1534,13 +1534,7 @@ void RBBITest::TestExtended() { charIdx += 6; break; } - if (testString.compare(charIdx-1, 5, "") == 0) { - delete tp.bi; - tp.bi = BreakIterator::createXGraphemeClusterInstance(locale, status); - charIdx += 4; - break; - } - + // localeMatcher.reset(testString); if (localeMatcher.lookingAt(charIdx-1, status)) { @@ -2090,7 +2084,7 @@ void RBBITest::checkUnicodeTestCase(const char *testFileName, int lineNumber, pos = bi->next(); expectedI++; } - + if (pos==BreakIterator::DONE && expectedIsize()) { errln("Test file \"%s\", line %d, failed to find break at position %d", testFileName, lineNumber, breakPositions->elementAti(expectedI)); diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt index 030496ec36a..9cb3147341c 100644 --- a/icu4c/source/test/testdata/rbbitst.txt +++ b/icu4c/source/test/testdata/rbbitst.txt @@ -94,23 +94,23 @@ ######################################################################################## # # -# Extended G r a p h e m e C l u s t e r T e s t s +# E x t e n d e d G r a p h e m e C l u s t e r T e s t s # # ########################################################################################## - +# # Plain Vanilla grapheme clusters -•a•b•c• -•a\u0301\u0302• •b\u0303\u0304• +#•a•b•c• +#•a\u0301\u0302• •b\u0303\u0304• # Assorted Hindi combining marks -•\u0904\u0903• •\u0937\u093E• •\u0904\u093F• •\u0937\u0940• •\u0937\u0949• •\u0937\u094A• •\u0937\u094B• •\u0937\u094C• +#•\u0904\u0903• •\u0937\u093E• •\u0904\u093F• •\u0937\u0940• •\u0937\u0949• •\u0937\u094A• •\u0937\u094B• •\u0937\u094C• # Thai Clusters # $Prepend $Extend* $PrependBase $Extend*; # -•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• • +#•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• • ########################################################################################