ICU-5766 Remove Extended Grapheme Cluster from Break Iteration

X-SVN-Rev: 22412
This commit is contained in:
Andy Heninger 2007-08-16 23:14:06 +00:00
parent f7f687e073
commit 3c035f1d12
6 changed files with 60 additions and 102 deletions

View file

@ -60,7 +60,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
UResourceBundle *brkRules = &brkRulesStack;
UResourceBundle *brkName = &brkNameStack;
RuleBasedBreakIterator *result = NULL;
if (U_FAILURE(status))
return NULL;
@ -96,7 +96,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
uprv_strncpy(actualLocale,
ures_getLocale(brkName, &status),
sizeof(actualLocale)/sizeof(actualLocale[0]));
UChar* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
if(extStart!=NULL){
@ -110,7 +110,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
ures_close(brkRules);
ures_close(brkName);
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
if (U_FAILURE(status)) {
ures_close(b);
@ -128,7 +128,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
}
ures_close(b);
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
delete result;
return NULL;
@ -189,15 +189,6 @@ BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
// -------------------------------------
// Creates a break iterator for Extended Grapheme Cluster breaks.
BreakIterator* U_EXPORT2
BreakIterator::createXGraphemeClusterInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_X_GRAPHEME_CLUSTER, status);
}
// -------------------------------------
// Gets all the available locales that has localized text boundary data.
const Locale* U_EXPORT2
BreakIterator::getAvailableLocales(int32_t& count)
@ -266,11 +257,11 @@ public:
UErrorCode status = U_ZERO_ERROR;
registerFactory(new ICUBreakIteratorFactory(), status);
}
virtual UObject* cloneInstance(UObject* instance) const {
return ((BreakIterator*)instance)->clone();
}
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
LocaleKey& lkey = (LocaleKey&)key;
int32_t kind = lkey.kind();
@ -278,7 +269,7 @@ public:
lkey.currentLocale(loc);
return BreakIterator::makeInstance(loc, kind, status);
}
virtual UBool isDefault() const {
return countFactories() == 1;
}
@ -293,7 +284,7 @@ U_NAMESPACE_END
static U_NAMESPACE_QUALIFIER ICULocaleService* gService = NULL;
/**
* Release all static memory held by breakiterator.
* Release all static memory held by breakiterator.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV breakiterator_cleanup(void) {
@ -308,12 +299,12 @@ static UBool U_CALLCONV breakiterator_cleanup(void) {
U_CDECL_END
U_NAMESPACE_BEGIN
static ICULocaleService*
static ICULocaleService*
getService(void)
{
UBool needsInit;
UMTX_CHECK(NULL, (UBool)(gService == NULL), needsInit);
if (needsInit) {
ICULocaleService *tService = new ICUBreakIteratorService();
umtx_lock(NULL);
@ -331,7 +322,7 @@ getService(void)
// -------------------------------------
static inline UBool
hasService(void)
hasService(void)
{
UBool retVal;
UMTX_CHECK(NULL, gService != NULL, retVal);
@ -341,7 +332,7 @@ hasService(void)
// -------------------------------------
URegistryKey U_EXPORT2
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
{
return getService()->registerInstance(toAdopt, locale, kind, status);
}
@ -349,7 +340,7 @@ BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UB
// -------------------------------------
UBool U_EXPORT2
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
{
if (U_SUCCESS(status)) {
if (hasService()) {
@ -377,7 +368,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
if (U_FAILURE(status)) {
return NULL;
}
u_init(&status);
#if !UCONFIG_NO_SERVICE
if (hasService()) {
@ -408,7 +399,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
// -------------------------------------
BreakIterator*
BreakIterator*
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
{
@ -418,7 +409,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
case UBRK_CHARACTER:
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
break;
case UBRK_WORD:
@ -433,9 +424,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
case UBRK_TITLE:
result = BreakIterator::buildInstance(loc, "title", kind, status);
break;
case UBRK_X_GRAPHEME_CLUSTER:
result = BreakIterator::buildInstance(loc, "xgc", kind, status);
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
}
@ -447,7 +435,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
return result;
}
Locale
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);

View file

@ -26,7 +26,7 @@
* \file
* \brief C++ API: Break Iterator.
*/
#if UCONFIG_NO_BREAK_ITERATION
U_NAMESPACE_BEGIN
@ -92,7 +92,7 @@ U_NAMESPACE_BEGIN
* file ubrk.h
* <p>
* Code snippits illustrating the use of the Break Iterator APIs
* are available in the ICU User Guide,
* are available in the ICU User Guide,
* http://icu-project.org/userguide/boundaryAnalysis.html
* and in the sample program icu/source/samples/break/break.cpp"
*
@ -174,7 +174,7 @@ public:
virtual void setText(const UnicodeString &text) = 0;
/**
* Reset the break iterator to operate over the text represented by
* Reset the break iterator to operate over the text represented by
* the UText. The iterator position is reset to the start.
*
* This function makes a shallow clone of the supplied UText. This means
@ -397,22 +397,6 @@ public:
static BreakIterator* U_EXPORT2
createTitleInstance(const Locale& where, UErrorCode& status);
/**
* Create BreakIterator for Extended Grapheme Clusters using specified locale
* Returns an instance of a BreakIterator for locating XGC booundaries
* Extended Grapheme Clusters are combining character sequences and other
* sequences that should remain unbroken when iterating over
* "characters" from a user perspective.
* @param loc the locale.
* @param status Receive information regarding any errors or warnings that
* occurred in creating the break iterator.
* @return A BreakIterator for Extended Grapheme Clusters.
* The caller owns the returned object and is responsible for deleting it.
* @draft ICU 3.8
*/
static BreakIterator* U_EXPORT2
createXGraphemeClusterInstance(const Locale& loc, UErrorCode& status);
/**
* Get the set of Locales for which TextBoundaries are installed.
* <p><b>Note:</b> this will not return locales added through the register

View file

@ -106,8 +106,7 @@ typedef enum UBreakIteratorType {
UBRK_TITLE = 4,
#endif /* U_HIDE_DEPRECATED_API */
/** Extended Grapheme Cluster breaks @draft ICU 3.8 */
UBRK_X_GRAPHEME_CLUSTER=5,
UBRK_COUNT = 6
UBRK_COUNT = 5
} UBreakIteratorType;
/** Value indicating all text boundaries have been returned.

View file

@ -68,7 +68,7 @@ void RBBIAPITest::TestCloneEquals()
errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
// Quick test of RulesBasedBreakIterator assignment -
// Quick test of RulesBasedBreakIterator assignment -
// Check that
// two different iterators are !=
// they are == after assignment
@ -122,16 +122,16 @@ void RBBIAPITest::TestCloneEquals()
RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
if(*bi1clone != *bi1 || *bi1clone != *biequal ||
if(*bi1clone != *bi1 || *bi1clone != *biequal ||
*bi1clone == *bi3 || *bi1clone == *bi2)
errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
if(*bi2clone == *bi1 || *bi2clone == *biequal ||
if(*bi2clone == *bi1 || *bi2clone == *biequal ||
*bi2clone == *bi3 || *bi2clone != *bi2)
errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
if(bi1->getText() != bi1clone->getText() ||
bi2clone->getText() != bi2->getText() ||
bi2clone->getText() != bi2->getText() ||
*bi2clone == *bi1clone )
errln((UnicodeString)"ERROR: RBBI's clone() method failed");
@ -232,7 +232,7 @@ void RBBIAPITest::TestHashCode()
errln((UnicodeString)"ERROR: different objects have same hashcodes");
delete bi1clone;
delete bi2clone;
delete bi2clone;
delete bi1;
delete bi2;
delete bi3;
@ -256,7 +256,7 @@ void RBBIAPITest::TestGetSetAdoptText()
CharacterIterator* text1Clone = text1->clone();
CharacterIterator* text2= new StringCharacterIterator(str2);
CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
wordIter1->setText(str1);
CharacterIterator *tci = &wordIter1->getText();
UnicodeString tstr;
@ -366,9 +366,9 @@ void RBBIAPITest::TestGetSetAdoptText()
delete charIter1;
delete rb;
}
}
void RBBIAPITest::TestIteration()
{
// This test just verifies that the API is present.
@ -409,13 +409,6 @@ void RBBIAPITest::TestIteration()
}
delete bi;
status=U_ZERO_ERROR;
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::
createXGraphemeClusterInstance(Locale::getDefault(), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(bi != NULL);
delete bi;
status=U_ZERO_ERROR;
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
if (U_FAILURE(status) || bi == NULL) {
@ -605,7 +598,7 @@ void RBBIAPITest::TestBuilder() {
int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
errln("FAIL : in construction");
@ -632,7 +625,7 @@ void RBBIAPITest::TestQuoteGrouping() {
int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
errln("FAIL : in construction");
@ -648,7 +641,7 @@ void RBBIAPITest::TestQuoteGrouping() {
// Test word break rule status constants.
//
void RBBIAPITest::TestRuleStatus() {
UChar str[30];
UChar str[30];
u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
// 012345678901234567 8 9 0 1 2 3 4 5 6
// Ideographic Katakana Hiragana
@ -666,7 +659,7 @@ void RBBIAPITest::TestRuleStatus() {
UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
UErrorCode status=U_ZERO_ERROR;
RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
if(U_FAILURE(status)) {
errln("FAIL : in construction");
@ -688,7 +681,7 @@ void RBBIAPITest::TestRuleStatus() {
errln("FAIL: incorrect tag value %d at position %d", tag, pos);
break;
}
// Check that we get the same tag values from getRuleStatusVec()
int32_t vec[10];
int t = bi->getRuleStatusVec(vec, 10, status);
@ -764,7 +757,7 @@ void RBBIAPITest::TestRuleStatusVec() {
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
TEST_ASSERT_SUCCESS(status);
if (U_SUCCESS(status)) {
@ -823,7 +816,7 @@ void RBBIAPITest::TestRuleStatusVec() {
TEST_ASSERT(statusVals[0] == 0);
//
// Check buffer overflow error handling. Char == A
// Check buffer overflow error handling. Char == A
//
bi->first();
pos = bi->next();
@ -867,7 +860,7 @@ void RBBIAPITest::TestBug2190() {
int32_t bounds1[] = {0, 4, 8};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
errln("FAIL : in construction");
@ -883,19 +876,19 @@ void RBBIAPITest::TestRegistration() {
#if !UCONFIG_NO_SERVICE
UErrorCode status = U_ZERO_ERROR;
BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
// ok to not delete these if we exit because of error?
BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
BreakIterator* root_word = BreakIterator::createWordInstance("", status);
BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
{
if (ja_word && *ja_word == *root_word) {
errln("japan not different from root");
}
}
{
BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
UBool fail = TRUE;
@ -907,7 +900,7 @@ void RBBIAPITest::TestRegistration() {
errln("bad result for xx_XX/word");
}
}
{
BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
UBool fail = TRUE;
@ -919,7 +912,7 @@ void RBBIAPITest::TestRegistration() {
errln("bad result for ja_JP/char");
}
}
{
BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
UBool fail = TRUE;
@ -931,7 +924,7 @@ void RBBIAPITest::TestRegistration() {
errln("bad result for xx_XX/char");
}
}
{
StringEnumeration* avail = BreakIterator::getAvailableLocales();
UBool found = FALSE;
@ -947,14 +940,14 @@ void RBBIAPITest::TestRegistration() {
errln("did not find test locale");
}
}
{
UBool unreg = BreakIterator::unregister(key, status);
if (!unreg) {
errln("unable to unregister");
}
}
{
BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
BreakIterator* root = BreakIterator::createWordInstance("", status);
@ -968,7 +961,7 @@ void RBBIAPITest::TestRegistration() {
errln("did not get root break");
}
}
{
StringEnumeration* avail = BreakIterator::getAvailableLocales();
UBool found = FALSE;
@ -984,7 +977,7 @@ void RBBIAPITest::TestRegistration() {
errln("found test locale");
}
}
{
int32_t count;
UBool foundLocale = FALSE;
@ -999,8 +992,8 @@ void RBBIAPITest::TestRegistration() {
errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
}
}
// ja_word was adopted by factory
delete ja_char;
delete root_word;
@ -1111,7 +1104,7 @@ void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof
if(gotoffset != expectedOffset)
errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
if(start <= gotoffset){
testString.extractBetween(start, gotoffset, selected);
testString.extractBetween(start, gotoffset, selected);
}
else{
testString.extractBetween(gotoffset, start, selected);

View file

@ -1194,8 +1194,8 @@ void RBBITest::TestBug5775() {
TEST_ASSERT(pos == 6);
delete bi;
}
/**
* Test Japanese Line Break
@ -1534,13 +1534,7 @@ void RBBITest::TestExtended() {
charIdx += 6;
break;
}
if (testString.compare(charIdx-1, 5, "<xgc>") == 0) {
delete tp.bi;
tp.bi = BreakIterator::createXGraphemeClusterInstance(locale, status);
charIdx += 4;
break;
}
// <locale loc_name>
localeMatcher.reset(testString);
if (localeMatcher.lookingAt(charIdx-1, status)) {
@ -2090,7 +2084,7 @@ void RBBITest::checkUnicodeTestCase(const char *testFileName, int lineNumber,
pos = bi->next();
expectedI++;
}
if (pos==BreakIterator::DONE && expectedI<breakPositions->size()) {
errln("Test file \"%s\", line %d, failed to find break at position %d",
testFileName, lineNumber, breakPositions->elementAti(expectedI));

View file

@ -94,23 +94,23 @@
########################################################################################
#
#
# Extended G r a p h e m e C l u s t e r T e s t s
# E x t e n d e d G r a p h e m e C l u s t e r T e s t s
#
#
##########################################################################################
<xgc>
#<xgc>
# Plain Vanilla grapheme clusters
<data>•a•b•c•</data>
<data>•a\u0301\u0302• •b\u0303\u0304•</data>
#<data>•a•b•c•</data>
#<data>•a\u0301\u0302• •b\u0303\u0304•</data>
# Assorted Hindi combining marks
<data>•\u0904\u0903• •\u0937\u093E• •\u0904\u093F• •\u0937\u0940• •\u0937\u0949• •\u0937\u094A• •\u0937\u094B• •\u0937\u094C•</data>
#<data>•\u0904\u0903• •\u0937\u093E• •\u0904\u093F• •\u0937\u0940• •\u0937\u0949• •\u0937\u094A• •\u0937\u094B• •\u0937\u094C•</data>
# Thai Clusters
# $Prepend $Extend* $PrependBase $Extend*;
#
<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
#<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
########################################################################################