ICU-12071 RuleBasedBreakIterator, make internal protected functions be private and delete unused ones.

X-SVN-Rev: 38666
This commit is contained in:
Andy Heninger 2016-04-29 00:02:10 +00:00
parent 14c2ede1d8
commit 0cbac47c4e
4 changed files with 65 additions and 198 deletions

View file

@ -72,21 +72,6 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
}
}
/**
* Same as above but does not adopt memory
*/
RuleBasedBreakIterator::RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
{
init();
fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status); // status checked in constructor
if (U_FAILURE(status)) {return;}
if(fData == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
//
// Construct from precompiled binary rules (tables). This constructor is public API,
// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().

View file

@ -56,15 +56,11 @@ struct RBBIStateTable;
*
* <p>See the ICU User Guide for information on Break Iterator Rules.</p>
*
* <p>This class is not intended to be subclassed. (Class DictionaryBasedBreakIterator
* is a subclass, but that relationship is effectively internal to the ICU
* implementation. The subclassing interface to RulesBasedBreakIterator is
* not part of the ICU API, and may not remain stable.</p>
*
* <p>This class is not intended to be subclassed.</p>
*/
class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator {
protected:
private:
/**
* The UText through which this BreakIterator accesses the text
* @internal
@ -139,7 +135,7 @@ protected:
* @internal
*/
int32_t fPositionInCache;
/**
*
* If present, UStack of LanguageBreakEngine objects that might handle
@ -148,7 +144,7 @@ protected:
* @internal
*/
UStack *fLanguageBreakEngines;
/**
*
* If present, the special LanguageBreakEngine used for handling
@ -157,32 +153,18 @@ protected:
* @internal
*/
UnhandledEngine *fUnhandledBreakEngine;
/**
*
* The type of the break iterator, or -1 if it has not been set.
* @internal
*/
int32_t fBreakType;
protected:
//=======================================================================
// constructors
//=======================================================================
#ifndef U_HIDE_INTERNAL_API
/**
* Constant to be used in the constructor
* RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt, UErrorCode &);
* which does not adopt the memory indicated by the RBBIDataHeader*
* parameter.
*
* @internal
*/
enum EDontAdopt {
kDontAdopt
};
/**
* Constructor from a flattened set of RBBI data in malloced memory.
* RulesBasedBreakIterators built from a custom set of rules
@ -195,17 +177,6 @@ protected:
*/
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
/**
* Constructor from a flattened set of RBBI data in memory which need not
* be malloced (e.g. it may be a memory-mapped file, etc.).
*
* This version does not adopt the memory, and does not
* free it when done.
* @internal
*/
RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
#endif /* U_HIDE_INTERNAL_API */
friend class RBBIRuleBuilder;
/** @internal */
@ -248,7 +219,7 @@ public:
* constuction from source rules.
*
* Ownership of the storage containing the compiled rules remains with the
* caller of this function. The compiled rules must not be modified or
* caller of this function. The compiled rules must not be modified or
* deleted during the life of the break iterator.
*
* The compiled rules are not compatible across different major versions of ICU.
@ -661,7 +632,7 @@ public:
virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
protected:
private:
//=======================================================================
// implementation
//=======================================================================
@ -670,41 +641,19 @@ protected:
* in text or iteration position.
* @internal
*/
virtual void reset(void);
#if 0
/**
* Return true if the category lookup for this char
* indicates that it is in the set of dictionary lookup chars.
* This function is intended for use by dictionary based break iterators.
* @return true if the category lookup for this char
* indicates that it is in the set of dictionary lookup chars.
* @internal
*/
virtual UBool isDictionaryChar(UChar32);
/**
* Get the type of the break iterator.
* @internal
*/
virtual int32_t getBreakType() const;
#endif
void reset(void);
/**
* Set the type of the break iterator.
* @internal
*/
virtual void setBreakType(int32_t type);
void setBreakType(int32_t type);
#ifndef U_HIDE_INTERNAL_API
/**
* Common initialization function, used by constructors and bufferClone.
* @internal
*/
void init();
#endif /* U_HIDE_INTERNAL_API */
private:
/**
* This method backs the iterator back up to a "safe position" in the text.
@ -728,9 +677,7 @@ private:
*/
int32_t handleNext(const RBBIStateTable *statetable);
protected:
#ifndef U_HIDE_INTERNAL_API
/**
* This is the function that actually implements dictionary-based
* breaking. Covering at least the range from startPos to endPos,
@ -746,9 +693,7 @@ protected:
* @internal
*/
int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
#endif /* U_HIDE_INTERNAL_API */
private:
/**
* This function returns the appropriate LanguageBreakEngine for a

View file

@ -1058,75 +1058,47 @@ void RBBIAPITest::TestRoundtripRules() {
}
}
// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
// This is just a sanity check, not a thorough test (e.g. we don't check that the
// first delete actually frees rulesCopy).
void RBBIAPITest::TestCreateFromRBBIData() {
// Get some handy RBBIData
const char *brkName = "word"; // or "sent", "line", "char", etc.
UErrorCode status = U_ZERO_ERROR;
LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
if ( U_SUCCESS(status) ) {
const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
uint32_t length = builtRules->fLength;
RBBIWithProtectedFunctions * brkItr;
// Try the memory-adopting constructor, need to copy the data first
RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
if ( rulesCopy ) {
uprv_memcpy( rulesCopy, builtRules, length );
// Check getBinaryRules() and construction of a break iterator from those rules.
brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
if ( U_SUCCESS(status) ) {
delete brkItr; // this should free rulesCopy
} else {
errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
status = U_ZERO_ERROR;// reset for the next test
uprv_free( rulesCopy );
}
}
void RBBIAPITest::TestGetBinaryRules() {
UErrorCode status=U_ZERO_ERROR;
LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
TEST_ASSERT_SUCCESS(status);
RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
TEST_ASSERT(rbbi != NULL);
// Now try the non-adopting constructor
brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
if ( U_SUCCESS(status) ) {
delete brkItr; // this should NOT attempt to free builtRules
if (builtRules->fLength != length) { // sanity check
errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
}
} else {
errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
}
// Check that the new line break iterator is nominally functional.
UnicodeString helloWorld("Hello, World!");
rbbi->setText(helloWorld);
int n = 0;
while (bi->next() != UBRK_DONE) {
++n;
}
TEST_ASSERT(n == 2);
// getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
//
status = U_ZERO_ERROR;
RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
if (rb == NULL || U_FAILURE(status)) {
dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
} else {
uint32_t length;
const uint8_t *rules = rb->getBinaryRules(length);
RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(*rb == *rb2);
UnicodeString words = "one two three ";
rb2->setText(words);
int wordCounter = 0;
while (rb2->next() != UBRK_DONE) {
wordCounter++;
}
TEST_ASSERT(wordCounter == 6);
// Extract the binary rules as a uint8_t blob.
uint32_t ruleLength;
const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
TEST_ASSERT(ruleLength > 0);
TEST_ASSERT(binRules != NULL);
status = U_ZERO_ERROR;
RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
delete rb;
delete rb2;
delete rb3;
// Clone the binary rules, and create a break iterator from that.
// The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
uint8_t *clonedRules = new uint8_t[ruleLength];
memcpy(clonedRules, binRules, ruleLength);
RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
TEST_ASSERT_SUCCESS(status);
// Check that the cloned line break iterator is nominally alive.
clonedBI.setText(helloWorld);
n = 0;
while (clonedBI.next() != UBRK_DONE) {
++n;
}
TEST_ASSERT(n == 2);
delete[] clonedRules;
}
@ -1428,41 +1400,33 @@ void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
{
if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
switch (index) {
// case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
TESTCASE_AUTO_BEGIN;
#if !UCONFIG_NO_FILE_IO
case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
case 4: name = "TestIteration"; if (exec) TestIteration(); break;
#else
case 0: case 1: case 2: case 3: case 4: name = "skip"; break;
TESTCASE_AUTO(TestCloneEquals);
TESTCASE_AUTO(TestgetRules);
TESTCASE_AUTO(TestHashCode);
TESTCASE_AUTO(TestGetSetAdoptText);
TESTCASE_AUTO(TestIteration);
#endif
case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
case 8: name = "TestBug2190"; if (exec) TestBug2190(); break;
TESTCASE_AUTO(TestBuilder);
TESTCASE_AUTO(TestQuoteGrouping);
TESTCASE_AUTO(TestRuleStatusVec);
TESTCASE_AUTO(TestBug2190);
#if !UCONFIG_NO_FILE_IO
case 9: name = "TestRegistration"; if (exec) TestRegistration(); break;
case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
#else
case 9: case 10: case 11: case 12: case 13: name = "skip"; break;
TESTCASE_AUTO(TestRegistration);
TESTCASE_AUTO(TestBoilerPlate);
TESTCASE_AUTO(TestRuleStatus);
TESTCASE_AUTO(TestRoundtripRules);
TESTCASE_AUTO(TestGetBinaryRules);
#endif
case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
TESTCASE_AUTO(TestRefreshInputText);
#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break;
#else
case 15: name="skip"; break;
TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
#endif
default: name = ""; break; // needed to end loop
}
TESTCASE_AUTO_END;
}
//---------------------------------------------
//Internal subroutines
//---------------------------------------------
@ -1504,18 +1468,4 @@ void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof
logln(prettify("****selected \"" + selected + "\""));
}
//---------------------------------------------
//RBBIWithProtectedFunctions class functions
//---------------------------------------------
RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
: RuleBasedBreakIterator(data, status)
{
}
RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
: RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
{
}
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

View file

@ -65,10 +65,9 @@ public:
void RoundtripRule(const char *dataFile);
/**
* Test creating RuleBasedBreakIterator from RBBIData.
* Test getting and using binary (compiled) rules.
**/
void TestCreateFromRBBIData(void);
void TestGetBinaryRules(void);
/**
* Tests grouping effect of 'single quotes' in rules.
@ -101,18 +100,6 @@ public:
};
/**
* Special class to enable testing of protected functions in RuleBasedBreakIterator
*/
class RBBIWithProtectedFunctions: public RuleBasedBreakIterator {
public:
enum EDontAdopt {
kDontAdopt
};
RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status);
RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
};
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif