mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-2077 RBBI: review comments incorporated. (incomplete, more to come.)
X-SVN-Rev: 9612
This commit is contained in:
parent
48acf9f6dd
commit
6df1676310
9 changed files with 287 additions and 197 deletions
|
@ -37,7 +37,7 @@ const int32_t BreakIterator::DONE = (int32_t)-1;
|
|||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a simple text boundary for word breaks.
|
||||
// Creates a break iterator for word breaks.
|
||||
BreakIterator*
|
||||
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
|
@ -49,31 +49,32 @@ BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
|
|||
|
||||
if (U_FAILURE(status))
|
||||
return NULL;
|
||||
|
||||
if (!uprv_strcmp(key.getLanguage(), "th"))
|
||||
{
|
||||
filename = "word_th";
|
||||
}
|
||||
|
||||
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
// The UDataMemory is adopted by the break iterator.
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
if(!uprv_strcmp(filename, "word_th")) {
|
||||
filename = "thaidict.brk";
|
||||
result = new DictionaryBasedBreakIterator(file, filename, status);
|
||||
/* test for NULL */
|
||||
if(result == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
/* test for NULL */
|
||||
if(result == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if(!uprv_strcmp(filename, "word_th")) {
|
||||
filename = "thaidict.brk";
|
||||
result = new DictionaryBasedBreakIterator(file, filename, status);
|
||||
}
|
||||
else {
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
}
|
||||
if (result == NULL) {
|
||||
udata_close(file);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
||||
delete result;
|
||||
result = NULL;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -81,7 +82,7 @@ BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
|
|||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a simple text boundary for line breaks.
|
||||
// Creates a break iterator for line breaks.
|
||||
BreakIterator*
|
||||
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
|
@ -93,39 +94,39 @@ BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
|
|||
|
||||
if (U_FAILURE(status))
|
||||
return NULL;
|
||||
|
||||
if (!uprv_strcmp(key.getLanguage(), "th"))
|
||||
{
|
||||
filename = "line_th";
|
||||
}
|
||||
|
||||
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
||||
|
||||
if (!U_FAILURE(status)) {
|
||||
if (!uprv_strcmp(key.getLanguage(), "th")) {
|
||||
filename = "thaidict.brk";
|
||||
result = new DictionaryBasedBreakIterator(file, filename, status);
|
||||
/* test for NULL */
|
||||
if(result == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
/* test for NULL */
|
||||
if(result == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
// The UDataMemory is adopted by the break iterator.
|
||||
|
||||
if (!uprv_strcmp(key.getLanguage(), "th")) {
|
||||
filename = "thaidict.brk";
|
||||
result = new DictionaryBasedBreakIterator(file, filename, status);
|
||||
}
|
||||
else {
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
}
|
||||
if (result == NULL) {
|
||||
udata_close(file);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
||||
delete result;
|
||||
result = NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a simple text boundary for character breaks.
|
||||
// Creates a break iterator for character breaks.
|
||||
BreakIterator*
|
||||
BreakIterator::createCharacterInstance(const Locale& /* key */, UErrorCode& status)
|
||||
{
|
||||
|
@ -138,22 +139,26 @@ BreakIterator::createCharacterInstance(const Locale& /* key */, UErrorCode& stat
|
|||
if (U_FAILURE(status))
|
||||
return NULL;
|
||||
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
||||
|
||||
if (!U_FAILURE(status)) {
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
/* test for NULL */
|
||||
if(result == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
// The UDataMemory is adopted by the break iterator.
|
||||
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
if (result == NULL) {
|
||||
udata_close(file);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
||||
delete result;
|
||||
result = NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a simple text boundary for sentence breaks.
|
||||
// Creates a break iterator for sentence breaks.
|
||||
BreakIterator*
|
||||
BreakIterator::createSentenceInstance(const Locale& /*key */, UErrorCode& status)
|
||||
{
|
||||
|
@ -166,14 +171,19 @@ BreakIterator::createSentenceInstance(const Locale& /*key */, UErrorCode& status
|
|||
if (U_FAILURE(status))
|
||||
return NULL;
|
||||
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
// The UDataMemory is adopted by the break iterator.
|
||||
|
||||
if (!U_FAILURE(status)) {
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
/* test for NULL */
|
||||
if(result == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
if (result == NULL) {
|
||||
udata_close(file);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
||||
delete result;
|
||||
result = NULL;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -181,7 +191,7 @@ BreakIterator::createSentenceInstance(const Locale& /*key */, UErrorCode& status
|
|||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a simple text boundary for title casing breaks.
|
||||
// Creates a break iterator for title casing breaks.
|
||||
BreakIterator*
|
||||
BreakIterator::createTitleInstance(const Locale& /* key */, UErrorCode& status)
|
||||
{
|
||||
|
@ -194,14 +204,19 @@ BreakIterator::createTitleInstance(const Locale& /* key */, UErrorCode& status)
|
|||
if (U_FAILURE(status))
|
||||
return NULL;
|
||||
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
// The UDataMemory is adopted by the break iterator.
|
||||
|
||||
if (!U_FAILURE(status)) {
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
/* test for NULL */
|
||||
if(result == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
if (result == NULL) {
|
||||
udata_close(file);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
||||
delete result;
|
||||
result = NULL;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -234,11 +249,11 @@ BreakIterator::getDisplayName(const Locale& objectLocale,
|
|||
return objectLocale.getDisplayName(displayLocale, name);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Needed because we declare the copy constructor (in order to prevent synthesizing one) and
|
||||
// so the default constructor is no longer synthesized.
|
||||
|
||||
// ------------------------------------------
|
||||
//
|
||||
// Default constructor and destructor
|
||||
//
|
||||
//-------------------------------------------
|
||||
BreakIterator::BreakIterator()
|
||||
{
|
||||
fBufferClone = FALSE;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "rbbirb.h"
|
||||
#include "filestrm.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
#include "uassert.h"
|
||||
|
||||
|
@ -25,8 +26,7 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
|
||||
static const int16_t START_STATE = 1; // The state number of the starting state
|
||||
|
||||
static const int16_t STOP_STATE = 0; // The state-transition value indicating "stop"
|
||||
static const int16_t STOP_STATE = 0; // The state-transition value indicating "stop"
|
||||
|
||||
/**
|
||||
* Class ID. (value is irrelevant; address is important)
|
||||
|
@ -86,6 +86,10 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
|
|||
if (U_FAILURE(status)) {return;};
|
||||
RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)
|
||||
RBBIRuleBuilder::createRuleBasedBreakIterator(rules, parseError, status);
|
||||
// Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that
|
||||
// creates and returns a complete RBBI. From here, in a constructor, we
|
||||
// can't just return the object created by the builder factory, hence
|
||||
// the assignment of the factory created object to "this".
|
||||
if (U_SUCCESS(status)) {
|
||||
*this = *bi;
|
||||
delete bi;
|
||||
|
@ -118,16 +122,15 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth
|
|||
}
|
||||
|
||||
|
||||
//=======================================================================
|
||||
// boilerplate
|
||||
//=======================================================================
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
RuleBasedBreakIterator::~RuleBasedBreakIterator() {
|
||||
delete fText;
|
||||
fText = NULL;
|
||||
if (fData != NULL) {
|
||||
fData->removeReference();
|
||||
fData = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -163,6 +166,7 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
|
|||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// init() Shared initialization routine. Used by all the constructors.
|
||||
// Initializes all fields, leaving the object in a consistent state.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
UBool RuleBasedBreakIterator::fTrace = FALSE;
|
||||
|
@ -179,7 +183,7 @@ void RuleBasedBreakIterator::init() {
|
|||
if (debugInitDone == FALSE) {
|
||||
#ifdef RBBI_DEBUG
|
||||
char *debugEnv = getenv("U_RBBIDEBUG");
|
||||
if (debugEnv && strstr(debugEnv, "trace")) {
|
||||
if (debugEnv && uprv_strstr(debugEnv, "trace")) {
|
||||
fTrace = TRUE;
|
||||
}
|
||||
#endif
|
||||
|
@ -268,7 +272,7 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
|
|||
reset();
|
||||
delete fText;
|
||||
fText = newText;
|
||||
fText->first();
|
||||
this->first();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -286,8 +290,8 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
|
|||
else {
|
||||
delete fText;
|
||||
fText = new StringCharacterIterator(newText);
|
||||
fText->first();
|
||||
}
|
||||
this->first();
|
||||
}
|
||||
|
||||
|
||||
|
@ -435,11 +439,14 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
|
|||
fLastBreakTagValid = TRUE;
|
||||
if (fText == NULL || offset >= fText->endIndex()) {
|
||||
// fText->setToEnd();
|
||||
return BreakIterator::DONE;
|
||||
// return BreakIterator::DONE;
|
||||
last();
|
||||
return next();
|
||||
}
|
||||
else if (offset < fText->startIndex()) {
|
||||
// fText->setToStart();
|
||||
return fText->startIndex();
|
||||
// return fText->startIndex();
|
||||
return first();
|
||||
}
|
||||
|
||||
// otherwise, set our internal iteration position (temporarily)
|
||||
|
@ -476,10 +483,11 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
|
|||
// just return DONE; if it's before the beginning, return the
|
||||
// text's starting offset
|
||||
if (fText == NULL || offset > fText->endIndex()) {
|
||||
return BreakIterator::DONE;
|
||||
// return BreakIterator::DONE;
|
||||
return last();
|
||||
}
|
||||
else if (offset < fText->startIndex()) {
|
||||
return fText->startIndex();
|
||||
return first();
|
||||
}
|
||||
|
||||
// if we start by updating the current iteration position to the
|
||||
|
@ -499,19 +507,25 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
|
|||
UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
|
||||
// the beginning index of the iterator is always a boundary position by definition
|
||||
if (fText == NULL || offset == fText->startIndex()) {
|
||||
first(); // For side effects on current position, tag values.
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// out-of-range indexes are never boundary positions
|
||||
else if (offset < fText->startIndex() || offset > fText->endIndex()) {
|
||||
if (offset < fText->startIndex()) {
|
||||
first(); // For side effects on current position, tag values.
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (offset > fText->endIndex()) {
|
||||
last(); // For side effects on current position, tag values.
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// otherwise, we can use following() on the position before the specified
|
||||
// one and return true of the position we get back is the one the user
|
||||
// one and return true if the position we get back is the one the user
|
||||
// specified
|
||||
else
|
||||
return following(offset - 1) == offset;
|
||||
return following(offset - 1) == offset;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -555,7 +569,7 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
|
|||
int32_t result = fText->getIndex() + 1;
|
||||
int32_t lookaheadResult = 0;
|
||||
|
||||
// begin in state 1
|
||||
// Initialize the state machine. Begin in state 1
|
||||
int32_t state = START_STATE;
|
||||
int16_t category;
|
||||
UChar32 c = fText->current32();
|
||||
|
@ -565,16 +579,19 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
|
|||
|
||||
fLastBreakTag = 0;
|
||||
|
||||
row = (RBBIStateTableRow *)
|
||||
row = (RBBIStateTableRow *) // Point to starting row of state table.
|
||||
(fData->fForwardTable->fTableData + (fData->fForwardTable->fRowLen * state));
|
||||
|
||||
// Character Category fetch for starting character.
|
||||
// See comments on character category code within loop, below.
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
if ((category & 0x4000) != 0) {
|
||||
fDictionaryCharCount++;
|
||||
category &= ~0x4000;
|
||||
}
|
||||
|
||||
// loop until we reach the end of the text or transition to state 0
|
||||
for (;;) {
|
||||
// loop until we reach the end of the text or transition to state 0
|
||||
for (;;) {
|
||||
if (c == CharacterIterator::DONE && fText->hasNext()==FALSE) {
|
||||
// Note: CharacterIterator::DONE is 0xffff, which is also a legal
|
||||
// character value. Check for DONE first, because it's quicker,
|
||||
|
@ -586,15 +603,16 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
|
|||
// Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
|
||||
// not the size of the character going in.
|
||||
//
|
||||
// And off bit 14, which flags use of a dictionary for dictionary based
|
||||
// iterators, but should be ignored here.
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
|
||||
// Check the dictionary bit in the character's category.
|
||||
// Counter is only used by dictionary based iterators.
|
||||
// Counter is only used by dictionary based iterators (subclasses).
|
||||
// Chars that need to be handled by a dictionary have a flag bit set
|
||||
// in their category values.
|
||||
//
|
||||
if ((category & 0x4000) != 0) {
|
||||
fDictionaryCharCount++;
|
||||
// And off the dictionary flag bit.
|
||||
category &= ~0x4000;
|
||||
}
|
||||
|
||||
|
@ -616,6 +634,8 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
|
|||
// Get the next character. Doing it here positions the iterator
|
||||
// to the correct position for recording matches in the code that
|
||||
// follows.
|
||||
// TODO: 16 bit next, and a 16 bit TRIE lookup, with escape code
|
||||
// for non-BMP chars, would be faster.
|
||||
c = fText->next32();
|
||||
|
||||
if (row->fAccepting == 0 && row->fLookAhead == 0) {
|
||||
|
@ -636,7 +656,7 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
|
|||
if (row->fAccepting == 0 && row->fLookAhead != 0) {
|
||||
// Lookahead match point. Remember it, but only if no other rule has
|
||||
// unconitionally matched up to this point.
|
||||
// TODO: handle case where there's a pending match from a different rule
|
||||
// TODO: handle case where there's a pending match from a different rule -
|
||||
// where lookaheadStatus != 0 && lookaheadStatus != row->fLookAhead.
|
||||
int32_t r = fText->getIndex();
|
||||
if (r > result) {
|
||||
|
@ -672,6 +692,7 @@ continueOn:
|
|||
// a lookahead state, advance the break position to the lookahead position
|
||||
// (the theory here is that if there are no characters at all after the lookahead
|
||||
// position, that always matches the lookahead criteria)
|
||||
// TODO: is this really the right behavior?
|
||||
if (c == CharacterIterator::DONE &&
|
||||
fText->hasNext()==FALSE &&
|
||||
lookaheadResult == fText->endIndex()) {
|
||||
|
@ -694,8 +715,9 @@ continueOn:
|
|||
// This method backs the iterator back up to a "safe position" in the text.
|
||||
// This is a position that we know, without any context, must be a break position.
|
||||
// The various calling methods then iterate forward from this safe position to
|
||||
// the appropriate position to return. (For more information, see the description
|
||||
// of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
|
||||
// the appropriate position to return.
|
||||
//
|
||||
// The logic of this function is very similar to handleNext(), above.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
int32_t RuleBasedBreakIterator::handlePrevious(void) {
|
||||
|
@ -833,18 +855,27 @@ RuleBasedBreakIterator::reset()
|
|||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// getRuleStatus()
|
||||
// getRuleStatus() Return the break rule tag associated with the current
|
||||
// iterator position. If the iterator arrived at its current
|
||||
// position by iterating forwards, the value will have been
|
||||
// cached by the handleNext() function.
|
||||
//
|
||||
// If no cached status value is available, the status is
|
||||
// found by doing a previous() followed by a next(), which
|
||||
// leaves the iterator where it started, and computes the
|
||||
// status while doing the next().
|
||||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
int32_t RuleBasedBreakIterator::getRuleStatus() const {
|
||||
// If the break tag value is unkown, back the iterator up, then move
|
||||
// forward again. Moving forward will set the fLastBreakTag value correctly.
|
||||
RuleBasedBreakIterator *nonConstThis = (RuleBasedBreakIterator *)this;
|
||||
if (fLastBreakTagValid == FALSE) {
|
||||
if (current() == fText->startIndex()) {
|
||||
// No cached status is available.
|
||||
if (fText == NULL || current() == fText->startIndex()) {
|
||||
// At start of text, or there is no text. Status is always zero.
|
||||
nonConstThis->fLastBreakTag = 0;
|
||||
nonConstThis->fLastBreakTagValid = TRUE;
|
||||
} else {
|
||||
// Not at start of text. Find status the tedious way.
|
||||
int32_t pa = current();
|
||||
nonConstThis->previous();
|
||||
int32_t pb = nonConstThis->next();
|
||||
|
@ -857,7 +888,7 @@ int32_t RuleBasedBreakIterator::getRuleStatus() const {
|
|||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// getFlattenedData Access to the compiled form of the rules,
|
||||
// getBinaryRules Access to the compiled form of the rules,
|
||||
// for use by build system tools that save the data
|
||||
// for standard iterator types.
|
||||
//
|
||||
|
@ -868,7 +899,7 @@ const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
|
|||
|
||||
if (fData != NULL) {
|
||||
retPtr = (const uint8_t *)fData->fHeader;
|
||||
length = fData->fHeader->fLength;
|
||||
length = fData->fHeader->fLength;
|
||||
}
|
||||
return retPtr;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
***************************************************************************
|
||||
* Copyright (C) 1999-2002 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
**********************************************************************
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
@ -156,7 +156,7 @@ int32_t RBBIDataWrapper::hashCode() {
|
|||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
void RBBIDataWrapper::removeReference() {
|
||||
if (umtx_atomic_dec(&fRefCount) == 0) {
|
||||
if (umtx_atomic_dec(&fRefCount) == 0) {
|
||||
delete this;
|
||||
}
|
||||
};
|
||||
|
@ -221,9 +221,4 @@ void RBBIDataWrapper::printData() {
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -35,7 +35,8 @@ U_NAMESPACE_BEGIN
|
|||
// All of them are strung together in a linked list, which is kept in order
|
||||
// (by character)
|
||||
//
|
||||
struct RangeDescriptor : public UObject {
|
||||
class RangeDescriptor : public UObject {
|
||||
public:
|
||||
UChar32 fStartChar; // Start of range, unicode 32 bit value.
|
||||
UChar32 fEndChar; // End of range, unicode 32 bit value.
|
||||
int32_t fNum; // runtime-mapped input value for this range.
|
||||
|
|
|
@ -94,22 +94,27 @@ ubrk_openRules( const UChar *rules,
|
|||
UParseError *parseErr,
|
||||
UErrorCode *status) {
|
||||
|
||||
BreakIterator *result = 0;
|
||||
if (status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
BreakIterator *result = 0;
|
||||
UnicodeString ruleString(rules, rulesLength);
|
||||
result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, *parseErr, *status);
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
UCharCharacterIterator *iter = 0;
|
||||
iter = new UCharCharacterIterator(text, textLength);
|
||||
if(iter == 0) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
delete result;
|
||||
return 0;
|
||||
if (text != NULL) {
|
||||
UCharCharacterIterator *iter = 0;
|
||||
iter = new UCharCharacterIterator(text, textLength);
|
||||
if(iter == 0) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
delete result;
|
||||
return 0;
|
||||
}
|
||||
result->adoptText(iter);
|
||||
}
|
||||
result->adoptText(iter);
|
||||
return (UBreakIterator *)result;
|
||||
}
|
||||
|
||||
|
@ -243,7 +248,7 @@ ubrk_countAvailable()
|
|||
}
|
||||
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
|
||||
{
|
||||
return ((BreakIterator *)bi)->isBoundary(offset);
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
*****************************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines
|
||||
* Copyright (C) 1997-2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*****************************************************************************************
|
||||
*
|
||||
* File BRKITER.H
|
||||
* File brkiter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
|
@ -65,13 +65,13 @@ U_NAMESPACE_BEGIN
|
|||
* <P>
|
||||
* Helper function to output text
|
||||
* <pre>
|
||||
* \code
|
||||
* \code
|
||||
* void printTextRange( BreakIterator& iterator, int32_t start, int32_t end )
|
||||
* {
|
||||
* UnicodeString textBuffer, temp;
|
||||
* CharacterIterator *strIter = iterator.createText();
|
||||
* strIter->getText(temp);
|
||||
* cout << " " << start << " " << end << " |"
|
||||
* cout << " " << start << " " << end << " |"
|
||||
* << temp.extractBetween(start, end, textBuffer)
|
||||
* << "|" << endl;
|
||||
* delete strIter;
|
||||
|
@ -149,7 +149,7 @@ U_NAMESPACE_BEGIN
|
|||
* BreakIterator* boundary;
|
||||
* UnicodeString stringToExamine("Aaa bbb ccc. Ddd eee fff.");
|
||||
* cout << "Examining: " << stringToExamine << endl;
|
||||
*
|
||||
*
|
||||
* //print each sentence in forward and reverse order
|
||||
* boundary = BreakIterator::createSentenceInstance( Locale::US );
|
||||
* boundary->setText(stringToExamine);
|
||||
|
@ -158,7 +158,7 @@ U_NAMESPACE_BEGIN
|
|||
* cout << "----- backward: ----------" << endl;
|
||||
* printEachBackward(*boundary);
|
||||
* delete boundary;
|
||||
*
|
||||
*
|
||||
* //print each word in order
|
||||
* boundary = BreakIterator::createWordInstance();
|
||||
* boundary->setText(stringToExamine);
|
||||
|
@ -173,7 +173,7 @@ U_NAMESPACE_BEGIN
|
|||
* //print word at charpos 10
|
||||
* cout << "----- at pos 10: ---------" << endl;
|
||||
* printAt(*boundary, 10 );
|
||||
*
|
||||
*
|
||||
* delete boundary;
|
||||
* }
|
||||
* \endcode
|
||||
|
@ -222,6 +222,8 @@ public:
|
|||
|
||||
/**
|
||||
* Return a CharacterIterator over the text being analyzed.
|
||||
* Changing the state of the returned iterator can have undefined consequences
|
||||
* on the operation of the break iterator. If you need to change it, clone it first.
|
||||
* @stable
|
||||
*/
|
||||
virtual const CharacterIterator& getText(void) const = 0;
|
||||
|
@ -278,8 +280,7 @@ public:
|
|||
virtual int32_t next(void) = 0;
|
||||
|
||||
/**
|
||||
* Return character index of the text boundary that was most recently
|
||||
* returned by next(), previous(), first(), or last()
|
||||
* Return character index of the current interator position within the text.
|
||||
* @return The boundary most recently returned.
|
||||
* @stable
|
||||
*/
|
||||
|
@ -304,9 +305,11 @@ public:
|
|||
* @stable
|
||||
*/
|
||||
virtual int32_t preceding(int32_t offset) = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Return true if the specfied position is a boundary position.
|
||||
* As a side effect, the current position of the iterator is set
|
||||
* to the first boundary position at or following the specified offset.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable
|
||||
|
@ -328,22 +331,22 @@ public:
|
|||
* Create BreakIterator for word-breaks using the given locale.
|
||||
* Returns an instance of a BreakIterator implementing word breaks.
|
||||
* WordBreak is useful for word selection (ex. double click)
|
||||
* @param where the locale.
|
||||
* @param where the locale.
|
||||
* @param status the error code
|
||||
* @return A BreakIterator for word-breaks. The UErrorCode& status
|
||||
* @return A BreakIterator for word-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_ERROR indicates that the default locale data was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable
|
||||
*/
|
||||
static BreakIterator* createWordInstance(const Locale& where,
|
||||
static BreakIterator* createWordInstance(const Locale& where,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
|
@ -354,84 +357,84 @@ public:
|
|||
* LineBreak is useful for word wrapping text.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for line-breaks. The UErrorCode& status
|
||||
* @return A BreakIterator for line-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_ERROR indicates that the default locale data was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable
|
||||
*/
|
||||
static BreakIterator* createLineInstance(const Locale& where,
|
||||
static BreakIterator* createLineInstance(const Locale& where,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for character-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing character breaks.
|
||||
* Character breaks are boundaries of combining character sequences.
|
||||
* @param where the locale.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for character-breaks. The UErrorCode& status
|
||||
* @return A BreakIterator for character-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_ERROR indicates that the default locale data was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable
|
||||
*/
|
||||
static BreakIterator* createCharacterInstance(const Locale& where,
|
||||
static BreakIterator* createCharacterInstance(const Locale& where,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for sentence-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing sentence breaks.
|
||||
* @param where the locale.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for sentence-breaks. The UErrorCode& status
|
||||
* @return A BreakIterator for sentence-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_ERROR indicates that the default locale data was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable
|
||||
*/
|
||||
static BreakIterator* createSentenceInstance(const Locale& where,
|
||||
static BreakIterator* createSentenceInstance(const Locale& where,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for title-casing breaks using the specified locale
|
||||
* Returns an instance of a BreakIterator implementing title breaks.
|
||||
* @param where the locale.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for title-breaks. The UErrorCode& status
|
||||
* @return A BreakIterator for title-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_ERROR indicates that the default locale data was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable
|
||||
* @draft ICU 2.1
|
||||
*/
|
||||
static BreakIterator* createTitleInstance(const Locale& where,
|
||||
static BreakIterator* createTitleInstance(const Locale& where,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
|
@ -469,24 +472,30 @@ public:
|
|||
/**
|
||||
* Thread safe client-buffer-based cloning operation
|
||||
* Do NOT call delete on a safeclone, since 'new' is not used to create it.
|
||||
* @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
|
||||
* @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
|
||||
* If buffer is not large enough, new memory will be allocated.
|
||||
* @param BufferSize reference to size of allocated space.
|
||||
* If BufferSize == 0, a sufficient size for use in cloning will
|
||||
* @param BufferSize reference to size of allocated space.
|
||||
* If BufferSize == 0, a sufficient size for use in cloning will
|
||||
* be returned ('pre-flighting')
|
||||
* If BufferSize is not enough for a stack-based safe clone,
|
||||
* If BufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
|
||||
* necessary.
|
||||
* @return pointer to the new clone
|
||||
*
|
||||
* @draft ICU 1.8
|
||||
*
|
||||
* @stable
|
||||
*/
|
||||
virtual BreakIterator * createBufferClone(void *stackBuffer,
|
||||
int32_t &BufferSize,
|
||||
UErrorCode &status) = 0;
|
||||
|
||||
/**
|
||||
* Determine whether the BreakIterator was created in user memory by
|
||||
* createBufferClone(), and thus should not be deleted. Such objects
|
||||
* must be closed by an explicit call to the destructor (not delete).
|
||||
* @stable
|
||||
*/
|
||||
inline UBool isBufferClone(void);
|
||||
|
||||
|
||||
|
|
|
@ -24,9 +24,9 @@ struct UTrie;
|
|||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct RBBIDataHeader;
|
||||
class RuleBasedBreakIteratorTables;
|
||||
class BreakIterator;
|
||||
class RBBIDataWrapper;
|
||||
class RuleBasedBreakIteratorTables;
|
||||
class BreakIterator;
|
||||
class RBBIDataWrapper;
|
||||
|
||||
|
||||
|
||||
|
@ -37,10 +37,6 @@ class RBBIDataWrapper;
|
|||
* <p>See the ICU User Guide for information on Break Iterator Rules.</p>
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
|
||||
|
||||
protected:
|
||||
|
@ -74,7 +70,7 @@ protected:
|
|||
uint32_t fDictionaryCharCount;
|
||||
|
||||
//
|
||||
// Debugging flag.
|
||||
// Debugging flag. Trace operation of state machine when true.
|
||||
//
|
||||
static UBool fTrace;
|
||||
|
||||
|
@ -117,7 +113,8 @@ protected:
|
|||
public:
|
||||
|
||||
/** Default constructor. Creates an empty shell of an iterator, with no
|
||||
* rules or text to iterate over. Object can subsequently be assigned.
|
||||
* rules or text to iterate over. Object can subsequently be assigned to.
|
||||
* @draft ICU 2.2
|
||||
*/
|
||||
RuleBasedBreakIterator();
|
||||
|
||||
|
@ -134,12 +131,14 @@ public:
|
|||
* @param parseError In the event of a syntax error in the rules, provides the location
|
||||
* within the rules of the problem.
|
||||
* @param status Information on any errors encountered.
|
||||
* @draft ICU 2.2
|
||||
*/
|
||||
RuleBasedBreakIterator( const UnicodeString &rules,
|
||||
UParseError &parseError,
|
||||
UErrorCode &status);
|
||||
/**
|
||||
* Destructor
|
||||
* @stable
|
||||
*/
|
||||
virtual ~RuleBasedBreakIterator();
|
||||
|
||||
|
@ -148,6 +147,7 @@ public:
|
|||
* and iterate over the same text, as the one passed in.
|
||||
* @param that The RuleBasedBreakItertor passed in
|
||||
* @return the newly created RuleBasedBreakIterator
|
||||
* @stable
|
||||
*/
|
||||
RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
|
||||
|
||||
|
@ -157,6 +157,7 @@ public:
|
|||
* @param that The BreakIterator to be compared for equality
|
||||
* @Return TRUE if both BreakIterators are of the
|
||||
* same class, have the same behavior, and iterate over the same text.
|
||||
* @stable
|
||||
*/
|
||||
virtual UBool operator==(const BreakIterator& that) const;
|
||||
|
||||
|
@ -165,6 +166,7 @@ public:
|
|||
* and vice versa.
|
||||
* @param that The BreakIterator to be compared for inequality
|
||||
* @return TRUE if both BreakIterators are not same.
|
||||
* @stable
|
||||
*/
|
||||
UBool operator!=(const BreakIterator& that) const;
|
||||
|
||||
|
@ -175,18 +177,21 @@ public:
|
|||
* will correctly clone (copy) a derived class.
|
||||
* clone() is thread safe. Multiple threads may simultaeneously
|
||||
* clone the same source break iterator.
|
||||
* @stable
|
||||
*/
|
||||
virtual BreakIterator* clone() const;
|
||||
|
||||
/**
|
||||
* Compute a hash code for this BreakIterator
|
||||
* @return A hash code
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns the description used to create this iterator
|
||||
* @return the description used to create this iterator
|
||||
* @stable
|
||||
*/
|
||||
virtual const UnicodeString& getRules(void) const;
|
||||
|
||||
|
@ -200,6 +205,7 @@ public:
|
|||
* Changing the state of this iterator can have undefined consequences. If
|
||||
* you need to change it, clone it first.
|
||||
* @return An iterator over the text being analyzed.
|
||||
* @stable
|
||||
*/
|
||||
virtual const CharacterIterator& getText(void) const;
|
||||
|
||||
|
@ -209,6 +215,7 @@ public:
|
|||
* the current iteration position to the beginning of the text.
|
||||
* @param newText An iterator over the text to analyze. The BreakIterator
|
||||
* takes ownership of the character iterator. The caller MUST NOT delete it!
|
||||
* @stable
|
||||
*/
|
||||
virtual void adoptText(CharacterIterator* newText);
|
||||
|
||||
|
@ -216,6 +223,7 @@ public:
|
|||
* Set the iterator to analyze a new piece of text. This function resets
|
||||
* the current iteration position to the beginning of the text.
|
||||
* @param newText The text to analyze.
|
||||
* @stable
|
||||
*/
|
||||
virtual void setText(const UnicodeString& newText);
|
||||
|
||||
|
@ -223,6 +231,7 @@ public:
|
|||
* Sets the current iteration position to the beginning of the text.
|
||||
* (i.e., the CharacterIterator's starting offset).
|
||||
* @return The offset of the beginning of the text.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t first(void);
|
||||
|
||||
|
@ -230,6 +239,7 @@ public:
|
|||
* Sets the current iteration position to the end of the text.
|
||||
* (i.e., the CharacterIterator's ending offset).
|
||||
* @return The text's past-the-end offset.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t last(void);
|
||||
|
||||
|
@ -241,18 +251,21 @@ public:
|
|||
* (negative is backwards, and positive is forwards).
|
||||
* @return The character offset of the boundary position n boundaries away from
|
||||
* the current one.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t next(int32_t n);
|
||||
|
||||
/**
|
||||
* Advances the iterator to the next boundary position.
|
||||
* @return The position of the first boundary after this one.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t next(void);
|
||||
|
||||
/**
|
||||
* Moves the iterator backwards, to the last boundary preceding this one.
|
||||
* @return The position of the last boundary position preceding this one.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t previous(void);
|
||||
|
||||
|
@ -261,6 +274,7 @@ public:
|
|||
* the specified position.
|
||||
* @param offset The position from which to begin searching for a break position.
|
||||
* @return The position of the first break after the current position.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t following(int32_t offset);
|
||||
|
||||
|
@ -269,6 +283,7 @@ public:
|
|||
* specified position.
|
||||
* @param offset The position to begin searching for a break from.
|
||||
* @return The position of the last boundary before the starting position.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t preceding(int32_t offset);
|
||||
|
||||
|
@ -278,12 +293,14 @@ public:
|
|||
* or after "offset".
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable
|
||||
*/
|
||||
virtual UBool isBoundary(int32_t offset);
|
||||
|
||||
/**
|
||||
* Returns the current iteration position.
|
||||
* @return The current iteration position.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t current(void) const;
|
||||
|
||||
|
@ -295,6 +312,7 @@ public:
|
|||
* status, a default value of 0 is returned.
|
||||
* @return the status from the break rule that determined the most recently
|
||||
* returned break position.
|
||||
* @draft ICU 2.2
|
||||
*/
|
||||
virtual int32_t getRuleStatus() const;
|
||||
|
||||
|
@ -336,7 +354,7 @@ public:
|
|||
* buffer size, but do not clone the object. If the
|
||||
* size was too small (but not zero), allocate heap
|
||||
* storage for the cloned object.
|
||||
*
|
||||
*
|
||||
* @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
|
||||
* returned if the the provided buffer was too small, and
|
||||
* the clone was therefore put on the heap.
|
||||
|
@ -344,6 +362,7 @@ public:
|
|||
* @return Pointer to the clone object. This may differ from the stackBuffer
|
||||
* address if the byte alignment of the stack buffer was not suitable
|
||||
* or if the stackBuffer was too small to hold the clone.
|
||||
* @draft stable
|
||||
*/
|
||||
virtual BreakIterator * createBufferClone(void *stackBuffer,
|
||||
int32_t &BufferSize,
|
||||
|
@ -365,6 +384,7 @@ public:
|
|||
* @return A pointer to the binary (compiled) rule data. The storage
|
||||
* belongs to the RulesBasedBreakIterator object, not the
|
||||
* caller, and must not be modified or deleted.
|
||||
* @internal
|
||||
*/
|
||||
virtual const uint8_t *getBinaryRules(uint32_t &length);
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
* typically starts of words, that should be set to Title Case
|
||||
* when title casing the text.
|
||||
* <P>
|
||||
*
|
||||
*
|
||||
* This is the interface for all text boundaries.
|
||||
* <P>
|
||||
* Examples:
|
||||
|
@ -204,15 +204,27 @@ typedef enum UBreakIteratorType UBreakIteratorType;
|
|||
* than for single individual values.
|
||||
*/
|
||||
enum UWordBreak {
|
||||
/** Tag value for "words" that do not fit into any of other categories.
|
||||
* Includes spaces and most punctuation. */
|
||||
UBRK_WORD_NONE = 0,
|
||||
/** Upper bound for tags for uncategorized words. */
|
||||
UBRK_WORD_NONE_LIMIT = 100,
|
||||
/** Tag value for words that appear to be numbers, lower limit. */
|
||||
UBRK_WORD_NUMBER = 100,
|
||||
/** Tag value for words that appear to be numbers, upper limit. */
|
||||
UBRK_WORD_NUMBER_LIMIT = 200,
|
||||
/** Tag value for words that contain letters, excluding
|
||||
* hiragana, katakana or ideographic characters, lower limit. */
|
||||
UBRK_WORD_LETTER = 200,
|
||||
/** Tag value for words containing letters, upper limit */
|
||||
UBRK_WORD_LETTER_LIMIT = 300,
|
||||
UBRK_WORD_HIRAKATA = 300,
|
||||
UBRK_WORD_HIRAKATA_LIMIT = 400,
|
||||
/** Tag value for words containing kana characters, lower limit */
|
||||
UBRK_WORD_KANA = 300,
|
||||
/** Tag value for words containing kana characters, upper limit */
|
||||
UBRK_WORD_KANA_LIMIT = 400,
|
||||
/** Tag value for words containing ideographic characters, lower limit */
|
||||
UBRK_WORD_IDEO = 400,
|
||||
/** Tag value for words containing ideographic characters, upper limit */
|
||||
UBRK_WORD_IDEO_LIMIT = 500
|
||||
};
|
||||
typedef enum UWordBreak UWordBreak;
|
||||
|
@ -232,7 +244,7 @@ typedef enum UWordBreak UWordBreak;
|
|||
* @see ubrk_openRules
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_open(UBreakIteratorType type,
|
||||
const char *locale,
|
||||
const UChar *text,
|
||||
|
@ -252,9 +264,9 @@ ubrk_open(UBreakIteratorType type,
|
|||
* @param status A UErrorCode to receive any errors.
|
||||
* @return A UBreakIterator for the specified rules.
|
||||
* @see ubrk_open
|
||||
* @draft
|
||||
* @draft ICU 2.2
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_openRules(const UChar *rules,
|
||||
int32_t rulesLength,
|
||||
const UChar *text,
|
||||
|
@ -276,9 +288,9 @@ ubrk_openRules(const UChar *rules,
|
|||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
|
||||
* @return pointer to the new clone
|
||||
* @draft ICU 1.8
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_safeClone(
|
||||
const UBreakIterator *bi,
|
||||
void *stackBuffer,
|
||||
|
@ -293,7 +305,7 @@ ubrk_safeClone(
|
|||
* @param bi The break iterator to close.
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_close(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
|
@ -304,7 +316,7 @@ ubrk_close(UBreakIterator *bi);
|
|||
* @param status The error code
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_setText(UBreakIterator* bi,
|
||||
const UChar* text,
|
||||
int32_t textLength,
|
||||
|
@ -318,7 +330,7 @@ ubrk_setText(UBreakIterator* bi,
|
|||
* \Ref{ubrk_first}, or \Ref{ubrk_last}.
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_current(const UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
|
@ -330,7 +342,7 @@ ubrk_current(const UBreakIterator *bi);
|
|||
* @see ubrk_previous
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_next(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
|
@ -342,7 +354,7 @@ ubrk_next(UBreakIterator *bi);
|
|||
* @see ubrk_next
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_previous(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
|
@ -353,7 +365,7 @@ ubrk_previous(UBreakIterator *bi);
|
|||
* @see ubrk_last
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_first(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
|
@ -366,7 +378,7 @@ ubrk_first(UBreakIterator *bi);
|
|||
* @see ubrk_first
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_last(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
|
@ -378,7 +390,7 @@ ubrk_last(UBreakIterator *bi);
|
|||
* @see ubrk_following
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_preceding(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
|
@ -391,7 +403,7 @@ ubrk_preceding(UBreakIterator *bi,
|
|||
* @see ubrk_preceding
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_following(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
|
@ -404,7 +416,7 @@ ubrk_following(UBreakIterator *bi,
|
|||
* @see ubrk_countAvailable
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ubrk_getAvailable(int32_t index);
|
||||
|
||||
/**
|
||||
|
@ -415,7 +427,7 @@ ubrk_getAvailable(int32_t index);
|
|||
* @see ubrk_getAvailable
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_countAvailable(void);
|
||||
|
||||
|
||||
|
@ -426,8 +438,9 @@ ubrk_countAvailable(void);
|
|||
* @param bi The break iterator to use.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
|
||||
|
||||
/**
|
||||
|
@ -437,6 +450,7 @@ ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
|
|||
* status, a default value of 0 is returned.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @draft ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatus(UBreakIterator *bi);
|
||||
|
|
|
@ -654,12 +654,12 @@ void RBBIAPITest::TestWordStatus() {
|
|||
int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
|
||||
UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
|
||||
UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE,
|
||||
UBRK_WORD_HIRAKATA, UBRK_WORD_NONE, UBRK_WORD_HIRAKATA};
|
||||
UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA};
|
||||
|
||||
int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
|
||||
UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
|
||||
UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT,
|
||||
UBRK_WORD_HIRAKATA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_HIRAKATA_LIMIT};
|
||||
int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
|
||||
UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
|
||||
UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT,
|
||||
UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT};
|
||||
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue