mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-10015 Dictionary Break, sync between ICU4C & J. Most changes for this ticket are in ICU4J.
X-SVN-Rev: 35115
This commit is contained in:
parent
53ababf5a7
commit
059f862c4a
3 changed files with 12 additions and 8 deletions
|
@ -1,6 +1,6 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006-2013, International Business Machines Corporation
|
||||
* Copyright (C) 2006-2014, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -49,6 +49,9 @@ DictionaryBreakEngine::findBreaks( UText *text,
|
|||
int32_t result = 0;
|
||||
|
||||
// Find the span of characters included in the set.
|
||||
// The span to break begins at the current position in the text, and
|
||||
// extends towards the start or end of the text, depending on 'reverse'.
|
||||
|
||||
int32_t start = (int32_t)utext_getNativeIndex(text);
|
||||
int32_t current;
|
||||
int32_t rangeStart;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2013, International Business Machines
|
||||
* Copyright (C) 2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* dictionarydata.h
|
||||
|
@ -118,7 +118,7 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
|
|||
if (count < limit) {
|
||||
if (values != NULL) {
|
||||
values[count] = bt.getValue();
|
||||
}
|
||||
}
|
||||
lengths[count++] = numChars;
|
||||
}
|
||||
if (result == USTRINGTRIE_FINAL_VALUE) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 1999-2013 International Business Machines Corporation
|
||||
* Copyright (C) 1999-2014 International Business Machines Corporation
|
||||
* and others. All rights reserved.
|
||||
***************************************************************************
|
||||
*/
|
||||
|
@ -592,6 +592,7 @@ int32_t RuleBasedBreakIterator::next(void) {
|
|||
}
|
||||
|
||||
int32_t startPos = current();
|
||||
fDictionaryCharCount = 0;
|
||||
int32_t result = handleNext(fData->fForwardTable);
|
||||
if (fDictionaryCharCount > 0) {
|
||||
result = checkDictionary(startPos, result, FALSE);
|
||||
|
@ -646,7 +647,6 @@ int32_t RuleBasedBreakIterator::previous(void) {
|
|||
// break position before the current position (we back our internal
|
||||
// iterator up one step to prevent handlePrevious() from returning
|
||||
// the current position), but not necessarily the last one before
|
||||
|
||||
// where we started
|
||||
|
||||
int32_t start = current();
|
||||
|
@ -679,11 +679,11 @@ int32_t RuleBasedBreakIterator::previous(void) {
|
|||
// the result position that we are to return (in lastResult.) If
|
||||
// the backwards rules overshot and the above loop had to do two or more
|
||||
// next()s to move up to the desired return position, we will have a valid
|
||||
// tag value. But, if handlePrevious() took us to exactly the correct result positon,
|
||||
// tag value. But, if handlePrevious() took us to exactly the correct result position,
|
||||
// we wont have a tag value for that position, which is only set by handleNext().
|
||||
|
||||
// set the current iteration position to be the last break position
|
||||
// before where we started, and then return that value
|
||||
// Set the current iteration position to be the last break position
|
||||
// before where we started, and then return that value.
|
||||
utext_setNativeIndex(fText, lastResult);
|
||||
fLastRuleStatusIndex = lastTag; // for use by getRuleStatus()
|
||||
fLastStatusIndexValid = breakTagValid;
|
||||
|
@ -1703,6 +1703,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
|||
// If we found breaks, build a new break cache. The first and last entries must
|
||||
// be the original starting and ending position.
|
||||
if (foundBreakCount > 0) {
|
||||
U_ASSERT(foundBreakCount == breaks.size());
|
||||
int32_t totalBreaks = foundBreakCount;
|
||||
if (startPos < breaks.elementAti(0)) {
|
||||
totalBreaks += 1;
|
||||
|
|
Loading…
Add table
Reference in a new issue