ICU-21578 Merge maint/maint-69 into main

This commit is contained in:
Shane F. Carr 2021-04-08 11:54:56 -05:00 committed by GitHub
commit 2ea794d35b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 192 additions and 104 deletions

View file

@ -14,6 +14,8 @@
* created by: Markus W. Scherer
*/
#include <cstdlib>
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "charstr.h"

View file

@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
}
Edits &Edits::operator=(const Edits &other) {
if (this == &other) { return *this; } // self-assignment: no-op
length = other.length;
delta = other.delta;
numChanges = other.numChanges;

View file

@ -20,6 +20,7 @@
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
#include "uvector.h"
#include "cmemory.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
@ -139,13 +140,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
public:
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
virtual ~SimpleFilteredSentenceBreakData();
SimpleFilteredSentenceBreakData *incr() {
umtx_atomic_inc(&refcount);
return this;
}
SimpleFilteredSentenceBreakData *decr() {
if(umtx_atomic_dec(&refcount) <= 0) {
delete this;
}
return 0;
}
virtual ~SimpleFilteredSentenceBreakData();
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
int32_t refcount;
bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
private:
// These tries own their data arrays.
// They are shared and must therefore not be modified.
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
u_atomic_int32_t refcount;
};
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
@ -244,7 +262,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
fDelegate(adopt)
{
// all set..
if (fData == nullptr) {
delete forwards;
delete backwards;
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
}
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
@ -261,59 +285,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t bestValue = -1;
// loops while 'n' points to an exception.
utext_setNativeIndex(fText.getAlias(), n); // from n..
fData->fBackwardsTrie->reset();
UChar32 uch;
//if(debug2) u_printf(" n@ %d\n", n);
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here??
// TODO only do this the 1st time?
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
} else {
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
uch = utext_next32(fText.getAlias());
utext_next32(fText.getAlias());
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
}
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = fData->fBackwardsTrie->getValue();
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
{
// Do not modify the shared trie!
UCharsTrie iter(fData->getBackwardsTrie());
UChar32 uch;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards
UStringTrieResult r = iter.nextForCodePoint(uch);
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = iter.getValue();
}
if(!USTRINGTRIE_HAS_NEXT(r)) {
break;
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
}
}
if(USTRINGTRIE_MATCHES(r)) { // exact match?
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
bestValue = fData->fBackwardsTrie->getValue();
bestPosn = utext_getNativeIndex(fText.getAlias());
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
}
//if(bestValue >= 0) {
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//}
if(bestPosn>=0) {
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
//int32_t bestValue = fBackwardsTrie->getValue();
//int32_t bestValue = iter.getValue();
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
if(bestValue == kMATCH) { // exact match!
//if(debug2) u_printf(" exact backward match\n");
return kExceptionHere; // See if the next is another exception.
} else if(bestValue == kPARTIAL
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
&& fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
//if(debug2) u_printf(" partial backward match\n");
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
// to see if it matches something going forward.
fData->fForwardsPartialTrie->reset();
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
// Do not modify the shared trie!
UCharsTrie iter(fData->getForwardsPartialTrie());
UChar32 uch;
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(rfwd)) {
@ -339,7 +366,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
if(n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@ -369,7 +396,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
if(n == 0 || n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@ -420,7 +447,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) {
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
UErrorCode status = U_ZERO_ERROR;
resetState(status);

View file

@ -1544,7 +1544,7 @@ AliasReplacer::replaceTransformedExtensions(
const char* str = transformedExtensions.data();
const char* tkey = ultag_getTKeyStart(str);
int32_t tlangLen = (tkey == str) ? 0 :
((tkey == nullptr) ? len : (tkey - str - 1));
((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
CharStringByteSink sink(&output);
if (tlangLen > 0) {
Locale tlang = LocaleBuilder()

View file

@ -651,7 +651,7 @@ ultag_getTKeyStart(const char *localeID) {
const char *result = localeID;
const char *sep;
while((sep = uprv_strchr(result, SEP)) != nullptr) {
if (_isTKey(result, sep - result)) {
if (_isTKey(result, static_cast<int32_t>(sep - result))) {
return result;
}
result = ++sep;

View file

@ -282,6 +282,7 @@ void CompoundTransliterator::freeTransliterators(void) {
CompoundTransliterator& CompoundTransliterator::operator=(
const CompoundTransliterator& t)
{
if (this == &t) { return *this; } // self-assignment: no-op
Transliterator::operator=(t);
int32_t i = 0;
UBool failed = FALSE;

View file

@ -450,6 +450,7 @@ DateFormatSymbols::copyData(const DateFormatSymbols& other) {
*/
DateFormatSymbols& DateFormatSymbols::operator=(const DateFormatSymbols& other)
{
if (this == &other) { return *this; } // self-assignment: no-op
dispose();
copyData(other);

View file

@ -154,7 +154,10 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
if (i > fString.fZero && prevIsSpan) {
int64_t si = cfpos.getInt64IterationContext() - 1;
U_ASSERT(si >= 0);
if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
int32_t previ = i - spanIndices[si].length;
U_ASSERT(previ >= fString.fZero);
Field prevField = fString.getFieldPtr()[previ];
if (prevField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
// Special handling for ULISTFMT_ELEMENT_FIELD
if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
fieldStart = i - fString.fZero - spanIndices[si].length;
@ -165,12 +168,13 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
fieldStart,
end);
return true;
} else {
prevIsSpan = false;
}
} else {
// Re-wind, since there may be multiple fields in the span.
i -= spanIndices[si].length;
U_ASSERT(i >= fString.fZero);
_field = fString.getFieldPtr()[i];
i = previ;
_field = prevField;
}
}
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.

View file

@ -20,22 +20,6 @@ class LongNameHandler;
}
} // namespace number
// Export an explicit template instantiation of the LocalPointer that is used as a
// data member of MeasureUnitImpl.
// (When building DLLs for Windows this is required.)
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
#if defined(_MSC_VER)
// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!=
#pragma warning(push)
#pragma warning(disable : 4661)
#endif
template class U_I18N_API LocalPointerBase<MeasureUnitImpl>;
template class U_I18N_API LocalPointer<MeasureUnitImpl>;
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#endif
static const char16_t kDefaultCurrency[] = u"XXX";
static const char kDefaultCurrency8[] = "XXX";
@ -194,9 +178,6 @@ struct MeasureUnitImplWithIndex;
template class U_I18N_API MaybeStackArray<SingleUnitImpl *, 8>;
template class U_I18N_API MemoryPool<SingleUnitImpl, 8>;
template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>;
template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>;
template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>;
template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>;
#endif
/**
@ -330,6 +311,29 @@ struct U_I18N_API MeasureUnitImplWithIndex : public UMemory {
}
};
// Export explicit template instantiations of MaybeStackArray, MemoryPool and
// MaybeStackVector. This is required when building DLLs for Windows. (See
// datefmt.h, collationiterator.h, erarules.h and others for similar examples.)
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>;
template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>;
template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>;
// Export an explicit template instantiation of the LocalPointer that is used as a
// data member of MeasureUnitImpl.
// (When building DLLs for Windows this is required.)
#if defined(_MSC_VER)
// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!=
#pragma warning(push)
#pragma warning(disable : 4661)
#endif
template class U_I18N_API LocalPointerBase<MeasureUnitImpl>;
template class U_I18N_API LocalPointer<MeasureUnitImpl>;
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#endif
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -38,7 +38,7 @@ namespace impl {
// Exported as U_I18N_API because it is a public member field of exported DecimalFormatProperties
// Using this wrapper is rather unfortunate, but is needed on Windows platforms in order to allow
// for DLL-exporting an fully specified template instantiation.
// for DLL-exporting a fully specified template instantiation.
class U_I18N_API CurrencyPluralInfoWrapper {
public:
LocalPointer<CurrencyPluralInfo> fPtr;
@ -52,7 +52,8 @@ public:
}
CurrencyPluralInfoWrapper& operator=(const CurrencyPluralInfoWrapper& other) {
if (!other.fPtr.isNull()) {
if (this != &other && // self-assignment: no-op
!other.fPtr.isNull()) {
fPtr.adoptInstead(new CurrencyPluralInfo(*other.fPtr));
}
return *this;

View file

@ -442,6 +442,7 @@ LocalizedNumberFormatter::LocalizedNumberFormatter(NFS<LNF>&& src) U_NOEXCEPT
}
LocalizedNumberFormatter& LocalizedNumberFormatter::operator=(const LNF& other) {
if (this == &other) { return *this; } // self-assignment: no-op
NFS<LNF>::operator=(static_cast<const NFS<LNF>&>(other));
UErrorCode localStatus = U_ZERO_ERROR; // Can't bubble up the error
lnfCopyHelper(other, localStatus);

View file

@ -5,6 +5,8 @@
#if !UCONFIG_NO_FORMATTING
#include <cstdlib>
#include "unicode/simpleformatter.h"
#include "unicode/ures.h"
#include "ureslocs.h"

View file

@ -46,6 +46,7 @@ Scale::Scale(const Scale& other)
}
Scale& Scale::operator=(const Scale& other) {
if (this == &other) { return *this; } // self-assignment: no-op
fMagnitude = other.fMagnitude;
if (other.fArbitrary != nullptr) {
UErrorCode localStatus = U_ZERO_ERROR;

View file

@ -34,6 +34,7 @@ StringProp::StringProp(const StringProp &other) : StringProp() {
// Copy assignment operator
StringProp &StringProp::operator=(const StringProp &other) {
if (this == &other) { return *this; } // self-assignment: no-op
fLength = 0;
fError = other.fError;
if (fValue != nullptr) {

View file

@ -245,6 +245,7 @@ LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(NFS<LNF>&& src) U_N
}
LocalizedNumberRangeFormatter& LocalizedNumberRangeFormatter::operator=(const LNF& other) {
if (this == &other) { return *this; } // self-assignment: no-op
NFS<LNF>::operator=(static_cast<const NFS<LNF>&>(other));
// Do not steal; just clear
delete fAtomicFormatter.exchange(nullptr);

View file

@ -274,6 +274,7 @@ OlsonTimeZone::OlsonTimeZone(const OlsonTimeZone& other) :
* Assignment operator
*/
OlsonTimeZone& OlsonTimeZone::operator=(const OlsonTimeZone& other) {
if (this == &other) { return *this; } // self-assignment: no-op
canonicalID = other.canonicalID;
transitionTimesPre32 = other.transitionTimesPre32;

View file

@ -184,7 +184,7 @@ StringSearch::clone() const {
// operator overloading ---------------------------------------------
StringSearch & StringSearch::operator=(const StringSearch &that)
{
if ((*this) != that) {
if (this != &that) {
UErrorCode status = U_ZERO_ERROR;
m_text_ = that.m_text_;
m_breakiterator_ = that.m_breakiterator_;

View file

@ -170,6 +170,7 @@ Transliterator* Transliterator::clone() const {
* Assignment operator.
*/
Transliterator& Transliterator::operator=(const Transliterator& other) {
if (this == &other) { return *this; } // self-assignment: no-op
ID = other.ID;
// NUL-terminate the ID string
ID.getTerminatedBuffer();

View file

@ -193,6 +193,7 @@ Win32DateFormat::~Win32DateFormat()
Win32DateFormat &Win32DateFormat::operator=(const Win32DateFormat &other)
{
if (this == &other) { return *this; } // self-assignment: no-op
// The following handles fCalendar
DateFormat::operator=(other);

View file

@ -268,6 +268,7 @@ Win32NumberFormat::~Win32NumberFormat()
Win32NumberFormat &Win32NumberFormat::operator=(const Win32NumberFormat &other)
{
if (this == &other) { return *this; } // self-assignment: no-op
NumberFormat::operator=(other);
this->fCurrency = other.fCurrency;

View file

@ -242,7 +242,7 @@ void IntlTestWithFieldPosition::checkMixedFormattedValue(
// Check nextPosition constrained over each category one at a time
for (int32_t category=0; category<UFIELD_CATEGORY_COUNT+1; category++) {
if (category == UFIELD_CATEGORY_COUNT+1) {
if (category == UFIELD_CATEGORY_COUNT) {
category = UFIELD_CATEGORY_LIST_SPAN;
}
cfpos.reset();

View file

@ -1209,6 +1209,7 @@ void NumberFormatterApiTest::unitArbitraryMeasureUnits() {
.unit(MeasureUnit::forIdentifier("pow4-mile", status))
.unitWidth(UNUM_UNIT_WIDTH_FULL_NAME)
.locale("en-ZA");
lnf.operator=(lnf); // self-assignment should be a no-op
lnf.formatInt(1, status);
status.expectErrorAndReset(U_RESOURCE_TYPE_MISMATCH);

View file

@ -149,6 +149,7 @@ void IntlTestDateFormatSymbols::TestGetSetSpecificItems()
dataerrln("ERROR: Couldn't create English DateFormatSymbols " + (UnicodeString)u_errorName(status));
return;
}
symbol->operator=(*symbol); // self-assignment should be a no-op
int32_t cntFmtAbbrev, cntFmtShort, cntStdAloneShort;
const UnicodeString * wdFmtAbbrev = symbol->getWeekdays(cntFmtAbbrev,DateFormatSymbols::FORMAT,DateFormatSymbols::ABBREVIATED);
const UnicodeString * wdFmtShort = symbol->getWeekdays(cntFmtShort,DateFormatSymbols::FORMAT,DateFormatSymbols::SHORT);

View file

@ -745,9 +745,9 @@ void TimeZoneFormatTest::RunAdoptDefaultThreadSafeTests(int32_t threadNumber) {
date += 6000 * i;
std::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createDefault());
status = U_ZERO_ERROR;
tz->getOffset(date, TRUE, rawOffset, dstOffset, status);
tz->getOffset(static_cast<UDate>(date), TRUE, rawOffset, dstOffset, status);
status = U_ZERO_ERROR;
tz->getOffset(date, FALSE, rawOffset, dstOffset, status);
tz->getOffset(static_cast<UDate>(date), FALSE, rawOffset, dstOffset, status);
}
}
}

View file

@ -1171,8 +1171,10 @@ void TimeZoneTest::TestCustomParse()
TimeZone *zone = TimeZone::createTimeZone(id);
UnicodeString itsID, temp;
if (dynamic_cast<OlsonTimeZone *>(zone) != NULL) {
OlsonTimeZone *ozone = dynamic_cast<OlsonTimeZone *>(zone);
if (ozone != nullptr) {
logln(id + " -> Olson time zone");
ozone->operator=(*ozone); // self-assignment should be a no-op
} else {
zone->getID(itsID);
int32_t ioffset = zone->getRawOffset()/1000;

View file

@ -62,6 +62,11 @@
<data>\
•Doctor with a D. •As in, Ph.D., you know.•</data>
# ICU-21459 logic error.
<locale en@ss=standard>
<sent>
<data>•on. •But after a day in the arena sun, the metal feels hot enough to blister my hands.•</data>
# same as root (unless some exceptions are added!)
<locale tfg@ss=standard>
<sent>

View file

@ -20,6 +20,14 @@ This is prerequisite for the icu layoutex (Paragraph Layout) project, which is s
cd icu4c/source
svn export https://github.com/behdad/icu-le-hb/trunk/src layout
(As an alternative to the above handling of layout engine header files, you can do the following:
1. In the Makefile in this directory, temporarily delete $(LEX) from the list of objects for LIBS
before running make install-header
2. After running make install-header, restore the deleted $(LEX) in the Makefile
3. Then when comparing the old urename.h to the newly generated one, copy all of the lines beginning
"#define pl_" from the old version to the new one.
- Peter E)
- Regenerate urename.h
cd icu4c/source/tools/genren

View file

@ -6,7 +6,7 @@
#*******************************************************************************
api.report.version = 69
api.report.prev.version = 68
release.file.ver = 69rc
api.doc.version = 69.1 Release Candidate
maven.pom.ver = 69.1-SNAPSHOT
release.file.ver = 69_1
api.doc.version = 69.1
maven.pom.ver = 69.1

View file

@ -19,6 +19,7 @@ import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.CharsTrie;
import com.ibm.icu.util.CharsTrieBuilder;
import com.ibm.icu.util.ICUCloneNotSupportedException;
import com.ibm.icu.util.StringTrieBuilder;
import com.ibm.icu.util.ULocale;
@ -72,8 +73,6 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
backwardsTrie.reset();
int uch;
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here??
// TODO only do this the 1st time?
@ -81,20 +80,17 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
uch = text.nextCodePoint();
}
BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE;
while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and..
((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie
while ((uch = text.previousCodePoint()) >= 0) { // more to consume backwards
BytesTrie.Result r = backwardsTrie.nextForCodePoint(uch);
if (r.hasValue()) { // remember the best match so far
bestPosn = text.getIndex();
bestValue = backwardsTrie.getValue();
}
if (!r.hasNext()) {
break;
}
}
if (r.matches()) { // exact match?
bestValue = backwardsTrie.getValue();
bestPosn = text.getIndex();
}
backwardsTrie.reset(); // for equals() & hashCode()
if (bestPosn >= 0) {
if (bestValue == Builder.MATCH) { // exact match!
@ -110,6 +106,7 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
while ((uch = text.nextCodePoint()) != BreakIterator.DONE
&& ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) {
}
forwardsPartialTrie.reset(); // for equals() & hashCode()
if (rfwd.matches()) {
// Exception here
return true;
@ -186,18 +183,39 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
if (getClass() != obj.getClass())
return false;
SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) obj;
return delegate.equals(other.delegate) && text.equals(other.text) && backwardsTrie.equals(other.backwardsTrie)
// TODO(ICU-21575): CharsTrie.equals() is not defined.
// Should compare the underlying data, and can then stop resetting after iteration.
return delegate.equals(other.delegate) && text.equals(other.text)
&& backwardsTrie.equals(other.backwardsTrie)
&& forwardsPartialTrie.equals(other.forwardsPartialTrie);
}
@Override
public int hashCode() {
return (forwardsPartialTrie.hashCode() * 39) + (backwardsTrie.hashCode() * 11) + delegate.hashCode();
// TODO(ICU-21575): CharsTrie.hashCode() is not defined.
return (forwardsPartialTrie.hashCode() * 39) + (backwardsTrie.hashCode() * 11)
+ delegate.hashCode();
}
@Override
public Object clone() {
SimpleFilteredSentenceBreakIterator other = (SimpleFilteredSentenceBreakIterator) super.clone();
try {
if (delegate != null) {
other.delegate = (BreakIterator) delegate.clone();
}
if (text != null) {
other.text = (UCharacterIterator) text.clone();
}
if (backwardsTrie != null) {
other.backwardsTrie = backwardsTrie.clone();
}
if (forwardsPartialTrie != null) {
other.forwardsPartialTrie = forwardsPartialTrie.clone();
}
} catch (CloneNotSupportedException e) {
throw new ICUCloneNotSupportedException(e);
}
return other;
}
@ -273,7 +291,7 @@ public class SimpleFilteredSentenceBreakIterator extends BreakIterator {
/**
* filter set to store all exceptions
*/
private HashSet<CharSequence> filterSet = new HashSet<CharSequence>();
private HashSet<CharSequence> filterSet = new HashSet<>();
static final int PARTIAL = (1 << 0); // < partial - need to run through forward trie
static final int MATCH = (1 << 1); // < exact match - skip this one.

View file

@ -202,23 +202,13 @@ public class StringSegment implements CharSequence {
}
/**
* Equals any CharSequence with the same chars as this segment.
* Returns true if this segment contains the same characters as the other CharSequence.
*
* <p>
* This method does not perform case folding; if you want case-insensitive equality, use
* <p>This method does not perform case folding; if you want case-insensitive equality, use
* {@link #getCommonPrefixLength}.
*/
@Override
public boolean equals(Object other) {
if (!(other instanceof CharSequence))
return false;
return Utility.charSequenceEquals(this, (CharSequence) other);
}
/** Returns a hash code equivalent to calling .toString().hashCode() */
@Override
public int hashCode() {
return Utility.charSequenceHashCode(this);
public boolean contentEquals(CharSequence other) {
return Utility.charSequenceEquals(this, other);
}
/** Returns a string representation useful for debugging. */

View file

@ -1368,7 +1368,7 @@ class NumberSkeletonImpl {
/** @return Whether we successfully found and parsed a trailing zero option. */
private static boolean parseTrailingZeroOption(StringSegment segment, MacroProps macros) {
if (segment.equals("w")) {
if (segment.contentEquals("w")) {
macros.precision = macros.precision.trailingZeroDisplay(TrailingZeroDisplay.HIDE_IF_WHOLE);
return true;
}

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8f668e5db6c9c9e8b2fcf5feaeb1bc16bb7b036466b0e045778ddd9dc00ad13a
size 13383795
oid sha256:8f02ab2967eaf73b6d28c8340d70b20d5f194f6c0ac24fe8464b25fd56763b04
size 13383786

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ecb74957c99d65a50729cd1d72e7e92e7254560381362980d73675d246281371
size 95105
oid sha256:da82185ad36c6b747848c409fff8661892c0ed5d5ebc0cdf8be9d29f4e3f65ef
size 95096

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:382f6e65ed9855715493eb48fb11e3bba62ff84754bf36fbe11370479a4c27bc
size 723620
oid sha256:26a032e0c9492cd986546eefb5ba54687598eb431caed531ccb00b12469421ca
size 726547

View file

@ -3,6 +3,8 @@
package com.ibm.icu.dev.test.impl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
@ -47,10 +49,15 @@ public class StringSegmentTest {
public void testCharAt() {
StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertCharSequenceEquals(SAMPLE_STRING, segment);
assertTrue(segment.contentEquals(SAMPLE_STRING));
segment.adjustOffset(3);
assertCharSequenceEquals("radio 📻", segment);
assertTrue(segment.contentEquals("radio 📻"));
assertFalse(segment.contentEquals(SAMPLE_STRING));
segment.setLength(5);
assertCharSequenceEquals("radio", segment);
assertTrue(segment.contentEquals("radio"));
assertFalse(segment.contentEquals(SAMPLE_STRING));
}
@Test

View file

@ -62,6 +62,11 @@
<data>\
•Doctor with a D. •As in, Ph.D., you know.•</data>
# ICU-21459 logic error.
<locale en@ss=standard>
<sent>
<data>•on. •But after a day in the arena sun, the metal feels hot enough to blister my hands.•</data>
# same as root (unless some exceptions are added!)
<locale tfg@ss=standard>
<sent>

View file

@ -83,7 +83,7 @@
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j-for-cldr</artifactId>
<version>69.1-SNAPSHOT-cldr-2021-03-09</version>
<version>69.1-SNAPSHOT-release-69-rc</version>
<!-- Note: see https://github.com/unicode-org/icu/packages/411079/versions
for the icu4j-for-cldr version tag to use -->
</dependency>