diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp index a0f92da619f..99c8e5dd5af 100644 --- a/icu4c/source/common/rbbirb.cpp +++ b/icu4c/source/common/rbbirb.cpp @@ -367,6 +367,9 @@ void RBBIRuleBuilder::optimizeTables() { fSafeRevTables->removeColumn(rightClass); } + fForwardTables->removeDuplicateStates(); + + } diff --git a/icu4c/source/common/rbbitblb.cpp b/icu4c/source/common/rbbitblb.cpp index b71921a9dee..68e9ffb666d 100644 --- a/icu4c/source/common/rbbitblb.cpp +++ b/icu4c/source/common/rbbitblb.cpp @@ -1120,8 +1120,79 @@ void RBBITableBuilder::removeColumn(int32_t column) { } } +/* + * findDuplicateState + */ +bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + for (; firstStateelementAt(firstState); + for (duplState=firstState+1; duplStateelementAt(duplState); + if (firstSD->fAccepting != duplSD->fAccepting || + firstSD->fLookAhead != duplSD->fLookAhead || + firstSD->fTagsIdx != duplSD->fTagsIdx) { + continue; + } + bool rowsMatch = true; + for (int32_t col=0; col < numCols; ++col) { + int32_t firstVal = firstSD->fDtran->elementAti(col); + int32_t duplVal = duplSD->fDtran->elementAti(col); + if (!((firstVal == duplVal) || + ((firstVal == firstState || firstVal == duplState) && + (duplVal == firstState || duplVal == duplState)))) { + rowsMatch = false; + break; + } + } + if (rowsMatch) { + return true; + } + } + } + return false; +} + +void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) { + U_ASSERT(keepState < duplState); + U_ASSERT(duplState < fDStates->size()); + + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); + fDStates->removeElementAt(duplState); + delete duplSD; + + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + for (int32_t state=0; stateelementAt(state); + for (int32_t col=0; colfDtran->elementAti(col); + int32_t newVal = existingVal; + if (existingVal == duplState) { + existingVal = keepState; + } else if (existingVal > duplState) { + newVal = existingVal - 1; + } + sd->fDtran->setElementAt(newVal, col); + } + } +} +/* + * RemoveDuplicateStates + */ +void RBBITableBuilder::removeDuplicateStates() { + int32_t firstState = 0; + int32_t duplicateState = 0; + while (findDuplicateState(firstState, duplicateState)) { + printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState); + removeState(firstState, duplicateState); + } + +} //----------------------------------------------------------------------------- // diff --git a/icu4c/source/common/rbbitblb.h b/icu4c/source/common/rbbitblb.h index 375ed6edd27..09b57b5cf0f 100644 --- a/icu4c/source/common/rbbitblb.h +++ b/icu4c/source/common/rbbitblb.h @@ -59,7 +59,8 @@ public: */ void removeColumn(int32_t column); - + /** Check for, and remove dupicate states (table rows). */ + void removeDuplicateStates(); private: @@ -83,6 +84,21 @@ private: void addRuleRootNodes(UVector *dest, RBBINode *node); + /** Find the next duplicate state. An iterator function. + * @param firstState (in/out) begin looking at this state, return the first of the + * pair of duplicates. + * @param duplicateState returns the duplicate state of fistState + * @return true if a duplicate pair of states was found. + */ + bool findDuplicateState(int32_t &firstState, int32_t &duplicateState); + + /** Remove a duplicate state/ + * @param keepState First of the duplicate pair. Keep it. + * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state + * to refer to keepState instead. + */ + void removeState(int32_t keepState, int32_t duplState); + // Set functions for UVector. // TODO: make a USet subclass of UVector diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index d60450b5a56..b7796cb7dda 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -539,6 +539,7 @@ typedef enum UErrorCode { U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ + U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. diff --git a/icu4c/source/common/utypes.cpp b/icu4c/source/common/utypes.cpp index 8f5791be160..5d6a0504ba6 100644 --- a/icu4c/source/common/utypes.cpp +++ b/icu4c/source/common/utypes.cpp @@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_UNDEFINED_KEYWORD", "U_DEFAULT_KEYWORD_MISSING", "U_DECIMAL_NUMBER_SYNTAX_ERROR", - "U_FORMAT_INEXACT_ERROR" + "U_FORMAT_INEXACT_ERROR", + "U_NUMBER_ARG_OUTOFBOUNDS_ERROR" }; static const char * const diff --git a/icu4c/source/i18n/dcfmtsym.cpp b/icu4c/source/i18n/dcfmtsym.cpp index d321a82f8a9..02c0e077f43 100644 --- a/icu4c/source/i18n/dcfmtsym.cpp +++ b/icu4c/source/i18n/dcfmtsym.cpp @@ -38,6 +38,7 @@ #include "uresimp.h" #include "ureslocs.h" #include "charstr.h" +#include "uassert.h" // ***************************************************************************** // class DecimalFormatSymbols @@ -165,6 +166,7 @@ DecimalFormatSymbols::operator=(const DecimalFormatSymbols& rhs) uprv_strcpy(actualLocale, rhs.actualLocale); fIsCustomCurrencySymbol = rhs.fIsCustomCurrencySymbol; fIsCustomIntlCurrencySymbol = rhs.fIsCustomIntlCurrencySymbol; + fCodePointZero = rhs.fCodePointZero; } return *this; } @@ -196,6 +198,7 @@ DecimalFormatSymbols::operator==(const DecimalFormatSymbols& that) const return FALSE; } } + // No need to check fCodePointZero since it is based on fSymbols return locale == that.locale && uprv_strcmp(validLocale, that.validLocale) == 0 && uprv_strcmp(actualLocale, that.actualLocale) == 0; @@ -433,6 +436,20 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status, // Let the monetary number separators equal the default number separators if necessary. sink.resolveMissingMonetarySeparators(fSymbols); + // Resolve codePointZero + const UnicodeString& stringZero = getConstDigitSymbol(0); + UChar32 tempCodePointZero = stringZero.char32At(0); + if (u_isdigit(tempCodePointZero) && stringZero.countChar32() == 1) { + for (int32_t i=0; i<=9; i++) { + const UnicodeString& stringDigit = getConstDigitSymbol(i); + if (stringDigit.char32At(0) != tempCodePointZero + i || stringDigit.countChar32() != 1) { + tempCodePointZero = -1; + break; + } + } + } + fCodePointZero = tempCodePointZero; + // Obtain currency data from the currency API. This is strictly // for backward compatibility; we don't use DecimalFormatSymbols // for currency data anymore. @@ -530,6 +547,8 @@ DecimalFormatSymbols::initialize() { fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents fIsCustomCurrencySymbol = FALSE; fIsCustomIntlCurrencySymbol = FALSE; + fCodePointZero = 0x30; + U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0)); } diff --git a/icu4c/source/i18n/number_integerwidth.cpp b/icu4c/source/i18n/number_integerwidth.cpp index 10dacfc4acb..4a612273f5e 100644 --- a/icu4c/source/i18n/number_integerwidth.cpp +++ b/icu4c/source/i18n/number_integerwidth.cpp @@ -13,25 +13,28 @@ using namespace icu; using namespace icu::number; using namespace icu::number::impl; -IntegerWidth::IntegerWidth(int8_t minInt, int8_t maxInt) { +IntegerWidth::IntegerWidth(digits_t minInt, digits_t maxInt) { fUnion.minMaxInt.fMinInt = minInt; fUnion.minMaxInt.fMaxInt = maxInt; } IntegerWidth IntegerWidth::zeroFillTo(int32_t minInt) { if (minInt >= 0 && minInt <= kMaxIntFracSig) { - return {static_cast(minInt), -1}; + return {static_cast(minInt), -1}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) { if (fHasError) { return *this; } // No-op on error - if (maxInt >= 0 && maxInt <= kMaxIntFracSig) { - return {fUnion.minMaxInt.fMinInt, static_cast(maxInt)}; + digits_t minInt = fUnion.minMaxInt.fMinInt; + if (maxInt >= 0 && maxInt <= kMaxIntFracSig && minInt <= maxInt) { + return {minInt, static_cast(maxInt)}; + } else if (maxInt == -1) { + return {minInt, -1}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/icu4c/source/i18n/number_notation.cpp b/icu4c/source/i18n/number_notation.cpp index ff0cd9505de..f4ad333354d 100644 --- a/icu4c/source/i18n/number_notation.cpp +++ b/icu4c/source/i18n/number_notation.cpp @@ -54,13 +54,13 @@ Notation Notation::simple() { ScientificNotation ScientificNotation::withMinExponentDigits(int32_t minExponentDigits) const { - if (minExponentDigits >= 0 && minExponentDigits < kMaxIntFracSig) { + if (minExponentDigits >= 1 && minExponentDigits <= kMaxIntFracSig) { ScientificSettings settings = fUnion.scientific; - settings.fMinExponentDigits = (int8_t) minExponentDigits; + settings.fMinExponentDigits = static_cast(minExponentDigits); NotationUnion union_ = {settings}; return {NTN_SCIENTIFIC, union_}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/icu4c/source/i18n/number_padding.cpp b/icu4c/source/i18n/number_padding.cpp index a478af60541..b1db3490cd4 100644 --- a/icu4c/source/i18n/number_padding.cpp +++ b/icu4c/source/i18n/number_padding.cpp @@ -43,7 +43,7 @@ Padder Padder::codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosit if (targetWidth >= 0) { return {cp, targetWidth, position}; } else { - return {U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/icu4c/source/i18n/number_rounding.cpp b/icu4c/source/i18n/number_rounding.cpp index 2f31727e994..fd4dafdf983 100644 --- a/icu4c/source/i18n/number_rounding.cpp +++ b/icu4c/source/i18n/number_rounding.cpp @@ -58,7 +58,7 @@ FractionRounder Rounder::fixedFraction(int32_t minMaxFractionPlaces) { if (minMaxFractionPlaces >= 0 && minMaxFractionPlaces <= kMaxIntFracSig) { return constructFraction(minMaxFractionPlaces, minMaxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -66,7 +66,7 @@ FractionRounder Rounder::minFraction(int32_t minFractionPlaces) { if (minFractionPlaces >= 0 && minFractionPlaces <= kMaxIntFracSig) { return constructFraction(minFractionPlaces, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -74,7 +74,7 @@ FractionRounder Rounder::maxFraction(int32_t maxFractionPlaces) { if (maxFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig) { return constructFraction(0, maxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -83,40 +83,40 @@ FractionRounder Rounder::minMaxFraction(int32_t minFractionPlaces, int32_t maxFr minFractionPlaces <= maxFractionPlaces) { return constructFraction(minFractionPlaces, maxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::fixedDigits(int32_t minMaxSignificantDigits) { - if (minMaxSignificantDigits >= 0 && minMaxSignificantDigits <= kMaxIntFracSig) { + if (minMaxSignificantDigits >= 1 && minMaxSignificantDigits <= kMaxIntFracSig) { return constructSignificant(minMaxSignificantDigits, minMaxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::minDigits(int32_t minSignificantDigits) { - if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { return constructSignificant(minSignificantDigits, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::maxDigits(int32_t maxSignificantDigits) { - if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { - return constructSignificant(0, maxSignificantDigits); + if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { + return constructSignificant(1, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits) { - if (minSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig && + if (minSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig && minSignificantDigits <= maxSignificantDigits) { return constructSignificant(minSignificantDigits, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -124,7 +124,7 @@ IncrementRounder Rounder::increment(double roundingIncrement) { if (roundingIncrement > 0.0) { return constructIncrement(roundingIncrement, 0); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -139,19 +139,19 @@ Rounder Rounder::withMode(RoundingMode roundingMode) const { Rounder FractionRounder::withMinDigits(int32_t minSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state - if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { return constructFractionSignificant(*this, minSignificantDigits, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder FractionRounder::withMaxDigits(int32_t maxSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state - if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { + if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { return constructFractionSignificant(*this, -1, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -185,14 +185,14 @@ Rounder IncrementRounder::withMinFraction(int32_t minFrac) const { if (minFrac >= 0 && minFrac <= kMaxIntFracSig) { return constructIncrement(fUnion.increment.fIncrement, minFrac); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } FractionRounder Rounder::constructFraction(int32_t minFrac, int32_t maxFrac) { FractionSignificantSettings settings; - settings.fMinFrac = static_cast (minFrac); - settings.fMaxFrac = static_cast (maxFrac); + settings.fMinFrac = static_cast(minFrac); + settings.fMaxFrac = static_cast(maxFrac); settings.fMinSig = -1; settings.fMaxSig = -1; RounderUnion union_; @@ -204,8 +204,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) { FractionSignificantSettings settings; settings.fMinFrac = -1; settings.fMaxFrac = -1; - settings.fMinSig = static_cast(minSig); - settings.fMaxSig = static_cast(maxSig); + settings.fMinSig = static_cast(minSig); + settings.fMaxSig = static_cast(maxSig); RounderUnion union_; union_.fracSig = settings; return {RND_SIGNIFICANT, union_, kDefaultMode}; @@ -214,8 +214,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) { Rounder Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig) { FractionSignificantSettings settings = base.fUnion.fracSig; - settings.fMinSig = static_cast(minSig); - settings.fMaxSig = static_cast(maxSig); + settings.fMinSig = static_cast(minSig); + settings.fMaxSig = static_cast(maxSig); RounderUnion union_; union_.fracSig = settings; return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode}; @@ -224,7 +224,7 @@ Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSi IncrementRounder Rounder::constructIncrement(double increment, int32_t minFrac) { IncrementSettings settings; settings.fIncrement = increment; - settings.fMinFrac = minFrac; + settings.fMinFrac = static_cast(minFrac); RounderUnion union_; union_.increment = settings; return {RND_INCREMENT, union_, kDefaultMode}; diff --git a/icu4c/source/i18n/number_types.h b/icu4c/source/i18n/number_types.h index e914ef71ac0..c01765e2cea 100644 --- a/icu4c/source/i18n/number_types.h +++ b/icu4c/source/i18n/number_types.h @@ -31,7 +31,7 @@ typedef UNumberFormatPadPosition PadPosition; typedef UNumberCompactStyle CompactStyle; // ICU4J Equivalent: RoundingUtils.MAX_INT_FRAC_SIG -static constexpr int32_t kMaxIntFracSig = 100; +static constexpr int32_t kMaxIntFracSig = 999; // ICU4J Equivalent: RoundingUtils.DEFAULT_ROUNDING_MODE static constexpr RoundingMode kDefaultMode = RoundingMode::UNUM_FOUND_HALFEVEN; @@ -42,10 +42,6 @@ static constexpr char16_t kFallbackPaddingString[] = u" "; // ICU4J Equivalent: NumberFormatterImpl.DEFAULT_CURRENCY static constexpr char16_t kDefaultCurrency[] = u"XXX"; -// FIXME: New error codes: -static constexpr UErrorCode U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR; -static constexpr UErrorCode U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR; - // Forward declarations: class Modifier; diff --git a/icu4c/source/i18n/unicode/dcfmtsym.h b/icu4c/source/i18n/unicode/dcfmtsym.h index 4dc6f950f29..86f5c198269 100644 --- a/icu4c/source/i18n/unicode/dcfmtsym.h +++ b/icu4c/source/i18n/unicode/dcfmtsym.h @@ -80,10 +80,6 @@ U_NAMESPACE_BEGIN * If you supply a pattern with multiple grouping characters, the interval * between the last one and the end of the integer is the one that is * used. So "#,##,###,####" == "######,####" == "##,####,####". - *

- * This class only handles localized digits where the 10 digits are - * contiguous in Unicode, from 0 to 9. Other digits sets (such as - * superscripts) would need a different subclass. */ class U_I18N_API DecimalFormatSymbols : public UObject { public: @@ -396,6 +392,13 @@ public: inline UBool isCustomIntlCurrencySymbol() const { return fIsCustomIntlCurrencySymbol; } + + /** + * @internal For ICU use only + */ + inline UChar32 getCodePointZero() const { + return fCodePointZero; + } #endif /* U_HIDE_INTERNAL_API */ /** @@ -408,11 +411,24 @@ public: * * @param symbol Constant to indicate a number format symbol. * @return the format symbol by the param 'symbol' - * @internal + * @draft ICU 61 */ - inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const; + inline const UnicodeString& getConstSymbol(ENumberFormatSymbol symbol) const; #ifndef U_HIDE_INTERNAL_API + /** + * Returns the const UnicodeString reference, like getConstSymbol, + * corresponding to the digit with the given value. This is equivalent + * to accessing the symbol from getConstSymbol with the corresponding + * key, such as kZeroDigitSymbol or kOneDigitSymbol. + * + * @param digit The digit, an integer between 0 and 9 inclusive. + * If outside the range 0 to 9, the zero digit is returned. + * @return the format symbol for the given digit. + * @internal This API is currently for ICU use only. + */ + inline const UnicodeString& getConstDigitSymbol(int32_t digit) const; + /** * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. * @internal @@ -444,6 +460,22 @@ private: */ UnicodeString fNoSymbol; + /** + * Dealing with code points is faster than dealing with strings when formatting. Because of + * this, we maintain a value containing the zero code point that is used whenever digitStrings + * represents a sequence of ten code points in order. + * + *

If the value stored here is positive, it means that the code point stored in this value + * corresponds to the digitStrings array, and codePointZero can be used instead of the + * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does + * *not* contain a sequence of code points, and it must be used directly. + * + *

It is assumed that codePointZero always shadows the value in digitStrings. codePointZero + * should never be set directly; rather, it should be updated only when digitStrings mutates. + * That is, the flow of information is digitStrings -> codePointZero, not the other way. + */ + UChar32 fCodePointZero; + Locale locale; char actualLocale[ULOC_FULLNAME_CAPACITY]; @@ -481,6 +513,17 @@ DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const { return *strPtr; } +inline const UnicodeString& DecimalFormatSymbols::getConstDigitSymbol(int32_t digit) const { + if (digit < 0 || digit > 9) { + digit = 0; + } + if (digit == 0) { + return fSymbols[kZeroDigitSymbol]; + } + ENumberFormatSymbol key = static_cast(kOneDigitSymbol + digit - 1); + return fSymbols[key]; +} + // ------------------------------------- inline void @@ -497,14 +540,20 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString // If the zero digit is being set to a known zero digit according to Unicode, // then we automatically set the corresponding 1-9 digits - if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) { + // Also record updates to fCodePointZero. Be conservative if in doubt. + if (symbol == kZeroDigitSymbol) { UChar32 sym = value.char32At(0); - if ( u_charDigitValue(sym) == 0 ) { + if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) { + fCodePointZero = sym; for ( int8_t i = 1 ; i<= 9 ; i++ ) { sym++; fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym); } + } else { + fCodePointZero = -1; } + } else if (symbol >= kOneDigitSymbol && symbol <= kNineDigitSymbol) { + fCodePointZero = -1; } } diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index 4c4f542b4dd..ac852f27e8e 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -253,16 +253,17 @@ typedef enum UGroupingStrategy { } UGroupingStrategy; /** - * An enum declaring how to denote positive and negative numbers. Example outputs when formatting 123 and -123 in - * en-US: + * An enum declaring how to denote positive and negative numbers. Example outputs when formatting + * 123, 0, and -123 in en-US: * - *

*

    - *
  • AUTO: "123", "-123" - *
  • ALWAYS: "+123", "-123" - *
  • NEVER: "123", "123" - *
  • ACCOUNTING: "$123", "($123)" - *
  • ACCOUNTING_ALWAYS: "+$123", "($123)" + *
  • AUTO: "123", "0", and "-123" + *
  • ALWAYS: "+123", "+0", and "-123" + *
  • NEVER: "123", "0", and "123" + *
  • ACCOUNTING: "$123", "$0", and "($123)" + *
  • ACCOUNTING_ALWAYS: "+$123", "+$0", and "($123)" + *
  • EXCEPT_ZERO: "+123", "0", and "-123" + *
  • ACCOUNTING_EXCEPT_ZERO: "+$123", "$0", and "($123)" *
* *

@@ -394,6 +395,21 @@ class IntegerWidth; namespace impl { +/** + * Datatype for minimum/maximum fraction digits. Must be able to hold kMaxIntFracSig. + * + * @internal + */ +typedef int16_t digits_t; + +/** + * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built + * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path. + * + * @internal + */ +static constexpr int32_t DEFAULT_THRESHOLD = 3; + // Forward declarations: class Padder; struct MacroProps; @@ -577,7 +593,7 @@ class U_I18N_API Notation : public UMemory { struct ScientificSettings { int8_t fEngineeringInterval; bool fRequireMinInt; - int8_t fMinExponentDigits; + impl::digits_t fMinExponentDigits; UNumberSignDisplay fExponentSignDisplay; } scientific; @@ -892,14 +908,14 @@ class U_I18N_API Rounder : public UMemory { union RounderUnion { struct FractionSignificantSettings { // For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT - int8_t fMinFrac; - int8_t fMaxFrac; - int8_t fMinSig; - int8_t fMaxSig; + impl::digits_t fMinFrac; + impl::digits_t fMaxFrac; + impl::digits_t fMinSig; + impl::digits_t fMaxSig; } fracSig; struct IncrementSettings { double fIncrement; - int32_t fMinFrac; + impl::digits_t fMinFrac; } increment; // For RND_INCREMENT UCurrencyUsage currencyUsage; // For RND_CURRENCY UErrorCode errorCode; // For RND_ERROR @@ -1153,7 +1169,8 @@ class U_I18N_API IntegerWidth : public UMemory { * For example, with maxInt=3, the number 1234 will get printed as "234". * * @param maxInt - * The maximum number of places before the decimal separator. + * The maximum number of places before the decimal separator. maxInt == -1 means no + * truncation. * @return An IntegerWidth for passing to the NumberFormatter integerWidth() setter. * @draft ICU 60 * @see NumberFormatter @@ -1163,14 +1180,14 @@ class U_I18N_API IntegerWidth : public UMemory { private: union { struct { - int8_t fMinInt; - int8_t fMaxInt; + impl::digits_t fMinInt; + impl::digits_t fMaxInt; } minMaxInt; UErrorCode errorCode; } fUnion; bool fHasError = false; - IntegerWidth(int8_t minInt, int8_t maxInt); + IntegerWidth(impl::digits_t minInt, impl::digits_t maxInt); IntegerWidth(UErrorCode errorCode) { // NOLINT fUnion.errorCode = errorCode; @@ -1205,14 +1222,6 @@ class U_I18N_API IntegerWidth : public UMemory { namespace impl { -/** - * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built - * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path. - * - * @internal - */ -static constexpr int32_t DEFAULT_THRESHOLD = 3; - /** @internal */ class U_I18N_API SymbolsWrapper : public UMemory { public: diff --git a/icu4c/source/i18n/unicode/utrans.h b/icu4c/source/i18n/unicode/utrans.h index d0f05cf2b60..697681aef85 100644 --- a/icu4c/source/i18n/unicode/utrans.h +++ b/icu4c/source/i18n/unicode/utrans.h @@ -382,7 +382,7 @@ utrans_openIDs(UErrorCode *pErrorCode); U_STABLE void U_EXPORT2 utrans_trans(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, int32_t start, int32_t* limit, UErrorCode* status); @@ -433,7 +433,7 @@ utrans_trans(const UTransliterator* trans, U_STABLE void U_EXPORT2 utrans_transIncremental(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, UTransPosition* pos, UErrorCode* status); diff --git a/icu4c/source/i18n/utrans.cpp b/icu4c/source/i18n/utrans.cpp index 5124833ac33..29013ead125 100644 --- a/icu4c/source/i18n/utrans.cpp +++ b/icu4c/source/i18n/utrans.cpp @@ -41,12 +41,12 @@ U_NAMESPACE_BEGIN class ReplaceableGlue : public Replaceable { UReplaceable *rep; - UReplaceableCallbacks *func; + const UReplaceableCallbacks *func; public: ReplaceableGlue(UReplaceable *replaceable, - UReplaceableCallbacks *funcCallback); + const UReplaceableCallbacks *funcCallback); virtual ~ReplaceableGlue(); @@ -88,7 +88,7 @@ protected: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, - UReplaceableCallbacks *funcCallback) + const UReplaceableCallbacks *funcCallback) : Replaceable() { this->rep = replaceable; @@ -398,7 +398,7 @@ utrans_openIDs(UErrorCode *pErrorCode) { U_CAPI void U_EXPORT2 utrans_trans(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, int32_t start, int32_t* limit, UErrorCode* status) { @@ -418,7 +418,7 @@ utrans_trans(const UTransliterator* trans, U_CAPI void U_EXPORT2 utrans_transIncremental(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, UTransPosition* pos, UErrorCode* status) { diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp index 5edf872d3b7..c45913796a4 100644 --- a/icu4c/source/test/intltest/intltest.cpp +++ b/icu4c/source/test/intltest/intltest.cpp @@ -2030,6 +2030,25 @@ UBool IntlTest::assertEquals(const char* message, return TRUE; } + +UBool IntlTest::assertEquals(const char* message, + UErrorCode expected, + UErrorCode actual) { + if (expected != actual) { + errln((UnicodeString)"FAIL: " + message + "; got " + + u_errorName(actual) + + "; expected " + u_errorName(expected)); + return FALSE; + } +#ifdef VERBOSE_ASSERTIONS + else { + logln((UnicodeString)"Ok: " + message + "; got " + u_errorName(actual)); + } +#endif + return TRUE; +} + + #if !UCONFIG_NO_FORMATTING UBool IntlTest::assertEquals(const char* message, const Formattable& expected, @@ -2105,6 +2124,16 @@ UBool IntlTest::assertEquals(const UnicodeString& message, int64_t actual) { return assertEquals(extractToAssertBuf(message), expected, actual); } +UBool IntlTest::assertEquals(const UnicodeString& message, + double expected, + double actual) { + return assertEquals(extractToAssertBuf(message), expected, actual); +} +UBool IntlTest::assertEquals(const UnicodeString& message, + UErrorCode expected, + UErrorCode actual) { + return assertEquals(extractToAssertBuf(message), expected, actual); +} #if !UCONFIG_NO_FORMATTING UBool IntlTest::assertEquals(const UnicodeString& message, diff --git a/icu4c/source/test/intltest/intltest.h b/icu4c/source/test/intltest/intltest.h index 1f7c80d4794..08765b707d0 100644 --- a/icu4c/source/test/intltest/intltest.h +++ b/icu4c/source/test/intltest/intltest.h @@ -289,13 +289,12 @@ public: UBool assertSuccess(const char* message, UErrorCode ec, UBool possibleDataError=FALSE, const char *file=NULL, int line=0); UBool assertEquals(const char* message, const UnicodeString& expected, const UnicodeString& actual, UBool possibleDataError=FALSE); - UBool assertEquals(const char* message, const char* expected, - const char* actual); - UBool assertEquals(const char* message, UBool expected, - UBool actual); + UBool assertEquals(const char* message, const char* expected, const char* actual); + UBool assertEquals(const char* message, UBool expected, UBool actual); UBool assertEquals(const char* message, int32_t expected, int32_t actual); UBool assertEquals(const char* message, int64_t expected, int64_t actual); UBool assertEquals(const char* message, double expected, double actual); + UBool assertEquals(const char* message, UErrorCode expected, UErrorCode actual); #if !UCONFIG_NO_FORMATTING UBool assertEquals(const char* message, const Formattable& expected, const Formattable& actual, UBool possibleDataError=FALSE); @@ -307,11 +306,12 @@ public: UBool assertSuccess(const UnicodeString& message, UErrorCode ec); UBool assertEquals(const UnicodeString& message, const UnicodeString& expected, const UnicodeString& actual, UBool possibleDataError=FALSE); - UBool assertEquals(const UnicodeString& message, const char* expected, - const char* actual); + UBool assertEquals(const UnicodeString& message, const char* expected, const char* actual); UBool assertEquals(const UnicodeString& message, UBool expected, UBool actual); UBool assertEquals(const UnicodeString& message, int32_t expected, int32_t actual); UBool assertEquals(const UnicodeString& message, int64_t expected, int64_t actual); + UBool assertEquals(const UnicodeString& message, double expected, double actual); + UBool assertEquals(const UnicodeString& message, UErrorCode expected, UErrorCode actual); virtual void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); // overide ! diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 9d4ffb7cef0..5b4030a9494 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -63,6 +63,7 @@ class NumberFormatterApiTest : public IntlTest { void locale(); void formatTypes(); void errors(); + void validRanges(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 62db705eac7..2d625877f30 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -76,6 +76,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha TESTCASE_AUTO(locale); TESTCASE_AUTO(formatTypes); TESTCASE_AUTO(errors); + TESTCASE_AUTO(validRanges); TESTCASE_AUTO_END; } @@ -1748,27 +1749,83 @@ void NumberFormatterApiTest::errors() { UErrorCode status2 = U_ZERO_ERROR; FormattedNumber fn = lnf.formatInt(1, status1); assertEquals( - "Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal", - U_ILLEGAL_ARGUMENT_ERROR, - status1); + "Should fail since rounder is not legal", + (UBool) TRUE, + (UBool) U_FAILURE(status1)); FieldPosition fp; fn.populateFieldPosition(fp, status2); assertEquals( - "Should fail with U_ILLEGAL_ARGUMENT_ERROR on terminal method", - U_ILLEGAL_ARGUMENT_ERROR, - status2); + "Should fail on terminal method", + (UBool) TRUE, + (UBool) U_FAILURE(status2)); } { UErrorCode status = U_ZERO_ERROR; lnf.copyErrorTo(status); assertEquals( - "Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal", - U_ILLEGAL_ARGUMENT_ERROR, - status); + "Should fail since rounder is not legal", + (UBool) TRUE, + (UBool) U_FAILURE(status)); } } +void NumberFormatterApiTest::validRanges() { + +#define EXPECTED_MAX_INT_FRAC_SIG 999 + +#define VALID_RANGE_ASSERT(status, method, lowerBound, argument) { \ + UErrorCode expectedStatus = ((lowerBound <= argument) && (argument <= EXPECTED_MAX_INT_FRAC_SIG)) \ + ? U_ZERO_ERROR \ + : U_NUMBER_ARG_OUTOFBOUNDS_ERROR; \ + assertEquals( \ + UnicodeString(u"Incorrect status for " #method " on input ") \ + + Int64ToUnicodeString(argument), \ + expectedStatus, \ + status); \ +} + +#define VALID_RANGE_ONEARG(setting, method, lowerBound) { \ + for (int32_t argument = -2; argument <= EXPECTED_MAX_INT_FRAC_SIG + 2; argument++) { \ + UErrorCode status = U_ZERO_ERROR; \ + NumberFormatter::with().setting(method(argument)).copyErrorTo(status); \ + VALID_RANGE_ASSERT(status, method, lowerBound, argument); \ + } \ +} + +#define VALID_RANGE_TWOARGS(setting, method, lowerBound) { \ + for (int32_t argument = -2; argument <= EXPECTED_MAX_INT_FRAC_SIG + 2; argument++) { \ + UErrorCode status = U_ZERO_ERROR; \ + /* Pass EXPECTED_MAX_INT_FRAC_SIG as the second argument so arg1 <= arg2 in expected cases */ \ + NumberFormatter::with().setting(method(argument, EXPECTED_MAX_INT_FRAC_SIG)).copyErrorTo(status); \ + VALID_RANGE_ASSERT(status, method, lowerBound, argument); \ + status = U_ZERO_ERROR; \ + /* Pass lowerBound as the first argument so arg1 <= arg2 in expected cases */ \ + NumberFormatter::with().setting(method(lowerBound, argument)).copyErrorTo(status); \ + VALID_RANGE_ASSERT(status, method, lowerBound, argument); \ + /* Check that first argument must be less than or equal to second argument */ \ + NumberFormatter::with().setting(method(argument, argument - 1)).copyErrorTo(status); \ + assertEquals("Incorrect status for " #method " on max < min input", \ + U_NUMBER_ARG_OUTOFBOUNDS_ERROR, \ + status); \ + } \ +} + + VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction, 0); + VALID_RANGE_ONEARG(rounding, Rounder::minFraction, 0); + VALID_RANGE_ONEARG(rounding, Rounder::maxFraction, 0); + VALID_RANGE_TWOARGS(rounding, Rounder::minMaxFraction, 0); + VALID_RANGE_ONEARG(rounding, Rounder::fixedDigits, 1); + VALID_RANGE_ONEARG(rounding, Rounder::minDigits, 1); + VALID_RANGE_ONEARG(rounding, Rounder::maxDigits, 1); + VALID_RANGE_TWOARGS(rounding, Rounder::minMaxDigits, 1); + VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction(1).withMinDigits, 1); + VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction(1).withMaxDigits, 1); + VALID_RANGE_ONEARG(notation, Notation::scientific().withMinExponentDigits, 1); + VALID_RANGE_ONEARG(integerWidth, IntegerWidth::zeroFillTo, 0); + VALID_RANGE_ONEARG(integerWidth, IntegerWidth::zeroFillTo(0).truncateAt, -1); +} + void NumberFormatterApiTest::assertFormatDescending(const UnicodeString &message, const UnlocalizedNumberFormatter &f, diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index c65859b873c..78b1029cf28 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -8923,20 +8923,23 @@ void NumberFormatTest::checkExceptionIssue11735() { } void NumberFormatTest::Test11035_FormatCurrencyAmount() { - UErrorCode status; + UErrorCode status = U_ZERO_ERROR; double amount = 12345.67; const char16_t* expected = u"12,345$67 ​"; // Test two ways to set a currency via API Locale loc1 = Locale("pt_PT"); - NumberFormat* fmt1 = NumberFormat::createCurrencyInstance(loc1, status); + LocalPointer fmt1(NumberFormat::createCurrencyInstance(loc1, status)); + assertSuccess("Creating fmt1", status); fmt1->setCurrency(u"PTE", status); + assertSuccess("Setting currency on fmt1", status); UnicodeString actualSetCurrency; fmt1->format(amount, actualSetCurrency); Locale loc2 = Locale("pt_PT@currency=PTE"); - NumberFormat* fmt2 = NumberFormat::createCurrencyInstance(loc2, status); + LocalPointer fmt2(NumberFormat::createCurrencyInstance(loc2, status)); + assertSuccess("Creating fmt2", status); UnicodeString actualLocaleString; fmt2->format(amount, actualLocaleString); diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index b28723f4564..fd150617a79 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -4469,16 +4469,15 @@ void RBBITest::TestTableRedundancies() { "!!forward; \n" "($s0 | '?')*; \n" "($s1 | $s2 | $s3)*; \n" }; - RuleBasedBreakIterator *lbi = + + RuleBasedBreakIterator *lbi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); //lbi->dumpTables(); - rules = lbi->getRules(); + UnicodeString lbRules = lbi->getRules(); delete lbi; UParseError pe {}; - RuleBasedBreakIterator *bi = - // (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); - new RuleBasedBreakIterator(rules, pe, status); + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(lbRules, pe, status); assertSuccess(WHERE, status); if (U_FAILURE(status)) return; bi->dumpTables(); diff --git a/icu4c/source/test/intltest/tsdcfmsy.cpp b/icu4c/source/test/intltest/tsdcfmsy.cpp index 90198e070f4..1ed6d760e67 100644 --- a/icu4c/source/test/intltest/tsdcfmsy.cpp +++ b/icu4c/source/test/intltest/tsdcfmsy.cpp @@ -23,6 +23,7 @@ void IntlTestDecimalFormatSymbols::runIndexedTest( int32_t index, UBool exec, co TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(testSymbols); TESTCASE_AUTO(testLastResortData); + TESTCASE_AUTO(testDigitSymbols); TESTCASE_AUTO(testNumberingSystem); TESTCASE_AUTO_END; } @@ -249,6 +250,102 @@ void IntlTestDecimalFormatSymbols::testLastResortData() { Verify(1234567.25, "#,##0.##", *lastResort, "1,234,567.25"); } +void IntlTestDecimalFormatSymbols::testDigitSymbols() { + // This test does more in ICU4J than in ICU4C right now. + // In ICU4C, it is basically just a test for codePointZero and getConstDigitSymbol. + UChar defZero = u'0'; + UChar32 osmanyaZero = U'\U000104A0'; + static const UChar* osmanyaDigitStrings[] = { + u"\U000104A0", u"\U000104A1", u"\U000104A2", u"\U000104A3", u"\U000104A4", + u"\U000104A5", u"\U000104A6", u"\U000104A7", u"\U000104A8", u"\U000104A9" + }; + + IcuTestErrorCode status(*this, "testDigitSymbols()"); + DecimalFormatSymbols symbols(Locale("en"), status); + + if (defZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be ASCII 0"); + } + for (int32_t i=0; i<=9; i++) { + assertEquals(UnicodeString("i. ASCII Digit at index ") + Int64ToUnicodeString(i), + UnicodeString(u'0' + i), + symbols.getConstDigitSymbol(i)); + } + + for (int32_t i=0; i<=9; i++) { + DecimalFormatSymbols::ENumberFormatSymbol key = + i == 0 + ? DecimalFormatSymbols::kZeroDigitSymbol + : static_cast + (DecimalFormatSymbols::kOneDigitSymbol + i - 1); + symbols.setSymbol(key, UnicodeString(osmanyaDigitStrings[i]), FALSE); + } + // NOTE: in ICU4J, the calculation of codePointZero is smarter; + // in ICU4C, it is more conservative and is only set if propogateDigits is true. + if (-1 != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be invalid"); + } + for (int32_t i=0; i<=9; i++) { + assertEquals(UnicodeString("ii. Osmanya digit at index ") + Int64ToUnicodeString(i), + UnicodeString(osmanyaDigitStrings[i]), + symbols.getConstDigitSymbol(i)); + } + + // Check Osmanya codePointZero + symbols.setSymbol( + DecimalFormatSymbols::kZeroDigitSymbol, + UnicodeString(osmanyaDigitStrings[0]), TRUE); + if (osmanyaZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be Osmanya code point zero"); + } + for (int32_t i=0; i<=9; i++) { + assertEquals(UnicodeString("iii. Osmanya digit at index ") + Int64ToUnicodeString(i), + UnicodeString(osmanyaDigitStrings[i]), + symbols.getConstDigitSymbol(i)); + } + + // Check after copy + DecimalFormatSymbols copy(symbols); + if (osmanyaZero != copy.getCodePointZero()) { + errln("ERROR: Code point zero be Osmanya code point zero"); + } + for (int32_t i=0; i<=9; i++) { + assertEquals(UnicodeString("iv. After copy at index ") + Int64ToUnicodeString(i), + UnicodeString(osmanyaDigitStrings[i]), + copy.getConstDigitSymbol(i)); + } + + // Check when loaded from resource bundle + DecimalFormatSymbols fromData(Locale("en@numbers=osma"), status); + if (osmanyaZero != fromData.getCodePointZero()) { + errln("ERROR: Code point zero be Osmanya code point zero"); + } + for (int32_t i=0; i<=9; i++) { + assertEquals(UnicodeString("v. Resource bundle at index ") + Int64ToUnicodeString(i), + UnicodeString(osmanyaDigitStrings[i]), + fromData.getConstDigitSymbol(i)); + } + + // Setting a digit somewhere in the middle should invalidate codePointZero + symbols.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"foo", FALSE); + if (-1 != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be invalid"); + } + + // Reset digits to Latin + symbols.setSymbol( + DecimalFormatSymbols::kZeroDigitSymbol, + UnicodeString(defZero)); + if (defZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be ASCII 0"); + } + for (int32_t i=0; i<=9; i++) { + assertEquals(UnicodeString("vi. ASCII Digit at index ") + Int64ToUnicodeString(i), + UnicodeString(u'0' + i), + symbols.getConstDigitSymbol(i)); + } +} + void IntlTestDecimalFormatSymbols::testNumberingSystem() { IcuTestErrorCode errorCode(*this, "testNumberingSystem"); struct testcase { diff --git a/icu4c/source/test/intltest/tsdcfmsy.h b/icu4c/source/test/intltest/tsdcfmsy.h index 1fd1dfdfba3..1922941b847 100644 --- a/icu4c/source/test/intltest/tsdcfmsy.h +++ b/icu4c/source/test/intltest/tsdcfmsy.h @@ -28,6 +28,7 @@ private: */ void testSymbols(/*char *par*/); void testLastResortData(); + void testDigitSymbols(); void testNumberingSystem(); /** helper functions**/ diff --git a/icu4c/source/tools/escapesrc/escapesrc.cpp b/icu4c/source/tools/escapesrc/escapesrc.cpp index 5e9648476e9..53f6a40db48 100644 --- a/icu4c/source/tools/escapesrc/escapesrc.cpp +++ b/icu4c/source/tools/escapesrc/escapesrc.cpp @@ -4,39 +4,76 @@ #include #include #include -#include #include #include #include #include -// with caution: +// Include this even though we aren't linking against it. #include "unicode/utf8.h" +// Include this here, to avoid needing to compile and link part of common lib +// (bootstrapping problem) +#include "utf_impl.cpp" + +/** + * What is this? + * or even: + * what IS this?? + * + * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code + * in utf-8 into.. something else. Something consumable by certain compilers (Solaris, xlC) + * which aren't quite there. + * + * - u"" or u'' gets converted to u"\uNNNN" or u'\uNNNN' + * - u8"" gets converted to "\xAA\xBB\xCC\xDD" etc. + * - if the system is EBCDIC-based, well, that's taken into account. + * + * Usage: + * escapesrc infile.cpp outfile.cpp + * Normally this is invoked by the build stage, with a rule such as: + * + * _%.cpp: $(srcdir)/%.cpp + * @$(BINDIR)/escapesrc$(EXEEXT) $< $@ + * %.o: _%.cpp + * $(COMPILE.cc) ... $@ $< + * + * Naturally, 'escapesrc' has to be excluded from said build rule. + + */ + + static const char kSPACE = 0x20, kTAB = 0x09, kLF = 0x0A, kCR = 0x0D; - // kHASH = 0x23, - // kSLASH = 0x2f, - // kSTAR = 0x2A, +// This contains a codepage and ISO 14882:1998 illegality table. +// Use "make gen-table" to rebuild it. # include "cptbl.h" +// For convenience # define cp1047_to_8859(c) cp1047_8859_1[c] +// Our app's name std::string prog; +/** + * Give the usual 1-line documentation and exit + */ void usage() { fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str()); } - +/** + * Delete the output file (if any) + * We want to delete even if we didn't generate, because it might be stale. + */ int cleanup(const std::string &outfile) { const char *outstr = outfile.c_str(); if(outstr && *outstr) { - int rc = unlink(outstr); + int rc = std::remove(outstr); if(rc == 0) { fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr); return 0; @@ -44,7 +81,7 @@ int cleanup(const std::string &outfile) { if( errno == ENOENT ) { return 0; // File did not exist - no error. } else { - perror("unlink"); + perror("std::remove"); return 1; } } @@ -52,16 +89,12 @@ int cleanup(const std::string &outfile) { return 0; } -// inline bool hasNonAscii(const char *line, size_t len) { -// const unsigned char *uline = reinterpret_cast(line); -// for(size_t i=0;i 0x7F) { -// return true; -// } -// } -// return false; -// } - +/** + * Skip across any known whitespace. + * @param p startpoint + * @param e limit + * @return first non-whitespace char + */ inline const char *skipws(const char *p, const char *e) { for(;p0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) { @@ -345,6 +360,12 @@ bool fixLine(int /*no*/, std::string &linestr) { return false; } +/** + * Convert a whole file + * @param infile + * @param outfile + * @return 1 on err, 0 otherwise + */ int convert(const std::string &infile, const std::string &outfile) { fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str()); @@ -386,6 +407,9 @@ int convert(const std::string &infile, const std::string &outfile) { return 0; } +/** + * Main function + */ int main(int argc, const char *argv[]) { prog = argv[0]; @@ -399,6 +423,3 @@ int main(int argc, const char *argv[]) { return convert(infile, outfile); } - - -#include "utf_impl.cpp"