ICU-13569 rbbi, refresh from trunk, plus table state compaction, work in progress.

X-SVN-Rev: 40888
This commit is contained in:
Andy Heninger 2018-02-10 01:31:35 +00:00
parent 0e3d103630
commit 1036ed52e3
24 changed files with 542 additions and 166 deletions

View file

@ -367,6 +367,9 @@ void RBBIRuleBuilder::optimizeTables() {
fSafeRevTables->removeColumn(rightClass);
}
fForwardTables->removeDuplicateStates();
}

View file

@ -1120,8 +1120,79 @@ void RBBITableBuilder::removeColumn(int32_t column) {
}
}
/*
* findDuplicateState
*/
bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (; firstState<numStates-1; ++firstState) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
for (duplState=firstState+1; duplState<numStates; ++duplState) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
if (firstSD->fAccepting != duplSD->fAccepting ||
firstSD->fLookAhead != duplSD->fLookAhead ||
firstSD->fTagsIdx != duplSD->fTagsIdx) {
continue;
}
bool rowsMatch = true;
for (int32_t col=0; col < numCols; ++col) {
int32_t firstVal = firstSD->fDtran->elementAti(col);
int32_t duplVal = duplSD->fDtran->elementAti(col);
if (!((firstVal == duplVal) ||
((firstVal == firstState || firstVal == duplState) &&
(duplVal == firstState || duplVal == duplState)))) {
rowsMatch = false;
break;
}
}
if (rowsMatch) {
return true;
}
}
}
return false;
}
void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fDStates->size());
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
fDStates->removeElementAt(duplState);
delete duplSD;
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (int32_t state=0; state<numStates; ++state) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
for (int32_t col=0; col<numCols; col++) {
int32_t existingVal = sd->fDtran->elementAti(col);
int32_t newVal = existingVal;
if (existingVal == duplState) {
existingVal = keepState;
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd->fDtran->setElementAt(newVal, col);
}
}
}
/*
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
int32_t firstState = 0;
int32_t duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
}
}
//-----------------------------------------------------------------------------
//

View file

@ -59,7 +59,8 @@ public:
*/
void removeColumn(int32_t column);
/** Check for, and remove dupicate states (table rows). */
void removeDuplicateStates();
private:
@ -83,6 +84,21 @@ private:
void addRuleRootNodes(UVector *dest, RBBINode *node);
/** Find the next duplicate state. An iterator function.
* @param firstState (in/out) begin looking at this state, return the first of the
* pair of duplicates.
* @param duplicateState returns the duplicate state of fistState
* @return true if a duplicate pair of states was found.
*/
bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
/** Remove a duplicate state/
* @param keepState First of the duplicate pair. Keep it.
* @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
* to refer to keepState instead.
*/
void removeState(int32_t keepState, int32_t duplState);
// Set functions for UVector.
// TODO: make a USet subclass of UVector

View file

@ -539,6 +539,7 @@ typedef enum UErrorCode {
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.

View file

@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_UNDEFINED_KEYWORD",
"U_DEFAULT_KEYWORD_MISSING",
"U_DECIMAL_NUMBER_SYNTAX_ERROR",
"U_FORMAT_INEXACT_ERROR"
"U_FORMAT_INEXACT_ERROR",
"U_NUMBER_ARG_OUTOFBOUNDS_ERROR"
};
static const char * const

View file

@ -38,6 +38,7 @@
#include "uresimp.h"
#include "ureslocs.h"
#include "charstr.h"
#include "uassert.h"
// *****************************************************************************
// class DecimalFormatSymbols
@ -165,6 +166,7 @@ DecimalFormatSymbols::operator=(const DecimalFormatSymbols& rhs)
uprv_strcpy(actualLocale, rhs.actualLocale);
fIsCustomCurrencySymbol = rhs.fIsCustomCurrencySymbol;
fIsCustomIntlCurrencySymbol = rhs.fIsCustomIntlCurrencySymbol;
fCodePointZero = rhs.fCodePointZero;
}
return *this;
}
@ -196,6 +198,7 @@ DecimalFormatSymbols::operator==(const DecimalFormatSymbols& that) const
return FALSE;
}
}
// No need to check fCodePointZero since it is based on fSymbols
return locale == that.locale &&
uprv_strcmp(validLocale, that.validLocale) == 0 &&
uprv_strcmp(actualLocale, that.actualLocale) == 0;
@ -433,6 +436,20 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status,
// Let the monetary number separators equal the default number separators if necessary.
sink.resolveMissingMonetarySeparators(fSymbols);
// Resolve codePointZero
const UnicodeString& stringZero = getConstDigitSymbol(0);
UChar32 tempCodePointZero = stringZero.char32At(0);
if (u_isdigit(tempCodePointZero) && stringZero.countChar32() == 1) {
for (int32_t i=0; i<=9; i++) {
const UnicodeString& stringDigit = getConstDigitSymbol(i);
if (stringDigit.char32At(0) != tempCodePointZero + i || stringDigit.countChar32() != 1) {
tempCodePointZero = -1;
break;
}
}
}
fCodePointZero = tempCodePointZero;
// Obtain currency data from the currency API. This is strictly
// for backward compatibility; we don't use DecimalFormatSymbols
// for currency data anymore.
@ -530,6 +547,8 @@ DecimalFormatSymbols::initialize() {
fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents
fIsCustomCurrencySymbol = FALSE;
fIsCustomIntlCurrencySymbol = FALSE;
fCodePointZero = 0x30;
U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0));
}

View file

@ -13,25 +13,28 @@ using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
IntegerWidth::IntegerWidth(int8_t minInt, int8_t maxInt) {
IntegerWidth::IntegerWidth(digits_t minInt, digits_t maxInt) {
fUnion.minMaxInt.fMinInt = minInt;
fUnion.minMaxInt.fMaxInt = maxInt;
}
IntegerWidth IntegerWidth::zeroFillTo(int32_t minInt) {
if (minInt >= 0 && minInt <= kMaxIntFracSig) {
return {static_cast<int8_t>(minInt), -1};
return {static_cast<digits_t>(minInt), -1};
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) {
if (fHasError) { return *this; } // No-op on error
if (maxInt >= 0 && maxInt <= kMaxIntFracSig) {
return {fUnion.minMaxInt.fMinInt, static_cast<int8_t>(maxInt)};
digits_t minInt = fUnion.minMaxInt.fMinInt;
if (maxInt >= 0 && maxInt <= kMaxIntFracSig && minInt <= maxInt) {
return {minInt, static_cast<digits_t>(maxInt)};
} else if (maxInt == -1) {
return {minInt, -1};
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}

View file

@ -54,13 +54,13 @@ Notation Notation::simple() {
ScientificNotation
ScientificNotation::withMinExponentDigits(int32_t minExponentDigits) const {
if (minExponentDigits >= 0 && minExponentDigits < kMaxIntFracSig) {
if (minExponentDigits >= 1 && minExponentDigits <= kMaxIntFracSig) {
ScientificSettings settings = fUnion.scientific;
settings.fMinExponentDigits = (int8_t) minExponentDigits;
settings.fMinExponentDigits = static_cast<digits_t>(minExponentDigits);
NotationUnion union_ = {settings};
return {NTN_SCIENTIFIC, union_};
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}

View file

@ -43,7 +43,7 @@ Padder Padder::codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosit
if (targetWidth >= 0) {
return {cp, targetWidth, position};
} else {
return {U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}

View file

@ -58,7 +58,7 @@ FractionRounder Rounder::fixedFraction(int32_t minMaxFractionPlaces) {
if (minMaxFractionPlaces >= 0 && minMaxFractionPlaces <= kMaxIntFracSig) {
return constructFraction(minMaxFractionPlaces, minMaxFractionPlaces);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
@ -66,7 +66,7 @@ FractionRounder Rounder::minFraction(int32_t minFractionPlaces) {
if (minFractionPlaces >= 0 && minFractionPlaces <= kMaxIntFracSig) {
return constructFraction(minFractionPlaces, -1);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
@ -74,7 +74,7 @@ FractionRounder Rounder::maxFraction(int32_t maxFractionPlaces) {
if (maxFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig) {
return constructFraction(0, maxFractionPlaces);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
@ -83,40 +83,40 @@ FractionRounder Rounder::minMaxFraction(int32_t minFractionPlaces, int32_t maxFr
minFractionPlaces <= maxFractionPlaces) {
return constructFraction(minFractionPlaces, maxFractionPlaces);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
Rounder Rounder::fixedDigits(int32_t minMaxSignificantDigits) {
if (minMaxSignificantDigits >= 0 && minMaxSignificantDigits <= kMaxIntFracSig) {
if (minMaxSignificantDigits >= 1 && minMaxSignificantDigits <= kMaxIntFracSig) {
return constructSignificant(minMaxSignificantDigits, minMaxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
Rounder Rounder::minDigits(int32_t minSignificantDigits) {
if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) {
if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) {
return constructSignificant(minSignificantDigits, -1);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
Rounder Rounder::maxDigits(int32_t maxSignificantDigits) {
if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) {
return constructSignificant(0, maxSignificantDigits);
if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) {
return constructSignificant(1, maxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
Rounder Rounder::minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits) {
if (minSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig &&
if (minSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig &&
minSignificantDigits <= maxSignificantDigits) {
return constructSignificant(minSignificantDigits, maxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
@ -124,7 +124,7 @@ IncrementRounder Rounder::increment(double roundingIncrement) {
if (roundingIncrement > 0.0) {
return constructIncrement(roundingIncrement, 0);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
@ -139,19 +139,19 @@ Rounder Rounder::withMode(RoundingMode roundingMode) const {
Rounder FractionRounder::withMinDigits(int32_t minSignificantDigits) const {
if (fType == RND_ERROR) { return *this; } // no-op in error state
if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) {
if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) {
return constructFractionSignificant(*this, minSignificantDigits, -1);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
Rounder FractionRounder::withMaxDigits(int32_t maxSignificantDigits) const {
if (fType == RND_ERROR) { return *this; } // no-op in error state
if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) {
if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) {
return constructFractionSignificant(*this, -1, maxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
@ -185,14 +185,14 @@ Rounder IncrementRounder::withMinFraction(int32_t minFrac) const {
if (minFrac >= 0 && minFrac <= kMaxIntFracSig) {
return constructIncrement(fUnion.increment.fIncrement, minFrac);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
}
}
FractionRounder Rounder::constructFraction(int32_t minFrac, int32_t maxFrac) {
FractionSignificantSettings settings;
settings.fMinFrac = static_cast<int8_t> (minFrac);
settings.fMaxFrac = static_cast<int8_t> (maxFrac);
settings.fMinFrac = static_cast<digits_t>(minFrac);
settings.fMaxFrac = static_cast<digits_t>(maxFrac);
settings.fMinSig = -1;
settings.fMaxSig = -1;
RounderUnion union_;
@ -204,8 +204,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) {
FractionSignificantSettings settings;
settings.fMinFrac = -1;
settings.fMaxFrac = -1;
settings.fMinSig = static_cast<int8_t>(minSig);
settings.fMaxSig = static_cast<int8_t>(maxSig);
settings.fMinSig = static_cast<digits_t>(minSig);
settings.fMaxSig = static_cast<digits_t>(maxSig);
RounderUnion union_;
union_.fracSig = settings;
return {RND_SIGNIFICANT, union_, kDefaultMode};
@ -214,8 +214,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) {
Rounder
Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig) {
FractionSignificantSettings settings = base.fUnion.fracSig;
settings.fMinSig = static_cast<int8_t>(minSig);
settings.fMaxSig = static_cast<int8_t>(maxSig);
settings.fMinSig = static_cast<digits_t>(minSig);
settings.fMaxSig = static_cast<digits_t>(maxSig);
RounderUnion union_;
union_.fracSig = settings;
return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode};
@ -224,7 +224,7 @@ Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSi
IncrementRounder Rounder::constructIncrement(double increment, int32_t minFrac) {
IncrementSettings settings;
settings.fIncrement = increment;
settings.fMinFrac = minFrac;
settings.fMinFrac = static_cast<digits_t>(minFrac);
RounderUnion union_;
union_.increment = settings;
return {RND_INCREMENT, union_, kDefaultMode};

View file

@ -31,7 +31,7 @@ typedef UNumberFormatPadPosition PadPosition;
typedef UNumberCompactStyle CompactStyle;
// ICU4J Equivalent: RoundingUtils.MAX_INT_FRAC_SIG
static constexpr int32_t kMaxIntFracSig = 100;
static constexpr int32_t kMaxIntFracSig = 999;
// ICU4J Equivalent: RoundingUtils.DEFAULT_ROUNDING_MODE
static constexpr RoundingMode kDefaultMode = RoundingMode::UNUM_FOUND_HALFEVEN;
@ -42,10 +42,6 @@ static constexpr char16_t kFallbackPaddingString[] = u" ";
// ICU4J Equivalent: NumberFormatterImpl.DEFAULT_CURRENCY
static constexpr char16_t kDefaultCurrency[] = u"XXX";
// FIXME: New error codes:
static constexpr UErrorCode U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
static constexpr UErrorCode U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
// Forward declarations:
class Modifier;

View file

@ -80,10 +80,6 @@ U_NAMESPACE_BEGIN
* If you supply a pattern with multiple grouping characters, the interval
* between the last one and the end of the integer is the one that is
* used. So "#,##,###,####" == "######,####" == "##,####,####".
* <P>
* This class only handles localized digits where the 10 digits are
* contiguous in Unicode, from 0 to 9. Other digits sets (such as
* superscripts) would need a different subclass.
*/
class U_I18N_API DecimalFormatSymbols : public UObject {
public:
@ -396,6 +392,13 @@ public:
inline UBool isCustomIntlCurrencySymbol() const {
return fIsCustomIntlCurrencySymbol;
}
/**
* @internal For ICU use only
*/
inline UChar32 getCodePointZero() const {
return fCodePointZero;
}
#endif /* U_HIDE_INTERNAL_API */
/**
@ -408,11 +411,24 @@ public:
*
* @param symbol Constant to indicate a number format symbol.
* @return the format symbol by the param 'symbol'
* @internal
* @draft ICU 61
*/
inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const;
inline const UnicodeString& getConstSymbol(ENumberFormatSymbol symbol) const;
#ifndef U_HIDE_INTERNAL_API
/**
* Returns the const UnicodeString reference, like getConstSymbol,
* corresponding to the digit with the given value. This is equivalent
* to accessing the symbol from getConstSymbol with the corresponding
* key, such as kZeroDigitSymbol or kOneDigitSymbol.
*
* @param digit The digit, an integer between 0 and 9 inclusive.
* If outside the range 0 to 9, the zero digit is returned.
* @return the format symbol for the given digit.
* @internal This API is currently for ICU use only.
*/
inline const UnicodeString& getConstDigitSymbol(int32_t digit) const;
/**
* Returns that pattern stored in currecy info. Internal API for use by NumberFormat API.
* @internal
@ -444,6 +460,22 @@ private:
*/
UnicodeString fNoSymbol;
/**
* Dealing with code points is faster than dealing with strings when formatting. Because of
* this, we maintain a value containing the zero code point that is used whenever digitStrings
* represents a sequence of ten code points in order.
*
* <p>If the value stored here is positive, it means that the code point stored in this value
* corresponds to the digitStrings array, and codePointZero can be used instead of the
* digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does
* *not* contain a sequence of code points, and it must be used directly.
*
* <p>It is assumed that codePointZero always shadows the value in digitStrings. codePointZero
* should never be set directly; rather, it should be updated only when digitStrings mutates.
* That is, the flow of information is digitStrings -> codePointZero, not the other way.
*/
UChar32 fCodePointZero;
Locale locale;
char actualLocale[ULOC_FULLNAME_CAPACITY];
@ -481,6 +513,17 @@ DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const {
return *strPtr;
}
inline const UnicodeString& DecimalFormatSymbols::getConstDigitSymbol(int32_t digit) const {
if (digit < 0 || digit > 9) {
digit = 0;
}
if (digit == 0) {
return fSymbols[kZeroDigitSymbol];
}
ENumberFormatSymbol key = static_cast<ENumberFormatSymbol>(kOneDigitSymbol + digit - 1);
return fSymbols[key];
}
// -------------------------------------
inline void
@ -497,14 +540,20 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString
// If the zero digit is being set to a known zero digit according to Unicode,
// then we automatically set the corresponding 1-9 digits
if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) {
// Also record updates to fCodePointZero. Be conservative if in doubt.
if (symbol == kZeroDigitSymbol) {
UChar32 sym = value.char32At(0);
if ( u_charDigitValue(sym) == 0 ) {
if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) {
fCodePointZero = sym;
for ( int8_t i = 1 ; i<= 9 ; i++ ) {
sym++;
fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym);
}
} else {
fCodePointZero = -1;
}
} else if (symbol >= kOneDigitSymbol && symbol <= kNineDigitSymbol) {
fCodePointZero = -1;
}
}

View file

@ -253,16 +253,17 @@ typedef enum UGroupingStrategy {
} UGroupingStrategy;
/**
* An enum declaring how to denote positive and negative numbers. Example outputs when formatting 123 and -123 in
* <em>en-US</em>:
* An enum declaring how to denote positive and negative numbers. Example outputs when formatting
* 123, 0, and -123 in <em>en-US</em>:
*
* <p>
* <ul>
* <li>AUTO: "123", "-123"
* <li>ALWAYS: "+123", "-123"
* <li>NEVER: "123", "123"
* <li>ACCOUNTING: "$123", "($123)"
* <li>ACCOUNTING_ALWAYS: "+$123", "($123)"
* <li>AUTO: "123", "0", and "-123"
* <li>ALWAYS: "+123", "+0", and "-123"
* <li>NEVER: "123", "0", and "123"
* <li>ACCOUNTING: "$123", "$0", and "($123)"
* <li>ACCOUNTING_ALWAYS: "+$123", "+$0", and "($123)"
* <li>EXCEPT_ZERO: "+123", "0", and "-123"
* <li>ACCOUNTING_EXCEPT_ZERO: "+$123", "$0", and "($123)"
* </ul>
*
* <p>
@ -394,6 +395,21 @@ class IntegerWidth;
namespace impl {
/**
* Datatype for minimum/maximum fraction digits. Must be able to hold kMaxIntFracSig.
*
* @internal
*/
typedef int16_t digits_t;
/**
* Use a default threshold of 3. This means that the third time .format() is called, the data structures get built
* using the "safe" code path. The first two calls to .format() will trigger the unsafe code path.
*
* @internal
*/
static constexpr int32_t DEFAULT_THRESHOLD = 3;
// Forward declarations:
class Padder;
struct MacroProps;
@ -577,7 +593,7 @@ class U_I18N_API Notation : public UMemory {
struct ScientificSettings {
int8_t fEngineeringInterval;
bool fRequireMinInt;
int8_t fMinExponentDigits;
impl::digits_t fMinExponentDigits;
UNumberSignDisplay fExponentSignDisplay;
} scientific;
@ -892,14 +908,14 @@ class U_I18N_API Rounder : public UMemory {
union RounderUnion {
struct FractionSignificantSettings {
// For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT
int8_t fMinFrac;
int8_t fMaxFrac;
int8_t fMinSig;
int8_t fMaxSig;
impl::digits_t fMinFrac;
impl::digits_t fMaxFrac;
impl::digits_t fMinSig;
impl::digits_t fMaxSig;
} fracSig;
struct IncrementSettings {
double fIncrement;
int32_t fMinFrac;
impl::digits_t fMinFrac;
} increment; // For RND_INCREMENT
UCurrencyUsage currencyUsage; // For RND_CURRENCY
UErrorCode errorCode; // For RND_ERROR
@ -1153,7 +1169,8 @@ class U_I18N_API IntegerWidth : public UMemory {
* For example, with maxInt=3, the number 1234 will get printed as "234".
*
* @param maxInt
* The maximum number of places before the decimal separator.
* The maximum number of places before the decimal separator. maxInt == -1 means no
* truncation.
* @return An IntegerWidth for passing to the NumberFormatter integerWidth() setter.
* @draft ICU 60
* @see NumberFormatter
@ -1163,14 +1180,14 @@ class U_I18N_API IntegerWidth : public UMemory {
private:
union {
struct {
int8_t fMinInt;
int8_t fMaxInt;
impl::digits_t fMinInt;
impl::digits_t fMaxInt;
} minMaxInt;
UErrorCode errorCode;
} fUnion;
bool fHasError = false;
IntegerWidth(int8_t minInt, int8_t maxInt);
IntegerWidth(impl::digits_t minInt, impl::digits_t maxInt);
IntegerWidth(UErrorCode errorCode) { // NOLINT
fUnion.errorCode = errorCode;
@ -1205,14 +1222,6 @@ class U_I18N_API IntegerWidth : public UMemory {
namespace impl {
/**
* Use a default threshold of 3. This means that the third time .format() is called, the data structures get built
* using the "safe" code path. The first two calls to .format() will trigger the unsafe code path.
*
* @internal
*/
static constexpr int32_t DEFAULT_THRESHOLD = 3;
/** @internal */
class U_I18N_API SymbolsWrapper : public UMemory {
public:

View file

@ -382,7 +382,7 @@ utrans_openIDs(UErrorCode *pErrorCode);
U_STABLE void U_EXPORT2
utrans_trans(const UTransliterator* trans,
UReplaceable* rep,
UReplaceableCallbacks* repFunc,
const UReplaceableCallbacks* repFunc,
int32_t start,
int32_t* limit,
UErrorCode* status);
@ -433,7 +433,7 @@ utrans_trans(const UTransliterator* trans,
U_STABLE void U_EXPORT2
utrans_transIncremental(const UTransliterator* trans,
UReplaceable* rep,
UReplaceableCallbacks* repFunc,
const UReplaceableCallbacks* repFunc,
UTransPosition* pos,
UErrorCode* status);

View file

@ -41,12 +41,12 @@ U_NAMESPACE_BEGIN
class ReplaceableGlue : public Replaceable {
UReplaceable *rep;
UReplaceableCallbacks *func;
const UReplaceableCallbacks *func;
public:
ReplaceableGlue(UReplaceable *replaceable,
UReplaceableCallbacks *funcCallback);
const UReplaceableCallbacks *funcCallback);
virtual ~ReplaceableGlue();
@ -88,7 +88,7 @@ protected:
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue)
ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable,
UReplaceableCallbacks *funcCallback)
const UReplaceableCallbacks *funcCallback)
: Replaceable()
{
this->rep = replaceable;
@ -398,7 +398,7 @@ utrans_openIDs(UErrorCode *pErrorCode) {
U_CAPI void U_EXPORT2
utrans_trans(const UTransliterator* trans,
UReplaceable* rep,
UReplaceableCallbacks* repFunc,
const UReplaceableCallbacks* repFunc,
int32_t start,
int32_t* limit,
UErrorCode* status) {
@ -418,7 +418,7 @@ utrans_trans(const UTransliterator* trans,
U_CAPI void U_EXPORT2
utrans_transIncremental(const UTransliterator* trans,
UReplaceable* rep,
UReplaceableCallbacks* repFunc,
const UReplaceableCallbacks* repFunc,
UTransPosition* pos,
UErrorCode* status) {

View file

@ -2030,6 +2030,25 @@ UBool IntlTest::assertEquals(const char* message,
return TRUE;
}
UBool IntlTest::assertEquals(const char* message,
UErrorCode expected,
UErrorCode actual) {
if (expected != actual) {
errln((UnicodeString)"FAIL: " + message + "; got " +
u_errorName(actual) +
"; expected " + u_errorName(expected));
return FALSE;
}
#ifdef VERBOSE_ASSERTIONS
else {
logln((UnicodeString)"Ok: " + message + "; got " + u_errorName(actual));
}
#endif
return TRUE;
}
#if !UCONFIG_NO_FORMATTING
UBool IntlTest::assertEquals(const char* message,
const Formattable& expected,
@ -2105,6 +2124,16 @@ UBool IntlTest::assertEquals(const UnicodeString& message,
int64_t actual) {
return assertEquals(extractToAssertBuf(message), expected, actual);
}
UBool IntlTest::assertEquals(const UnicodeString& message,
double expected,
double actual) {
return assertEquals(extractToAssertBuf(message), expected, actual);
}
UBool IntlTest::assertEquals(const UnicodeString& message,
UErrorCode expected,
UErrorCode actual) {
return assertEquals(extractToAssertBuf(message), expected, actual);
}
#if !UCONFIG_NO_FORMATTING
UBool IntlTest::assertEquals(const UnicodeString& message,

View file

@ -289,13 +289,12 @@ public:
UBool assertSuccess(const char* message, UErrorCode ec, UBool possibleDataError=FALSE, const char *file=NULL, int line=0);
UBool assertEquals(const char* message, const UnicodeString& expected,
const UnicodeString& actual, UBool possibleDataError=FALSE);
UBool assertEquals(const char* message, const char* expected,
const char* actual);
UBool assertEquals(const char* message, UBool expected,
UBool actual);
UBool assertEquals(const char* message, const char* expected, const char* actual);
UBool assertEquals(const char* message, UBool expected, UBool actual);
UBool assertEquals(const char* message, int32_t expected, int32_t actual);
UBool assertEquals(const char* message, int64_t expected, int64_t actual);
UBool assertEquals(const char* message, double expected, double actual);
UBool assertEquals(const char* message, UErrorCode expected, UErrorCode actual);
#if !UCONFIG_NO_FORMATTING
UBool assertEquals(const char* message, const Formattable& expected,
const Formattable& actual, UBool possibleDataError=FALSE);
@ -307,11 +306,12 @@ public:
UBool assertSuccess(const UnicodeString& message, UErrorCode ec);
UBool assertEquals(const UnicodeString& message, const UnicodeString& expected,
const UnicodeString& actual, UBool possibleDataError=FALSE);
UBool assertEquals(const UnicodeString& message, const char* expected,
const char* actual);
UBool assertEquals(const UnicodeString& message, const char* expected, const char* actual);
UBool assertEquals(const UnicodeString& message, UBool expected, UBool actual);
UBool assertEquals(const UnicodeString& message, int32_t expected, int32_t actual);
UBool assertEquals(const UnicodeString& message, int64_t expected, int64_t actual);
UBool assertEquals(const UnicodeString& message, double expected, double actual);
UBool assertEquals(const UnicodeString& message, UErrorCode expected, UErrorCode actual);
virtual void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); // overide !

View file

@ -63,6 +63,7 @@ class NumberFormatterApiTest : public IntlTest {
void locale();
void formatTypes();
void errors();
void validRanges();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);

View file

@ -76,6 +76,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha
TESTCASE_AUTO(locale);
TESTCASE_AUTO(formatTypes);
TESTCASE_AUTO(errors);
TESTCASE_AUTO(validRanges);
TESTCASE_AUTO_END;
}
@ -1748,27 +1749,83 @@ void NumberFormatterApiTest::errors() {
UErrorCode status2 = U_ZERO_ERROR;
FormattedNumber fn = lnf.formatInt(1, status1);
assertEquals(
"Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal",
U_ILLEGAL_ARGUMENT_ERROR,
status1);
"Should fail since rounder is not legal",
(UBool) TRUE,
(UBool) U_FAILURE(status1));
FieldPosition fp;
fn.populateFieldPosition(fp, status2);
assertEquals(
"Should fail with U_ILLEGAL_ARGUMENT_ERROR on terminal method",
U_ILLEGAL_ARGUMENT_ERROR,
status2);
"Should fail on terminal method",
(UBool) TRUE,
(UBool) U_FAILURE(status2));
}
{
UErrorCode status = U_ZERO_ERROR;
lnf.copyErrorTo(status);
assertEquals(
"Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal",
U_ILLEGAL_ARGUMENT_ERROR,
status);
"Should fail since rounder is not legal",
(UBool) TRUE,
(UBool) U_FAILURE(status));
}
}
void NumberFormatterApiTest::validRanges() {
#define EXPECTED_MAX_INT_FRAC_SIG 999
#define VALID_RANGE_ASSERT(status, method, lowerBound, argument) { \
UErrorCode expectedStatus = ((lowerBound <= argument) && (argument <= EXPECTED_MAX_INT_FRAC_SIG)) \
? U_ZERO_ERROR \
: U_NUMBER_ARG_OUTOFBOUNDS_ERROR; \
assertEquals( \
UnicodeString(u"Incorrect status for " #method " on input ") \
+ Int64ToUnicodeString(argument), \
expectedStatus, \
status); \
}
#define VALID_RANGE_ONEARG(setting, method, lowerBound) { \
for (int32_t argument = -2; argument <= EXPECTED_MAX_INT_FRAC_SIG + 2; argument++) { \
UErrorCode status = U_ZERO_ERROR; \
NumberFormatter::with().setting(method(argument)).copyErrorTo(status); \
VALID_RANGE_ASSERT(status, method, lowerBound, argument); \
} \
}
#define VALID_RANGE_TWOARGS(setting, method, lowerBound) { \
for (int32_t argument = -2; argument <= EXPECTED_MAX_INT_FRAC_SIG + 2; argument++) { \
UErrorCode status = U_ZERO_ERROR; \
/* Pass EXPECTED_MAX_INT_FRAC_SIG as the second argument so arg1 <= arg2 in expected cases */ \
NumberFormatter::with().setting(method(argument, EXPECTED_MAX_INT_FRAC_SIG)).copyErrorTo(status); \
VALID_RANGE_ASSERT(status, method, lowerBound, argument); \
status = U_ZERO_ERROR; \
/* Pass lowerBound as the first argument so arg1 <= arg2 in expected cases */ \
NumberFormatter::with().setting(method(lowerBound, argument)).copyErrorTo(status); \
VALID_RANGE_ASSERT(status, method, lowerBound, argument); \
/* Check that first argument must be less than or equal to second argument */ \
NumberFormatter::with().setting(method(argument, argument - 1)).copyErrorTo(status); \
assertEquals("Incorrect status for " #method " on max < min input", \
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, \
status); \
} \
}
VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction, 0);
VALID_RANGE_ONEARG(rounding, Rounder::minFraction, 0);
VALID_RANGE_ONEARG(rounding, Rounder::maxFraction, 0);
VALID_RANGE_TWOARGS(rounding, Rounder::minMaxFraction, 0);
VALID_RANGE_ONEARG(rounding, Rounder::fixedDigits, 1);
VALID_RANGE_ONEARG(rounding, Rounder::minDigits, 1);
VALID_RANGE_ONEARG(rounding, Rounder::maxDigits, 1);
VALID_RANGE_TWOARGS(rounding, Rounder::minMaxDigits, 1);
VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction(1).withMinDigits, 1);
VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction(1).withMaxDigits, 1);
VALID_RANGE_ONEARG(notation, Notation::scientific().withMinExponentDigits, 1);
VALID_RANGE_ONEARG(integerWidth, IntegerWidth::zeroFillTo, 0);
VALID_RANGE_ONEARG(integerWidth, IntegerWidth::zeroFillTo(0).truncateAt, -1);
}
void NumberFormatterApiTest::assertFormatDescending(const UnicodeString &message,
const UnlocalizedNumberFormatter &f,

View file

@ -8923,20 +8923,23 @@ void NumberFormatTest::checkExceptionIssue11735() {
}
void NumberFormatTest::Test11035_FormatCurrencyAmount() {
UErrorCode status;
UErrorCode status = U_ZERO_ERROR;
double amount = 12345.67;
const char16_t* expected = u"12,345$67 ";
// Test two ways to set a currency via API
Locale loc1 = Locale("pt_PT");
NumberFormat* fmt1 = NumberFormat::createCurrencyInstance(loc1, status);
LocalPointer<NumberFormat> fmt1(NumberFormat::createCurrencyInstance(loc1, status));
assertSuccess("Creating fmt1", status);
fmt1->setCurrency(u"PTE", status);
assertSuccess("Setting currency on fmt1", status);
UnicodeString actualSetCurrency;
fmt1->format(amount, actualSetCurrency);
Locale loc2 = Locale("pt_PT@currency=PTE");
NumberFormat* fmt2 = NumberFormat::createCurrencyInstance(loc2, status);
LocalPointer<NumberFormat> fmt2(NumberFormat::createCurrencyInstance(loc2, status));
assertSuccess("Creating fmt2", status);
UnicodeString actualLocaleString;
fmt2->format(amount, actualLocaleString);

View file

@ -4469,16 +4469,15 @@ void RBBITest::TestTableRedundancies() {
"!!forward; \n"
"($s0 | '?')*; \n"
"($s1 | $s2 | $s3)*; \n" };
RuleBasedBreakIterator *lbi =
RuleBasedBreakIterator *lbi =
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
//lbi->dumpTables();
rules = lbi->getRules();
UnicodeString lbRules = lbi->getRules();
delete lbi;
UParseError pe {};
RuleBasedBreakIterator *bi =
// (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
new RuleBasedBreakIterator(rules, pe, status);
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(lbRules, pe, status);
assertSuccess(WHERE, status);
if (U_FAILURE(status)) return;
bi->dumpTables();

View file

@ -23,6 +23,7 @@ void IntlTestDecimalFormatSymbols::runIndexedTest( int32_t index, UBool exec, co
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testSymbols);
TESTCASE_AUTO(testLastResortData);
TESTCASE_AUTO(testDigitSymbols);
TESTCASE_AUTO(testNumberingSystem);
TESTCASE_AUTO_END;
}
@ -249,6 +250,102 @@ void IntlTestDecimalFormatSymbols::testLastResortData() {
Verify(1234567.25, "#,##0.##", *lastResort, "1,234,567.25");
}
void IntlTestDecimalFormatSymbols::testDigitSymbols() {
// This test does more in ICU4J than in ICU4C right now.
// In ICU4C, it is basically just a test for codePointZero and getConstDigitSymbol.
UChar defZero = u'0';
UChar32 osmanyaZero = U'\U000104A0';
static const UChar* osmanyaDigitStrings[] = {
u"\U000104A0", u"\U000104A1", u"\U000104A2", u"\U000104A3", u"\U000104A4",
u"\U000104A5", u"\U000104A6", u"\U000104A7", u"\U000104A8", u"\U000104A9"
};
IcuTestErrorCode status(*this, "testDigitSymbols()");
DecimalFormatSymbols symbols(Locale("en"), status);
if (defZero != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be ASCII 0");
}
for (int32_t i=0; i<=9; i++) {
assertEquals(UnicodeString("i. ASCII Digit at index ") + Int64ToUnicodeString(i),
UnicodeString(u'0' + i),
symbols.getConstDigitSymbol(i));
}
for (int32_t i=0; i<=9; i++) {
DecimalFormatSymbols::ENumberFormatSymbol key =
i == 0
? DecimalFormatSymbols::kZeroDigitSymbol
: static_cast<DecimalFormatSymbols::ENumberFormatSymbol>
(DecimalFormatSymbols::kOneDigitSymbol + i - 1);
symbols.setSymbol(key, UnicodeString(osmanyaDigitStrings[i]), FALSE);
}
// NOTE: in ICU4J, the calculation of codePointZero is smarter;
// in ICU4C, it is more conservative and is only set if propogateDigits is true.
if (-1 != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be invalid");
}
for (int32_t i=0; i<=9; i++) {
assertEquals(UnicodeString("ii. Osmanya digit at index ") + Int64ToUnicodeString(i),
UnicodeString(osmanyaDigitStrings[i]),
symbols.getConstDigitSymbol(i));
}
// Check Osmanya codePointZero
symbols.setSymbol(
DecimalFormatSymbols::kZeroDigitSymbol,
UnicodeString(osmanyaDigitStrings[0]), TRUE);
if (osmanyaZero != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be Osmanya code point zero");
}
for (int32_t i=0; i<=9; i++) {
assertEquals(UnicodeString("iii. Osmanya digit at index ") + Int64ToUnicodeString(i),
UnicodeString(osmanyaDigitStrings[i]),
symbols.getConstDigitSymbol(i));
}
// Check after copy
DecimalFormatSymbols copy(symbols);
if (osmanyaZero != copy.getCodePointZero()) {
errln("ERROR: Code point zero be Osmanya code point zero");
}
for (int32_t i=0; i<=9; i++) {
assertEquals(UnicodeString("iv. After copy at index ") + Int64ToUnicodeString(i),
UnicodeString(osmanyaDigitStrings[i]),
copy.getConstDigitSymbol(i));
}
// Check when loaded from resource bundle
DecimalFormatSymbols fromData(Locale("en@numbers=osma"), status);
if (osmanyaZero != fromData.getCodePointZero()) {
errln("ERROR: Code point zero be Osmanya code point zero");
}
for (int32_t i=0; i<=9; i++) {
assertEquals(UnicodeString("v. Resource bundle at index ") + Int64ToUnicodeString(i),
UnicodeString(osmanyaDigitStrings[i]),
fromData.getConstDigitSymbol(i));
}
// Setting a digit somewhere in the middle should invalidate codePointZero
symbols.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"foo", FALSE);
if (-1 != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be invalid");
}
// Reset digits to Latin
symbols.setSymbol(
DecimalFormatSymbols::kZeroDigitSymbol,
UnicodeString(defZero));
if (defZero != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be ASCII 0");
}
for (int32_t i=0; i<=9; i++) {
assertEquals(UnicodeString("vi. ASCII Digit at index ") + Int64ToUnicodeString(i),
UnicodeString(u'0' + i),
symbols.getConstDigitSymbol(i));
}
}
void IntlTestDecimalFormatSymbols::testNumberingSystem() {
IcuTestErrorCode errorCode(*this, "testNumberingSystem");
struct testcase {

View file

@ -28,6 +28,7 @@ private:
*/
void testSymbols(/*char *par*/);
void testLastResortData();
void testDigitSymbols();
void testNumberingSystem();
/** helper functions**/

View file

@ -4,39 +4,76 @@
#include <stdio.h>
#include <string>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <fstream>
// with caution:
// Include this even though we aren't linking against it.
#include "unicode/utf8.h"
// Include this here, to avoid needing to compile and link part of common lib
// (bootstrapping problem)
#include "utf_impl.cpp"
/**
* What is this?
* or even:
* what IS this??
*
* "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code
* in utf-8 into.. something else. Something consumable by certain compilers (Solaris, xlC)
* which aren't quite there.
*
* - u"<unicode>" or u'<unicode>' gets converted to u"\uNNNN" or u'\uNNNN'
* - u8"<unicode>" gets converted to "\xAA\xBB\xCC\xDD" etc.
* - if the system is EBCDIC-based, well, that's taken into account.
*
* Usage:
* escapesrc infile.cpp outfile.cpp
* Normally this is invoked by the build stage, with a rule such as:
*
* _%.cpp: $(srcdir)/%.cpp
* @$(BINDIR)/escapesrc$(EXEEXT) $< $@
* %.o: _%.cpp
* $(COMPILE.cc) ... $@ $<
*
* Naturally, 'escapesrc' has to be excluded from said build rule.
*/
static const char
kSPACE = 0x20,
kTAB = 0x09,
kLF = 0x0A,
kCR = 0x0D;
// kHASH = 0x23,
// kSLASH = 0x2f,
// kSTAR = 0x2A,
// This contains a codepage and ISO 14882:1998 illegality table.
// Use "make gen-table" to rebuild it.
# include "cptbl.h"
// For convenience
# define cp1047_to_8859(c) cp1047_8859_1[c]
// Our app's name
std::string prog;
/**
* Give the usual 1-line documentation and exit
*/
void usage() {
fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str());
}
/**
* Delete the output file (if any)
* We want to delete even if we didn't generate, because it might be stale.
*/
int cleanup(const std::string &outfile) {
const char *outstr = outfile.c_str();
if(outstr && *outstr) {
int rc = unlink(outstr);
int rc = std::remove(outstr);
if(rc == 0) {
fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr);
return 0;
@ -44,7 +81,7 @@ int cleanup(const std::string &outfile) {
if( errno == ENOENT ) {
return 0; // File did not exist - no error.
} else {
perror("unlink");
perror("std::remove");
return 1;
}
}
@ -52,16 +89,12 @@ int cleanup(const std::string &outfile) {
return 0;
}
// inline bool hasNonAscii(const char *line, size_t len) {
// const unsigned char *uline = reinterpret_cast<const unsigned char*>(line);
// for(size_t i=0;i<len; i++) {
// if( uline[i] > 0x7F) {
// return true;
// }
// }
// return false;
// }
/**
* Skip across any known whitespace.
* @param p startpoint
* @param e limit
* @return first non-whitespace char
*/
inline const char *skipws(const char *p, const char *e) {
for(;p<e;p++) {
switch(*p) {
@ -77,30 +110,11 @@ inline const char *skipws(const char *p, const char *e) {
return p;
}
// inline bool isCommentOrEmpty(const char* line, size_t len) {
// const char *p = line;
// const char *e = line+len;
// p = skipws(p,e);
// if(p==e) {
// return true; // whitespace only
// }
// p++;
// switch(*p) {
// case kHASH: return true; // #directive
// case kSLASH:
// p++;
// if(p==e) return false; // single slash
// switch(*p) {
// case kSLASH: // '/ /'
// case kSTAR: // '/ *'
// return true; // start of comment
// default: return false; // something else
// }
// default: return false; // something else
// }
// /*NOTREACHED*/
// }
/**
* Append a byte, hex encoded
* @param outstr sstring to append to
* @param byte the byte to append
*/
void appendByte(std::string &outstr,
uint8_t byte) {
char tmp2[5];
@ -109,6 +123,11 @@ void appendByte(std::string &outstr,
}
/**
* Append the bytes from 'linestr' into outstr, with escaping
* @param outstr the output buffer
* @param linestr the input buffer
* @param pos in/out: the current char under consideration
* @param chars the number of chars to consider
* @return true on failure
*/
bool appendUtf8(std::string &outstr,
@ -141,6 +160,7 @@ bool appendUtf8(std::string &outstr,
}
/**
* Fixup u8"x"
* @param linestr string to mutate. Already escaped into \u format.
* @param origpos beginning, points to 'u8"'
* @param pos end, points to "
@ -184,9 +204,11 @@ bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
}
/**
* fix the string at the position
* false = no err
* true = had err
* fix the u"x"/u'x'/u8"x" string at the position
* u8'x' is not supported, sorry.
* @param linestr the input string
* @param pos the position
* @return false = no err, true = had err
*/
bool fixAt(std::string &linestr, size_t pos) {
size_t origpos = pos;
@ -292,8 +314,12 @@ bool fixAt(std::string &linestr, size_t pos) {
}
/**
* Fixup an entire line
* false = no err
* true = had err
* @param no the line number (not used)
* @param linestr the string to fix
* @return true if any err, else false
*/
bool fixLine(int /*no*/, std::string &linestr) {
const char *line = linestr.c_str();
@ -304,17 +330,6 @@ bool fixLine(int /*no*/, std::string &linestr) {
return false; // Nothing to do. No u' or u" detected
}
// lines such as u8"\u0308" are all ASCII.
// // Quick Check: all ascii?
// if(!hasNonAscii(line, len)) {
// return false; // ASCII
// }
// // comment or empty line?
// if(isCommentOrEmpty(line, len)) {
// return false; // Comment or just empty
// }
// start from the end and find all u" cases
size_t pos = len = linestr.size();
while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) {
@ -345,6 +360,12 @@ bool fixLine(int /*no*/, std::string &linestr) {
return false;
}
/**
* Convert a whole file
* @param infile
* @param outfile
* @return 1 on err, 0 otherwise
*/
int convert(const std::string &infile, const std::string &outfile) {
fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str());
@ -386,6 +407,9 @@ int convert(const std::string &infile, const std::string &outfile) {
return 0;
}
/**
* Main function
*/
int main(int argc, const char *argv[]) {
prog = argv[0];
@ -399,6 +423,3 @@ int main(int argc, const char *argv[]) {
return convert(infile, outfile);
}
#include "utf_impl.cpp"