From 9eb5fc1b11e72cddf6974ce97900debf014d47c3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 7 Jan 2016 00:20:53 +0000 Subject: [PATCH] ICU-12032 rewrite SimplePatternFormatter: quoting like MessageFormat, shorter, create fewer objects X-SVN-Rev: 38155 --- icu4c/source/common/listformatter.cpp | 16 +- .../source/common/simplepatternformatter.cpp | 679 ++++++------------ icu4c/source/common/simplepatternformatter.h | 347 +++++---- icu4c/source/common/unicode/listformatter.h | 4 +- icu4c/source/i18n/measfmt.cpp | 4 +- .../test/intltest/listformattertest.cpp | 5 +- .../test/intltest/quantityformattertest.cpp | 22 +- .../intltest/simplepatternformattertest.cpp | 111 ++- 8 files changed, 503 insertions(+), 685 deletions(-) diff --git a/icu4c/source/common/listformatter.cpp b/icu4c/source/common/listformatter.cpp index 4941fa2c0a2..622ea59302c 100644 --- a/icu4c/source/common/listformatter.cpp +++ b/icu4c/source/common/listformatter.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2013-2015, International Business Machines +* Copyright (C) 2013-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -43,11 +43,11 @@ ListFormatInternal( middlePattern(middle, 2, 2, errorCode), endPattern(end, 2, 2, errorCode) {} -ListFormatInternal(const ListFormatData &data) : - twoPattern(data.twoPattern), - startPattern(data.startPattern), - middlePattern(data.middlePattern), - endPattern(data.endPattern) { } +ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) : + twoPattern(data.twoPattern, errorCode), + startPattern(data.startPattern, errorCode), + middlePattern(data.middlePattern, errorCode), + endPattern(data.endPattern, errorCode) { } ListFormatInternal(const ListFormatInternal &other) : twoPattern(other.twoPattern), @@ -236,8 +236,8 @@ ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *s return p; } -ListFormatter::ListFormatter(const ListFormatData& listFormatData) { - owned = new ListFormatInternal(listFormatData); +ListFormatter::ListFormatter(const ListFormatData& listFormatData, UErrorCode &errorCode) { + owned = new ListFormatInternal(listFormatData, errorCode); data = owned; } diff --git a/icu4c/source/common/simplepatternformatter.cpp b/icu4c/source/common/simplepatternformatter.cpp index abaaea9dcd5..2e4fb59e045 100644 --- a/icu4c/source/common/simplepatternformatter.cpp +++ b/icu4c/source/common/simplepatternformatter.cpp @@ -1,538 +1,313 @@ /* ****************************************************************************** -* Copyright (C) 2014-2015, International Business Machines +* Copyright (C) 2014-2016, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * simplepatternformatter.cpp */ + +#include "unicode/utypes.h" +#include "unicode/unistr.h" #include "simplepatternformatter.h" -#include "cstring.h" #include "uassert.h" U_NAMESPACE_BEGIN -static UBool isInvalidArray(const void *array, int32_t size) { - return (size < 0 || (size > 0 && array == NULL)); -} +namespace { -typedef enum SimplePatternFormatterCompileState { - INIT, - APOSTROPHE, - PLACEHOLDER -} SimplePatternFormatterCompileState; +/** + * Argument numbers must be smaller than this limit. + * Text segment lengths are offset by this much. + * This is currently the only unused char value in compiled patterns, + * except it is the maximum value of the first unit (max arg +1). + */ +const int32_t ARG_NUM_LIMIT = 0x100; +/** + * Initial and maximum char/UChar value set for a text segment. + * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. + * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. + */ +const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff; +/** + * Maximum length of a text segment. Longer segments are split into shorter ones. + */ +const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT; -// Handles parsing placeholders in the pattern string, e.g {4} or {35} -class SimplePatternFormatterIdBuilder { -public: - SimplePatternFormatterIdBuilder() : id(0), idLen(0) { } - ~SimplePatternFormatterIdBuilder() { } - - // Resets so that this object has seen no placeholder ID. - void reset() { id = 0; idLen = 0; } - - // Returns the numeric placeholder ID parsed so far - int32_t getId() const { return id; } - - // Appends the numeric placeholder ID parsed so far back to a - // UChar buffer. Used to recover if parser using this object finds - // no closing curly brace. - void appendTo(UChar *buffer, int32_t *len) const; - - // Returns true if this object has seen a placeholder ID. - UBool isValid() const { return (idLen > 0); } - - // Processes a single digit character. Pattern string parser calls this - // as it processes digits after an opening curly brace. - void add(UChar ch); -private: - int32_t id; - int32_t idLen; - SimplePatternFormatterIdBuilder( - const SimplePatternFormatterIdBuilder &other); - SimplePatternFormatterIdBuilder &operator=( - const SimplePatternFormatterIdBuilder &other); +enum { + APOS = 0x27, + DIGIT_ZERO = 0x30, + DIGIT_ONE = 0x31, + DIGIT_NINE = 0x39, + OPEN_BRACE = 0x7b, + CLOSE_BRACE = 0x7d }; -void SimplePatternFormatterIdBuilder::appendTo( - UChar *buffer, int32_t *len) const { - int32_t origLen = *len; - int32_t kId = id; - for (int32_t i = origLen + idLen - 1; i >= origLen; i--) { - int32_t digit = kId % 10; - buffer[i] = digit + 0x30; - kId /= 10; - } - *len = origLen + idLen; +inline UBool isInvalidArray(const void *array, int32_t length) { + return (length < 0 || (array == NULL && length != 0)); } -void SimplePatternFormatterIdBuilder::add(UChar ch) { - id = id * 10 + (ch - 0x30); - idLen++; -} - -// Represents placeholder values. -class SimplePatternFormatterPlaceholderValues : public UMemory { -public: - SimplePatternFormatterPlaceholderValues( - const UnicodeString * const *values, - int32_t valuesCount); - - // Returns TRUE if appendTo value is at any index besides exceptIndex. - UBool isAppendToInAnyIndexExcept( - const UnicodeString &appendTo, int32_t exceptIndex) const; - - // For each appendTo value, stores the snapshot of it in its place. - void snapshotAppendTo(const UnicodeString &appendTo); - - // Returns the placeholder value at index. No range checking performed. - // Returned reference is valid for as long as this object exists. - const UnicodeString &get(int32_t index) const; -private: - const UnicodeString * const *fValues; - int32_t fValuesCount; - const UnicodeString *fAppendTo; - UnicodeString fAppendToCopy; - SimplePatternFormatterPlaceholderValues( - const SimplePatternFormatterPlaceholderValues &); - SimplePatternFormatterPlaceholderValues &operator=( - const SimplePatternFormatterPlaceholderValues &); -}; - -SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues( - const UnicodeString * const *values, - int32_t valuesCount) - : fValues(values), - fValuesCount(valuesCount), - fAppendTo(NULL), - fAppendToCopy() { -} - -UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept( - const UnicodeString &appendTo, int32_t exceptIndex) const { - for (int32_t i = 0; i < fValuesCount; ++i) { - if (i != exceptIndex && fValues[i] == &appendTo) { - return TRUE; - } - } - return FALSE; -} - -void SimplePatternFormatterPlaceholderValues::snapshotAppendTo( - const UnicodeString &appendTo) { - fAppendTo = &appendTo; - fAppendToCopy = appendTo; -} - -const UnicodeString &SimplePatternFormatterPlaceholderValues::get( - int32_t index) const { - if (fAppendTo == NULL || fAppendTo != fValues[index]) { - return *fValues[index]; - } - return fAppendToCopy; -} - -SimplePatternFormatter::SimplePatternFormatter() : - noPlaceholders(), - placeholders(), - placeholderSize(0), - placeholderCount(0), - firstPlaceholderReused(FALSE) { -} - -SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) : - noPlaceholders(), - placeholders(), - placeholderSize(0), - placeholderCount(0), - firstPlaceholderReused(FALSE) { - UErrorCode status = U_ZERO_ERROR; - compile(pattern, status); -} - -SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern, - int32_t min, int32_t max, - UErrorCode &errorCode) - : noPlaceholders(), - placeholders(), - placeholderSize(0), - placeholderCount(0), - firstPlaceholderReused(FALSE) { - compileMinMaxPlaceholders(pattern, min, max, errorCode); -} - -SimplePatternFormatter::SimplePatternFormatter( - const SimplePatternFormatter &other) : - noPlaceholders(other.noPlaceholders), - placeholders(), - placeholderSize(0), - placeholderCount(other.placeholderCount), - firstPlaceholderReused(other.firstPlaceholderReused) { - placeholderSize = ensureCapacity(other.placeholderSize); - uprv_memcpy( - placeholders.getAlias(), - other.placeholders.getAlias(), - placeholderSize * sizeof(PlaceholderInfo)); -} +} // namespace SimplePatternFormatter &SimplePatternFormatter::operator=( const SimplePatternFormatter& other) { if (this == &other) { return *this; } - noPlaceholders = other.noPlaceholders; - placeholderSize = ensureCapacity(other.placeholderSize); - placeholderCount = other.placeholderCount; - firstPlaceholderReused = other.firstPlaceholderReused; - uprv_memcpy( - placeholders.getAlias(), - other.placeholders.getAlias(), - placeholderSize * sizeof(PlaceholderInfo)); + compiledPattern = other.compiledPattern; return *this; } -SimplePatternFormatter::~SimplePatternFormatter() { -} +SimplePatternFormatter::~SimplePatternFormatter() {} UBool SimplePatternFormatter::compileMinMaxPlaceholders( const UnicodeString &pattern, int32_t min, int32_t max, - UErrorCode &status) { - if (U_FAILURE(status)) { + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + // Parse consistent with MessagePattern, but + // - support only simple numbered arguments + // - build a simple binary structure into the result string const UChar *patternBuffer = pattern.getBuffer(); int32_t patternLength = pattern.length(); - UChar *buffer = noPlaceholders.getBuffer(patternLength); - int32_t len = 0; - placeholderSize = 0; - placeholderCount = 0; - SimplePatternFormatterCompileState state = INIT; - SimplePatternFormatterIdBuilder idBuilder; - for (int32_t i = 0; i < patternLength; ++i) { - UChar ch = patternBuffer[i]; - switch (state) { - case INIT: - if (ch == 0x27) { - state = APOSTROPHE; - } else if (ch == 0x7B) { - state = PLACEHOLDER; - idBuilder.reset(); + // Reserve the first char for the number of arguments. + compiledPattern.setTo((UChar)0); + int32_t textLength = 0; + int32_t maxArg = -1; + UBool inQuote = FALSE; + for (int32_t i = 0; i < patternLength;) { + UChar c = patternBuffer[i++]; + if (c == APOS) { + if (i < patternLength && (c = patternBuffer[i]) == APOS) { + // double apostrophe, skip the second one + ++i; + } else if (inQuote) { + // skip the quote-ending apostrophe + inQuote = FALSE; + continue; + } else if (c == OPEN_BRACE || c == CLOSE_BRACE) { + // Skip the quote-starting apostrophe, find the end of the quoted literal text. + ++i; + inQuote = TRUE; } else { - buffer[len++] = ch; + // The apostrophe is part of literal text. + c = APOS; } - break; - case APOSTROPHE: - if (ch == 0x27) { - buffer[len++] = 0x27; - } else if (ch == 0x7B) { - buffer[len++] = 0x7B; + } else if (!inQuote && c == OPEN_BRACE) { + if (textLength > 0) { + compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, + (UChar)(ARG_NUM_LIMIT + textLength)); + textLength = 0; + } + int32_t argNumber; + if ((i + 1) < patternLength && + 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 && + patternBuffer[i + 1] == CLOSE_BRACE) { + i += 2; } else { - buffer[len++] = 0x27; - buffer[len++] = ch; - } - state = INIT; - break; - case PLACEHOLDER: - if (ch >= 0x30 && ch <= 0x39) { - idBuilder.add(ch); - } else if (ch == 0x7D && idBuilder.isValid()) { - if (!addPlaceholder(idBuilder.getId(), len)) { - noPlaceholders.releaseBuffer(0); - status = U_MEMORY_ALLOCATION_ERROR; + // Multi-digit argument number (no leading zero) or syntax error. + // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) + // around the number, but this class does not. + argNumber = -1; + if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { + argNumber = c - DIGIT_ZERO; + while (i < patternLength && + DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { + argNumber = argNumber * 10 + (c - DIGIT_ZERO); + if (argNumber >= ARG_NUM_LIMIT) { + break; + } + } + } + if (argNumber < 0 || c != CLOSE_BRACE) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; return FALSE; } - state = INIT; - } else { - buffer[len++] = 0x7B; - idBuilder.appendTo(buffer, &len); - buffer[len++] = ch; - state = INIT; } - break; - default: - U_ASSERT(FALSE); - break; + if (argNumber > maxArg) { + maxArg = argNumber; + } + compiledPattern.append((UChar)argNumber); + continue; + } // else: c is part of literal text + // Append c and track the literal-text segment length. + if (textLength == 0) { + // Reserve a char for the length of a new text segment, preset the maximum length. + compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR); + } + compiledPattern.append(c); + if (++textLength == MAX_SEGMENT_LENGTH) { + textLength = 0; } } - switch (state) { - case INIT: - break; - case APOSTROPHE: - buffer[len++] = 0x27; - break; - case PLACEHOLDER: - buffer[len++] = 0X7B; - idBuilder.appendTo(buffer, &len); - break; - default: - U_ASSERT(false); - break; + if (textLength > 0) { + compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, + (UChar)(ARG_NUM_LIMIT + textLength)); } - noPlaceholders.releaseBuffer(len); - if (placeholderCount < min || max < placeholderCount) { - status = U_ILLEGAL_ARGUMENT_ERROR; + int32_t argCount = maxArg + 1; + if (argCount < min || max < argCount) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; return FALSE; } + compiledPattern.setCharAt(0, (UChar)argCount); return TRUE; } UnicodeString& SimplePatternFormatter::format( - const UnicodeString &arg0, - UnicodeString &appendTo, - UErrorCode &status) const { - const UnicodeString *params[] = {&arg0}; - return formatAndAppend( - params, - UPRV_LENGTHOF(params), - appendTo, - NULL, - 0, - status); + const UnicodeString &value0, + UnicodeString &appendTo, UErrorCode &errorCode) const { + const UnicodeString *values[] = { &value0 }; + return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode); } UnicodeString& SimplePatternFormatter::format( - const UnicodeString &arg0, - const UnicodeString &arg1, - UnicodeString &appendTo, - UErrorCode &status) const { - const UnicodeString *params[] = {&arg0, &arg1}; - return formatAndAppend( - params, - UPRV_LENGTHOF(params), - appendTo, - NULL, - 0, - status); + const UnicodeString &value0, + const UnicodeString &value1, + UnicodeString &appendTo, UErrorCode &errorCode) const { + const UnicodeString *values[] = { &value0, &value1 }; + return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode); } UnicodeString& SimplePatternFormatter::format( - const UnicodeString &arg0, - const UnicodeString &arg1, - const UnicodeString &arg2, - UnicodeString &appendTo, - UErrorCode &status) const { - const UnicodeString *params[] = {&arg0, &arg1, &arg2}; - return formatAndAppend( - params, - UPRV_LENGTHOF(params), - appendTo, - NULL, - 0, - status); -} - -static void updatePlaceholderOffset( - int32_t placeholderId, - int32_t placeholderOffset, - int32_t *offsetArray, - int32_t offsetArrayLength) { - if (placeholderId < offsetArrayLength) { - offsetArray[placeholderId] = placeholderOffset; - } -} - -static void appendRange( - const UnicodeString &src, - int32_t start, - int32_t end, - UnicodeString &dest) { - // This check improves performance significantly. - if (start == end) { - return; - } - dest.append(src, start, end - start); + const UnicodeString &value0, + const UnicodeString &value1, + const UnicodeString &value2, + UnicodeString &appendTo, UErrorCode &errorCode) const { + const UnicodeString *values[] = { &value0, &value1, &value2 }; + return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode); } UnicodeString& SimplePatternFormatter::formatAndAppend( - const UnicodeString * const *placeholderValues, - int32_t placeholderValueCount, + const UnicodeString *const *values, int32_t valuesLength, UnicodeString &appendTo, - int32_t *offsetArray, - int32_t offsetArrayLength, - UErrorCode &status) const { - if (U_FAILURE(status)) { + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return appendTo; } - if (isInvalidArray(placeholderValues, placeholderValueCount) - || isInvalidArray(offsetArray, offsetArrayLength)) { - status = U_ILLEGAL_ARGUMENT_ERROR; + if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) || + valuesLength < getPlaceholderCount()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; return appendTo; } - if (placeholderValueCount < placeholderCount) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return appendTo; - } - - // Since we are disallowing parameter values that are the same as - // appendTo, we have to check all placeholderValues as opposed to - // the first placeholderCount placeholder values. - SimplePatternFormatterPlaceholderValues values( - placeholderValues, placeholderValueCount); - if (values.isAppendToInAnyIndexExcept(appendTo, -1)) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return appendTo; - } - return formatAndAppend( - values, - appendTo, - offsetArray, - offsetArrayLength); + return format(compiledPattern.getBuffer(), compiledPattern.length(), values, + appendTo, NULL, TRUE, + offsets, offsetsLength, errorCode); } -UnicodeString& SimplePatternFormatter::formatAndReplace( - const UnicodeString * const *placeholderValues, - int32_t placeholderValueCount, +UnicodeString &SimplePatternFormatter::formatAndReplace( + const UnicodeString *const *values, int32_t valuesLength, UnicodeString &result, - int32_t *offsetArray, - int32_t offsetArrayLength, - UErrorCode &status) const { - if (U_FAILURE(status)) { + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return result; } - if (isInvalidArray(placeholderValues, placeholderValueCount) - || isInvalidArray(offsetArray, offsetArrayLength)) { - status = U_ILLEGAL_ARGUMENT_ERROR; + if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; return result; } - if (placeholderValueCount < placeholderCount) { - status = U_ILLEGAL_ARGUMENT_ERROR; + const UChar *cp = compiledPattern.getBuffer(); + int32_t cpLength = compiledPattern.length(); + if (valuesLength < getPlaceholderCount(cp, cpLength)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; return result; } - SimplePatternFormatterPlaceholderValues values( - placeholderValues, placeholderCount); - int32_t placeholderAtStart = getUniquePlaceholderAtStart(); - // If pattern starts with a unique placeholder and that placeholder - // value is result, we may be able to optimize by just appending to result. - if (placeholderAtStart >= 0 - && placeholderValues[placeholderAtStart] == &result) { - - // If result is the value for other placeholders, call off optimization. - if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) { - values.snapshotAppendTo(result); - result.remove(); - return formatAndAppend( - values, - result, - offsetArray, - offsetArrayLength); - } - - // Otherwise we can optimize - formatAndAppend( - values, - result, - offsetArray, - offsetArrayLength); - - // We have to make the offset for the placeholderAtStart - // placeholder be 0. Otherwise it would be the length of the - // previous value of result. - if (offsetArrayLength > placeholderAtStart) { - offsetArray[placeholderAtStart] = 0; - } - return result; - } - if (values.isAppendToInAnyIndexExcept(result, -1)) { - values.snapshotAppendTo(result); - } - result.remove(); - return formatAndAppend( - values, - result, - offsetArray, - offsetArrayLength); -} - -UnicodeString& SimplePatternFormatter::formatAndAppend( - const SimplePatternFormatterPlaceholderValues &values, - UnicodeString &appendTo, - int32_t *offsetArray, - int32_t offsetArrayLength) const { - for (int32_t i = 0; i < offsetArrayLength; ++i) { - offsetArray[i] = -1; - } - if (placeholderSize == 0) { - appendTo.append(noPlaceholders); - return appendTo; - } - appendRange( - noPlaceholders, - 0, - placeholders[0].offset, - appendTo); - updatePlaceholderOffset( - placeholders[0].id, - appendTo.length(), - offsetArray, - offsetArrayLength); - const UnicodeString *placeholderValue = &values.get(placeholders[0].id); - if (placeholderValue != &appendTo) { - appendTo.append(*placeholderValue); - } - for (int32_t i = 1; i < placeholderSize; ++i) { - appendRange( - noPlaceholders, - placeholders[i - 1].offset, - placeholders[i].offset, - appendTo); - updatePlaceholderOffset( - placeholders[i].id, - appendTo.length(), - offsetArray, - offsetArrayLength); - placeholderValue = &values.get(placeholders[i].id); - if (placeholderValue != &appendTo) { - appendTo.append(*placeholderValue); + // If the pattern starts with an argument whose value is the same object + // as the result, then we keep the result contents and append to it. + // Otherwise we replace its contents. + int32_t firstArg = -1; + // If any non-initial argument value is the same object as the result, + // then we first copy its contents and use that instead while formatting. + UnicodeString resultCopy; + if (getPlaceholderCount(cp, cpLength) > 0) { + for (int32_t i = 1; i < cpLength;) { + int32_t n = cp[i++]; + if (n < ARG_NUM_LIMIT) { + if (values[n] == &result) { + if (i == 2) { + firstArg = n; + } else if (resultCopy.isEmpty() && !result.isEmpty()) { + resultCopy = result; + } + } + } else { + i += n - ARG_NUM_LIMIT; + } } } - appendRange( - noPlaceholders, - placeholders[placeholderSize - 1].offset, - noPlaceholders.length(), - appendTo); - return appendTo; + if (firstArg < 0) { + result.remove(); + } + return format(cp, cpLength, values, + result, &resultCopy, FALSE, + offsets, offsetsLength, errorCode); } -int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const { - if (placeholderSize == 0 - || firstPlaceholderReused || placeholders[0].offset != 0) { - return -1; +UnicodeString SimplePatternFormatter::getTextWithNoPlaceholders( + const UChar *compiledPattern, int32_t compiledPatternLength) { + int32_t capacity = compiledPatternLength - 1 - + getPlaceholderCount(compiledPattern, compiledPatternLength); + UnicodeString sb(capacity, 0, 0); // Java: StringBuilder + for (int32_t i = 1; i < compiledPatternLength;) { + int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT; + if (segmentLength > 0) { + sb.append(compiledPattern + i, segmentLength); + i += segmentLength; + } } - return placeholders[0].id; + return sb; } -int32_t SimplePatternFormatter::ensureCapacity( - int32_t desiredCapacity, int32_t allocationSize) { - if (allocationSize < desiredCapacity) { - allocationSize = desiredCapacity; +UnicodeString &SimplePatternFormatter::format( + const UChar *compiledPattern, int32_t compiledPatternLength, + const UnicodeString *const *values, + UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, + int32_t *offsets, int32_t offsetsLength, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return result; } - if (desiredCapacity <= placeholders.getCapacity()) { - return desiredCapacity; + for (int32_t i = 0; i < offsetsLength; i++) { + offsets[i] = -1; } - // allocate new buffer - if (placeholders.resize(allocationSize, placeholderSize) == NULL) { - return placeholders.getCapacity(); + for (int32_t i = 1; i < compiledPatternLength;) { + int32_t n = compiledPattern[i++]; + if (n < ARG_NUM_LIMIT) { + const UnicodeString *value = values[n]; + if (value == NULL) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + if (value == &result) { + if (forbidResultAsValue) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + if (i == 2) { + // We are appending to result which is also the first value object. + if (n < offsetsLength) { + offsets[n] = 0; + } + } else { + if (n < offsetsLength) { + offsets[n] = result.length(); + } + result.append(*resultCopy); + } + } else { + if (n < offsetsLength) { + offsets[n] = result.length(); + } + result.append(*value); + } + } else { + int32_t length = n - ARG_NUM_LIMIT; + result.append(compiledPattern + i, length); + i += length; + } } - return desiredCapacity; + return result; } -UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) { - if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) { - return FALSE; - } - ++placeholderSize; - PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1]; - placeholderEnd->offset = offset; - placeholderEnd->id = id; - if (id >= placeholderCount) { - placeholderCount = id + 1; - } - if (placeholderSize > 1 - && placeholders[placeholderSize - 1].id == placeholders[0].id) { - firstPlaceholderReused = TRUE; - } - return TRUE; -} - U_NAMESPACE_END diff --git a/icu4c/source/common/simplepatternformatter.h b/icu4c/source/common/simplepatternformatter.h index 782a29c09e4..d633e1ea493 100644 --- a/icu4c/source/common/simplepatternformatter.h +++ b/icu4c/source/common/simplepatternformatter.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 2014-2015, International Business Machines +* Copyright (C) 2014-2016, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * simplepatternformatter.h @@ -9,71 +9,72 @@ #ifndef __SIMPLEPATTERNFORMATTER_H__ #define __SIMPLEPATTERNFORMATTER_H__ -#define EXPECTED_PLACEHOLDER_COUNT 3 - -#include "cmemory.h" #include "unicode/utypes.h" #include "unicode/unistr.h" U_NAMESPACE_BEGIN -class SimplePatternFormatterPlaceholderValues; - -struct PlaceholderInfo { - int32_t id; - int32_t offset; -}; - /** - * Compiled version of a pattern string such as "{1} was born in {0}". - *

- * Using SimplePatternFormatter is both faster and safer than adhoc replacement. - * They are faster because they are precompiled; they are safer because they - * account for curly braces escaped by apostrophe ('). - * - * Placeholders are of the form \{[0-9]+\}. If a curly brace is preceded - * by a single quote, it becomes a curly brace instead of the start of a - * placeholder. Two single quotes resolve to one single quote. - *

+ * Formats simple patterns like "{1} was born in {0}". + * Minimal subset of MessageFormat; fast, simple, minimal dependencies. + * Supports only numbered arguments with no type nor style parameters, + * and formats only string values. + * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior. + * + * Factory methods throw exceptions for syntax errors + * and for too few or too many arguments/placeholders. + * + * SimplePatternFormatter objects are immutable and can be safely cached like strings. + * * Example: *

- * SimplePatternFormatter fmt("{1} '{born} in {0}");
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * SimplePatternFormatter fmt("{1} '{born}' in {0}", errorCode);
  * UnicodeString result;
- * UErrorCode status = U_ZERO_ERROR;
- * // Evaluates to: "paul {born} in england"
- * fmt.format("england", "paul", result, status);
+ *
+ * // Output: "paul {born} in england"
+ * fmt.format("england", "paul", result, errorCode);
  * 
+ * + * @see MessageFormat + * @see UMessagePatternApostropheMode */ class U_COMMON_API SimplePatternFormatter : public UMemory { public: /** - * Default constructor + * Default constructor. */ - SimplePatternFormatter(); + SimplePatternFormatter() : compiledPattern((UChar)0) {} /** - * Constructs from a pattern. Will never fail if pattern has three or - * fewer placeholders in it. - */ - explicit SimplePatternFormatter(const UnicodeString& pattern); - - /** - * Constructs from a pattern. Will never fail if pattern has three or - * fewer placeholders in it. + * Constructs a formatter from the pattern string. * + * @param pattern The pattern string. + */ + explicit SimplePatternFormatter(const UnicodeString& pattern, UErrorCode &errorCode) { + compile(pattern, errorCode); + } + + /** + * Constructs a formatter from the pattern string. + * + * @param pattern The pattern string. * @param min The pattern must have at least this many placeholders. * @param max The pattern must have at most this many placeholders. */ SimplePatternFormatter(const UnicodeString& pattern, int32_t min, int32_t max, - UErrorCode &errorCode); + UErrorCode &errorCode) { + compileMinMaxPlaceholders(pattern, min, max, errorCode); + } /** * Copy constructor. */ - SimplePatternFormatter(const SimplePatternFormatter& other); + SimplePatternFormatter(const SimplePatternFormatter& other) + : compiledPattern(other.compiledPattern) {} /** - * Assignment operator + * Assignment operator. */ SimplePatternFormatter &operator=(const SimplePatternFormatter& other); @@ -83,179 +84,165 @@ public: ~SimplePatternFormatter(); /** - * Compiles pattern and makes this object represent pattern. + * Changes this object according to the new pattern. * - * Returns TRUE on success; FALSE on failure. Will not fail if - * there are three or fewer placeholders in pattern. May fail with - * U_MEMORY_ALLOCATION_ERROR if there are more than three placeholders. + * @param pattern The pattern string. + * @return TRUE if U_SUCCESS(errorCode). */ - UBool compile(const UnicodeString &pattern, UErrorCode &status) { - return compileMinMaxPlaceholders(pattern, 0, INT32_MAX, status); + UBool compile(const UnicodeString &pattern, UErrorCode &errorCode) { + return compileMinMaxPlaceholders(pattern, 0, INT32_MAX, errorCode); } /** - * Compiles pattern and makes this object represent pattern. - * - * Returns TRUE on success; FALSE on failure. Will not fail if - * there are three or fewer placeholders in pattern. May fail with - * U_MEMORY_ALLOCATION_ERROR if there are more than three placeholders. + * Changes this object according to the new pattern. * + * @param pattern The pattern string. * @param min The pattern must have at least this many placeholders. * @param max The pattern must have at most this many placeholders. + * @return TRUE if U_SUCCESS(errorCode). */ UBool compileMinMaxPlaceholders(const UnicodeString &pattern, - int32_t min, int32_t max, UErrorCode &status); + int32_t min, int32_t max, UErrorCode &errorCode); /** - * Returns (maxPlaceholderId + 1). For example - * SimplePatternFormatter("{0} {2}").getPlaceholderCount() - * evaluates to 3. - * Callers use this function to find out how many values this object - * expects when formatting. + * @return The max argument number/placeholder ID + 1. */ int32_t getPlaceholderCount() const { - return placeholderCount; + return getPlaceholderCount(compiledPattern.getBuffer(), compiledPattern.length()); } /** - * Returns this pattern with none of the placeholders. - */ - const UnicodeString &getPatternWithNoPlaceholders() const { - return noPlaceholders; - } - - /** - * Formats given value. arg0 cannot be appendTo. - */ - UnicodeString &format( - const UnicodeString &args0, - UnicodeString &appendTo, - UErrorCode &status) const; - - /** - * Formats given values. Neither arg0 nor arg1 can be appendTo. - */ - UnicodeString &format( - const UnicodeString &args0, - const UnicodeString &args1, - UnicodeString &appendTo, - UErrorCode &status) const; - - /** - * Formats given values. Neither arg0, arg1, nor arg2 can be appendTo. - */ - UnicodeString &format( - const UnicodeString &args0, - const UnicodeString &args1, - const UnicodeString &args2, - UnicodeString &appendTo, - UErrorCode &status) const; - - /** - * Formats given values. + * Formats the given value, appending to the appendTo builder. + * The placeholder value must not be the same object as appendTo. + * getPlaceholderCount() must be at most 1. * - * The caller retains ownership of all pointers. - * @param placeholderValues 1st one corresponds to {0}; 2nd to {1}; - * 3rd to {2} etc. If any of these point to appendTo, this method - * sets status to U_ILLEGAL_ARGUMENT_ERROR. - * @param placeholderValueCount the number of placeholder values - * must be at least large enough to provide values for all placeholders - * in this object. Otherwise status set to U_ILLEGAL_ARGUMENT_ERROR. - * @param appendTo resulting string appended here. - * @param offsetArray The offset of each placeholder value in appendTo - * stored here. The first value gets the offset of the value for {0}; - * the 2nd for {1}; the 3rd for {2} etc. -1 means that the corresponding - * placeholder does not exist in this object. If caller is not - * interested in offsets, it may pass NULL and 0 for the length. - * @param offsetArrayLength the size of offsetArray. If less than - * placeholderValueCount only the first offsets get recorded. If - * greater than placeholderValueCount, then extra values in offset - * array are set to -1. - * @param status any error stored here. + * @param value0 Value for argument {0}. + * @param appendTo Gets the formatted pattern and value appended. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo + */ + UnicodeString &format( + const UnicodeString &value0, + UnicodeString &appendTo, UErrorCode &errorCode) const; + + /** + * Formats the given values, appending to the appendTo builder. + * A placeholder value must not be the same object as appendTo. + * getPlaceholderCount() must be at most 2. + * + * @param value0 Value for argument {0}. + * @param value1 Value for argument {1}. + * @param appendTo Gets the formatted pattern and values appended. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo + */ + UnicodeString &format( + const UnicodeString &value0, + const UnicodeString &value1, + UnicodeString &appendTo, UErrorCode &errorCode) const; + + /** + * Formats the given values, appending to the appendTo builder. + * A placeholder value must not be the same object as appendTo. + * getPlaceholderCount() must be at most 3. + * + * @param value0 Value for argument {0}. + * @param value1 Value for argument {1}. + * @param value2 Value for argument {2}. + * @param appendTo Gets the formatted pattern and values appended. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo + */ + UnicodeString &format( + const UnicodeString &value0, + const UnicodeString &value1, + const UnicodeString &value2, + UnicodeString &appendTo, UErrorCode &errorCode) const; + + /** + * Formats the given values, appending to the appendTo string. + * + * @param values The placeholder values. + * A placeholder value must not be the same object as appendTo. + * Can be NULL if valuesLength==getPlaceholderCount()==0. + * @param valuesLength The length of the values array. + * Must be at least getPlaceholderCount(). + * @param appendTo Gets the formatted pattern and values appended. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be shorter or longer than values. Can be NULL if offsetsLength==0. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param offsetsLength The length of the offsets array. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo */ UnicodeString &formatAndAppend( - const UnicodeString * const *placeholderValues, - int32_t placeholderValueCount, + const UnicodeString *const *values, int32_t valuesLength, UnicodeString &appendTo, - int32_t *offsetArray, - int32_t offsetArrayLength, - UErrorCode &status) const; + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; /** - * Formats given values. + * Formats the given values, replacing the contents of the result string. + * May optimize by actually appending to the result if it is the same object + * as the initial argument's corresponding value. * - * The caller retains ownership of all pointers. - * @param placeholderValues 1st one corresponds to {0}; 2nd to {1}; - * 3rd to {2} etc. May include pointer to result in which case - * the previous value of result is used for the corresponding - * placeholder. - * @param placeholderValueCount the number of placeholder values - * must be at least large enough to provide values for all placeholders - * in this object. Otherwise status set to U_ILLEGAL_ARGUMENT_ERROR. - * @param result resulting string stored here overwriting any previous - * value. - * @param offsetArray The offset of each placeholder value in result - * stored here. The first value gets the offset of the value for {0}; - * the 2nd for {1}; the 3rd for {2} etc. -1 means that the corresponding - * placeholder does not exist in this object. If caller is not - * interested in offsets, it may pass NULL and 0 for the length. - * @param offsetArrayLength the size of offsetArray. If less than - * placeholderValueCount only the first offsets get recorded. If - * greater than placeholderValueCount, then extra values in offset - * array are set to -1. - * @param status any error stored here. + * @param values The placeholder values. + * A placeholder value may be the same object as result. + * Can be NULL if valuesLength==getPlaceholderCount()==0. + * @param valuesLength The length of the values array. + * Must be at least getPlaceholderCount(). + * @param result Gets its contents replaced by the formatted pattern and values. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be shorter or longer than values. Can be NULL if offsetsLength==0. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param offsetsLength The length of the offsets array. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return result */ UnicodeString &formatAndReplace( - const UnicodeString * const *placeholderValues, - int32_t placeholderValueCount, + const UnicodeString *const *values, int32_t valuesLength, UnicodeString &result, - int32_t *offsetArray, - int32_t offsetArrayLength, - UErrorCode &status) const; + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; + + /** + * Returns the pattern text with none of the placeholders. + * Like formatting with all-empty string values. + */ + UnicodeString getTextWithNoPlaceholders() const { + return getTextWithNoPlaceholders(compiledPattern.getBuffer(), compiledPattern.length()); + } + private: - UnicodeString noPlaceholders; - MaybeStackArray placeholders; - int32_t placeholderSize; - int32_t placeholderCount; - UBool firstPlaceholderReused; + /** + * Binary representation of the compiled pattern. + * Index 0: One more than the highest argument number. + * Followed by zero or more arguments or literal-text segments. + * + * An argument is stored as its number, less than ARG_NUM_LIMIT. + * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, + * followed by that many chars. + */ + UnicodeString compiledPattern; - // A Placeholder value that is the same as appendTo is treated as the - // empty string. - UnicodeString &formatAndAppend( - const SimplePatternFormatterPlaceholderValues &placeholderValues, - UnicodeString &appendTo, - int32_t *offsetArray, - int32_t offsetArrayLength) const; + static inline int32_t getPlaceholderCount(const UChar *compiledPattern, + int32_t compiledPatternLength) { + return compiledPatternLength == 0 ? 0 : compiledPattern[0]; + } - // Returns the placeholder at the beginning of this pattern - // (e.g 3 for placeholder {3}). Returns -1 if the beginning of pattern - // is text or if the placeholder at the beginning of this pattern - // is used again in the middle of the pattern. - int32_t getUniquePlaceholderAtStart() const; - - // ensureCapacity ensures that the capacity of the placeholders array - // is desiredCapacity. If ensureCapacity must resize the placeholders - // array, the first placeholderSize elements stay in the array. Note - // that ensureCapcity NEVER changes the value of placeholderSize only - // the capacity of the placeholders array. - // If there is no memory allocation error when resizing, this - // function returns desiredCapacity. If there is a memory allocation - // error, this function leaves the placeholders array unchanged and - // returns the smaller, old capacity. ensureCapacity resizes only if - // the current capacity of placeholders array is less than desiredCapacity. - // Otherwise, it leaves the placeholders array unchanged. If caller - // specifies an allocation size, then it must be at least as large as - // desiredCapacity. In that case, if ensureCapacity resizes, it will - // allocate allocationSize spots instead of desiredCapacity spots in - // the array. If caller is calling ensureCapacity in a loop while adding - // elements, it is recommended that it use an allocationSize of - // approximately twice desiredCapacity to avoid memory allocation with - // every call to ensureCapacity. - int32_t ensureCapacity(int32_t desiredCapacity, int32_t allocationSize=0); + static UnicodeString getTextWithNoPlaceholders(const UChar *compiledPattern, int32_t compiledPatternLength); - // Records the offset of an individual placeholder in the noPlaceholders - // string. - UBool addPlaceholder(int32_t id, int32_t offset); + static UnicodeString &format( + const UChar *compiledPattern, int32_t compiledPatternLength, + const UnicodeString *const *values, + UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, + int32_t *offsets, int32_t offsetsLength, + UErrorCode &errorCode); }; U_NAMESPACE_END diff --git a/icu4c/source/common/unicode/listformatter.h b/icu4c/source/common/unicode/listformatter.h index e48faaa1276..cb5d50d7a2e 100644 --- a/icu4c/source/common/unicode/listformatter.h +++ b/icu4c/source/common/unicode/listformatter.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2012-2014, International Business Machines +* Copyright (C) 2012-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -145,7 +145,7 @@ class U_COMMON_API ListFormatter : public UObject{ /** * @internal constructor made public for testing. */ - ListFormatter(const ListFormatData &data); + ListFormatter(const ListFormatData &data, UErrorCode &errorCode); /** * @internal constructor made public for testing. */ diff --git a/icu4c/source/i18n/measfmt.cpp b/icu4c/source/i18n/measfmt.cpp index 1af72e9da8d..c0f6b66f37f 100644 --- a/icu4c/source/i18n/measfmt.cpp +++ b/icu4c/source/i18n/measfmt.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2004-2015, International Business Machines +* Copyright (c) 2004-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -1128,7 +1128,7 @@ int32_t MeasureFormat::withPerUnitAndAppend( if (U_FAILURE(status)) { return offset; } - UnicodeString perUnitString = pattern->getPatternWithNoPlaceholders(); + UnicodeString perUnitString = pattern->getTextWithNoPlaceholders(); perUnitString.trim(); const UnicodeString *params[] = {&formatted, &perUnitString}; perFormatter->formatAndAppend( diff --git a/icu4c/source/test/intltest/listformattertest.cpp b/icu4c/source/test/intltest/listformattertest.cpp index 25474c798eb..62cdd34c823 100644 --- a/icu4c/source/test/intltest/listformattertest.cpp +++ b/icu4c/source/test/intltest/listformattertest.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2012-2014, International Business Machines +* Copyright (C) 2012-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -197,9 +197,10 @@ void ListFormatterTest::TestOutOfOrderPatterns() { four + " in the last after " + three + " after " + two + " after the first " + one }; + UErrorCode errorCode = U_ZERO_ERROR; ListFormatData data("{1} after {0}", "{1} after the first {0}", "{1} after {0}", "{1} in the last after {0}"); - ListFormatter formatter(data); + ListFormatter formatter(data, errorCode); UnicodeString input1[] = {one}; CheckFormatting(&formatter, input1, 1, results[0]); diff --git a/icu4c/source/test/intltest/quantityformattertest.cpp b/icu4c/source/test/intltest/quantityformattertest.cpp index c641de2a686..79bd29d6321 100644 --- a/icu4c/source/test/intltest/quantityformattertest.cpp +++ b/icu4c/source/test/intltest/quantityformattertest.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2014-2015, International Business Machines Corporation and +* Copyright (C) 2014-2016, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * @@ -59,19 +59,19 @@ void QuantityFormatterTest::TestBasic() { assertEquals( "getByVariant", - fmt.getByVariant("bad variant")->getPatternWithNoPlaceholders(), + fmt.getByVariant("bad variant")->getTextWithNoPlaceholders(), " pounds"); assertEquals( "getByVariant", - fmt.getByVariant("other")->getPatternWithNoPlaceholders(), + fmt.getByVariant("other")->getTextWithNoPlaceholders(), " pounds"); assertEquals( "getByVariant", - fmt.getByVariant("one")->getPatternWithNoPlaceholders(), + fmt.getByVariant("one")->getTextWithNoPlaceholders(), " pound"); assertEquals( "getByVariant", - fmt.getByVariant("few")->getPatternWithNoPlaceholders(), + fmt.getByVariant("few")->getTextWithNoPlaceholders(), " pounds"); // Test copy constructor @@ -79,15 +79,15 @@ void QuantityFormatterTest::TestBasic() { QuantityFormatter copied(fmt); assertEquals( "copied getByVariant", - copied.getByVariant("other")->getPatternWithNoPlaceholders(), + copied.getByVariant("other")->getTextWithNoPlaceholders(), " pounds"); assertEquals( "copied getByVariant", - copied.getByVariant("one")->getPatternWithNoPlaceholders(), + copied.getByVariant("one")->getTextWithNoPlaceholders(), " pound"); assertEquals( "copied getByVariant", - copied.getByVariant("few")->getPatternWithNoPlaceholders(), + copied.getByVariant("few")->getTextWithNoPlaceholders(), " pounds"); } @@ -97,15 +97,15 @@ void QuantityFormatterTest::TestBasic() { assigned = fmt; assertEquals( "assigned getByVariant", - assigned.getByVariant("other")->getPatternWithNoPlaceholders(), + assigned.getByVariant("other")->getTextWithNoPlaceholders(), " pounds"); assertEquals( "assigned getByVariant", - assigned.getByVariant("one")->getPatternWithNoPlaceholders(), + assigned.getByVariant("one")->getTextWithNoPlaceholders(), " pound"); assertEquals( "assigned getByVariant", - assigned.getByVariant("few")->getPatternWithNoPlaceholders(), + assigned.getByVariant("few")->getTextWithNoPlaceholders(), " pounds"); } diff --git a/icu4c/source/test/intltest/simplepatternformattertest.cpp b/icu4c/source/test/intltest/simplepatternformattertest.cpp index 27d3316425f..1e067995691 100644 --- a/icu4c/source/test/intltest/simplepatternformattertest.cpp +++ b/icu4c/source/test/intltest/simplepatternformattertest.cpp @@ -1,13 +1,16 @@ /* ******************************************************************************* -* Copyright (C) 2014, International Business Machines Corporation and * -* others. All Rights Reserved. * +* Copyright (C) 2014-2016, International Business Machines Corporation and +* others. All Rights Reserved. ******************************************************************************* * * File SIMPLEPATTERNFORMATTERTEST.CPP * ******************************************************************************** */ + +#include "unicode/msgfmt.h" +#include "unicode/unistr.h" #include "cstring.h" #include "intltest.h" #include "simplepatternformatter.h" @@ -17,17 +20,20 @@ public: SimplePatternFormatterTest() { } void TestNoPlaceholders(); + void TestSyntaxErrors(); void TestOnePlaceholder(); + void TestBigPlaceholder(); void TestManyPlaceholders(); void TestTooFewPlaceholderValues(); void TestBadArguments(); - void TestGetPatternWithNoPlaceholders(); + void TestTextWithNoPlaceholders(); void TestFormatReplaceNoOptimization(); void TestFormatReplaceNoOptimizationLeadingText(); void TestFormatReplaceOptimization(); void TestFormatReplaceNoOptimizationLeadingPlaceholderUsedTwice(); void TestFormatReplaceOptimizationNoOffsets(); void TestFormatReplaceNoOptimizationNoOffsets(); + void TestQuotingLikeMessageFormat(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); private: void verifyOffsets( @@ -39,39 +45,55 @@ private: void SimplePatternFormatterTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) { TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(TestNoPlaceholders); + TESTCASE_AUTO(TestSyntaxErrors); TESTCASE_AUTO(TestOnePlaceholder); + TESTCASE_AUTO(TestBigPlaceholder); TESTCASE_AUTO(TestManyPlaceholders); TESTCASE_AUTO(TestTooFewPlaceholderValues); TESTCASE_AUTO(TestBadArguments); - TESTCASE_AUTO(TestGetPatternWithNoPlaceholders); + TESTCASE_AUTO(TestTextWithNoPlaceholders); TESTCASE_AUTO(TestFormatReplaceNoOptimization); TESTCASE_AUTO(TestFormatReplaceNoOptimizationLeadingText); TESTCASE_AUTO(TestFormatReplaceOptimization); TESTCASE_AUTO(TestFormatReplaceNoOptimizationLeadingPlaceholderUsedTwice); TESTCASE_AUTO(TestFormatReplaceOptimizationNoOffsets); TESTCASE_AUTO(TestFormatReplaceNoOptimizationNoOffsets); + TESTCASE_AUTO(TestQuotingLikeMessageFormat); TESTCASE_AUTO_END; } void SimplePatternFormatterTest::TestNoPlaceholders() { UErrorCode status = U_ZERO_ERROR; - SimplePatternFormatter fmt("This doesn''t have templates '{0}"); - assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount()); + SimplePatternFormatter fmt("This doesn''t have templates '{0}", status); + assertEquals("getPlaceholderCount", 0, fmt.getPlaceholderCount()); UnicodeString appendTo; assertEquals( "format", "This doesn't have templates {0}", fmt.format("unused", appendTo, status)); - fmt.compile("This has {} bad {012d placeholders", status); - assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount()); appendTo.remove(); + int32_t offsets[] = { 0 }; assertEquals( - "format", - "This has {} bad {012d placeholders", - fmt.format("unused", appendTo, status)); + "formatAndAppend", + "This doesn't have templates {0}", + fmt.formatAndAppend(NULL, 0, appendTo, offsets, 1, status)); + assertEquals("formatAndAppend offsets[0]", -1, offsets[0]); + assertEquals( + "formatAndReplace", + "This doesn't have templates {0}", + fmt.formatAndReplace(NULL, 0, appendTo, NULL, 0, status)); assertSuccess("Status", status); } +void SimplePatternFormatterTest::TestSyntaxErrors() { + UErrorCode status = U_ZERO_ERROR; + SimplePatternFormatter fmt("{}", status); + assertEquals("syntax error {}", U_ILLEGAL_ARGUMENT_ERROR, status); + status = U_ZERO_ERROR; + fmt.compile("{12d", status); + assertEquals("syntax error {12d", U_ILLEGAL_ARGUMENT_ERROR, status); +} + void SimplePatternFormatterTest::TestOnePlaceholder() { UErrorCode status = U_ZERO_ERROR; SimplePatternFormatter fmt; @@ -105,6 +127,24 @@ void SimplePatternFormatterTest::TestOnePlaceholder() { assertSuccess("Status", status); } +void SimplePatternFormatterTest::TestBigPlaceholder() { + UErrorCode status = U_ZERO_ERROR; + SimplePatternFormatter fmt("a{20}c", status); + if (!assertSuccess("Status", status)) { + return; + } + assertEquals("{20} count", 21, fmt.getPlaceholderCount()); + UnicodeString b("b"); + UnicodeString *values[] = { + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &b + }; + UnicodeString result; + assertEquals("{20}=b", "abc", fmt.formatAndAppend(values, 21, result, NULL, 0, status)); + assertSuccess("Status", status); +} + void SimplePatternFormatterTest::TestManyPlaceholders() { UErrorCode status = U_ZERO_ERROR; SimplePatternFormatter fmt; @@ -203,12 +243,12 @@ void SimplePatternFormatterTest::TestManyPlaceholders() { } void SimplePatternFormatterTest::TestTooFewPlaceholderValues() { - SimplePatternFormatter fmt("{0} and {1}"); + UErrorCode status = U_ZERO_ERROR; + SimplePatternFormatter fmt("{0} and {1}", status); UnicodeString appendTo; UnicodeString firstValue; UnicodeString *params[] = {&firstValue}; - UErrorCode status = U_ZERO_ERROR; fmt.format( firstValue, appendTo, status); if (status != U_ILLEGAL_ARGUMENT_ERROR) { @@ -231,9 +271,9 @@ void SimplePatternFormatterTest::TestTooFewPlaceholderValues() { } void SimplePatternFormatterTest::TestBadArguments() { - SimplePatternFormatter fmt("pickle"); - UnicodeString appendTo; UErrorCode status = U_ZERO_ERROR; + SimplePatternFormatter fmt("pickle", status); + UnicodeString appendTo; // These succeed fmt.formatAndAppend( @@ -247,7 +287,7 @@ void SimplePatternFormatterTest::TestBadArguments() { fmt.formatAndAppend( NULL, 1, appendTo, NULL, 0, status); if (status != U_ILLEGAL_ARGUMENT_ERROR) { - errln("Expected U_ILLEGAL_ARGUMENT_ERROR"); + errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndAppend() values=NULL but length=1"); } status = U_ZERO_ERROR; @@ -255,16 +295,17 @@ void SimplePatternFormatterTest::TestBadArguments() { fmt.formatAndAppend( NULL, 0, appendTo, NULL, 1, status); if (status != U_ILLEGAL_ARGUMENT_ERROR) { - errln("Expected U_ILLEGAL_ARGUMENT_ERROR"); + errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndAppend() offsets=NULL but length=1"); } status = U_ZERO_ERROR; // fails because appendTo used as a parameter value - const UnicodeString *params[] = {&appendTo}; - fmt.formatAndAppend( - params, UPRV_LENGTHOF(params), appendTo, NULL, 0, status); + SimplePatternFormatter fmt2("Placeholders {0} and {1}", status); + UnicodeString frog("frog"); + const UnicodeString *params[] = { &appendTo, &frog }; + fmt2.formatAndAppend(params, 2, appendTo, NULL, 0, status); if (status != U_ILLEGAL_ARGUMENT_ERROR) { - errln("Expected U_ILLEGAL_ARGUMENT_ERROR"); + errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndAppend() value=appendTo"); } status = U_ZERO_ERROR; @@ -273,7 +314,7 @@ void SimplePatternFormatterTest::TestBadArguments() { fmt.formatAndReplace( NULL, 1, appendTo, NULL, 0, status); if (status != U_ILLEGAL_ARGUMENT_ERROR) { - errln("Expected U_ILLEGAL_ARGUMENT_ERROR"); + errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndReplace() values=NULL but length=1"); } status = U_ZERO_ERROR; @@ -281,14 +322,15 @@ void SimplePatternFormatterTest::TestBadArguments() { fmt.formatAndReplace( NULL, 0, appendTo, NULL, 1, status); if (status != U_ILLEGAL_ARGUMENT_ERROR) { - errln("Expected U_ILLEGAL_ARGUMENT_ERROR"); + errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndReplace() offsets=NULL but length=1"); } } -void SimplePatternFormatterTest::TestGetPatternWithNoPlaceholders() { - SimplePatternFormatter fmt("{0} has no {1} placeholders."); +void SimplePatternFormatterTest::TestTextWithNoPlaceholders() { + UErrorCode status = U_ZERO_ERROR; + SimplePatternFormatter fmt("{0} has no {1} placeholders.", status); assertEquals( - "", " has no placeholders.", fmt.getPatternWithNoPlaceholders()); + "", " has no placeholders.", fmt.getTextWithNoPlaceholders()); } void SimplePatternFormatterTest::TestFormatReplaceNoOptimization() { @@ -438,7 +480,7 @@ void SimplePatternFormatterTest::TestFormatReplaceOptimizationNoOffsets() { void SimplePatternFormatterTest::TestFormatReplaceNoOptimizationNoOffsets() { UErrorCode status = U_ZERO_ERROR; - SimplePatternFormatter fmt("Placeholders {0} and {1}"); + SimplePatternFormatter fmt("Placeholders {0} and {1}", status); UnicodeString result("previous:"); UnicodeString frog("frog"); const UnicodeString *params[] = {&result, &frog}; @@ -455,6 +497,20 @@ void SimplePatternFormatterTest::TestFormatReplaceNoOptimizationNoOffsets() { assertSuccess("Status", status); } +void SimplePatternFormatterTest::TestQuotingLikeMessageFormat() { + UErrorCode status = U_ZERO_ERROR; + UnicodeString pattern = "{0} don't can''t '{5}''}{a' again '}'{1} to the '{end"; + SimplePatternFormatter spf(pattern, status); + MessageFormat mf(pattern, Locale::getRoot(), status); + UnicodeString expected = "X don't can't {5}'}{a again }Y to the {end"; + UnicodeString x("X"), y("Y"); + Formattable values[] = { x, y }; + UnicodeString result; + FieldPosition ignore(FieldPosition::DONT_CARE); + assertEquals("MessageFormat", expected, mf.format(values, 2, result, ignore, status)); + assertEquals("SimplePatternFormatter", expected, spf.format(x, y, result.remove(), status)); +} + void SimplePatternFormatterTest::verifyOffsets( const int32_t *expected, const int32_t *actual, int32_t count) { for (int32_t i = 0; i < count; ++i) { @@ -467,4 +523,3 @@ void SimplePatternFormatterTest::verifyOffsets( extern IntlTest *createSimplePatternFormatterTest() { return new SimplePatternFormatterTest(); } -