ICU-12032 rewrite SimplePatternFormatter: quoting like MessageFormat, shorter, create fewer objects

X-SVN-Rev: 38155
This commit is contained in:
Markus Scherer 2016-01-07 00:20:53 +00:00
parent a7119fa972
commit 9eb5fc1b11
8 changed files with 503 additions and 685 deletions

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2013-2015, International Business Machines
* Copyright (C) 2013-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -43,11 +43,11 @@ ListFormatInternal(
middlePattern(middle, 2, 2, errorCode),
endPattern(end, 2, 2, errorCode) {}
ListFormatInternal(const ListFormatData &data) :
twoPattern(data.twoPattern),
startPattern(data.startPattern),
middlePattern(data.middlePattern),
endPattern(data.endPattern) { }
ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) :
twoPattern(data.twoPattern, errorCode),
startPattern(data.startPattern, errorCode),
middlePattern(data.middlePattern, errorCode),
endPattern(data.endPattern, errorCode) { }
ListFormatInternal(const ListFormatInternal &other) :
twoPattern(other.twoPattern),
@ -236,8 +236,8 @@ ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *s
return p;
}
ListFormatter::ListFormatter(const ListFormatData& listFormatData) {
owned = new ListFormatInternal(listFormatData);
ListFormatter::ListFormatter(const ListFormatData& listFormatData, UErrorCode &errorCode) {
owned = new ListFormatInternal(listFormatData, errorCode);
data = owned;
}

View file

@ -1,538 +1,313 @@
/*
******************************************************************************
* Copyright (C) 2014-2015, International Business Machines
* Copyright (C) 2014-2016, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* simplepatternformatter.cpp
*/
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "simplepatternformatter.h"
#include "cstring.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
static UBool isInvalidArray(const void *array, int32_t size) {
return (size < 0 || (size > 0 && array == NULL));
}
namespace {
typedef enum SimplePatternFormatterCompileState {
INIT,
APOSTROPHE,
PLACEHOLDER
} SimplePatternFormatterCompileState;
/**
* Argument numbers must be smaller than this limit.
* Text segment lengths are offset by this much.
* This is currently the only unused char value in compiled patterns,
* except it is the maximum value of the first unit (max arg +1).
*/
const int32_t ARG_NUM_LIMIT = 0x100;
/**
* Initial and maximum char/UChar value set for a text segment.
* Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
* Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
*/
const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
/**
* Maximum length of a text segment. Longer segments are split into shorter ones.
*/
const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
// Handles parsing placeholders in the pattern string, e.g {4} or {35}
class SimplePatternFormatterIdBuilder {
public:
SimplePatternFormatterIdBuilder() : id(0), idLen(0) { }
~SimplePatternFormatterIdBuilder() { }
// Resets so that this object has seen no placeholder ID.
void reset() { id = 0; idLen = 0; }
// Returns the numeric placeholder ID parsed so far
int32_t getId() const { return id; }
// Appends the numeric placeholder ID parsed so far back to a
// UChar buffer. Used to recover if parser using this object finds
// no closing curly brace.
void appendTo(UChar *buffer, int32_t *len) const;
// Returns true if this object has seen a placeholder ID.
UBool isValid() const { return (idLen > 0); }
// Processes a single digit character. Pattern string parser calls this
// as it processes digits after an opening curly brace.
void add(UChar ch);
private:
int32_t id;
int32_t idLen;
SimplePatternFormatterIdBuilder(
const SimplePatternFormatterIdBuilder &other);
SimplePatternFormatterIdBuilder &operator=(
const SimplePatternFormatterIdBuilder &other);
enum {
APOS = 0x27,
DIGIT_ZERO = 0x30,
DIGIT_ONE = 0x31,
DIGIT_NINE = 0x39,
OPEN_BRACE = 0x7b,
CLOSE_BRACE = 0x7d
};
void SimplePatternFormatterIdBuilder::appendTo(
UChar *buffer, int32_t *len) const {
int32_t origLen = *len;
int32_t kId = id;
for (int32_t i = origLen + idLen - 1; i >= origLen; i--) {
int32_t digit = kId % 10;
buffer[i] = digit + 0x30;
kId /= 10;
}
*len = origLen + idLen;
inline UBool isInvalidArray(const void *array, int32_t length) {
return (length < 0 || (array == NULL && length != 0));
}
void SimplePatternFormatterIdBuilder::add(UChar ch) {
id = id * 10 + (ch - 0x30);
idLen++;
}
// Represents placeholder values.
class SimplePatternFormatterPlaceholderValues : public UMemory {
public:
SimplePatternFormatterPlaceholderValues(
const UnicodeString * const *values,
int32_t valuesCount);
// Returns TRUE if appendTo value is at any index besides exceptIndex.
UBool isAppendToInAnyIndexExcept(
const UnicodeString &appendTo, int32_t exceptIndex) const;
// For each appendTo value, stores the snapshot of it in its place.
void snapshotAppendTo(const UnicodeString &appendTo);
// Returns the placeholder value at index. No range checking performed.
// Returned reference is valid for as long as this object exists.
const UnicodeString &get(int32_t index) const;
private:
const UnicodeString * const *fValues;
int32_t fValuesCount;
const UnicodeString *fAppendTo;
UnicodeString fAppendToCopy;
SimplePatternFormatterPlaceholderValues(
const SimplePatternFormatterPlaceholderValues &);
SimplePatternFormatterPlaceholderValues &operator=(
const SimplePatternFormatterPlaceholderValues &);
};
SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues(
const UnicodeString * const *values,
int32_t valuesCount)
: fValues(values),
fValuesCount(valuesCount),
fAppendTo(NULL),
fAppendToCopy() {
}
UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept(
const UnicodeString &appendTo, int32_t exceptIndex) const {
for (int32_t i = 0; i < fValuesCount; ++i) {
if (i != exceptIndex && fValues[i] == &appendTo) {
return TRUE;
}
}
return FALSE;
}
void SimplePatternFormatterPlaceholderValues::snapshotAppendTo(
const UnicodeString &appendTo) {
fAppendTo = &appendTo;
fAppendToCopy = appendTo;
}
const UnicodeString &SimplePatternFormatterPlaceholderValues::get(
int32_t index) const {
if (fAppendTo == NULL || fAppendTo != fValues[index]) {
return *fValues[index];
}
return fAppendToCopy;
}
SimplePatternFormatter::SimplePatternFormatter() :
noPlaceholders(),
placeholders(),
placeholderSize(0),
placeholderCount(0),
firstPlaceholderReused(FALSE) {
}
SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
noPlaceholders(),
placeholders(),
placeholderSize(0),
placeholderCount(0),
firstPlaceholderReused(FALSE) {
UErrorCode status = U_ZERO_ERROR;
compile(pattern, status);
}
SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern,
int32_t min, int32_t max,
UErrorCode &errorCode)
: noPlaceholders(),
placeholders(),
placeholderSize(0),
placeholderCount(0),
firstPlaceholderReused(FALSE) {
compileMinMaxPlaceholders(pattern, min, max, errorCode);
}
SimplePatternFormatter::SimplePatternFormatter(
const SimplePatternFormatter &other) :
noPlaceholders(other.noPlaceholders),
placeholders(),
placeholderSize(0),
placeholderCount(other.placeholderCount),
firstPlaceholderReused(other.firstPlaceholderReused) {
placeholderSize = ensureCapacity(other.placeholderSize);
uprv_memcpy(
placeholders.getAlias(),
other.placeholders.getAlias(),
placeholderSize * sizeof(PlaceholderInfo));
}
} // namespace
SimplePatternFormatter &SimplePatternFormatter::operator=(
const SimplePatternFormatter& other) {
if (this == &other) {
return *this;
}
noPlaceholders = other.noPlaceholders;
placeholderSize = ensureCapacity(other.placeholderSize);
placeholderCount = other.placeholderCount;
firstPlaceholderReused = other.firstPlaceholderReused;
uprv_memcpy(
placeholders.getAlias(),
other.placeholders.getAlias(),
placeholderSize * sizeof(PlaceholderInfo));
compiledPattern = other.compiledPattern;
return *this;
}
SimplePatternFormatter::~SimplePatternFormatter() {
}
SimplePatternFormatter::~SimplePatternFormatter() {}
UBool SimplePatternFormatter::compileMinMaxPlaceholders(
const UnicodeString &pattern,
int32_t min, int32_t max,
UErrorCode &status) {
if (U_FAILURE(status)) {
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return FALSE;
}
// Parse consistent with MessagePattern, but
// - support only simple numbered arguments
// - build a simple binary structure into the result string
const UChar *patternBuffer = pattern.getBuffer();
int32_t patternLength = pattern.length();
UChar *buffer = noPlaceholders.getBuffer(patternLength);
int32_t len = 0;
placeholderSize = 0;
placeholderCount = 0;
SimplePatternFormatterCompileState state = INIT;
SimplePatternFormatterIdBuilder idBuilder;
for (int32_t i = 0; i < patternLength; ++i) {
UChar ch = patternBuffer[i];
switch (state) {
case INIT:
if (ch == 0x27) {
state = APOSTROPHE;
} else if (ch == 0x7B) {
state = PLACEHOLDER;
idBuilder.reset();
// Reserve the first char for the number of arguments.
compiledPattern.setTo((UChar)0);
int32_t textLength = 0;
int32_t maxArg = -1;
UBool inQuote = FALSE;
for (int32_t i = 0; i < patternLength;) {
UChar c = patternBuffer[i++];
if (c == APOS) {
if (i < patternLength && (c = patternBuffer[i]) == APOS) {
// double apostrophe, skip the second one
++i;
} else if (inQuote) {
// skip the quote-ending apostrophe
inQuote = FALSE;
continue;
} else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
// Skip the quote-starting apostrophe, find the end of the quoted literal text.
++i;
inQuote = TRUE;
} else {
buffer[len++] = ch;
// The apostrophe is part of literal text.
c = APOS;
}
break;
case APOSTROPHE:
if (ch == 0x27) {
buffer[len++] = 0x27;
} else if (ch == 0x7B) {
buffer[len++] = 0x7B;
} else if (!inQuote && c == OPEN_BRACE) {
if (textLength > 0) {
compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
(UChar)(ARG_NUM_LIMIT + textLength));
textLength = 0;
}
int32_t argNumber;
if ((i + 1) < patternLength &&
0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
patternBuffer[i + 1] == CLOSE_BRACE) {
i += 2;
} else {
buffer[len++] = 0x27;
buffer[len++] = ch;
}
state = INIT;
break;
case PLACEHOLDER:
if (ch >= 0x30 && ch <= 0x39) {
idBuilder.add(ch);
} else if (ch == 0x7D && idBuilder.isValid()) {
if (!addPlaceholder(idBuilder.getId(), len)) {
noPlaceholders.releaseBuffer(0);
status = U_MEMORY_ALLOCATION_ERROR;
// Multi-digit argument number (no leading zero) or syntax error.
// MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
// around the number, but this class does not.
argNumber = -1;
if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
argNumber = c - DIGIT_ZERO;
while (i < patternLength &&
DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
argNumber = argNumber * 10 + (c - DIGIT_ZERO);
if (argNumber >= ARG_NUM_LIMIT) {
break;
}
}
}
if (argNumber < 0 || c != CLOSE_BRACE) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
state = INIT;
} else {
buffer[len++] = 0x7B;
idBuilder.appendTo(buffer, &len);
buffer[len++] = ch;
state = INIT;
}
break;
default:
U_ASSERT(FALSE);
break;
if (argNumber > maxArg) {
maxArg = argNumber;
}
compiledPattern.append((UChar)argNumber);
continue;
} // else: c is part of literal text
// Append c and track the literal-text segment length.
if (textLength == 0) {
// Reserve a char for the length of a new text segment, preset the maximum length.
compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
}
compiledPattern.append(c);
if (++textLength == MAX_SEGMENT_LENGTH) {
textLength = 0;
}
}
switch (state) {
case INIT:
break;
case APOSTROPHE:
buffer[len++] = 0x27;
break;
case PLACEHOLDER:
buffer[len++] = 0X7B;
idBuilder.appendTo(buffer, &len);
break;
default:
U_ASSERT(false);
break;
if (textLength > 0) {
compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
(UChar)(ARG_NUM_LIMIT + textLength));
}
noPlaceholders.releaseBuffer(len);
if (placeholderCount < min || max < placeholderCount) {
status = U_ILLEGAL_ARGUMENT_ERROR;
int32_t argCount = maxArg + 1;
if (argCount < min || max < argCount) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
compiledPattern.setCharAt(0, (UChar)argCount);
return TRUE;
}
UnicodeString& SimplePatternFormatter::format(
const UnicodeString &arg0,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0};
return formatAndAppend(
params,
UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
status);
const UnicodeString &value0,
UnicodeString &appendTo, UErrorCode &errorCode) const {
const UnicodeString *values[] = { &value0 };
return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
}
UnicodeString& SimplePatternFormatter::format(
const UnicodeString &arg0,
const UnicodeString &arg1,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0, &arg1};
return formatAndAppend(
params,
UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
status);
const UnicodeString &value0,
const UnicodeString &value1,
UnicodeString &appendTo, UErrorCode &errorCode) const {
const UnicodeString *values[] = { &value0, &value1 };
return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
}
UnicodeString& SimplePatternFormatter::format(
const UnicodeString &arg0,
const UnicodeString &arg1,
const UnicodeString &arg2,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0, &arg1, &arg2};
return formatAndAppend(
params,
UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
status);
}
static void updatePlaceholderOffset(
int32_t placeholderId,
int32_t placeholderOffset,
int32_t *offsetArray,
int32_t offsetArrayLength) {
if (placeholderId < offsetArrayLength) {
offsetArray[placeholderId] = placeholderOffset;
}
}
static void appendRange(
const UnicodeString &src,
int32_t start,
int32_t end,
UnicodeString &dest) {
// This check improves performance significantly.
if (start == end) {
return;
}
dest.append(src, start, end - start);
const UnicodeString &value0,
const UnicodeString &value1,
const UnicodeString &value2,
UnicodeString &appendTo, UErrorCode &errorCode) const {
const UnicodeString *values[] = { &value0, &value1, &value2 };
return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
}
UnicodeString& SimplePatternFormatter::formatAndAppend(
const UnicodeString * const *placeholderValues,
int32_t placeholderValueCount,
const UnicodeString *const *values, int32_t valuesLength,
UnicodeString &appendTo,
int32_t *offsetArray,
int32_t offsetArrayLength,
UErrorCode &status) const {
if (U_FAILURE(status)) {
int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return appendTo;
}
if (isInvalidArray(placeholderValues, placeholderValueCount)
|| isInvalidArray(offsetArray, offsetArrayLength)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
valuesLength < getPlaceholderCount()) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
if (placeholderValueCount < placeholderCount) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
// Since we are disallowing parameter values that are the same as
// appendTo, we have to check all placeholderValues as opposed to
// the first placeholderCount placeholder values.
SimplePatternFormatterPlaceholderValues values(
placeholderValues, placeholderValueCount);
if (values.isAppendToInAnyIndexExcept(appendTo, -1)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
return formatAndAppend(
values,
appendTo,
offsetArray,
offsetArrayLength);
return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
appendTo, NULL, TRUE,
offsets, offsetsLength, errorCode);
}
UnicodeString& SimplePatternFormatter::formatAndReplace(
const UnicodeString * const *placeholderValues,
int32_t placeholderValueCount,
UnicodeString &SimplePatternFormatter::formatAndReplace(
const UnicodeString *const *values, int32_t valuesLength,
UnicodeString &result,
int32_t *offsetArray,
int32_t offsetArrayLength,
UErrorCode &status) const {
if (U_FAILURE(status)) {
int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return result;
}
if (isInvalidArray(placeholderValues, placeholderValueCount)
|| isInvalidArray(offsetArray, offsetArrayLength)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
if (placeholderValueCount < placeholderCount) {
status = U_ILLEGAL_ARGUMENT_ERROR;
const UChar *cp = compiledPattern.getBuffer();
int32_t cpLength = compiledPattern.length();
if (valuesLength < getPlaceholderCount(cp, cpLength)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
SimplePatternFormatterPlaceholderValues values(
placeholderValues, placeholderCount);
int32_t placeholderAtStart = getUniquePlaceholderAtStart();
// If pattern starts with a unique placeholder and that placeholder
// value is result, we may be able to optimize by just appending to result.
if (placeholderAtStart >= 0
&& placeholderValues[placeholderAtStart] == &result) {
// If result is the value for other placeholders, call off optimization.
if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) {
values.snapshotAppendTo(result);
result.remove();
return formatAndAppend(
values,
result,
offsetArray,
offsetArrayLength);
}
// Otherwise we can optimize
formatAndAppend(
values,
result,
offsetArray,
offsetArrayLength);
// We have to make the offset for the placeholderAtStart
// placeholder be 0. Otherwise it would be the length of the
// previous value of result.
if (offsetArrayLength > placeholderAtStart) {
offsetArray[placeholderAtStart] = 0;
}
return result;
}
if (values.isAppendToInAnyIndexExcept(result, -1)) {
values.snapshotAppendTo(result);
}
result.remove();
return formatAndAppend(
values,
result,
offsetArray,
offsetArrayLength);
}
UnicodeString& SimplePatternFormatter::formatAndAppend(
const SimplePatternFormatterPlaceholderValues &values,
UnicodeString &appendTo,
int32_t *offsetArray,
int32_t offsetArrayLength) const {
for (int32_t i = 0; i < offsetArrayLength; ++i) {
offsetArray[i] = -1;
}
if (placeholderSize == 0) {
appendTo.append(noPlaceholders);
return appendTo;
}
appendRange(
noPlaceholders,
0,
placeholders[0].offset,
appendTo);
updatePlaceholderOffset(
placeholders[0].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
const UnicodeString *placeholderValue = &values.get(placeholders[0].id);
if (placeholderValue != &appendTo) {
appendTo.append(*placeholderValue);
}
for (int32_t i = 1; i < placeholderSize; ++i) {
appendRange(
noPlaceholders,
placeholders[i - 1].offset,
placeholders[i].offset,
appendTo);
updatePlaceholderOffset(
placeholders[i].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
placeholderValue = &values.get(placeholders[i].id);
if (placeholderValue != &appendTo) {
appendTo.append(*placeholderValue);
// If the pattern starts with an argument whose value is the same object
// as the result, then we keep the result contents and append to it.
// Otherwise we replace its contents.
int32_t firstArg = -1;
// If any non-initial argument value is the same object as the result,
// then we first copy its contents and use that instead while formatting.
UnicodeString resultCopy;
if (getPlaceholderCount(cp, cpLength) > 0) {
for (int32_t i = 1; i < cpLength;) {
int32_t n = cp[i++];
if (n < ARG_NUM_LIMIT) {
if (values[n] == &result) {
if (i == 2) {
firstArg = n;
} else if (resultCopy.isEmpty() && !result.isEmpty()) {
resultCopy = result;
}
}
} else {
i += n - ARG_NUM_LIMIT;
}
}
}
appendRange(
noPlaceholders,
placeholders[placeholderSize - 1].offset,
noPlaceholders.length(),
appendTo);
return appendTo;
if (firstArg < 0) {
result.remove();
}
return format(cp, cpLength, values,
result, &resultCopy, FALSE,
offsets, offsetsLength, errorCode);
}
int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const {
if (placeholderSize == 0
|| firstPlaceholderReused || placeholders[0].offset != 0) {
return -1;
UnicodeString SimplePatternFormatter::getTextWithNoPlaceholders(
const UChar *compiledPattern, int32_t compiledPatternLength) {
int32_t capacity = compiledPatternLength - 1 -
getPlaceholderCount(compiledPattern, compiledPatternLength);
UnicodeString sb(capacity, 0, 0); // Java: StringBuilder
for (int32_t i = 1; i < compiledPatternLength;) {
int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT;
if (segmentLength > 0) {
sb.append(compiledPattern + i, segmentLength);
i += segmentLength;
}
}
return placeholders[0].id;
return sb;
}
int32_t SimplePatternFormatter::ensureCapacity(
int32_t desiredCapacity, int32_t allocationSize) {
if (allocationSize < desiredCapacity) {
allocationSize = desiredCapacity;
UnicodeString &SimplePatternFormatter::format(
const UChar *compiledPattern, int32_t compiledPatternLength,
const UnicodeString *const *values,
UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
int32_t *offsets, int32_t offsetsLength,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return result;
}
if (desiredCapacity <= placeholders.getCapacity()) {
return desiredCapacity;
for (int32_t i = 0; i < offsetsLength; i++) {
offsets[i] = -1;
}
// allocate new buffer
if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
return placeholders.getCapacity();
for (int32_t i = 1; i < compiledPatternLength;) {
int32_t n = compiledPattern[i++];
if (n < ARG_NUM_LIMIT) {
const UnicodeString *value = values[n];
if (value == NULL) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
if (value == &result) {
if (forbidResultAsValue) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
if (i == 2) {
// We are appending to result which is also the first value object.
if (n < offsetsLength) {
offsets[n] = 0;
}
} else {
if (n < offsetsLength) {
offsets[n] = result.length();
}
result.append(*resultCopy);
}
} else {
if (n < offsetsLength) {
offsets[n] = result.length();
}
result.append(*value);
}
} else {
int32_t length = n - ARG_NUM_LIMIT;
result.append(compiledPattern + i, length);
i += length;
}
}
return desiredCapacity;
return result;
}
UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
return FALSE;
}
++placeholderSize;
PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
placeholderEnd->offset = offset;
placeholderEnd->id = id;
if (id >= placeholderCount) {
placeholderCount = id + 1;
}
if (placeholderSize > 1
&& placeholders[placeholderSize - 1].id == placeholders[0].id) {
firstPlaceholderReused = TRUE;
}
return TRUE;
}
U_NAMESPACE_END

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 2014-2015, International Business Machines
* Copyright (C) 2014-2016, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* simplepatternformatter.h
@ -9,71 +9,72 @@
#ifndef __SIMPLEPATTERNFORMATTER_H__
#define __SIMPLEPATTERNFORMATTER_H__
#define EXPECTED_PLACEHOLDER_COUNT 3
#include "cmemory.h"
#include "unicode/utypes.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN
class SimplePatternFormatterPlaceholderValues;
struct PlaceholderInfo {
int32_t id;
int32_t offset;
};
/**
* Compiled version of a pattern string such as "{1} was born in {0}".
* <p>
* Using SimplePatternFormatter is both faster and safer than adhoc replacement.
* They are faster because they are precompiled; they are safer because they
* account for curly braces escaped by apostrophe (').
*
* Placeholders are of the form \{[0-9]+\}. If a curly brace is preceded
* by a single quote, it becomes a curly brace instead of the start of a
* placeholder. Two single quotes resolve to one single quote.
* <p>
* Formats simple patterns like "{1} was born in {0}".
* Minimal subset of MessageFormat; fast, simple, minimal dependencies.
* Supports only numbered arguments with no type nor style parameters,
* and formats only string values.
* Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
*
* Factory methods throw exceptions for syntax errors
* and for too few or too many arguments/placeholders.
*
* SimplePatternFormatter objects are immutable and can be safely cached like strings.
*
* Example:
* <pre>
* SimplePatternFormatter fmt("{1} '{born} in {0}");
* UErrorCode errorCode = U_ZERO_ERROR;
* SimplePatternFormatter fmt("{1} '{born}' in {0}", errorCode);
* UnicodeString result;
* UErrorCode status = U_ZERO_ERROR;
* // Evaluates to: "paul {born} in england"
* fmt.format("england", "paul", result, status);
*
* // Output: "paul {born} in england"
* fmt.format("england", "paul", result, errorCode);
* </pre>
*
* @see MessageFormat
* @see UMessagePatternApostropheMode
*/
class U_COMMON_API SimplePatternFormatter : public UMemory {
public:
/**
* Default constructor
* Default constructor.
*/
SimplePatternFormatter();
SimplePatternFormatter() : compiledPattern((UChar)0) {}
/**
* Constructs from a pattern. Will never fail if pattern has three or
* fewer placeholders in it.
*/
explicit SimplePatternFormatter(const UnicodeString& pattern);
/**
* Constructs from a pattern. Will never fail if pattern has three or
* fewer placeholders in it.
* Constructs a formatter from the pattern string.
*
* @param pattern The pattern string.
*/
explicit SimplePatternFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
compile(pattern, errorCode);
}
/**
* Constructs a formatter from the pattern string.
*
* @param pattern The pattern string.
* @param min The pattern must have at least this many placeholders.
* @param max The pattern must have at most this many placeholders.
*/
SimplePatternFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
UErrorCode &errorCode);
UErrorCode &errorCode) {
compileMinMaxPlaceholders(pattern, min, max, errorCode);
}
/**
* Copy constructor.
*/
SimplePatternFormatter(const SimplePatternFormatter& other);
SimplePatternFormatter(const SimplePatternFormatter& other)
: compiledPattern(other.compiledPattern) {}
/**
* Assignment operator
* Assignment operator.
*/
SimplePatternFormatter &operator=(const SimplePatternFormatter& other);
@ -83,179 +84,165 @@ public:
~SimplePatternFormatter();
/**
* Compiles pattern and makes this object represent pattern.
* Changes this object according to the new pattern.
*
* Returns TRUE on success; FALSE on failure. Will not fail if
* there are three or fewer placeholders in pattern. May fail with
* U_MEMORY_ALLOCATION_ERROR if there are more than three placeholders.
* @param pattern The pattern string.
* @return TRUE if U_SUCCESS(errorCode).
*/
UBool compile(const UnicodeString &pattern, UErrorCode &status) {
return compileMinMaxPlaceholders(pattern, 0, INT32_MAX, status);
UBool compile(const UnicodeString &pattern, UErrorCode &errorCode) {
return compileMinMaxPlaceholders(pattern, 0, INT32_MAX, errorCode);
}
/**
* Compiles pattern and makes this object represent pattern.
*
* Returns TRUE on success; FALSE on failure. Will not fail if
* there are three or fewer placeholders in pattern. May fail with
* U_MEMORY_ALLOCATION_ERROR if there are more than three placeholders.
* Changes this object according to the new pattern.
*
* @param pattern The pattern string.
* @param min The pattern must have at least this many placeholders.
* @param max The pattern must have at most this many placeholders.
* @return TRUE if U_SUCCESS(errorCode).
*/
UBool compileMinMaxPlaceholders(const UnicodeString &pattern,
int32_t min, int32_t max, UErrorCode &status);
int32_t min, int32_t max, UErrorCode &errorCode);
/**
* Returns (maxPlaceholderId + 1). For example
* <code>SimplePatternFormatter("{0} {2}").getPlaceholderCount()
* evaluates to 3.
* Callers use this function to find out how many values this object
* expects when formatting.
* @return The max argument number/placeholder ID + 1.
*/
int32_t getPlaceholderCount() const {
return placeholderCount;
return getPlaceholderCount(compiledPattern.getBuffer(), compiledPattern.length());
}
/**
* Returns this pattern with none of the placeholders.
*/
const UnicodeString &getPatternWithNoPlaceholders() const {
return noPlaceholders;
}
/**
* Formats given value. arg0 cannot be appendTo.
*/
UnicodeString &format(
const UnicodeString &args0,
UnicodeString &appendTo,
UErrorCode &status) const;
/**
* Formats given values. Neither arg0 nor arg1 can be appendTo.
*/
UnicodeString &format(
const UnicodeString &args0,
const UnicodeString &args1,
UnicodeString &appendTo,
UErrorCode &status) const;
/**
* Formats given values. Neither arg0, arg1, nor arg2 can be appendTo.
*/
UnicodeString &format(
const UnicodeString &args0,
const UnicodeString &args1,
const UnicodeString &args2,
UnicodeString &appendTo,
UErrorCode &status) const;
/**
* Formats given values.
* Formats the given value, appending to the appendTo builder.
* The placeholder value must not be the same object as appendTo.
* getPlaceholderCount() must be at most 1.
*
* The caller retains ownership of all pointers.
* @param placeholderValues 1st one corresponds to {0}; 2nd to {1};
* 3rd to {2} etc. If any of these point to appendTo, this method
* sets status to U_ILLEGAL_ARGUMENT_ERROR.
* @param placeholderValueCount the number of placeholder values
* must be at least large enough to provide values for all placeholders
* in this object. Otherwise status set to U_ILLEGAL_ARGUMENT_ERROR.
* @param appendTo resulting string appended here.
* @param offsetArray The offset of each placeholder value in appendTo
* stored here. The first value gets the offset of the value for {0};
* the 2nd for {1}; the 3rd for {2} etc. -1 means that the corresponding
* placeholder does not exist in this object. If caller is not
* interested in offsets, it may pass NULL and 0 for the length.
* @param offsetArrayLength the size of offsetArray. If less than
* placeholderValueCount only the first offsets get recorded. If
* greater than placeholderValueCount, then extra values in offset
* array are set to -1.
* @param status any error stored here.
* @param value0 Value for argument {0}.
* @param appendTo Gets the formatted pattern and value appended.
* @param errorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return appendTo
*/
UnicodeString &format(
const UnicodeString &value0,
UnicodeString &appendTo, UErrorCode &errorCode) const;
/**
* Formats the given values, appending to the appendTo builder.
* A placeholder value must not be the same object as appendTo.
* getPlaceholderCount() must be at most 2.
*
* @param value0 Value for argument {0}.
* @param value1 Value for argument {1}.
* @param appendTo Gets the formatted pattern and values appended.
* @param errorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return appendTo
*/
UnicodeString &format(
const UnicodeString &value0,
const UnicodeString &value1,
UnicodeString &appendTo, UErrorCode &errorCode) const;
/**
* Formats the given values, appending to the appendTo builder.
* A placeholder value must not be the same object as appendTo.
* getPlaceholderCount() must be at most 3.
*
* @param value0 Value for argument {0}.
* @param value1 Value for argument {1}.
* @param value2 Value for argument {2}.
* @param appendTo Gets the formatted pattern and values appended.
* @param errorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return appendTo
*/
UnicodeString &format(
const UnicodeString &value0,
const UnicodeString &value1,
const UnicodeString &value2,
UnicodeString &appendTo, UErrorCode &errorCode) const;
/**
* Formats the given values, appending to the appendTo string.
*
* @param values The placeholder values.
* A placeholder value must not be the same object as appendTo.
* Can be NULL if valuesLength==getPlaceholderCount()==0.
* @param valuesLength The length of the values array.
* Must be at least getPlaceholderCount().
* @param appendTo Gets the formatted pattern and values appended.
* @param offsets offsets[i] receives the offset of where
* values[i] replaced pattern argument {i}.
* Can be shorter or longer than values. Can be NULL if offsetsLength==0.
* If there is no {i} in the pattern, then offsets[i] is set to -1.
* @param offsetsLength The length of the offsets array.
* @param errorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return appendTo
*/
UnicodeString &formatAndAppend(
const UnicodeString * const *placeholderValues,
int32_t placeholderValueCount,
const UnicodeString *const *values, int32_t valuesLength,
UnicodeString &appendTo,
int32_t *offsetArray,
int32_t offsetArrayLength,
UErrorCode &status) const;
int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
/**
* Formats given values.
* Formats the given values, replacing the contents of the result string.
* May optimize by actually appending to the result if it is the same object
* as the initial argument's corresponding value.
*
* The caller retains ownership of all pointers.
* @param placeholderValues 1st one corresponds to {0}; 2nd to {1};
* 3rd to {2} etc. May include pointer to result in which case
* the previous value of result is used for the corresponding
* placeholder.
* @param placeholderValueCount the number of placeholder values
* must be at least large enough to provide values for all placeholders
* in this object. Otherwise status set to U_ILLEGAL_ARGUMENT_ERROR.
* @param result resulting string stored here overwriting any previous
* value.
* @param offsetArray The offset of each placeholder value in result
* stored here. The first value gets the offset of the value for {0};
* the 2nd for {1}; the 3rd for {2} etc. -1 means that the corresponding
* placeholder does not exist in this object. If caller is not
* interested in offsets, it may pass NULL and 0 for the length.
* @param offsetArrayLength the size of offsetArray. If less than
* placeholderValueCount only the first offsets get recorded. If
* greater than placeholderValueCount, then extra values in offset
* array are set to -1.
* @param status any error stored here.
* @param values The placeholder values.
* A placeholder value may be the same object as result.
* Can be NULL if valuesLength==getPlaceholderCount()==0.
* @param valuesLength The length of the values array.
* Must be at least getPlaceholderCount().
* @param result Gets its contents replaced by the formatted pattern and values.
* @param offsets offsets[i] receives the offset of where
* values[i] replaced pattern argument {i}.
* Can be shorter or longer than values. Can be NULL if offsetsLength==0.
* If there is no {i} in the pattern, then offsets[i] is set to -1.
* @param offsetsLength The length of the offsets array.
* @param errorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return result
*/
UnicodeString &formatAndReplace(
const UnicodeString * const *placeholderValues,
int32_t placeholderValueCount,
const UnicodeString *const *values, int32_t valuesLength,
UnicodeString &result,
int32_t *offsetArray,
int32_t offsetArrayLength,
UErrorCode &status) const;
int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
/**
* Returns the pattern text with none of the placeholders.
* Like formatting with all-empty string values.
*/
UnicodeString getTextWithNoPlaceholders() const {
return getTextWithNoPlaceholders(compiledPattern.getBuffer(), compiledPattern.length());
}
private:
UnicodeString noPlaceholders;
MaybeStackArray<PlaceholderInfo, 3> placeholders;
int32_t placeholderSize;
int32_t placeholderCount;
UBool firstPlaceholderReused;
/**
* Binary representation of the compiled pattern.
* Index 0: One more than the highest argument number.
* Followed by zero or more arguments or literal-text segments.
*
* An argument is stored as its number, less than ARG_NUM_LIMIT.
* A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
* followed by that many chars.
*/
UnicodeString compiledPattern;
// A Placeholder value that is the same as appendTo is treated as the
// empty string.
UnicodeString &formatAndAppend(
const SimplePatternFormatterPlaceholderValues &placeholderValues,
UnicodeString &appendTo,
int32_t *offsetArray,
int32_t offsetArrayLength) const;
static inline int32_t getPlaceholderCount(const UChar *compiledPattern,
int32_t compiledPatternLength) {
return compiledPatternLength == 0 ? 0 : compiledPattern[0];
}
// Returns the placeholder at the beginning of this pattern
// (e.g 3 for placeholder {3}). Returns -1 if the beginning of pattern
// is text or if the placeholder at the beginning of this pattern
// is used again in the middle of the pattern.
int32_t getUniquePlaceholderAtStart() const;
// ensureCapacity ensures that the capacity of the placeholders array
// is desiredCapacity. If ensureCapacity must resize the placeholders
// array, the first placeholderSize elements stay in the array. Note
// that ensureCapcity NEVER changes the value of placeholderSize only
// the capacity of the placeholders array.
// If there is no memory allocation error when resizing, this
// function returns desiredCapacity. If there is a memory allocation
// error, this function leaves the placeholders array unchanged and
// returns the smaller, old capacity. ensureCapacity resizes only if
// the current capacity of placeholders array is less than desiredCapacity.
// Otherwise, it leaves the placeholders array unchanged. If caller
// specifies an allocation size, then it must be at least as large as
// desiredCapacity. In that case, if ensureCapacity resizes, it will
// allocate allocationSize spots instead of desiredCapacity spots in
// the array. If caller is calling ensureCapacity in a loop while adding
// elements, it is recommended that it use an allocationSize of
// approximately twice desiredCapacity to avoid memory allocation with
// every call to ensureCapacity.
int32_t ensureCapacity(int32_t desiredCapacity, int32_t allocationSize=0);
static UnicodeString getTextWithNoPlaceholders(const UChar *compiledPattern, int32_t compiledPatternLength);
// Records the offset of an individual placeholder in the noPlaceholders
// string.
UBool addPlaceholder(int32_t id, int32_t offset);
static UnicodeString &format(
const UChar *compiledPattern, int32_t compiledPatternLength,
const UnicodeString *const *values,
UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
int32_t *offsets, int32_t offsetsLength,
UErrorCode &errorCode);
};
U_NAMESPACE_END

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2012-2014, International Business Machines
* Copyright (C) 2012-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -145,7 +145,7 @@ class U_COMMON_API ListFormatter : public UObject{
/**
* @internal constructor made public for testing.
*/
ListFormatter(const ListFormatData &data);
ListFormatter(const ListFormatData &data, UErrorCode &errorCode);
/**
* @internal constructor made public for testing.
*/

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2004-2015, International Business Machines
* Copyright (c) 2004-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@ -1128,7 +1128,7 @@ int32_t MeasureFormat::withPerUnitAndAppend(
if (U_FAILURE(status)) {
return offset;
}
UnicodeString perUnitString = pattern->getPatternWithNoPlaceholders();
UnicodeString perUnitString = pattern->getTextWithNoPlaceholders();
perUnitString.trim();
const UnicodeString *params[] = {&formatted, &perUnitString};
perFormatter->formatAndAppend(

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2012-2014, International Business Machines
* Copyright (C) 2012-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -197,9 +197,10 @@ void ListFormatterTest::TestOutOfOrderPatterns() {
four + " in the last after " + three + " after " + two + " after the first " + one
};
UErrorCode errorCode = U_ZERO_ERROR;
ListFormatData data("{1} after {0}", "{1} after the first {0}",
"{1} after {0}", "{1} in the last after {0}");
ListFormatter formatter(data);
ListFormatter formatter(data, errorCode);
UnicodeString input1[] = {one};
CheckFormatting(&formatter, input1, 1, results[0]);

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2014-2015, International Business Machines Corporation and
* Copyright (C) 2014-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
@ -59,19 +59,19 @@ void QuantityFormatterTest::TestBasic() {
assertEquals(
"getByVariant",
fmt.getByVariant("bad variant")->getPatternWithNoPlaceholders(),
fmt.getByVariant("bad variant")->getTextWithNoPlaceholders(),
" pounds");
assertEquals(
"getByVariant",
fmt.getByVariant("other")->getPatternWithNoPlaceholders(),
fmt.getByVariant("other")->getTextWithNoPlaceholders(),
" pounds");
assertEquals(
"getByVariant",
fmt.getByVariant("one")->getPatternWithNoPlaceholders(),
fmt.getByVariant("one")->getTextWithNoPlaceholders(),
" pound");
assertEquals(
"getByVariant",
fmt.getByVariant("few")->getPatternWithNoPlaceholders(),
fmt.getByVariant("few")->getTextWithNoPlaceholders(),
" pounds");
// Test copy constructor
@ -79,15 +79,15 @@ void QuantityFormatterTest::TestBasic() {
QuantityFormatter copied(fmt);
assertEquals(
"copied getByVariant",
copied.getByVariant("other")->getPatternWithNoPlaceholders(),
copied.getByVariant("other")->getTextWithNoPlaceholders(),
" pounds");
assertEquals(
"copied getByVariant",
copied.getByVariant("one")->getPatternWithNoPlaceholders(),
copied.getByVariant("one")->getTextWithNoPlaceholders(),
" pound");
assertEquals(
"copied getByVariant",
copied.getByVariant("few")->getPatternWithNoPlaceholders(),
copied.getByVariant("few")->getTextWithNoPlaceholders(),
" pounds");
}
@ -97,15 +97,15 @@ void QuantityFormatterTest::TestBasic() {
assigned = fmt;
assertEquals(
"assigned getByVariant",
assigned.getByVariant("other")->getPatternWithNoPlaceholders(),
assigned.getByVariant("other")->getTextWithNoPlaceholders(),
" pounds");
assertEquals(
"assigned getByVariant",
assigned.getByVariant("one")->getPatternWithNoPlaceholders(),
assigned.getByVariant("one")->getTextWithNoPlaceholders(),
" pound");
assertEquals(
"assigned getByVariant",
assigned.getByVariant("few")->getPatternWithNoPlaceholders(),
assigned.getByVariant("few")->getTextWithNoPlaceholders(),
" pounds");
}

View file

@ -1,13 +1,16 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2014-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File SIMPLEPATTERNFORMATTERTEST.CPP
*
********************************************************************************
*/
#include "unicode/msgfmt.h"
#include "unicode/unistr.h"
#include "cstring.h"
#include "intltest.h"
#include "simplepatternformatter.h"
@ -17,17 +20,20 @@ public:
SimplePatternFormatterTest() {
}
void TestNoPlaceholders();
void TestSyntaxErrors();
void TestOnePlaceholder();
void TestBigPlaceholder();
void TestManyPlaceholders();
void TestTooFewPlaceholderValues();
void TestBadArguments();
void TestGetPatternWithNoPlaceholders();
void TestTextWithNoPlaceholders();
void TestFormatReplaceNoOptimization();
void TestFormatReplaceNoOptimizationLeadingText();
void TestFormatReplaceOptimization();
void TestFormatReplaceNoOptimizationLeadingPlaceholderUsedTwice();
void TestFormatReplaceOptimizationNoOffsets();
void TestFormatReplaceNoOptimizationNoOffsets();
void TestQuotingLikeMessageFormat();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
private:
void verifyOffsets(
@ -39,39 +45,55 @@ private:
void SimplePatternFormatterTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) {
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(TestNoPlaceholders);
TESTCASE_AUTO(TestSyntaxErrors);
TESTCASE_AUTO(TestOnePlaceholder);
TESTCASE_AUTO(TestBigPlaceholder);
TESTCASE_AUTO(TestManyPlaceholders);
TESTCASE_AUTO(TestTooFewPlaceholderValues);
TESTCASE_AUTO(TestBadArguments);
TESTCASE_AUTO(TestGetPatternWithNoPlaceholders);
TESTCASE_AUTO(TestTextWithNoPlaceholders);
TESTCASE_AUTO(TestFormatReplaceNoOptimization);
TESTCASE_AUTO(TestFormatReplaceNoOptimizationLeadingText);
TESTCASE_AUTO(TestFormatReplaceOptimization);
TESTCASE_AUTO(TestFormatReplaceNoOptimizationLeadingPlaceholderUsedTwice);
TESTCASE_AUTO(TestFormatReplaceOptimizationNoOffsets);
TESTCASE_AUTO(TestFormatReplaceNoOptimizationNoOffsets);
TESTCASE_AUTO(TestQuotingLikeMessageFormat);
TESTCASE_AUTO_END;
}
void SimplePatternFormatterTest::TestNoPlaceholders() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt("This doesn''t have templates '{0}");
assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
SimplePatternFormatter fmt("This doesn''t have templates '{0}", status);
assertEquals("getPlaceholderCount", 0, fmt.getPlaceholderCount());
UnicodeString appendTo;
assertEquals(
"format",
"This doesn't have templates {0}",
fmt.format("unused", appendTo, status));
fmt.compile("This has {} bad {012d placeholders", status);
assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
appendTo.remove();
int32_t offsets[] = { 0 };
assertEquals(
"format",
"This has {} bad {012d placeholders",
fmt.format("unused", appendTo, status));
"formatAndAppend",
"This doesn't have templates {0}",
fmt.formatAndAppend(NULL, 0, appendTo, offsets, 1, status));
assertEquals("formatAndAppend offsets[0]", -1, offsets[0]);
assertEquals(
"formatAndReplace",
"This doesn't have templates {0}",
fmt.formatAndReplace(NULL, 0, appendTo, NULL, 0, status));
assertSuccess("Status", status);
}
void SimplePatternFormatterTest::TestSyntaxErrors() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt("{}", status);
assertEquals("syntax error {}", U_ILLEGAL_ARGUMENT_ERROR, status);
status = U_ZERO_ERROR;
fmt.compile("{12d", status);
assertEquals("syntax error {12d", U_ILLEGAL_ARGUMENT_ERROR, status);
}
void SimplePatternFormatterTest::TestOnePlaceholder() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt;
@ -105,6 +127,24 @@ void SimplePatternFormatterTest::TestOnePlaceholder() {
assertSuccess("Status", status);
}
void SimplePatternFormatterTest::TestBigPlaceholder() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt("a{20}c", status);
if (!assertSuccess("Status", status)) {
return;
}
assertEquals("{20} count", 21, fmt.getPlaceholderCount());
UnicodeString b("b");
UnicodeString *values[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
&b
};
UnicodeString result;
assertEquals("{20}=b", "abc", fmt.formatAndAppend(values, 21, result, NULL, 0, status));
assertSuccess("Status", status);
}
void SimplePatternFormatterTest::TestManyPlaceholders() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt;
@ -203,12 +243,12 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
}
void SimplePatternFormatterTest::TestTooFewPlaceholderValues() {
SimplePatternFormatter fmt("{0} and {1}");
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt("{0} and {1}", status);
UnicodeString appendTo;
UnicodeString firstValue;
UnicodeString *params[] = {&firstValue};
UErrorCode status = U_ZERO_ERROR;
fmt.format(
firstValue, appendTo, status);
if (status != U_ILLEGAL_ARGUMENT_ERROR) {
@ -231,9 +271,9 @@ void SimplePatternFormatterTest::TestTooFewPlaceholderValues() {
}
void SimplePatternFormatterTest::TestBadArguments() {
SimplePatternFormatter fmt("pickle");
UnicodeString appendTo;
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt("pickle", status);
UnicodeString appendTo;
// These succeed
fmt.formatAndAppend(
@ -247,7 +287,7 @@ void SimplePatternFormatterTest::TestBadArguments() {
fmt.formatAndAppend(
NULL, 1, appendTo, NULL, 0, status);
if (status != U_ILLEGAL_ARGUMENT_ERROR) {
errln("Expected U_ILLEGAL_ARGUMENT_ERROR");
errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndAppend() values=NULL but length=1");
}
status = U_ZERO_ERROR;
@ -255,16 +295,17 @@ void SimplePatternFormatterTest::TestBadArguments() {
fmt.formatAndAppend(
NULL, 0, appendTo, NULL, 1, status);
if (status != U_ILLEGAL_ARGUMENT_ERROR) {
errln("Expected U_ILLEGAL_ARGUMENT_ERROR");
errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndAppend() offsets=NULL but length=1");
}
status = U_ZERO_ERROR;
// fails because appendTo used as a parameter value
const UnicodeString *params[] = {&appendTo};
fmt.formatAndAppend(
params, UPRV_LENGTHOF(params), appendTo, NULL, 0, status);
SimplePatternFormatter fmt2("Placeholders {0} and {1}", status);
UnicodeString frog("frog");
const UnicodeString *params[] = { &appendTo, &frog };
fmt2.formatAndAppend(params, 2, appendTo, NULL, 0, status);
if (status != U_ILLEGAL_ARGUMENT_ERROR) {
errln("Expected U_ILLEGAL_ARGUMENT_ERROR");
errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndAppend() value=appendTo");
}
status = U_ZERO_ERROR;
@ -273,7 +314,7 @@ void SimplePatternFormatterTest::TestBadArguments() {
fmt.formatAndReplace(
NULL, 1, appendTo, NULL, 0, status);
if (status != U_ILLEGAL_ARGUMENT_ERROR) {
errln("Expected U_ILLEGAL_ARGUMENT_ERROR");
errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndReplace() values=NULL but length=1");
}
status = U_ZERO_ERROR;
@ -281,14 +322,15 @@ void SimplePatternFormatterTest::TestBadArguments() {
fmt.formatAndReplace(
NULL, 0, appendTo, NULL, 1, status);
if (status != U_ILLEGAL_ARGUMENT_ERROR) {
errln("Expected U_ILLEGAL_ARGUMENT_ERROR");
errln("Expected U_ILLEGAL_ARGUMENT_ERROR: formatAndReplace() offsets=NULL but length=1");
}
}
void SimplePatternFormatterTest::TestGetPatternWithNoPlaceholders() {
SimplePatternFormatter fmt("{0} has no {1} placeholders.");
void SimplePatternFormatterTest::TestTextWithNoPlaceholders() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt("{0} has no {1} placeholders.", status);
assertEquals(
"", " has no placeholders.", fmt.getPatternWithNoPlaceholders());
"", " has no placeholders.", fmt.getTextWithNoPlaceholders());
}
void SimplePatternFormatterTest::TestFormatReplaceNoOptimization() {
@ -438,7 +480,7 @@ void SimplePatternFormatterTest::TestFormatReplaceOptimizationNoOffsets() {
void SimplePatternFormatterTest::TestFormatReplaceNoOptimizationNoOffsets() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt("Placeholders {0} and {1}");
SimplePatternFormatter fmt("Placeholders {0} and {1}", status);
UnicodeString result("previous:");
UnicodeString frog("frog");
const UnicodeString *params[] = {&result, &frog};
@ -455,6 +497,20 @@ void SimplePatternFormatterTest::TestFormatReplaceNoOptimizationNoOffsets() {
assertSuccess("Status", status);
}
void SimplePatternFormatterTest::TestQuotingLikeMessageFormat() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString pattern = "{0} don't can''t '{5}''}{a' again '}'{1} to the '{end";
SimplePatternFormatter spf(pattern, status);
MessageFormat mf(pattern, Locale::getRoot(), status);
UnicodeString expected = "X don't can't {5}'}{a again }Y to the {end";
UnicodeString x("X"), y("Y");
Formattable values[] = { x, y };
UnicodeString result;
FieldPosition ignore(FieldPosition::DONT_CARE);
assertEquals("MessageFormat", expected, mf.format(values, 2, result, ignore, status));
assertEquals("SimplePatternFormatter", expected, spf.format(x, y, result.remove(), status));
}
void SimplePatternFormatterTest::verifyOffsets(
const int32_t *expected, const int32_t *actual, int32_t count) {
for (int32_t i = 0; i < count; ++i) {
@ -467,4 +523,3 @@ void SimplePatternFormatterTest::verifyOffsets(
extern IntlTest *createSimplePatternFormatterTest() {
return new SimplePatternFormatterTest();
}