ICU-10793 Add optimization to ListFormatter.

X-SVN-Rev: 35579
This commit is contained in:
Travis Keep 2014-04-02 21:43:11 +00:00
parent 0d3dedc0a5
commit c5fcd47b4f
4 changed files with 152 additions and 71 deletions

View file

@ -355,9 +355,14 @@ UnicodeString& ListFormatter::format(
errorCode);
int32_t i;
int32_t pos = 0;
int32_t npos = 1;
int32_t npos = 0;
UBool startsWithZeroPlaceholder =
data->middlePattern.startsWithPlaceholder(0);
for (i = 2; i < nItems - 1; ++i) {
temp[npos].remove();
if (!startsWithZeroPlaceholder) {
npos = (pos + 1) & 1;
temp[npos].remove();
}
joinStrings(
data->middlePattern,
temp[pos],
@ -367,9 +372,11 @@ UnicodeString& ListFormatter::format(
offset,
errorCode);
pos = npos;
npos = (pos + 1) & 1;
}
temp[npos].remove();
if (!data->endPattern.startsWithPlaceholder(0)) {
npos = (pos + 1) & 1;
temp[npos].remove();
}
joinStrings(
data->endPattern,
temp[pos],

View file

@ -56,17 +56,15 @@ void SimplePatternFormatterIdBuilder::add(UChar ch) {
SimplePatternFormatter::SimplePatternFormatter() :
noPlaceholders(),
placeholdersByOffset(placeholderBuffer),
placeholders(),
placeholderSize(0),
placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
placeholderCount(0) {
}
SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
noPlaceholders(),
placeholdersByOffset(placeholderBuffer),
placeholders(),
placeholderSize(0),
placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
placeholderCount(0) {
UErrorCode status = U_ZERO_ERROR;
compile(pattern, status);
@ -75,15 +73,14 @@ SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
SimplePatternFormatter::SimplePatternFormatter(
const SimplePatternFormatter &other) :
noPlaceholders(other.noPlaceholders),
placeholdersByOffset(placeholderBuffer),
placeholders(),
placeholderSize(0),
placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
placeholderCount(other.placeholderCount) {
placeholderSize = ensureCapacity(other.placeholderSize);
uprv_memcpy(
placeholdersByOffset,
other.placeholdersByOffset,
placeholderSize * 2 * sizeof(int32_t));
placeholders.getAlias(),
other.placeholders.getAlias(),
placeholderSize * sizeof(PlaceholderInfo));
}
SimplePatternFormatter &SimplePatternFormatter::operator=(
@ -92,19 +89,16 @@ SimplePatternFormatter &SimplePatternFormatter::operator=(
return *this;
}
noPlaceholders = other.noPlaceholders;
placeholderCount = other.placeholderCount;
placeholderSize = ensureCapacity(other.placeholderSize);
placeholderCount = other.placeholderCount;
uprv_memcpy(
placeholdersByOffset,
other.placeholdersByOffset,
placeholderSize * 2 * sizeof(int32_t));
placeholders.getAlias(),
other.placeholders.getAlias(),
placeholderSize * sizeof(PlaceholderInfo));
return *this;
}
SimplePatternFormatter::~SimplePatternFormatter() {
if (placeholdersByOffset != placeholderBuffer) {
uprv_free(placeholdersByOffset);
}
}
UBool SimplePatternFormatter::compile(
@ -183,6 +177,13 @@ UBool SimplePatternFormatter::compile(
return TRUE;
}
UBool SimplePatternFormatter::startsWithPlaceholder(int32_t id) const {
if (placeholderSize == 0) {
return FALSE;
}
return (placeholders[0].offset == 0 && placeholders[0].id == id);
}
UnicodeString& SimplePatternFormatter::format(
const UnicodeString &arg0,
UnicodeString &appendTo,
@ -267,72 +268,70 @@ UnicodeString& SimplePatternFormatter::format(
appendTo.append(noPlaceholders);
return appendTo;
}
appendRange(
noPlaceholders,
0,
placeholdersByOffset[0],
appendTo);
updatePlaceholderOffset(
placeholdersByOffset[1],
appendTo.length(),
offsetArray,
offsetArrayLength);
appendTo.append(*placeholderValues[placeholdersByOffset[1]]);
for (int32_t i = 1; i < placeholderSize; ++i) {
if (placeholders[0].offset > 0 ||
placeholderValues[placeholders[0].id] != &appendTo) {
appendRange(
noPlaceholders,
placeholdersByOffset[2 * i - 2],
placeholdersByOffset[2 * i],
0,
placeholders[0].offset,
appendTo);
updatePlaceholderOffset(
placeholdersByOffset[2 * i + 1],
placeholders[0].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
appendTo.append(*placeholderValues[placeholdersByOffset[2 * i + 1]]);
appendTo.append(*placeholderValues[placeholders[0].id]);
} else {
updatePlaceholderOffset(
placeholders[0].id,
0,
offsetArray,
offsetArrayLength);
}
for (int32_t i = 1; i < placeholderSize; ++i) {
appendRange(
noPlaceholders,
placeholders[i - 1].offset,
placeholders[i].offset,
appendTo);
updatePlaceholderOffset(
placeholders[i].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
appendTo.append(*placeholderValues[placeholders[i].id]);
}
appendRange(
noPlaceholders,
placeholdersByOffset[2 * placeholderSize - 2],
placeholders[placeholderSize - 1].offset,
noPlaceholders.length(),
appendTo);
return appendTo;
}
int32_t SimplePatternFormatter::ensureCapacity(int32_t atLeast) {
if (atLeast <= placeholderCapacity) {
return atLeast;
int32_t SimplePatternFormatter::ensureCapacity(
int32_t desiredCapacity, int32_t allocationSize) {
if (allocationSize < desiredCapacity) {
allocationSize = desiredCapacity;
}
if (desiredCapacity <= placeholders.getCapacity()) {
return desiredCapacity;
}
// aim to double capacity each time
int32_t newCapacity = 2*atLeast - 2;
// allocate new buffer
int32_t *newBuffer = (int32_t *) uprv_malloc(2 * newCapacity * sizeof(int32_t));
if (newBuffer == NULL) {
return placeholderCapacity;
if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
return placeholders.getCapacity();
}
// Copy contents of old buffer to new buffer
uprv_memcpy(newBuffer, placeholdersByOffset, 2 * placeholderSize * sizeof(int32_t));
// free old buffer
if (placeholdersByOffset != placeholderBuffer) {
uprv_free(placeholdersByOffset);
}
// Use new buffer
placeholdersByOffset = newBuffer;
placeholderCapacity = newCapacity;
return atLeast;
return desiredCapacity;
}
UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
if (ensureCapacity(placeholderSize + 1) < placeholderSize + 1) {
if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
return FALSE;
}
++placeholderSize;
placeholdersByOffset[2 * placeholderSize - 2] = offset;
placeholdersByOffset[2 * placeholderSize - 1] = id;
PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
placeholderEnd->offset = offset;
placeholderEnd->id = id;
if (id >= placeholderCount) {
placeholderCount = id + 1;
}

View file

@ -11,11 +11,17 @@
#define EXPECTED_PLACEHOLDER_COUNT 3
#include "cmemory.h"
#include "unicode/utypes.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN
struct PlaceholderInfo {
int32_t id;
int32_t offset;
};
/**
* Compiled version of a pattern string such as "{1} was born in {0}".
* <p>
@ -84,6 +90,12 @@ public:
return placeholderCount;
}
/**
* Returns true if the pattern this object represents starts with
* placeholder id; otherwise, returns false.
*/
UBool startsWithPlaceholder(int32_t id) const;
/**
* Formats given value.
*/
@ -120,7 +132,12 @@ public:
* @param placeholderValueCount the number of placeholder values
* must be at least large enough to provide values for all placeholders
* in this object. Otherwise status set to U_ILLEGAL_ARGUMENT_ERROR.
* @param appendTo resulting string appended here.
* @param appendTo resulting string appended here. Optimization: If
* the pattern this object represents starts with a placeholder AND
* appendTo references the value of that same placeholder, then that
* placeholder value is not copied to appendTo (Its already there).
* If the value of the starting placeholder is a very large string,
* this optimization can offer huge savings.
* @param offsetArray The offset of each placeholder value in appendTo
* stored here. The first value gets the offset of the value for {0};
* the 2nd for {1}; the 3rd for {2} etc. -1 means that the corresponding
@ -139,12 +156,32 @@ public:
UErrorCode &status) const;
private:
UnicodeString noPlaceholders;
int32_t placeholderBuffer[EXPECTED_PLACEHOLDER_COUNT * 2];
int32_t *placeholdersByOffset;
MaybeStackArray<PlaceholderInfo, 3> placeholders;
int32_t placeholderSize;
int32_t placeholderCapacity;
int32_t placeholderCount;
int32_t ensureCapacity(int32_t size);
// ensureCapacity ensures that the capacity of the placeholders array
// is desiredCapacity. If ensureCapacity must resize the placeholders
// array, the first placeholderSize elements stay in the array. Note
// that ensureCapcity NEVER changes the value of placeholderSize only
// the capacity of the placeholders array.
// If there is no memory allocation error when resizing, this
// function returns desiredCapacity. If there is a memory allocation
// error, this function leaves the placeholders array unchanged and
// returns the smaller, old capacity. ensureCapacity resizes only if
// the current capacity of placeholders array is less than desiredCapacity.
// Otherwise, it leaves the placeholders array unchanged. If caller
// specifies an allocation size, then it must be at least as large as
// desiredCapacity. In that case, if ensureCapacity resizes, it will
// allocate allocationSize spots instead of desiredCapacity spots in
// the array. If caller is calling ensureCapacity in a loop while adding
// elements, it is recommended that it use an allocationSize of
// approximately twice desiredCapacity to avoid memory allocation with
// every call to ensureCapacity.
int32_t ensureCapacity(int32_t desiredCapacity, int32_t allocationSize=0);
// Records the offset of an individual placeholder in the noPlaceholders
// string.
UBool addPlaceholder(int32_t id, int32_t offset);
};

View file

@ -21,6 +21,7 @@ public:
void TestNoPlaceholders();
void TestOnePlaceholder();
void TestManyPlaceholders();
void TestOptimization();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
private:
};
@ -30,6 +31,7 @@ void SimplePatternFormatterTest::runIndexedTest(int32_t index, UBool exec, const
TESTCASE_AUTO(TestNoPlaceholders);
TESTCASE_AUTO(TestOnePlaceholder);
TESTCASE_AUTO(TestManyPlaceholders);
TESTCASE_AUTO(TestOptimization);
TESTCASE_AUTO_END;
}
@ -39,14 +41,14 @@ void SimplePatternFormatterTest::TestNoPlaceholders() {
assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
UnicodeString appendTo;
assertEquals(
"Evaluate",
"format",
"This doesn't have templates {0}",
fmt.format("unused", appendTo, status));
fmt.compile("This has {} bad {012d placeholders", status);
assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
appendTo.remove();
assertEquals(
"Evaluate",
"format",
"This has {} bad {012d placeholders",
fmt.format("unused", appendTo, status));
assertSuccess("Status", status);
@ -59,7 +61,7 @@ void SimplePatternFormatterTest::TestOnePlaceholder() {
assertEquals("PlaceholderCount", 1, fmt.getPlaceholderCount());
UnicodeString appendTo;
assertEquals(
"Evaluate",
"format",
"1 meter",
fmt.format("1", appendTo, status));
assertSuccess("Status", status);
@ -88,6 +90,8 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
SimplePatternFormatter fmt;
fmt.compile(
"Templates {2}{1}{5} and {4} are out of order.", status);
assertSuccess("Status", status);
assertFalse("startsWithPlaceholder", fmt.startsWithPlaceholder(2));
assertEquals("PlaceholderCount", 6, fmt.getPlaceholderCount());
UnicodeString values[] = {
"freddy", "tommy", "frog", "billy", "leg", "{0}"};
@ -97,7 +101,7 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
int32_t expectedOffsets[6] = {-1, 22, 18, -1, 35, 27};
UnicodeString appendTo("Prefix: ");
assertEquals(
"Evaluate",
"format",
"Prefix: Templates frogtommy{0} and leg are out of order.",
fmt.format(
params,
@ -187,6 +191,40 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
assertSuccess("Status", status);
}
void SimplePatternFormatterTest::TestOptimization() {
UErrorCode status = U_ZERO_ERROR;
SimplePatternFormatter fmt;
fmt.compile("{2}, {0}, {1} and {3}", status);
assertSuccess("Status", status);
assertTrue("startsWithPlaceholder", fmt.startsWithPlaceholder(2));
assertFalse("startsWithPlaceholder", fmt.startsWithPlaceholder(0));
UnicodeString values[] = {
"freddy", "frog", "leg", "by"};
UnicodeString *params[] = {
&values[0], &values[1], &values[2], &values[3]};
int32_t offsets[4];
int32_t expectedOffsets[4] = {5, 13, 0, 22};
// The pattern starts with {2}, so format should append the result of
// the rest of the pattern to values[2], the value for {2}.
assertEquals(
"format",
"leg, freddy, frog and by",
fmt.format(
params,
LENGTHOF(params),
values[2],
offsets,
LENGTHOF(offsets),
status));
assertSuccess("Status", status);
for (int32_t i = 0; i < LENGTHOF(expectedOffsets); ++i) {
if (expectedOffsets[i] != offsets[i]) {
errln("Expected %d, got %d", expectedOffsets[i], offsets[i]);
}
}
}
extern IntlTest *createSimplePatternFormatterTest() {
return new SimplePatternFormatterTest();
}