mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 12:40:02 +00:00
ICU-10793 Add optimization to ListFormatter.
X-SVN-Rev: 35579
This commit is contained in:
parent
0d3dedc0a5
commit
c5fcd47b4f
4 changed files with 152 additions and 71 deletions
|
@ -355,9 +355,14 @@ UnicodeString& ListFormatter::format(
|
|||
errorCode);
|
||||
int32_t i;
|
||||
int32_t pos = 0;
|
||||
int32_t npos = 1;
|
||||
int32_t npos = 0;
|
||||
UBool startsWithZeroPlaceholder =
|
||||
data->middlePattern.startsWithPlaceholder(0);
|
||||
for (i = 2; i < nItems - 1; ++i) {
|
||||
temp[npos].remove();
|
||||
if (!startsWithZeroPlaceholder) {
|
||||
npos = (pos + 1) & 1;
|
||||
temp[npos].remove();
|
||||
}
|
||||
joinStrings(
|
||||
data->middlePattern,
|
||||
temp[pos],
|
||||
|
@ -367,9 +372,11 @@ UnicodeString& ListFormatter::format(
|
|||
offset,
|
||||
errorCode);
|
||||
pos = npos;
|
||||
npos = (pos + 1) & 1;
|
||||
}
|
||||
temp[npos].remove();
|
||||
if (!data->endPattern.startsWithPlaceholder(0)) {
|
||||
npos = (pos + 1) & 1;
|
||||
temp[npos].remove();
|
||||
}
|
||||
joinStrings(
|
||||
data->endPattern,
|
||||
temp[pos],
|
||||
|
|
|
@ -56,17 +56,15 @@ void SimplePatternFormatterIdBuilder::add(UChar ch) {
|
|||
|
||||
SimplePatternFormatter::SimplePatternFormatter() :
|
||||
noPlaceholders(),
|
||||
placeholdersByOffset(placeholderBuffer),
|
||||
placeholders(),
|
||||
placeholderSize(0),
|
||||
placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
|
||||
placeholderCount(0) {
|
||||
}
|
||||
|
||||
SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
|
||||
noPlaceholders(),
|
||||
placeholdersByOffset(placeholderBuffer),
|
||||
placeholders(),
|
||||
placeholderSize(0),
|
||||
placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
|
||||
placeholderCount(0) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
compile(pattern, status);
|
||||
|
@ -75,15 +73,14 @@ SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
|
|||
SimplePatternFormatter::SimplePatternFormatter(
|
||||
const SimplePatternFormatter &other) :
|
||||
noPlaceholders(other.noPlaceholders),
|
||||
placeholdersByOffset(placeholderBuffer),
|
||||
placeholders(),
|
||||
placeholderSize(0),
|
||||
placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
|
||||
placeholderCount(other.placeholderCount) {
|
||||
placeholderSize = ensureCapacity(other.placeholderSize);
|
||||
uprv_memcpy(
|
||||
placeholdersByOffset,
|
||||
other.placeholdersByOffset,
|
||||
placeholderSize * 2 * sizeof(int32_t));
|
||||
placeholders.getAlias(),
|
||||
other.placeholders.getAlias(),
|
||||
placeholderSize * sizeof(PlaceholderInfo));
|
||||
}
|
||||
|
||||
SimplePatternFormatter &SimplePatternFormatter::operator=(
|
||||
|
@ -92,19 +89,16 @@ SimplePatternFormatter &SimplePatternFormatter::operator=(
|
|||
return *this;
|
||||
}
|
||||
noPlaceholders = other.noPlaceholders;
|
||||
placeholderCount = other.placeholderCount;
|
||||
placeholderSize = ensureCapacity(other.placeholderSize);
|
||||
placeholderCount = other.placeholderCount;
|
||||
uprv_memcpy(
|
||||
placeholdersByOffset,
|
||||
other.placeholdersByOffset,
|
||||
placeholderSize * 2 * sizeof(int32_t));
|
||||
placeholders.getAlias(),
|
||||
other.placeholders.getAlias(),
|
||||
placeholderSize * sizeof(PlaceholderInfo));
|
||||
return *this;
|
||||
}
|
||||
|
||||
SimplePatternFormatter::~SimplePatternFormatter() {
|
||||
if (placeholdersByOffset != placeholderBuffer) {
|
||||
uprv_free(placeholdersByOffset);
|
||||
}
|
||||
}
|
||||
|
||||
UBool SimplePatternFormatter::compile(
|
||||
|
@ -183,6 +177,13 @@ UBool SimplePatternFormatter::compile(
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
UBool SimplePatternFormatter::startsWithPlaceholder(int32_t id) const {
|
||||
if (placeholderSize == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
return (placeholders[0].offset == 0 && placeholders[0].id == id);
|
||||
}
|
||||
|
||||
UnicodeString& SimplePatternFormatter::format(
|
||||
const UnicodeString &arg0,
|
||||
UnicodeString &appendTo,
|
||||
|
@ -267,72 +268,70 @@ UnicodeString& SimplePatternFormatter::format(
|
|||
appendTo.append(noPlaceholders);
|
||||
return appendTo;
|
||||
}
|
||||
appendRange(
|
||||
noPlaceholders,
|
||||
0,
|
||||
placeholdersByOffset[0],
|
||||
appendTo);
|
||||
updatePlaceholderOffset(
|
||||
placeholdersByOffset[1],
|
||||
appendTo.length(),
|
||||
offsetArray,
|
||||
offsetArrayLength);
|
||||
appendTo.append(*placeholderValues[placeholdersByOffset[1]]);
|
||||
for (int32_t i = 1; i < placeholderSize; ++i) {
|
||||
if (placeholders[0].offset > 0 ||
|
||||
placeholderValues[placeholders[0].id] != &appendTo) {
|
||||
appendRange(
|
||||
noPlaceholders,
|
||||
placeholdersByOffset[2 * i - 2],
|
||||
placeholdersByOffset[2 * i],
|
||||
0,
|
||||
placeholders[0].offset,
|
||||
appendTo);
|
||||
updatePlaceholderOffset(
|
||||
placeholdersByOffset[2 * i + 1],
|
||||
placeholders[0].id,
|
||||
appendTo.length(),
|
||||
offsetArray,
|
||||
offsetArrayLength);
|
||||
appendTo.append(*placeholderValues[placeholdersByOffset[2 * i + 1]]);
|
||||
appendTo.append(*placeholderValues[placeholders[0].id]);
|
||||
} else {
|
||||
updatePlaceholderOffset(
|
||||
placeholders[0].id,
|
||||
0,
|
||||
offsetArray,
|
||||
offsetArrayLength);
|
||||
}
|
||||
for (int32_t i = 1; i < placeholderSize; ++i) {
|
||||
appendRange(
|
||||
noPlaceholders,
|
||||
placeholders[i - 1].offset,
|
||||
placeholders[i].offset,
|
||||
appendTo);
|
||||
updatePlaceholderOffset(
|
||||
placeholders[i].id,
|
||||
appendTo.length(),
|
||||
offsetArray,
|
||||
offsetArrayLength);
|
||||
appendTo.append(*placeholderValues[placeholders[i].id]);
|
||||
}
|
||||
appendRange(
|
||||
noPlaceholders,
|
||||
placeholdersByOffset[2 * placeholderSize - 2],
|
||||
placeholders[placeholderSize - 1].offset,
|
||||
noPlaceholders.length(),
|
||||
appendTo);
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
int32_t SimplePatternFormatter::ensureCapacity(int32_t atLeast) {
|
||||
if (atLeast <= placeholderCapacity) {
|
||||
return atLeast;
|
||||
int32_t SimplePatternFormatter::ensureCapacity(
|
||||
int32_t desiredCapacity, int32_t allocationSize) {
|
||||
if (allocationSize < desiredCapacity) {
|
||||
allocationSize = desiredCapacity;
|
||||
}
|
||||
if (desiredCapacity <= placeholders.getCapacity()) {
|
||||
return desiredCapacity;
|
||||
}
|
||||
// aim to double capacity each time
|
||||
int32_t newCapacity = 2*atLeast - 2;
|
||||
|
||||
// allocate new buffer
|
||||
int32_t *newBuffer = (int32_t *) uprv_malloc(2 * newCapacity * sizeof(int32_t));
|
||||
if (newBuffer == NULL) {
|
||||
return placeholderCapacity;
|
||||
if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
|
||||
return placeholders.getCapacity();
|
||||
}
|
||||
|
||||
// Copy contents of old buffer to new buffer
|
||||
uprv_memcpy(newBuffer, placeholdersByOffset, 2 * placeholderSize * sizeof(int32_t));
|
||||
|
||||
// free old buffer
|
||||
if (placeholdersByOffset != placeholderBuffer) {
|
||||
uprv_free(placeholdersByOffset);
|
||||
}
|
||||
|
||||
// Use new buffer
|
||||
placeholdersByOffset = newBuffer;
|
||||
placeholderCapacity = newCapacity;
|
||||
return atLeast;
|
||||
return desiredCapacity;
|
||||
}
|
||||
|
||||
UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
|
||||
if (ensureCapacity(placeholderSize + 1) < placeholderSize + 1) {
|
||||
if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
|
||||
return FALSE;
|
||||
}
|
||||
++placeholderSize;
|
||||
placeholdersByOffset[2 * placeholderSize - 2] = offset;
|
||||
placeholdersByOffset[2 * placeholderSize - 1] = id;
|
||||
PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
|
||||
placeholderEnd->offset = offset;
|
||||
placeholderEnd->id = id;
|
||||
if (id >= placeholderCount) {
|
||||
placeholderCount = id + 1;
|
||||
}
|
||||
|
|
|
@ -11,11 +11,17 @@
|
|||
|
||||
#define EXPECTED_PLACEHOLDER_COUNT 3
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct PlaceholderInfo {
|
||||
int32_t id;
|
||||
int32_t offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* Compiled version of a pattern string such as "{1} was born in {0}".
|
||||
* <p>
|
||||
|
@ -84,6 +90,12 @@ public:
|
|||
return placeholderCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the pattern this object represents starts with
|
||||
* placeholder id; otherwise, returns false.
|
||||
*/
|
||||
UBool startsWithPlaceholder(int32_t id) const;
|
||||
|
||||
/**
|
||||
* Formats given value.
|
||||
*/
|
||||
|
@ -120,7 +132,12 @@ public:
|
|||
* @param placeholderValueCount the number of placeholder values
|
||||
* must be at least large enough to provide values for all placeholders
|
||||
* in this object. Otherwise status set to U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @param appendTo resulting string appended here.
|
||||
* @param appendTo resulting string appended here. Optimization: If
|
||||
* the pattern this object represents starts with a placeholder AND
|
||||
* appendTo references the value of that same placeholder, then that
|
||||
* placeholder value is not copied to appendTo (Its already there).
|
||||
* If the value of the starting placeholder is a very large string,
|
||||
* this optimization can offer huge savings.
|
||||
* @param offsetArray The offset of each placeholder value in appendTo
|
||||
* stored here. The first value gets the offset of the value for {0};
|
||||
* the 2nd for {1}; the 3rd for {2} etc. -1 means that the corresponding
|
||||
|
@ -139,12 +156,32 @@ public:
|
|||
UErrorCode &status) const;
|
||||
private:
|
||||
UnicodeString noPlaceholders;
|
||||
int32_t placeholderBuffer[EXPECTED_PLACEHOLDER_COUNT * 2];
|
||||
int32_t *placeholdersByOffset;
|
||||
MaybeStackArray<PlaceholderInfo, 3> placeholders;
|
||||
int32_t placeholderSize;
|
||||
int32_t placeholderCapacity;
|
||||
int32_t placeholderCount;
|
||||
int32_t ensureCapacity(int32_t size);
|
||||
|
||||
// ensureCapacity ensures that the capacity of the placeholders array
|
||||
// is desiredCapacity. If ensureCapacity must resize the placeholders
|
||||
// array, the first placeholderSize elements stay in the array. Note
|
||||
// that ensureCapcity NEVER changes the value of placeholderSize only
|
||||
// the capacity of the placeholders array.
|
||||
// If there is no memory allocation error when resizing, this
|
||||
// function returns desiredCapacity. If there is a memory allocation
|
||||
// error, this function leaves the placeholders array unchanged and
|
||||
// returns the smaller, old capacity. ensureCapacity resizes only if
|
||||
// the current capacity of placeholders array is less than desiredCapacity.
|
||||
// Otherwise, it leaves the placeholders array unchanged. If caller
|
||||
// specifies an allocation size, then it must be at least as large as
|
||||
// desiredCapacity. In that case, if ensureCapacity resizes, it will
|
||||
// allocate allocationSize spots instead of desiredCapacity spots in
|
||||
// the array. If caller is calling ensureCapacity in a loop while adding
|
||||
// elements, it is recommended that it use an allocationSize of
|
||||
// approximately twice desiredCapacity to avoid memory allocation with
|
||||
// every call to ensureCapacity.
|
||||
int32_t ensureCapacity(int32_t desiredCapacity, int32_t allocationSize=0);
|
||||
|
||||
// Records the offset of an individual placeholder in the noPlaceholders
|
||||
// string.
|
||||
UBool addPlaceholder(int32_t id, int32_t offset);
|
||||
};
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ public:
|
|||
void TestNoPlaceholders();
|
||||
void TestOnePlaceholder();
|
||||
void TestManyPlaceholders();
|
||||
void TestOptimization();
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
|
||||
private:
|
||||
};
|
||||
|
@ -30,6 +31,7 @@ void SimplePatternFormatterTest::runIndexedTest(int32_t index, UBool exec, const
|
|||
TESTCASE_AUTO(TestNoPlaceholders);
|
||||
TESTCASE_AUTO(TestOnePlaceholder);
|
||||
TESTCASE_AUTO(TestManyPlaceholders);
|
||||
TESTCASE_AUTO(TestOptimization);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -39,14 +41,14 @@ void SimplePatternFormatterTest::TestNoPlaceholders() {
|
|||
assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
|
||||
UnicodeString appendTo;
|
||||
assertEquals(
|
||||
"Evaluate",
|
||||
"format",
|
||||
"This doesn't have templates {0}",
|
||||
fmt.format("unused", appendTo, status));
|
||||
fmt.compile("This has {} bad {012d placeholders", status);
|
||||
assertEquals("PlaceholderCount", 0, fmt.getPlaceholderCount());
|
||||
appendTo.remove();
|
||||
assertEquals(
|
||||
"Evaluate",
|
||||
"format",
|
||||
"This has {} bad {012d placeholders",
|
||||
fmt.format("unused", appendTo, status));
|
||||
assertSuccess("Status", status);
|
||||
|
@ -59,7 +61,7 @@ void SimplePatternFormatterTest::TestOnePlaceholder() {
|
|||
assertEquals("PlaceholderCount", 1, fmt.getPlaceholderCount());
|
||||
UnicodeString appendTo;
|
||||
assertEquals(
|
||||
"Evaluate",
|
||||
"format",
|
||||
"1 meter",
|
||||
fmt.format("1", appendTo, status));
|
||||
assertSuccess("Status", status);
|
||||
|
@ -88,6 +90,8 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
|
|||
SimplePatternFormatter fmt;
|
||||
fmt.compile(
|
||||
"Templates {2}{1}{5} and {4} are out of order.", status);
|
||||
assertSuccess("Status", status);
|
||||
assertFalse("startsWithPlaceholder", fmt.startsWithPlaceholder(2));
|
||||
assertEquals("PlaceholderCount", 6, fmt.getPlaceholderCount());
|
||||
UnicodeString values[] = {
|
||||
"freddy", "tommy", "frog", "billy", "leg", "{0}"};
|
||||
|
@ -97,7 +101,7 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
|
|||
int32_t expectedOffsets[6] = {-1, 22, 18, -1, 35, 27};
|
||||
UnicodeString appendTo("Prefix: ");
|
||||
assertEquals(
|
||||
"Evaluate",
|
||||
"format",
|
||||
"Prefix: Templates frogtommy{0} and leg are out of order.",
|
||||
fmt.format(
|
||||
params,
|
||||
|
@ -187,6 +191,40 @@ void SimplePatternFormatterTest::TestManyPlaceholders() {
|
|||
assertSuccess("Status", status);
|
||||
}
|
||||
|
||||
void SimplePatternFormatterTest::TestOptimization() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
SimplePatternFormatter fmt;
|
||||
fmt.compile("{2}, {0}, {1} and {3}", status);
|
||||
assertSuccess("Status", status);
|
||||
assertTrue("startsWithPlaceholder", fmt.startsWithPlaceholder(2));
|
||||
assertFalse("startsWithPlaceholder", fmt.startsWithPlaceholder(0));
|
||||
UnicodeString values[] = {
|
||||
"freddy", "frog", "leg", "by"};
|
||||
UnicodeString *params[] = {
|
||||
&values[0], &values[1], &values[2], &values[3]};
|
||||
int32_t offsets[4];
|
||||
int32_t expectedOffsets[4] = {5, 13, 0, 22};
|
||||
|
||||
// The pattern starts with {2}, so format should append the result of
|
||||
// the rest of the pattern to values[2], the value for {2}.
|
||||
assertEquals(
|
||||
"format",
|
||||
"leg, freddy, frog and by",
|
||||
fmt.format(
|
||||
params,
|
||||
LENGTHOF(params),
|
||||
values[2],
|
||||
offsets,
|
||||
LENGTHOF(offsets),
|
||||
status));
|
||||
assertSuccess("Status", status);
|
||||
for (int32_t i = 0; i < LENGTHOF(expectedOffsets); ++i) {
|
||||
if (expectedOffsets[i] != offsets[i]) {
|
||||
errln("Expected %d, got %d", expectedOffsets[i], offsets[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern IntlTest *createSimplePatternFormatterTest() {
|
||||
return new SimplePatternFormatterTest();
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue