mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-19 11:45:45 +00:00
ICU-13247 Java: String CaseMap.apply(CharSequence); fix omitUnchangedText() without Edits
X-SVN-Rev: 40417
This commit is contained in:
parent
cf7b342a1d
commit
ee7fb909bf
7 changed files with 364 additions and 131 deletions
|
@ -165,9 +165,9 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
|
|||
/* (not) original code point */
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(cpLength);
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
}
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
c=~result;
|
||||
if(destIndex<destCapacity && c<=0x7f) { // ASCII slightly-fastpath
|
||||
|
@ -283,9 +283,9 @@ appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
|
|||
if(length>0) {
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(length);
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
}
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
return -1; // integer overflow
|
||||
|
@ -628,8 +628,10 @@ int32_t toUpper(uint32_t options,
|
|||
}
|
||||
}
|
||||
|
||||
UBool change = TRUE;
|
||||
if (edits != NULL) {
|
||||
UBool change;
|
||||
if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
|
||||
change = TRUE; // common, simple usage
|
||||
} else {
|
||||
// Find out first whether we are changing the text.
|
||||
U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
|
||||
change = (i + 2) > nextIndex ||
|
||||
|
|
|
@ -73,9 +73,9 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
|||
/* (not) original code point */
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(cpLength);
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
}
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
c=~result;
|
||||
if(destIndex<destCapacity && c<=0xffff) { // BMP slightly-fastpath
|
||||
|
@ -150,9 +150,9 @@ appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
|||
if(length>0) {
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(length);
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
}
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
return -1; // integer overflow
|
||||
|
@ -934,8 +934,10 @@ int32_t toUpper(uint32_t options,
|
|||
}
|
||||
}
|
||||
|
||||
UBool change = TRUE;
|
||||
if (edits != NULL) {
|
||||
UBool change;
|
||||
if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
|
||||
change = TRUE; // common, simple usage
|
||||
} else {
|
||||
// Find out first whether we are changing the text.
|
||||
change = src[i] != upper || numYpogegrammeni > 0;
|
||||
int32_t i2 = i + 1;
|
||||
|
|
|
@ -62,6 +62,8 @@ public:
|
|||
void TestMergeEdits();
|
||||
void TestCaseMapWithEdits();
|
||||
void TestCaseMapUTF8WithEdits();
|
||||
void TestCaseMapToString();
|
||||
void TestCaseMapUTF8ToString();
|
||||
void TestLongUnicodeString();
|
||||
void TestBug13127();
|
||||
void TestInPlaceTitle();
|
||||
|
@ -102,6 +104,8 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
|||
TESTCASE_AUTO(TestMergeEdits);
|
||||
TESTCASE_AUTO(TestCaseMapWithEdits);
|
||||
TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
|
||||
TESTCASE_AUTO(TestCaseMapToString);
|
||||
TESTCASE_AUTO(TestCaseMapUTF8ToString);
|
||||
TESTCASE_AUTO(TestLongUnicodeString);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
TESTCASE_AUTO(TestBug13127);
|
||||
|
@ -1216,7 +1220,7 @@ void StringCaseTest::TestMergeEdits() {
|
|||
}
|
||||
|
||||
void StringCaseTest::TestCaseMapWithEdits() {
|
||||
IcuTestErrorCode errorCode(*this, "TestEdits");
|
||||
IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
|
||||
UChar dest[20];
|
||||
Edits edits;
|
||||
|
||||
|
@ -1258,7 +1262,7 @@ void StringCaseTest::TestCaseMapWithEdits() {
|
|||
U_OMIT_UNCHANGED_TEXT |
|
||||
U_TITLECASE_NO_BREAK_ADJUSTMENT |
|
||||
U_TITLECASE_NO_LOWERCASE,
|
||||
NULL, u"IjssEL IglOo", 12,
|
||||
nullptr, u"IjssEL IglOo", 12,
|
||||
dest, UPRV_LENGTHOF(dest), &edits, errorCode);
|
||||
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
|
||||
static const EditChange titleExpectedChanges[] = {
|
||||
|
@ -1338,7 +1342,7 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
|
|||
U_OMIT_UNCHANGED_TEXT |
|
||||
U_TITLECASE_NO_BREAK_ADJUSTMENT |
|
||||
U_TITLECASE_NO_LOWERCASE,
|
||||
NULL, u8"IjssEL IglOo", 12,
|
||||
nullptr, u8"IjssEL IglOo", 12,
|
||||
dest, UPRV_LENGTHOF(dest), &edits, errorCode);
|
||||
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
|
@ -1377,6 +1381,114 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
|
|||
TRUE, errorCode);
|
||||
}
|
||||
|
||||
void StringCaseTest::TestCaseMapToString() {
|
||||
// This test function name is parallel with one in UCharacterCaseTest.java.
|
||||
// It is a bit of a misnomer until we have CaseMap API that writes to
|
||||
// a UnicodeString, at which point we should change this code here.
|
||||
IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
|
||||
UChar dest[20];
|
||||
|
||||
// Omit unchanged text.
|
||||
int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
|
||||
u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toLower(IstanBul)",
|
||||
UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
|
||||
length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
|
||||
u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toUpper(Πατάτα)",
|
||||
UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
length = CaseMap::toTitle("nl",
|
||||
U_OMIT_UNCHANGED_TEXT |
|
||||
U_TITLECASE_NO_BREAK_ADJUSTMENT |
|
||||
U_TITLECASE_NO_LOWERCASE,
|
||||
nullptr, u"IjssEL IglOo", 12,
|
||||
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toTitle(IjssEL IglOo)",
|
||||
UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
|
||||
#endif
|
||||
length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"foldCase(IßtanBul)",
|
||||
UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
|
||||
|
||||
// Return the whole result string.
|
||||
length = CaseMap::toLower("tr", 0,
|
||||
u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toLower(IstanBul)",
|
||||
UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length));
|
||||
length = CaseMap::toUpper("el", 0,
|
||||
u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toUpper(Πατάτα)",
|
||||
UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
length = CaseMap::toTitle("nl",
|
||||
U_TITLECASE_NO_BREAK_ADJUSTMENT |
|
||||
U_TITLECASE_NO_LOWERCASE,
|
||||
nullptr, u"IjssEL IglOo", 12,
|
||||
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toTitle(IjssEL IglOo)",
|
||||
UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length));
|
||||
#endif
|
||||
length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"foldCase(IßtanBul)",
|
||||
UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length));
|
||||
}
|
||||
|
||||
void StringCaseTest::TestCaseMapUTF8ToString() {
|
||||
IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
|
||||
// TODO: Change this to writing to string via ByteSink when that is available.
|
||||
char dest[50];
|
||||
|
||||
// Omit unchanged text.
|
||||
int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
|
||||
u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
|
||||
u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
length = CaseMap::utf8ToTitle("nl",
|
||||
U_OMIT_UNCHANGED_TEXT |
|
||||
U_TITLECASE_NO_BREAK_ADJUSTMENT |
|
||||
U_TITLECASE_NO_LOWERCASE,
|
||||
nullptr, u8"IjssEL IglOo", 12,
|
||||
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
#endif
|
||||
length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
|
||||
// Return the whole result string.
|
||||
length = CaseMap::utf8ToLower("tr", 0,
|
||||
u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
length = CaseMap::utf8ToUpper("el", 0,
|
||||
u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
length = CaseMap::utf8ToTitle("nl",
|
||||
U_TITLECASE_NO_BREAK_ADJUSTMENT |
|
||||
U_TITLECASE_NO_LOWERCASE,
|
||||
nullptr, u8"IjssEL IglOo", 12,
|
||||
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
#endif
|
||||
length = CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
|
||||
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
}
|
||||
|
||||
void StringCaseTest::TestLongUnicodeString() {
|
||||
// Code coverage for UnicodeString case mapping code handling
|
||||
// long strings or many changes in a string.
|
||||
|
|
|
@ -318,6 +318,11 @@ public final class CaseMapImpl {
|
|||
length = newText.getEndIndex();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setText(CharSequence newText) {
|
||||
length = newText.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setText(String newText) {
|
||||
length = newText.length();
|
||||
|
@ -346,9 +351,9 @@ public final class CaseMapImpl {
|
|||
// (not) original code point
|
||||
if (edits != null) {
|
||||
edits.addUnchanged(cpLength);
|
||||
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
|
||||
return;
|
||||
}
|
||||
appendCodePoint(dest, ~result);
|
||||
} else if (result <= UCaseProps.MAX_STRING_LENGTH) {
|
||||
|
@ -370,14 +375,31 @@ public final class CaseMapImpl {
|
|||
if (length > 0) {
|
||||
if (edits != null) {
|
||||
edits.addUnchanged(length);
|
||||
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
|
||||
return;
|
||||
}
|
||||
dest.append(src, start, start + length);
|
||||
}
|
||||
}
|
||||
|
||||
private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) {
|
||||
if (!edits.hasChanges()) {
|
||||
return src.toString();
|
||||
}
|
||||
StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta());
|
||||
for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
|
||||
if (ei.hasChange()) {
|
||||
int i = ei.replacementIndex();
|
||||
result.append(replacementChars, i, i + ei.newLength());
|
||||
} else {
|
||||
int i = ei.sourceIndex();
|
||||
result.append(src, i, i + ei.oldLength());
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
|
||||
Appendable dest, Edits edits) throws IOException {
|
||||
int c;
|
||||
|
@ -387,6 +409,23 @@ public final class CaseMapImpl {
|
|||
}
|
||||
}
|
||||
|
||||
public static String toLower(int caseLocale, int options, CharSequence src) {
|
||||
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
|
||||
if (src.length() == 0) {
|
||||
return src.toString();
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = toLower(
|
||||
caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
|
||||
return applyEdits(src, replacementChars, edits);
|
||||
} else {
|
||||
return toLower(caseLocale, options, src,
|
||||
new StringBuilder(src.length()), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static <A extends Appendable> A toLower(int caseLocale, int options,
|
||||
CharSequence src, A dest, Edits edits) {
|
||||
try {
|
||||
|
@ -401,6 +440,23 @@ public final class CaseMapImpl {
|
|||
}
|
||||
}
|
||||
|
||||
public static String toUpper(int caseLocale, int options, CharSequence src) {
|
||||
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
|
||||
if (src.length() == 0) {
|
||||
return src.toString();
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = toUpper(
|
||||
caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
|
||||
return applyEdits(src, replacementChars, edits);
|
||||
} else {
|
||||
return toUpper(caseLocale, options, src,
|
||||
new StringBuilder(src.length()), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static <A extends Appendable> A toUpper(int caseLocale, int options,
|
||||
CharSequence src, A dest, Edits edits) {
|
||||
try {
|
||||
|
@ -422,6 +478,24 @@ public final class CaseMapImpl {
|
|||
}
|
||||
}
|
||||
|
||||
public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) {
|
||||
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
|
||||
if (src.length() == 0) {
|
||||
return src.toString();
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = toTitle(
|
||||
caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src,
|
||||
new StringBuilder(), edits);
|
||||
return applyEdits(src, replacementChars, edits);
|
||||
} else {
|
||||
return toTitle(caseLocale, options, iter, src,
|
||||
new StringBuilder(src.length()), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static <A extends Appendable> A toTitle(
|
||||
int caseLocale, int options, BreakIterator titleIter,
|
||||
CharSequence src, A dest, Edits edits) {
|
||||
|
@ -533,6 +607,22 @@ public final class CaseMapImpl {
|
|||
}
|
||||
}
|
||||
|
||||
public static String fold(int options, CharSequence src) {
|
||||
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
|
||||
if (src.length() == 0) {
|
||||
return src.toString();
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = fold(
|
||||
options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
|
||||
return applyEdits(src, replacementChars, edits);
|
||||
} else {
|
||||
return fold(options, src, new StringBuilder(src.length()), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static <A extends Appendable> A fold(int options,
|
||||
CharSequence src, A dest, Edits edits) {
|
||||
try {
|
||||
|
@ -1131,7 +1221,7 @@ public final class CaseMapImpl {
|
|||
}
|
||||
|
||||
boolean change;
|
||||
if (edits == null) {
|
||||
if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) {
|
||||
change = true; // common, simple usage
|
||||
} else {
|
||||
// Find out first whether we are changing the text.
|
||||
|
|
|
@ -28,7 +28,6 @@ import com.ibm.icu.impl.UPropertyAliases;
|
|||
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
|
||||
import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.Edits;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
@ -4937,7 +4936,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static String toUpperCase(String str)
|
||||
{
|
||||
return toUpperCase(getDefaultCaseLocale(), str);
|
||||
return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4949,7 +4948,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static String toLowerCase(String str)
|
||||
{
|
||||
return toLowerCase(getDefaultCaseLocale(), str);
|
||||
return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4993,75 +4992,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
return UCaseProps.getCaseLocale(locale);
|
||||
}
|
||||
|
||||
private static String toLowerCase(int caseLocale, String str) {
|
||||
if (str.length() <= 100) {
|
||||
if (str.isEmpty()) {
|
||||
return str;
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = CaseMapImpl.toLower(
|
||||
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
|
||||
return applyEdits(str, replacementChars, edits);
|
||||
} else {
|
||||
return CaseMapImpl.toLower(caseLocale, 0, str,
|
||||
new StringBuilder(str.length()), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
private static String toUpperCase(int caseLocale, String str) {
|
||||
if (str.length() <= 100) {
|
||||
if (str.isEmpty()) {
|
||||
return str;
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = CaseMapImpl.toUpper(
|
||||
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
|
||||
return applyEdits(str, replacementChars, edits);
|
||||
} else {
|
||||
return CaseMapImpl.toUpper(caseLocale, 0, str,
|
||||
new StringBuilder(str.length()), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
|
||||
if (str.length() <= 100) {
|
||||
if (str.isEmpty()) {
|
||||
return str;
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = CaseMapImpl.toTitle(
|
||||
caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
|
||||
new StringBuilder(), edits);
|
||||
return applyEdits(str, replacementChars, edits);
|
||||
} else {
|
||||
return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
|
||||
new StringBuilder(str.length()), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
|
||||
if (!edits.hasChanges()) {
|
||||
return str;
|
||||
}
|
||||
StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
|
||||
for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
|
||||
if (ei.hasChange()) {
|
||||
int i = ei.replacementIndex();
|
||||
result.append(replacementChars, i, i + ei.newLength());
|
||||
} else {
|
||||
int i = ei.sourceIndex();
|
||||
result.append(str, i, i + ei.oldLength());
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the uppercase version of the argument string.
|
||||
* Casing is dependent on the argument locale and context-sensitive.
|
||||
|
@ -5072,7 +5002,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static String toUpperCase(Locale locale, String str)
|
||||
{
|
||||
return toUpperCase(getCaseLocale(locale), str);
|
||||
return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5084,7 +5014,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
* @stable ICU 3.2
|
||||
*/
|
||||
public static String toUpperCase(ULocale locale, String str) {
|
||||
return toUpperCase(getCaseLocale(locale), str);
|
||||
return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5097,7 +5027,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static String toLowerCase(Locale locale, String str)
|
||||
{
|
||||
return toLowerCase(getCaseLocale(locale), str);
|
||||
return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5109,7 +5039,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
* @stable ICU 3.2
|
||||
*/
|
||||
public static String toLowerCase(ULocale locale, String str) {
|
||||
return toLowerCase(getCaseLocale(locale), str);
|
||||
return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5190,7 +5120,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
}
|
||||
titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
|
||||
titleIter.setText(str);
|
||||
return toTitleCase(getCaseLocale(locale), options, titleIter, str);
|
||||
return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5217,16 +5147,13 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
@Deprecated
|
||||
public static String toTitleFirst(ULocale locale, String str) {
|
||||
return toTitleCase(locale, str, null,
|
||||
CaseMapImpl.TITLECASE_WHOLE_STRING|TITLECASE_NO_LOWERCASE);
|
||||
// TODO: Remove this function.
|
||||
// Move something like the following helper function into CLDR.
|
||||
// private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
|
||||
// CaseMap.toTitle().wholeString().noLowercase();
|
||||
// return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(
|
||||
// locale.toLocale(), null, str, new StringBuilder(), null).toString();
|
||||
// TODO: Remove this function. Inline it where it is called in CLDR.
|
||||
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str);
|
||||
}
|
||||
|
||||
private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
|
||||
com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase();
|
||||
|
||||
/**
|
||||
* {@icu} <p>Returns the titlecase version of the argument string.
|
||||
* <p>Position for titlecasing is determined by the argument break
|
||||
|
@ -5257,7 +5184,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
}
|
||||
titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
|
||||
titleIter.setText(str);
|
||||
return toTitleCase(getCaseLocale(locale), options, titleIter, str);
|
||||
return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5374,19 +5301,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
* @stable ICU 2.6
|
||||
*/
|
||||
public static final String foldCase(String str, int options) {
|
||||
if (str.length() <= 100) {
|
||||
if (str.isEmpty()) {
|
||||
return str;
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes. Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = CaseMapImpl.fold(
|
||||
options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
|
||||
return applyEdits(str, replacementChars, edits);
|
||||
} else {
|
||||
return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
|
||||
}
|
||||
return CaseMapImpl.fold(options, str);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -92,6 +92,24 @@ public abstract class CaseMap {
|
|||
return OMIT_UNCHANGED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lowercases a string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
|
||||
* (See {@link ULocale#toLocale}.)
|
||||
* @param src The original string.
|
||||
* @return the result string.
|
||||
*
|
||||
* @see UCharacter#toLowerCase(Locale, String)
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public String apply(Locale locale, CharSequence src) {
|
||||
return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
|
@ -138,6 +156,24 @@ public abstract class CaseMap {
|
|||
return OMIT_UNCHANGED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Uppercases a string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
|
||||
* (See {@link ULocale#toLocale}.)
|
||||
* @param src The original string.
|
||||
* @return the result string.
|
||||
*
|
||||
* @see UCharacter#toUpperCase(Locale, String)
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public String apply(Locale locale, CharSequence src) {
|
||||
return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
|
@ -288,6 +324,38 @@ public abstract class CaseMap {
|
|||
internalOptions, CaseMapImpl.TITLECASE_ADJUST_TO_CASED));
|
||||
}
|
||||
|
||||
/**
|
||||
* Titlecases a string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* <p>Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
|
||||
* (See {@link ULocale#toLocale}.)
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If null, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @return the result string.
|
||||
*
|
||||
* @see UCharacter#toUpperCase(Locale, String)
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public String apply(Locale locale, BreakIterator iter, CharSequence src) {
|
||||
if (iter == null && locale == null) {
|
||||
locale = Locale.getDefault();
|
||||
}
|
||||
iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter);
|
||||
iter.setText(src);
|
||||
return CaseMapImpl.toTitle(getCaseLocale(locale), internalOptions, iter, src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Titlecases a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
|
@ -321,7 +389,7 @@ public abstract class CaseMap {
|
|||
locale = Locale.getDefault();
|
||||
}
|
||||
iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter);
|
||||
iter.setText(src.toString());
|
||||
iter.setText(src);
|
||||
return CaseMapImpl.toTitle(
|
||||
getCaseLocale(locale), internalOptions, iter, src, dest, edits);
|
||||
}
|
||||
|
@ -372,13 +440,31 @@ public abstract class CaseMap {
|
|||
}
|
||||
|
||||
/**
|
||||
* Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* Case-folds a string.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* <p>Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* <p>The result may be longer or shorter than the original.
|
||||
* @param src The original string.
|
||||
* @return the result string.
|
||||
*
|
||||
* @see UCharacter#foldCase(String, int)
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public String apply(CharSequence src) {
|
||||
return CaseMapImpl.fold(internalOptions, src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* <p>Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
|
|
|
@ -420,14 +420,13 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));
|
||||
|
||||
// Also check the behavior using Java Locale
|
||||
Locale JAVALOC_DUTCH = new Locale("nl");
|
||||
assertEquals("Dutch titlecase check in English (Java Locale)",
|
||||
"Ijssel Igloo Ijmuiden",
|
||||
UCharacter.toTitleCase(Locale.ENGLISH, "ijssel igloo IJMUIDEN", null));
|
||||
|
||||
assertEquals("Dutch titlecase check in Dutch (Java Locale)",
|
||||
"IJssel Igloo IJmuiden",
|
||||
UCharacter.toTitleCase(JAVALOC_DUTCH, "ijssel igloo IJMUIDEN", null));
|
||||
UCharacter.toTitleCase(DUTCH_LOCALE_, "ijssel igloo IJMUIDEN", null));
|
||||
|
||||
iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
|
||||
assertEquals("Dutch titlecase check in Dutch with nolowercase option",
|
||||
|
@ -1238,7 +1237,7 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
sb.delete(0, sb.length());
|
||||
edits.reset();
|
||||
sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
|
||||
new Locale("nl"), null, "IjssEL IglOo", sb, edits);
|
||||
DUTCH_LOCALE_, null, "IjssEL IglOo", sb, edits);
|
||||
assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
|
||||
EditChange[] titleExpectedChanges = new EditChange[] {
|
||||
new EditChange(false, 1, 1),
|
||||
|
@ -1265,6 +1264,32 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
foldExpectedChanges, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestCaseMapToString() {
|
||||
// String apply(..., CharSequence)
|
||||
// Omit unchanged text.
|
||||
assertEquals("toLower(Istanbul)", "ıb",
|
||||
CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul"));
|
||||
assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ",
|
||||
CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα"));
|
||||
assertEquals("toTitle(IjssEL IglOo)", "J",
|
||||
CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
|
||||
DUTCH_LOCALE_, null, "IjssEL IglOo"));
|
||||
assertEquals("fold(IßtanBul)", "ıssb",
|
||||
CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul"));
|
||||
|
||||
// Return the whole result string.
|
||||
assertEquals("toLower(Istanbul)", "ıstanbul",
|
||||
CaseMap.toLower().apply(TURKISH_LOCALE_, "IstanBul"));
|
||||
assertEquals("toUpper(Πατάτα)", "ΠΑΤΑΤΑ",
|
||||
CaseMap.toUpper().apply(GREEK_LOCALE_, "Πατάτα"));
|
||||
assertEquals("toTitle(IjssEL IglOo)", "IJssEL IglOo",
|
||||
CaseMap.toTitle().noBreakAdjustment().noLowercase().apply(
|
||||
DUTCH_LOCALE_, null, "IjssEL IglOo"));
|
||||
assertEquals("fold(IßtanBul)", "ısstanbul",
|
||||
CaseMap.fold().turkic().apply("IßtanBul"));
|
||||
}
|
||||
|
||||
// private data members - test data --------------------------------------
|
||||
|
||||
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
|
||||
|
@ -1272,6 +1297,7 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
|
||||
private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
|
||||
private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");
|
||||
private static final Locale DUTCH_LOCALE_ = new Locale("nl");
|
||||
|
||||
private static final int CHARACTER_UPPER_[] =
|
||||
{0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
|
|
Loading…
Add table
Reference in a new issue