ICU-13247 Java: String CaseMap.apply(CharSequence); fix omitUnchangedText() without Edits

X-SVN-Rev: 40417
This commit is contained in:
Markus Scherer 2017-09-14 22:26:13 +00:00
parent cf7b342a1d
commit ee7fb909bf
7 changed files with 364 additions and 131 deletions

View file

@ -165,9 +165,9 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
/* (not) original code point */
if(edits!=NULL) {
edits->addUnchanged(cpLength);
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
c=~result;
if(destIndex<destCapacity && c<=0x7f) { // ASCII slightly-fastpath
@ -283,9 +283,9 @@ appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
@ -628,8 +628,10 @@ int32_t toUpper(uint32_t options,
}
}
UBool change = TRUE;
if (edits != NULL) {
UBool change;
if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
change = TRUE; // common, simple usage
} else {
// Find out first whether we are changing the text.
U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
change = (i + 2) > nextIndex ||

View file

@ -73,9 +73,9 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
/* (not) original code point */
if(edits!=NULL) {
edits->addUnchanged(cpLength);
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
c=~result;
if(destIndex<destCapacity && c<=0xffff) { // BMP slightly-fastpath
@ -150,9 +150,9 @@ appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
@ -934,8 +934,10 @@ int32_t toUpper(uint32_t options,
}
}
UBool change = TRUE;
if (edits != NULL) {
UBool change;
if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
change = TRUE; // common, simple usage
} else {
// Find out first whether we are changing the text.
change = src[i] != upper || numYpogegrammeni > 0;
int32_t i2 = i + 1;

View file

@ -62,6 +62,8 @@ public:
void TestMergeEdits();
void TestCaseMapWithEdits();
void TestCaseMapUTF8WithEdits();
void TestCaseMapToString();
void TestCaseMapUTF8ToString();
void TestLongUnicodeString();
void TestBug13127();
void TestInPlaceTitle();
@ -102,6 +104,8 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestMergeEdits);
TESTCASE_AUTO(TestCaseMapWithEdits);
TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
TESTCASE_AUTO(TestCaseMapToString);
TESTCASE_AUTO(TestCaseMapUTF8ToString);
TESTCASE_AUTO(TestLongUnicodeString);
#if !UCONFIG_NO_BREAK_ITERATION
TESTCASE_AUTO(TestBug13127);
@ -1216,7 +1220,7 @@ void StringCaseTest::TestMergeEdits() {
}
void StringCaseTest::TestCaseMapWithEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
UChar dest[20];
Edits edits;
@ -1258,7 +1262,7 @@ void StringCaseTest::TestCaseMapWithEdits() {
U_OMIT_UNCHANGED_TEXT |
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
NULL, u"IjssEL IglOo", 12,
nullptr, u"IjssEL IglOo", 12,
dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
static const EditChange titleExpectedChanges[] = {
@ -1338,7 +1342,7 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
U_OMIT_UNCHANGED_TEXT |
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
NULL, u8"IjssEL IglOo", 12,
nullptr, u8"IjssEL IglOo", 12,
dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
@ -1377,6 +1381,114 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
TRUE, errorCode);
}
void StringCaseTest::TestCaseMapToString() {
// This test function name is parallel with one in UCharacterCaseTest.java.
// It is a bit of a misnomer until we have CaseMap API that writes to
// a UnicodeString, at which point we should change this code here.
IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
UChar dest[20];
// Omit unchanged text.
int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toLower(IstanBul)",
UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toUpper(Πατάτα)",
UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
#if !UCONFIG_NO_BREAK_ITERATION
length = CaseMap::toTitle("nl",
U_OMIT_UNCHANGED_TEXT |
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
nullptr, u"IjssEL IglOo", 12,
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)",
UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
#endif
length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"foldCase(IßtanBul)",
UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
// Return the whole result string.
length = CaseMap::toLower("tr", 0,
u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toLower(IstanBul)",
UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length));
length = CaseMap::toUpper("el", 0,
u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toUpper(Πατάτα)",
UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
#if !UCONFIG_NO_BREAK_ITERATION
length = CaseMap::toTitle("nl",
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
nullptr, u"IjssEL IglOo", 12,
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)",
UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length));
#endif
length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"foldCase(IßtanBul)",
UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length));
}
void StringCaseTest::TestCaseMapUTF8ToString() {
IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
// TODO: Change this to writing to string via ByteSink when that is available.
char dest[50];
// Omit unchanged text.
int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
#if !UCONFIG_NO_BREAK_ITERATION
length = CaseMap::utf8ToTitle("nl",
U_OMIT_UNCHANGED_TEXT |
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
nullptr, u8"IjssEL IglOo", 12,
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
#endif
length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
// Return the whole result string.
length = CaseMap::utf8ToLower("tr", 0,
u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
length = CaseMap::utf8ToUpper("el", 0,
u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
#if !UCONFIG_NO_BREAK_ITERATION
length = CaseMap::utf8ToTitle("nl",
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
nullptr, u8"IjssEL IglOo", 12,
dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
#endif
length = CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
}
void StringCaseTest::TestLongUnicodeString() {
// Code coverage for UnicodeString case mapping code handling
// long strings or many changes in a string.

View file

@ -318,6 +318,11 @@ public final class CaseMapImpl {
length = newText.getEndIndex();
}
@Override
public void setText(CharSequence newText) {
length = newText.length();
}
@Override
public void setText(String newText) {
length = newText.length();
@ -346,9 +351,9 @@ public final class CaseMapImpl {
// (not) original code point
if (edits != null) {
edits.addUnchanged(cpLength);
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
return;
}
}
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
return;
}
appendCodePoint(dest, ~result);
} else if (result <= UCaseProps.MAX_STRING_LENGTH) {
@ -370,14 +375,31 @@ public final class CaseMapImpl {
if (length > 0) {
if (edits != null) {
edits.addUnchanged(length);
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
return;
}
}
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
return;
}
dest.append(src, start, start + length);
}
}
private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) {
if (!edits.hasChanges()) {
return src.toString();
}
StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta());
for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
if (ei.hasChange()) {
int i = ei.replacementIndex();
result.append(replacementChars, i, i + ei.newLength());
} else {
int i = ei.sourceIndex();
result.append(src, i, i + ei.oldLength());
}
}
return result.toString();
}
private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
Appendable dest, Edits edits) throws IOException {
int c;
@ -387,6 +409,23 @@ public final class CaseMapImpl {
}
}
public static String toLower(int caseLocale, int options, CharSequence src) {
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
if (src.length() == 0) {
return src.toString();
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = toLower(
caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
return applyEdits(src, replacementChars, edits);
} else {
return toLower(caseLocale, options, src,
new StringBuilder(src.length()), null).toString();
}
}
public static <A extends Appendable> A toLower(int caseLocale, int options,
CharSequence src, A dest, Edits edits) {
try {
@ -401,6 +440,23 @@ public final class CaseMapImpl {
}
}
public static String toUpper(int caseLocale, int options, CharSequence src) {
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
if (src.length() == 0) {
return src.toString();
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = toUpper(
caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
return applyEdits(src, replacementChars, edits);
} else {
return toUpper(caseLocale, options, src,
new StringBuilder(src.length()), null).toString();
}
}
public static <A extends Appendable> A toUpper(int caseLocale, int options,
CharSequence src, A dest, Edits edits) {
try {
@ -422,6 +478,24 @@ public final class CaseMapImpl {
}
}
public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) {
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
if (src.length() == 0) {
return src.toString();
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = toTitle(
caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src,
new StringBuilder(), edits);
return applyEdits(src, replacementChars, edits);
} else {
return toTitle(caseLocale, options, iter, src,
new StringBuilder(src.length()), null).toString();
}
}
public static <A extends Appendable> A toTitle(
int caseLocale, int options, BreakIterator titleIter,
CharSequence src, A dest, Edits edits) {
@ -533,6 +607,22 @@ public final class CaseMapImpl {
}
}
public static String fold(int options, CharSequence src) {
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
if (src.length() == 0) {
return src.toString();
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = fold(
options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
return applyEdits(src, replacementChars, edits);
} else {
return fold(options, src, new StringBuilder(src.length()), null).toString();
}
}
public static <A extends Appendable> A fold(int options,
CharSequence src, A dest, Edits edits) {
try {
@ -1131,7 +1221,7 @@ public final class CaseMapImpl {
}
boolean change;
if (edits == null) {
if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) {
change = true; // common, simple usage
} else {
// Find out first whether we are changing the text.

View file

@ -28,7 +28,6 @@ import com.ibm.icu.impl.UPropertyAliases;
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Edits;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.ULocale;
@ -4937,7 +4936,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static String toUpperCase(String str)
{
return toUpperCase(getDefaultCaseLocale(), str);
return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
}
/**
@ -4949,7 +4948,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static String toLowerCase(String str)
{
return toLowerCase(getDefaultCaseLocale(), str);
return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
}
/**
@ -4993,75 +4992,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
return UCaseProps.getCaseLocale(locale);
}
private static String toLowerCase(int caseLocale, String str) {
if (str.length() <= 100) {
if (str.isEmpty()) {
return str;
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.toLower(
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.toLower(caseLocale, 0, str,
new StringBuilder(str.length()), null).toString();
}
}
private static String toUpperCase(int caseLocale, String str) {
if (str.length() <= 100) {
if (str.isEmpty()) {
return str;
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.toUpper(
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.toUpper(caseLocale, 0, str,
new StringBuilder(str.length()), null).toString();
}
}
private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
if (str.length() <= 100) {
if (str.isEmpty()) {
return str;
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.toTitle(
caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
new StringBuilder(str.length()), null).toString();
}
}
private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
if (!edits.hasChanges()) {
return str;
}
StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
if (ei.hasChange()) {
int i = ei.replacementIndex();
result.append(replacementChars, i, i + ei.newLength());
} else {
int i = ei.sourceIndex();
result.append(str, i, i + ei.oldLength());
}
}
return result.toString();
}
/**
* Returns the uppercase version of the argument string.
* Casing is dependent on the argument locale and context-sensitive.
@ -5072,7 +5002,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static String toUpperCase(Locale locale, String str)
{
return toUpperCase(getCaseLocale(locale), str);
return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
}
/**
@ -5084,7 +5014,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @stable ICU 3.2
*/
public static String toUpperCase(ULocale locale, String str) {
return toUpperCase(getCaseLocale(locale), str);
return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
}
/**
@ -5097,7 +5027,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
public static String toLowerCase(Locale locale, String str)
{
return toLowerCase(getCaseLocale(locale), str);
return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
}
/**
@ -5109,7 +5039,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @stable ICU 3.2
*/
public static String toLowerCase(ULocale locale, String str) {
return toLowerCase(getCaseLocale(locale), str);
return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
}
/**
@ -5190,7 +5120,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
}
titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
titleIter.setText(str);
return toTitleCase(getCaseLocale(locale), options, titleIter, str);
return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
}
/**
@ -5217,16 +5147,13 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/
@Deprecated
public static String toTitleFirst(ULocale locale, String str) {
return toTitleCase(locale, str, null,
CaseMapImpl.TITLECASE_WHOLE_STRING|TITLECASE_NO_LOWERCASE);
// TODO: Remove this function.
// Move something like the following helper function into CLDR.
// private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
// CaseMap.toTitle().wholeString().noLowercase();
// return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(
// locale.toLocale(), null, str, new StringBuilder(), null).toString();
// TODO: Remove this function. Inline it where it is called in CLDR.
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str);
}
private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase();
/**
* {@icu} <p>Returns the titlecase version of the argument string.
* <p>Position for titlecasing is determined by the argument break
@ -5257,7 +5184,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
}
titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
titleIter.setText(str);
return toTitleCase(getCaseLocale(locale), options, titleIter, str);
return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
}
/**
@ -5374,19 +5301,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @stable ICU 2.6
*/
public static final String foldCase(String str, int options) {
if (str.length() <= 100) {
if (str.isEmpty()) {
return str;
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.fold(
options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
}
return CaseMapImpl.fold(options, str);
}
/**

View file

@ -92,6 +92,24 @@ public abstract class CaseMap {
return OMIT_UNCHANGED;
}
/**
* Lowercases a string.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
* (See {@link ULocale#toLocale}.)
* @param src The original string.
* @return the result string.
*
* @see UCharacter#toLowerCase(Locale, String)
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
*/
public String apply(Locale locale, CharSequence src) {
return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src);
}
/**
* Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
* Casing is locale-dependent and context-sensitive.
@ -138,6 +156,24 @@ public abstract class CaseMap {
return OMIT_UNCHANGED;
}
/**
* Uppercases a string.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
* (See {@link ULocale#toLocale}.)
* @param src The original string.
* @return the result string.
*
* @see UCharacter#toUpperCase(Locale, String)
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
*/
public String apply(Locale locale, CharSequence src) {
return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src);
}
/**
* Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
* Casing is locale-dependent and context-sensitive.
@ -288,6 +324,38 @@ public abstract class CaseMap {
internalOptions, CaseMapImpl.TITLECASE_ADJUST_TO_CASED));
}
/**
* Titlecases a string.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* <p>Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
* (See {@link ULocale#toLocale}.)
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* and used one or more times for iteration (first() and next()).
* If null, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @return the result string.
*
* @see UCharacter#toUpperCase(Locale, String)
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
*/
public String apply(Locale locale, BreakIterator iter, CharSequence src) {
if (iter == null && locale == null) {
locale = Locale.getDefault();
}
iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter);
iter.setText(src);
return CaseMapImpl.toTitle(getCaseLocale(locale), internalOptions, iter, src);
}
/**
* Titlecases a string and optionally records edits (see {@link #omitUnchangedText}).
* Casing is locale-dependent and context-sensitive.
@ -321,7 +389,7 @@ public abstract class CaseMap {
locale = Locale.getDefault();
}
iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter);
iter.setText(src.toString());
iter.setText(src);
return CaseMapImpl.toTitle(
getCaseLocale(locale), internalOptions, iter, src, dest, edits);
}
@ -372,13 +440,31 @@ public abstract class CaseMap {
}
/**
* Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
* Case-folds a string.
* The result may be longer or shorter than the original.
*
* <p>Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* <p>The result may be longer or shorter than the original.
* @param src The original string.
* @return the result string.
*
* @see UCharacter#foldCase(String, int)
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
*/
public String apply(CharSequence src) {
return CaseMapImpl.fold(internalOptions, src);
}
/**
* Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
* The result may be longer or shorter than the original.
*
* <p>Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* @param src The original string.
* @param dest A buffer for the result string. Must not be null.

View file

@ -420,14 +420,13 @@ public final class UCharacterCaseTest extends TestFmwk
UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));
// Also check the behavior using Java Locale
Locale JAVALOC_DUTCH = new Locale("nl");
assertEquals("Dutch titlecase check in English (Java Locale)",
"Ijssel Igloo Ijmuiden",
UCharacter.toTitleCase(Locale.ENGLISH, "ijssel igloo IJMUIDEN", null));
assertEquals("Dutch titlecase check in Dutch (Java Locale)",
"IJssel Igloo IJmuiden",
UCharacter.toTitleCase(JAVALOC_DUTCH, "ijssel igloo IJMUIDEN", null));
UCharacter.toTitleCase(DUTCH_LOCALE_, "ijssel igloo IJMUIDEN", null));
iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
assertEquals("Dutch titlecase check in Dutch with nolowercase option",
@ -1238,7 +1237,7 @@ public final class UCharacterCaseTest extends TestFmwk
sb.delete(0, sb.length());
edits.reset();
sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
new Locale("nl"), null, "IjssEL IglOo", sb, edits);
DUTCH_LOCALE_, null, "IjssEL IglOo", sb, edits);
assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
EditChange[] titleExpectedChanges = new EditChange[] {
new EditChange(false, 1, 1),
@ -1265,6 +1264,32 @@ public final class UCharacterCaseTest extends TestFmwk
foldExpectedChanges, true);
}
@Test
public void TestCaseMapToString() {
// String apply(..., CharSequence)
// Omit unchanged text.
assertEquals("toLower(Istanbul)", "ıb",
CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul"));
assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ",
CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα"));
assertEquals("toTitle(IjssEL IglOo)", "J",
CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
DUTCH_LOCALE_, null, "IjssEL IglOo"));
assertEquals("fold(IßtanBul)", "ıssb",
CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul"));
// Return the whole result string.
assertEquals("toLower(Istanbul)", "ıstanbul",
CaseMap.toLower().apply(TURKISH_LOCALE_, "IstanBul"));
assertEquals("toUpper(Πατάτα)", "ΠΑΤΑΤΑ",
CaseMap.toUpper().apply(GREEK_LOCALE_, "Πατάτα"));
assertEquals("toTitle(IjssEL IglOo)", "IJssEL IglOo",
CaseMap.toTitle().noBreakAdjustment().noLowercase().apply(
DUTCH_LOCALE_, null, "IjssEL IglOo"));
assertEquals("fold(IßtanBul)", "ısstanbul",
CaseMap.fold().turkic().apply("IßtanBul"));
}
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
@ -1272,6 +1297,7 @@ public final class UCharacterCaseTest extends TestFmwk
private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");
private static final Locale DUTCH_LOCALE_ = new Locale("nl");
private static final int CHARACTER_UPPER_[] =
{0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,