mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
parent
398489b915
commit
039ecd6fd0
5 changed files with 465 additions and 57 deletions
|
@ -420,6 +420,96 @@ void toUpper(int32_t caseLocale, uint32_t options,
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint8_t ACUTE_BYTE0 = u8"\u0301"[0];
|
||||
|
||||
constexpr uint8_t ACUTE_BYTE1 = u8"\u0301"[1];
|
||||
|
||||
/**
|
||||
* Input: c is a letter I with or without acute accent.
|
||||
* start is the index in src after c, and is less than segmentLimit.
|
||||
* If a plain i/I is followed by a plain j/J,
|
||||
* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
|
||||
* then we output accordingly.
|
||||
*
|
||||
* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
|
||||
*/
|
||||
int32_t maybeTitleDutchIJ(const uint8_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
|
||||
ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
|
||||
|
||||
int32_t index = start;
|
||||
bool withAcute = false;
|
||||
|
||||
// If the conditions are met, then the following variables tell us what to output.
|
||||
int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
|
||||
bool doTitleJ = false; // true if the j needs to be titlecased
|
||||
int32_t unchanged2 = 0; // after the j (0 or 1)
|
||||
|
||||
// next character after the first letter
|
||||
UChar32 c2;
|
||||
c2 = src[index++];
|
||||
|
||||
// Is the first letter an i/I with accent?
|
||||
if (c == u'I') {
|
||||
if (c2 == ACUTE_BYTE0 && index < segmentLimit && src[index++] == ACUTE_BYTE1) {
|
||||
withAcute = true;
|
||||
unchanged1 = 2; // ACUTE is 2 code units in UTF-8
|
||||
if (index == segmentLimit) { return start; }
|
||||
c2 = src[index++];
|
||||
}
|
||||
} else { // Í
|
||||
withAcute = true;
|
||||
}
|
||||
|
||||
// Is the next character a j/J?
|
||||
if (c2 == u'j') {
|
||||
doTitleJ = true;
|
||||
} else if (c2 == u'J') {
|
||||
++unchanged1;
|
||||
} else {
|
||||
return start;
|
||||
}
|
||||
|
||||
// A plain i/I must be followed by a plain j/J.
|
||||
// An i/I with acute must be followed by a j/J with acute.
|
||||
if (withAcute) {
|
||||
if ((index + 1) >= segmentLimit || src[index++] != ACUTE_BYTE0 || src[index++] != ACUTE_BYTE1) {
|
||||
return start;
|
||||
}
|
||||
if (doTitleJ) {
|
||||
unchanged2 = 2; // ACUTE is 2 code units in UTF-8
|
||||
} else {
|
||||
unchanged1 = unchanged1 + 2; // ACUTE is 2 code units in UTF-8
|
||||
}
|
||||
}
|
||||
|
||||
// There must not be another combining mark.
|
||||
if (index < segmentLimit) {
|
||||
int32_t cp;
|
||||
int32_t i = index;
|
||||
U8_NEXT(src, i, segmentLimit, cp);
|
||||
uint32_t typeMask = U_GET_GC_MASK(cp);
|
||||
if ((typeMask & U_GC_M_MASK) != 0) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
// Output the rest of the Dutch IJ.
|
||||
ByteSinkUtil::appendUnchanged(src + start, unchanged1, sink, options, edits, errorCode);
|
||||
start += unchanged1;
|
||||
if (doTitleJ) {
|
||||
ByteSinkUtil::appendCodePoint(1, u'J', sink, edits);
|
||||
++start;
|
||||
}
|
||||
ByteSinkUtil::appendUnchanged(src + start, unchanged2, sink, options, edits, errorCode);
|
||||
|
||||
U_ASSERT(start + unchanged2 == index);
|
||||
return index;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CFUNC void U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(
|
||||
int32_t caseLocale, uint32_t options, BreakIterator *iter,
|
||||
|
@ -505,18 +595,13 @@ ucasemap_internalUTF8ToTitle(
|
|||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < index &&
|
||||
caseLocale == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
|
||||
if (src[titleStart+1] == 0x006A) {
|
||||
ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
|
||||
titleLimit++;
|
||||
} else if (src[titleStart+1] == 0x004A) {
|
||||
// Keep the capital J from getting lowercased.
|
||||
if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
|
||||
sink, options, edits, errorCode)) {
|
||||
return;
|
||||
}
|
||||
titleLimit++;
|
||||
caseLocale == UCASE_LOC_DUTCH) {
|
||||
if (c < 0) {
|
||||
c = ~c;
|
||||
}
|
||||
|
||||
if (c == u'I' || c == u'Í') {
|
||||
titleLimit = maybeTitleDutchIJ(src, c, titleLimit, index, sink, options, edits, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,12 @@
|
|||
#include "ustr_imp.h"
|
||||
#include "uassert.h"
|
||||
|
||||
/**
|
||||
* Code point for COMBINING ACUTE ACCENT
|
||||
* @internal
|
||||
*/
|
||||
#define ACUTE u'\u0301'
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
@ -396,6 +402,93 @@ U_NAMESPACE_USE
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* Input: c is a letter I with or without acute accent.
|
||||
* start is the index in src after c, and is less than segmentLimit.
|
||||
* If a plain i/I is followed by a plain j/J,
|
||||
* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
|
||||
* then we output accordingly.
|
||||
*
|
||||
* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
|
||||
*/
|
||||
int32_t maybeTitleDutchIJ(const UChar *src, UChar32 c, int32_t start, int32_t segmentLimit,
|
||||
UChar *dest, int32_t &destIndex, int32_t destCapacity, uint32_t options,
|
||||
icu::Edits *edits) {
|
||||
|
||||
int32_t index = start;
|
||||
bool withAcute = false;
|
||||
|
||||
// If the conditions are met, then the following variables tell us what to output.
|
||||
int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
|
||||
bool doTitleJ = false; // true if the j needs to be titlecased
|
||||
int32_t unchanged2 = 0; // after the j (0 or 1)
|
||||
|
||||
// next character after the first letter
|
||||
UChar c2 = src[index++];
|
||||
|
||||
// Is the first letter an i/I with accent?
|
||||
if (c == u'I') {
|
||||
if (c2 == ACUTE) {
|
||||
withAcute = true;
|
||||
unchanged1 = 1;
|
||||
if (index == segmentLimit) { return start; }
|
||||
c2 = src[index++];
|
||||
}
|
||||
} else { // Í
|
||||
withAcute = true;
|
||||
}
|
||||
|
||||
// Is the next character a j/J?
|
||||
if (c2 == u'j') {
|
||||
doTitleJ = true;
|
||||
} else if (c2 == u'J') {
|
||||
++unchanged1;
|
||||
} else {
|
||||
return start;
|
||||
}
|
||||
|
||||
// A plain i/I must be followed by a plain j/J.
|
||||
// An i/I with acute must be followed by a j/J with acute.
|
||||
if (withAcute) {
|
||||
if (index == segmentLimit || src[index++] != ACUTE) { return start; }
|
||||
if (doTitleJ) {
|
||||
unchanged2 = 1;
|
||||
} else {
|
||||
++unchanged1;
|
||||
}
|
||||
}
|
||||
|
||||
// There must not be another combining mark.
|
||||
if (index < segmentLimit) {
|
||||
int32_t cp;
|
||||
int32_t i = index;
|
||||
U16_NEXT(src, i, segmentLimit, cp);
|
||||
uint32_t typeMask = U_GET_GC_MASK(cp);
|
||||
if ((typeMask & U_GC_M_MASK) != 0) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
// Output the rest of the Dutch IJ.
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged1, options, edits);
|
||||
start += unchanged1;
|
||||
if (doTitleJ) {
|
||||
destIndex = appendUChar(dest, destIndex, destCapacity, u'J');
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
++start;
|
||||
}
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged2, options, edits);
|
||||
|
||||
U_ASSERT(start + unchanged2 == index);
|
||||
return index;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
|
@ -412,14 +505,14 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
|
|||
csc.limit=srcLength;
|
||||
int32_t destIndex=0;
|
||||
int32_t prev=0;
|
||||
UBool isFirstIndex=TRUE;
|
||||
bool isFirstIndex=true;
|
||||
|
||||
/* titlecasing loop */
|
||||
while(prev<srcLength) {
|
||||
/* find next index where to titlecase */
|
||||
int32_t index;
|
||||
if(isFirstIndex) {
|
||||
isFirstIndex=FALSE;
|
||||
isFirstIndex=false;
|
||||
index=iter->first();
|
||||
} else {
|
||||
index=iter->next();
|
||||
|
@ -446,7 +539,7 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
|
|||
// Stop with titleStart<titleLimit<=index
|
||||
// if there is a character to be titlecased,
|
||||
// or else stop with titleStart==titleLimit==index.
|
||||
UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
|
||||
bool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
|
||||
while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
|
||||
titleStart=titleLimit;
|
||||
if(titleLimit==index) {
|
||||
|
@ -479,27 +572,15 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
|
|||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < index &&
|
||||
caseLocale == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
|
||||
if (src[titleStart+1] == 0x006A) {
|
||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
|
||||
if(destIndex<0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(edits!=NULL) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
titleLimit++;
|
||||
} else if (src[titleStart+1] == 0x004A) {
|
||||
// Keep the capital J from getting lowercased.
|
||||
destIndex=appendUnchanged(dest, destIndex, destCapacity,
|
||||
src+titleStart+1, 1, options, edits);
|
||||
if(destIndex<0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
titleLimit++;
|
||||
caseLocale == UCASE_LOC_DUTCH) {
|
||||
if (c < 0) {
|
||||
c = ~c;
|
||||
}
|
||||
|
||||
if (c == u'I' || c == u'Í') {
|
||||
titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index,
|
||||
dest, destIndex, destCapacity, options,
|
||||
edits);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ public:
|
|||
void *iter, const char *localeID, uint32_t options);
|
||||
void TestCasing();
|
||||
void TestTitleOptions();
|
||||
void TestDutchTitle();
|
||||
void TestFullCaseFoldingIterator();
|
||||
void TestGreekUpper();
|
||||
void TestArmenian();
|
||||
|
@ -95,6 +96,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
|||
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
TESTCASE_AUTO(TestCasing);
|
||||
TESTCASE_AUTO(TestTitleOptions);
|
||||
TESTCASE_AUTO(TestDutchTitle);
|
||||
#endif
|
||||
TESTCASE_AUTO(TestFullCaseFoldingIterator);
|
||||
TESTCASE_AUTO(TestGreekUpper);
|
||||
|
@ -451,6 +453,7 @@ StringCaseTest::TestCasingImpl(const UnicodeString &input,
|
|||
}
|
||||
if(result!=output) {
|
||||
dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
|
||||
dataerrln(UnicodeString("input = [") + input + "], expected = [" + output + "], actual = [" + result + "]");
|
||||
}
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if(whichCase==TEST_TITLE && options==0) {
|
||||
|
@ -667,6 +670,104 @@ StringCaseTest::TestTitleOptions() {
|
|||
#endif
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
void StringCaseTest::TestDutchTitle() {
|
||||
IcuTestErrorCode errorCode(*this, "TestDutchTitle");
|
||||
|
||||
Locale nl("nl"); // Dutch
|
||||
LocalPointer<BreakIterator> iter(
|
||||
BreakIterator::createWordInstance(nl, errorCode));
|
||||
|
||||
// Dutch titlecase check in English
|
||||
TestCasingImpl(
|
||||
u"ijssel igloo IJMUIDEN",
|
||||
u"Ijssel Igloo Ijmuiden",
|
||||
TEST_TITLE,
|
||||
nullptr,
|
||||
"en",
|
||||
0);
|
||||
|
||||
// Dutch titlecase check in Dutch
|
||||
TestCasingImpl(
|
||||
u"ijssel igloo IJMUIDEN",
|
||||
u"IJssel Igloo IJmuiden",
|
||||
TEST_TITLE,
|
||||
nullptr,
|
||||
"nl",
|
||||
0);
|
||||
|
||||
// Dutch titlecase check in Dutch with nolowercase option
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
iter->setText(u"ijssel igloo IjMUIdEN iPoD ijenough");
|
||||
TestCasingImpl(
|
||||
u"ijssel igloo IjMUIdEN iPoD ijenough",
|
||||
u"IJssel Igloo IJMUIdEN IPoD IJenough",
|
||||
TEST_TITLE,
|
||||
nullptr,
|
||||
"nl",
|
||||
U_TITLECASE_NO_LOWERCASE);
|
||||
}
|
||||
|
||||
errorCode.reset();
|
||||
|
||||
// Accented IJ testing
|
||||
|
||||
struct dutchTitleTestCase {
|
||||
const UnicodeString input;
|
||||
const UnicodeString expectedFull;
|
||||
const UnicodeString expectedOnlyChanged;
|
||||
} dutchTitleTestCases[] = {
|
||||
// input, expectedFull, expectedOnlyChanged
|
||||
{u"ij", u"IJ", u"IJ"},
|
||||
{u"IJ", u"IJ", u""},
|
||||
{u"íj́", u"ÍJ́", u"ÍJ"},
|
||||
{u"ÍJ́", u"ÍJ́", u""},
|
||||
{u"íJ́", u"ÍJ́", u"Í"},
|
||||
{u"Ij́", u"Ij́", u""},
|
||||
{u"ij́", u"Ij́", u"I"},
|
||||
{u"ïj́", u"Ïj́", u"Ï"},
|
||||
{u"íj\u0308", u"Íj\u0308", u"Í"},
|
||||
{u"íj́\U0001D16E", u"Íj́\U0001D16E", u"Í"},
|
||||
{u"íj\u1ABE", u"Íj\u1ABE", u"Í"},
|
||||
|
||||
{u"ijabc", u"IJabc", u"IJ"},
|
||||
{u"IJabc", u"IJabc", u""},
|
||||
{u"íj́abc", u"ÍJ́abc", u"ÍJ"},
|
||||
{u"ÍJ́abc", u"ÍJ́abc", u""},
|
||||
{u"íJ́abc", u"ÍJ́abc", u"Í"},
|
||||
{u"Ij́abc", u"Ij́abc", u""},
|
||||
{u"ij́abc", u"Ij́abc", u"I"},
|
||||
{u"ïj́abc", u"Ïj́abc", u"Ï"},
|
||||
{u"íjabc\u0308", u"Íjabc\u0308", u"Í"},
|
||||
{u"íj́abc\U0001D16E", u"ÍJ́abc\U0001D16E", u"ÍJ"},
|
||||
{u"íjabc\u1ABE", u"Íjabc\u1ABE", u"Í"},
|
||||
};
|
||||
|
||||
for (const auto& cas : dutchTitleTestCases) {
|
||||
const UnicodeString &input = cas.input;
|
||||
const UnicodeString &expectedFull = cas.expectedFull;
|
||||
const UnicodeString &expectedOnlyChanged = cas.expectedOnlyChanged;
|
||||
|
||||
for (const auto& isOnlyChanged : {true, false}) {
|
||||
uint32_t testOptions = U_TITLECASE_NO_LOWERCASE
|
||||
| (isOnlyChanged ? U_OMIT_UNCHANGED_TEXT : 0);
|
||||
|
||||
const UnicodeString &expected = isOnlyChanged ? expectedOnlyChanged : expectedFull;
|
||||
|
||||
TestCasingImpl(
|
||||
input,
|
||||
expected,
|
||||
TEST_TITLE,
|
||||
nullptr,
|
||||
"nl",
|
||||
testOptions
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
StringCaseTest::TestFullCaseFoldingIterator() {
|
||||
UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
|
||||
|
|
|
@ -70,6 +70,10 @@ public final class CaseMapImpl {
|
|||
cpStart=cpLimit=limit;
|
||||
}
|
||||
|
||||
public void moveTo(int i) {
|
||||
cpStart=cpLimit=i;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate forward through the string to fetch the next code point
|
||||
* to be case-mapped, and set the context indexes for it.
|
||||
|
@ -189,6 +193,13 @@ public final class CaseMapImpl {
|
|||
return options | newOption;
|
||||
}
|
||||
|
||||
private static final char ACUTE = '\u0301';
|
||||
|
||||
private static final int U_GC_M_MASK =
|
||||
(1 << UCharacterCategory.NON_SPACING_MARK) |
|
||||
(1 << UCharacterCategory.COMBINING_SPACING_MARK) |
|
||||
(1 << UCharacterCategory.ENCLOSING_MARK);
|
||||
|
||||
private static final int LNS =
|
||||
(1 << UCharacterCategory.UPPERCASE_LETTER) |
|
||||
(1 << UCharacterCategory.LOWERCASE_LETTER) |
|
||||
|
@ -726,34 +737,25 @@ public final class CaseMapImpl {
|
|||
}
|
||||
|
||||
if(titleStart<index) {
|
||||
int titleLimit=iter.getCPLimit();
|
||||
// titlecase c which is from [titleStart..titleLimit[
|
||||
c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
|
||||
appendResult(c, dest, iter.getCPLength(), options, edits);
|
||||
|
||||
// Special case Dutch IJ titlecasing
|
||||
int titleLimit;
|
||||
if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
|
||||
char c1 = src.charAt(titleStart);
|
||||
if ((c1 == 'i' || c1 == 'I')) {
|
||||
char c2 = src.charAt(titleStart+1);
|
||||
if (c2 == 'j') {
|
||||
dest.append('J');
|
||||
if (edits != null) {
|
||||
edits.addReplace(1, 1);
|
||||
}
|
||||
c = iter.nextCaseMapCP();
|
||||
titleLimit++;
|
||||
assert c == c2;
|
||||
assert titleLimit == iter.getCPLimit();
|
||||
} else if (c2 == 'J') {
|
||||
// Keep the capital J from getting lowercased.
|
||||
appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
|
||||
c = iter.nextCaseMapCP();
|
||||
titleLimit++;
|
||||
assert c == c2;
|
||||
assert titleLimit == iter.getCPLimit();
|
||||
}
|
||||
if (c < 0) {
|
||||
c = ~c;
|
||||
}
|
||||
if (c == 'I' || c == 'Í') {
|
||||
titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index, dest, options, edits);
|
||||
iter.moveTo(titleLimit);
|
||||
}
|
||||
else {
|
||||
titleLimit = iter.getCPLimit();
|
||||
}
|
||||
} else {
|
||||
titleLimit = iter.getCPLimit();
|
||||
}
|
||||
|
||||
// lowercase [titleLimit..index[
|
||||
|
@ -779,6 +781,82 @@ public final class CaseMapImpl {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Input: c is a letter I with or without acute accent.
|
||||
* start is the index in src after c, and is less than segmentLimit.
|
||||
* If a plain i/I is followed by a plain j/J,
|
||||
* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
|
||||
* then we output accordingly.
|
||||
*
|
||||
* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
|
||||
* @throws IOException
|
||||
*/
|
||||
private static <A extends Appendable> int maybeTitleDutchIJ(
|
||||
CharSequence src, int c, int start, int segmentLimit,
|
||||
A dest, int options, Edits edits) throws IOException {
|
||||
int index = start;
|
||||
boolean withAcute = false;
|
||||
|
||||
// If the conditions are met, then the following variables tell us what to output.
|
||||
int unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
|
||||
boolean doTitleJ = false; // true if the j needs to be titlecased
|
||||
int unchanged2 = 0; // after the j (0 or 1)
|
||||
|
||||
// next character after the first letter
|
||||
char c2 = src.charAt(index++);
|
||||
|
||||
// Is the first letter an i/I with accent?
|
||||
if (c == 'I') {
|
||||
if (c2 == ACUTE) {
|
||||
withAcute = true;
|
||||
unchanged1 = 1;
|
||||
if (index == segmentLimit) { return start; }
|
||||
c2 = src.charAt(index++);
|
||||
}
|
||||
} else { // Í
|
||||
withAcute = true;
|
||||
}
|
||||
// Is the next character a j/J?
|
||||
if (c2 == 'j') {
|
||||
doTitleJ = true;
|
||||
} else if (c2 == 'J') {
|
||||
++unchanged1;
|
||||
} else {
|
||||
return start;
|
||||
}
|
||||
// A plain i/I must be followed by a plain j/J.
|
||||
// An i/I with acute must be followed by a j/J with acute.
|
||||
if (withAcute) {
|
||||
if (index == segmentLimit || src.charAt(index++) != ACUTE) { return start; }
|
||||
if (doTitleJ) {
|
||||
unchanged2 = 1;
|
||||
} else {
|
||||
++unchanged1;
|
||||
}
|
||||
}
|
||||
// There must not be another combining mark.
|
||||
if (index < segmentLimit) {
|
||||
int cp = Character.codePointAt(src, index);
|
||||
int bit = 1 << UCharacter.getType(cp);
|
||||
if ((bit & U_GC_M_MASK) != 0) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
// Output the rest of the Dutch IJ.
|
||||
appendUnchanged(src, start, unchanged1, dest, options, edits);
|
||||
start += unchanged1;
|
||||
if (doTitleJ) {
|
||||
dest.append('J');
|
||||
if (edits != null) {
|
||||
edits.addReplace(1, 1);
|
||||
}
|
||||
++start;
|
||||
}
|
||||
appendUnchanged(src, start, unchanged2, dest, options, edits);
|
||||
assert start + unchanged2 == index;
|
||||
return index;
|
||||
}
|
||||
|
||||
public static String fold(int options, CharSequence src) {
|
||||
if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
|
||||
if (src.length() == 0) {
|
||||
|
|
|
@ -13,6 +13,7 @@ package com.ibm.icu.dev.test.lang;
|
|||
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -23,6 +24,7 @@ import org.junit.runners.JUnit4;
|
|||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.test.TestUtil;
|
||||
import com.ibm.icu.impl.CaseMapImpl;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
|
@ -467,6 +469,67 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
assertEquals("Dutch titlecase check in Dutch with nolowercase option",
|
||||
"IJssel Igloo IJMUIdEN IPoD IJenough",
|
||||
UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));
|
||||
|
||||
// Accented IJ testing
|
||||
|
||||
String[][] dutchIJCasesData = {
|
||||
// input, expectedFull, expOnlyChanged
|
||||
{"ij", "IJ", "IJ"},
|
||||
{"IJ", "IJ", ""},
|
||||
{"íj́", "ÍJ́", "ÍJ"},
|
||||
{"ÍJ́", "ÍJ́", ""},
|
||||
{"íJ́", "ÍJ́", "Í"},
|
||||
{"Ij́", "Ij́", ""},
|
||||
{"ij́", "Ij́", "I"},
|
||||
{"ïj́", "Ïj́", "Ï"},
|
||||
{"íj\u0308", "Íj\u0308", "Í"},
|
||||
{"íj́\uD834\uDD6E", "Íj́\uD834\uDD6E", "Í"}, // \uD834\uDD6E == \U0001D16E
|
||||
{"íj\u1ABE", "Íj\u1ABE", "Í"},
|
||||
|
||||
{"ijabc", "IJabc", "IJ"},
|
||||
{"IJabc", "IJabc", ""},
|
||||
{"íj́abc", "ÍJ́abc", "ÍJ"},
|
||||
{"ÍJ́abc", "ÍJ́abc", ""},
|
||||
{"íJ́abc", "ÍJ́abc", "Í"},
|
||||
{"Ij́abc", "Ij́abc", ""},
|
||||
{"ij́abc", "Ij́abc", "I"},
|
||||
{"ïj́abc", "Ïj́abc", "Ï"},
|
||||
{"íjabc\u0308", "Íjabc\u0308", "Í"},
|
||||
{"íj́abc\uD834\uDD6E", "ÍJ́abc\uD834\uDD6E", "ÍJ"},
|
||||
{"íjabc\u1ABE", "Íjabc\u1ABE", "Í"},
|
||||
};
|
||||
|
||||
for (String[] caseDatum : dutchIJCasesData) {
|
||||
String input = caseDatum[0];
|
||||
String expectedFull = caseDatum[1];
|
||||
String expectedOnlyChanged = caseDatum[2];
|
||||
|
||||
for (boolean isOnlyChanged : Arrays.asList(true, false)) {
|
||||
String testMsg = "Dutch accented ij"
|
||||
+ (isOnlyChanged ? ", only changes" : "");
|
||||
|
||||
int testOptions = UCharacter.TITLECASE_NO_LOWERCASE
|
||||
| (isOnlyChanged ? CaseMapImpl.OMIT_UNCHANGED_TEXT : 0);
|
||||
|
||||
CaseMap.Title titleCaseMapBase = CaseMap.toTitle().noLowercase();
|
||||
CaseMap.Title titleCaseMap = isOnlyChanged ? titleCaseMapBase.omitUnchangedText() : titleCaseMapBase;
|
||||
|
||||
String expected = isOnlyChanged ? expectedOnlyChanged : expectedFull;
|
||||
|
||||
// Newer API for title casing
|
||||
StringBuilder resultBuilder = new StringBuilder();
|
||||
Edits edits = new Edits();
|
||||
titleCaseMap.apply(DUTCH_LOCALE_, null, input, resultBuilder, edits);
|
||||
String result = resultBuilder.toString();
|
||||
assertEquals(testMsg + ", [" + input + "]",
|
||||
expected, result);
|
||||
|
||||
// Older API for title casing (vs. Newer API)
|
||||
String oldApiResult = UCharacter.toTitleCase(LOC_DUTCH, input, null, testOptions);
|
||||
assertEquals(testMsg + ", Title.apply() vs UCharacter.toTitleCase()" + ", [" + input + "]",
|
||||
result, oldApiResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Add table
Reference in a new issue