ICU-1501 Updated Upper/Lower/TitlecaseTransliterator and checked in new casing tests for surrogates from Java.

X-SVN-Rev: 7235
This commit is contained in:
Helena Chapman 2001-11-30 23:53:55 +00:00
parent 1022f98094
commit 6f5df69456
8 changed files with 231 additions and 21 deletions

View file

@ -1066,11 +1066,11 @@ getCaseLocale(const char *locale) {
return LOC_ROOT;
}
if( (locale[0]=='t' && locale[1]=='r') ||
(locale[0]=='a' && locale[1]=='z')
if( (lang[0]=='t' && lang[1]=='r') ||
(lang[0]=='a' && lang[1]=='z')
) {
return LOC_TURKISH;
} else if(locale[0]=='l' && locale[1]=='t') {
} else if(lang[0]=='l' && lang[1]=='t') {
return LOC_LITHUANIAN;
} else {
return LOC_ROOT;

View file

@ -234,6 +234,7 @@ CLEAN :
-@erase "*.obj"
-@erase "*.brk"
-@erase "*.dat"
-@erase "*.dll"
@cd "$(TESTDATAOUT)"
-@erase "*.dat"
-@erase "*.cnv"

View file

@ -234,6 +234,7 @@ CLEAN :
-@erase "*.obj"
-@erase "*.brk"
-@erase "*.dat"
-@erase "*.dll"
@cd "$(TESTDATAOUT)"
-@erase "*.dat"
-@erase "*.cnv"

View file

@ -136,14 +136,13 @@ void TitlecaseTransliterator::handleTransliterate(
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
UChar32 cp, bufferCH;
int32_t oldLen;
int32_t newLen;
for (; i < limit; ) {
UErrorCode status = U_ZERO_ERROR;
int32_t s = i;
buffer[0] = original.charAt(s);
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
@ -153,7 +152,8 @@ void TitlecaseTransliterator::handleTransliterate(
newLen = u_internalTitleCase(cp, buffer, u_getMaxCaseExpansion(), loc.getName());
} else {
u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
newLen = buffer[0] == original.charAt(s) ? -1 : u_strlen(buffer);
UTF_GET_CHAR(buffer, 0, s, u_strlen(buffer), bufferCH);
newLen = bufferCH == original.char32At(s) ? -1 : u_strlen(buffer);
}
doTitle = !CASED->contains(cp);
if (newLen >= 0) {

View file

@ -86,20 +86,21 @@ void LowercaseTransliterator::handleTransliterate(Replaceable& text,
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
UChar32 cp, bufferCH;
int32_t oldLen;
for (; i < limit; ) {
UErrorCode status = U_ZERO_ERROR;
int32_t s = i;
buffer[0] = original.charAt(s);
bufferCH = original.char32At(s);
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
/* Skip checking of status code here because the buffer should not have overflowed. */
if ( buffer[0] != original.charAt(s) ) {
UTF_GET_CHAR(buffer, 0, s, u_strlen(buffer), cp);
if ( bufferCH != cp ) {
int len = u_strlen(buffer);
UnicodeString temp(buffer);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);

View file

@ -89,20 +89,21 @@ void UppercaseTransliterator::handleTransliterate(Replaceable& text,
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
UChar32 cp, bufferCH;
int32_t oldLen;
for (; i < limit; ) {
UErrorCode status = U_ZERO_ERROR;
int32_t s = i;
buffer[0] = original.charAt(s);
bufferCH = original.char32At(s);
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
u_strToUpper(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
/* Skip checking of status code here because the buffer should not have overflowed. */
if (buffer[0] != original.charAt(s)) {
UTF_GET_CHAR(buffer, 0, s, u_strlen(buffer), cp);
if (bufferCH != cp) {
int len = u_strlen(buffer);
UnicodeString temp(buffer);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);

View file

@ -21,6 +21,7 @@
#include "unicode/uniset.h"
#include "unicode/unitohex.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
/***********************************************************************
@ -143,6 +144,10 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(61,TestEscape);
TESTCASE(62,TestAnchorMasking);
TESTCASE(63,TestDisplayName);
TESTCASE(64,TestSpecialCases);
TESTCASE(65,TestIncrementalProgress);
TESTCASE(66,TestSurrogateCasing);
default: name = ""; break;
}
}
@ -2964,6 +2969,189 @@ void TransliteratorTest::TestDisplayName() {
}
}
const UnicodeString DESERET_DEE((UChar32)0x10414);
const UnicodeString DESERET_dee((UChar32)0x1043C);
void TransliteratorTest::TestSpecialCases(void) {
const UnicodeString registerRules[] = {
"Any-Dev1", "x > X; y > Y;",
"Any-Dev2", "XY > Z",
"Greek-Latin/FAKE",
CharsToUnicodeString
("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;")
};
const UnicodeString testCases[] = {
// NORMALIZATION, not in C
"NFC", CharsToUnicodeString("a\\u0300"), CharsToUnicodeString("\\u00E0"),
"NFD", CharsToUnicodeString("\\u00E0"), CharsToUnicodeString("a\\u0300"),
// mp -> b BUG
"Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
"Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
// check for devanagari bug
"nfd;Dev1;Dev2;nfc", "xy", "Z",
// ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
"Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
CharsToUnicodeString("Ab'cd Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
//TODO: enable this test once Titlecase works right
/*
"Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
*/
"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
// FORMS OF S
"Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
"Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
"Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
"Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3")
};
UParseError pos;
for (int32_t i = 0; i < 6 /*registerRules.length*/; i+=2) {
UErrorCode status = U_ZERO_ERROR;
Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
registerRules[i+1], UTRANS_FORWARD, pos, status);
if (U_FAILURE(status)) {
errln("Fails: Unable to create the transliterator from rules.");
} else {
Transliterator::registerInstance(t);
}
}
for (i = 0; i < 36 /*testCases.length*/; i+=3) {
UErrorCode status = U_ZERO_ERROR;
Transliterator *t = Transliterator::createInstance(testCases[i+0], UTRANS_FORWARD, pos, status);
expect(*t, testCases[i+1], testCases[i+2], 0);
delete t;
}
}
char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
if (ch <= 0xFFFF) {
sprintf(buffer, "\\u%04x", ch);
} else {
sprintf(buffer, "\\u%08x", ch);
}
return buffer;
}
void TransliteratorTest::TestSurrogateCasing (void) {
// check that casing handles surrogates
// titlecase is currently defective
char buffer[20];
UChar buffer2[20];
UChar32 dee;
UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
UnicodeString DEE(u_totitle(dee));
if (DEE != DESERET_DEE) {
err("Fails titlecase of surrogates");
err(Char32ToEscapedChars(dee, buffer));
err(", ");
errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
}
UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
UErrorCode status= U_ZERO_ERROR;
u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
errln("Fails: Can't uppercase surrogates.");
}
status= U_ZERO_ERROR;
u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
errln("Fails: Can't lowercase surrogates.");
}
}
// Check to see that incremental gets at least part way through a reasonable string.
void TransliteratorTest::TestIncrementalProgress(void) {
UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
int32_t i = 0, j=0, k=0;
int32_t sources = Transliterator::countAvailableSources();
for (i = 0; i < sources; i++) {
UnicodeString source;
Transliterator::getAvailableSource(i, source);
if (source != UnicodeString("Latin")) continue;
int32_t targets = Transliterator::countAvailableTargets(source);
for (j = 0; j < targets; j++) {
UnicodeString target;
Transliterator::getAvailableTarget(j, source, target);
int32_t variants = Transliterator::countAvailableVariants(source, target);
for (k =0; k< variants; k++) {
UnicodeString variant;
UParseError err;
UErrorCode status = U_ZERO_ERROR;
Transliterator::getAvailableVariant(k, source, target, variant);
UnicodeString id = source + "-" + target + "/" + variant;
Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
if (U_FAILURE(status)) {
errln("FAIL: Not able to create transliterator from the composed ID.");
}
status = U_ZERO_ERROR;
UnicodeString result = CheckIncrementalAux(t, test);
Transliterator *inv = t->createInverse(status);
CheckIncrementalAux(inv, result);
delete t;
delete inv;
}
}
}
}
UnicodeString TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
const UnicodeString& input) {
UTransPosition pos;
UnicodeString test = input;
pos.contextStart = 0;
pos.contextLimit = input.length();
pos.start = 0;
pos.limit = input.length();
t->transliterate(test, pos.start, pos.limit);
UBool gotError = FALSE;
if (pos.start == 0) {
log("No Progress, ");
log(t->getID());
log(": ");
errln(formatInput(test, input, pos));
gotError = TRUE;
} else {
log("PASS Progress, ");
log(t->getID());
log(": ");
logln(formatInput(test, input, pos));
}
t->finishTransliteration(test, pos);
if (pos.start != pos.limit) {
log("Incomplete, ");
log(t->getID());
log(": ");
errln(formatInput(test, input, pos));
gotError = TRUE;
}
return test;
}
//======================================================================
// Support methods
//======================================================================
@ -3047,6 +3235,7 @@ void TransliteratorTest::expect(const Transliterator& t,
expectedResult);
}
/**
* @param appendTo result is appended to this param.
* @param input the string being transliterated

View file

@ -259,45 +259,59 @@ class TransliteratorTest : public IntlTest {
/**
* Make sure parse errors reference the right line.
*/
void TestParseError();
void TestParseError(void);
/**
* Make sure sets on output are disallowed.
*/
void TestOutputSet();
void TestOutputSet(void);
/**
* Test the use variable range pragma, making sure that use of
* variable range characters is detected and flagged as an error.
*/
void TestVariableRange();
void TestVariableRange(void);
/**
* Test invalid post context error handling
*/
void TestInvalidPostContext();
void TestInvalidPostContext(void);
/**
* Test ID form variants
*/
void TestIDForms();
void TestIDForms(void);
/**
* Mark's toRules test.
*/
void TestToRulesMark();
void TestToRulesMark(void);
/**
* Test Escape and Unescape transliterators.
*/
void TestEscape();
void TestEscape(void);
void TestAnchorMasking();
void TestAnchorMasking(void);
/**
* Make sure display names of variants look reasonable.
*/
void TestDisplayName();
void TestDisplayName(void);
/**
* Check to see if case mapping works correctly.
*/
void TestSpecialCases(void);
/**
* Check to see that incremental gets at least part way through a reasonable string.
*/
void TestIncrementalProgress(void);
/**
* Check that casing handles surrogates.
*/
void TestSurrogateCasing (void);
//======================================================================
// Support methods
@ -333,6 +347,9 @@ class TransliteratorTest : public IntlTest {
void checkRules(const UnicodeString& label, Transliterator& t2,
const UnicodeString& testRulesForward);
UnicodeString CheckIncrementalAux(const Transliterator* t,
const UnicodeString& input);
};
#endif