mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-17 02:37:25 +00:00
ICU-965 in Any-Title make can't -> Can't, not Can'T
X-SVN-Rev: 5144
This commit is contained in:
parent
3225d6b8fe
commit
be55a412be
2 changed files with 20 additions and 5 deletions
|
@ -18,7 +18,8 @@ const char* TitlecaseTransliterator::_ID = "Any-Title";
|
|||
|
||||
TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter) {
|
||||
setMaximumContextLength(1);
|
||||
// Need to look back 2 characters in the case of "can't"
|
||||
setMaximumContextLength(2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -63,12 +64,23 @@ void TitlecaseTransliterator::handleTransliterate(
|
|||
// don't filter characters in the range contextStart..start-1
|
||||
// (the left context).
|
||||
|
||||
// NOTE: This method contains some special case code to handle
|
||||
// apostrophes between alpha characters. We want to have
|
||||
// "can't" => "Can't" (not "Can'T"). This may be incorrect
|
||||
// for some locales, e.g., "l'arbre" => "L'Arbre" (?).
|
||||
// TODO: Revisit this.
|
||||
|
||||
// Determine if there is a preceding letter character in the
|
||||
// left context (if there is any left context).
|
||||
UBool wasLastCharALetter = FALSE;
|
||||
if (offsets.start > offsets.contextStart) {
|
||||
wasLastCharALetter =
|
||||
u_isalpha(text.charAt(offsets.start - 1));
|
||||
UChar c = text.charAt(offsets.start - 1);
|
||||
// Handle the case "Can'|t", where the | marks the context
|
||||
// boundary. We only handle a single apostrophe.
|
||||
if (c == 0x0027 /*'*/ && (offsets.start-2) >= offsets.contextStart) {
|
||||
c = text.charAt(offsets.start - 2);
|
||||
}
|
||||
wasLastCharALetter = u_isalpha(c);
|
||||
}
|
||||
|
||||
// The buffer used to batch up changes to be made
|
||||
|
@ -116,6 +128,9 @@ void TitlecaseTransliterator::handleTransliterate(
|
|||
buffer.append(newChar);
|
||||
}
|
||||
wasLastCharALetter = TRUE;
|
||||
} else if (c == 0x0027 /*'*/ && wasLastCharALetter) {
|
||||
// Ignore a single embedded apostrophe, so that "can't" =>
|
||||
// "Can't", not "Can'T".
|
||||
} else {
|
||||
wasLastCharALetter = FALSE;
|
||||
}
|
||||
|
|
|
@ -977,8 +977,8 @@ void TransliteratorTest::TestCaseMap(void) {
|
|||
"THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
|
||||
expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
|
||||
"the quick brown foX jumped over the lazY dogs.");
|
||||
expect(*toTitle, "the quick brown foX jumped over the laZy dogs.",
|
||||
"The Quick Brown FoX Jumped Over The LaZy Dogs.");
|
||||
expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
|
||||
"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
|
||||
|
||||
delete toUpper;
|
||||
delete toLower;
|
||||
|
|
Loading…
Add table
Reference in a new issue