From cd5de5761f7f9d71bf80a693889db7b08cfd5db7 Mon Sep 17 00:00:00 2001 From: Helena Chapman Date: Fri, 30 Nov 2001 00:57:29 +0000 Subject: [PATCH] ICU-1501 Ported back the Upper/Lower/TitlecaseTransliterator changes from Java. X-SVN-Rev: 7192 --- icu4c/source/i18n/titletrn.cpp | 89 ++++++++++++++++++++++++++-------- icu4c/source/i18n/titletrn.h | 6 ++- icu4c/source/i18n/tolowtrn.cpp | 83 ++++++++++++++++++++++++------- icu4c/source/i18n/tolowtrn.h | 18 +++---- icu4c/source/i18n/toupptrn.cpp | 85 +++++++++++++++++++++++++------- icu4c/source/i18n/toupptrn.h | 17 +++---- 6 files changed, 222 insertions(+), 76 deletions(-) diff --git a/icu4c/source/i18n/titletrn.cpp b/icu4c/source/i18n/titletrn.cpp index c7bf7ce3c90..fd236f8cae2 100644 --- a/icu4c/source/i18n/titletrn.cpp +++ b/icu4c/source/i18n/titletrn.cpp @@ -13,6 +13,9 @@ #include "unicode/uniset.h" #include "mutex.h" #include "ucln_in.h" +#include "unicode/ustring.h" +#include "ustr_imp.h" +#include "cpputils.h" U_NAMESPACE_BEGIN @@ -33,8 +36,11 @@ static UnicodeSet* SKIP = NULL; */ static UnicodeSet* CASED = NULL; -TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) : - Transliterator(_ID, adoptedFilter) { +TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) : + Transliterator(_ID, 0), + loc(theLoc), + buffer(0) { + buffer = new UChar[u_getMaxCaseExpansion()]; // Need to look back 2 characters in the case of "can't" setMaximumContextLength(2); } @@ -42,13 +48,20 @@ TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) : /** * Destructor. */ -TitlecaseTransliterator::~TitlecaseTransliterator() {} +TitlecaseTransliterator::~TitlecaseTransliterator() { + delete [] buffer; +} /** * Copy constructor. */ TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) : - Transliterator(o) {} + Transliterator(o), + loc(o.loc), + buffer(0) { + buffer = new UChar[u_getMaxCaseExpansion()]; + uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion()); +} /** * Assignment operator. @@ -56,6 +69,8 @@ TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& TitlecaseTransliterator& TitlecaseTransliterator::operator=( const TitlecaseTransliterator& o) { Transliterator::operator=(o); + loc = o.loc; + uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion()); return *this; } @@ -103,22 +118,58 @@ void TitlecaseTransliterator::handleTransliterate( // Convert things after a CASED character toLower; things // after a non-CASED, non-SKIP character toTitle. SKIP // characters are copied directly and do not change the mode. - UnicodeString str("A", ""); - for (start=offsets.start; startcontains(c)) { - continue; - } - UChar d = (UChar) (doTitle ? u_totitle(c) - : u_tolower(c)); - if (c != d) { - str.setCharAt(0, d); - text.handleReplaceBetween(start, start+1, str); - } - doTitle = !CASED->contains(c); + int32_t textPos = offsets.start; + if (textPos >= offsets.limit) return; + + // get string for context + // TODO: add convenience method to do this, since we do it all over + + int32_t loop = 0; + UnicodeString original; + /* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context + /* Extract the characters from Replaceable */ + for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) { + original.append(text.charAt(loop)); } - - offsets.start = start; + // Walk through original string + // If there is a case change, modify corresponding position in replaceable + + int32_t i = textPos - offsets.contextStart; + int32_t limit = offsets.limit - offsets.contextStart; + UChar32 cp; + int32_t oldLen; + int32_t newLen; + + for (; i < limit; ) { + UErrorCode status = U_ZERO_ERROR; + int32_t s = i; + buffer[0] = original.charAt(s); + + UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp); + oldLen = UTF_CHAR_LENGTH(cp); + i += oldLen; + if (!SKIP->contains(cp)) { + if (doTitle) { + newLen = u_internalTitleCase(cp, buffer, u_getMaxCaseExpansion(), loc.getName()); + } else { + u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status); + newLen = buffer[0] == original.charAt(s) ? -1 : u_strlen(buffer); + } + doTitle = !CASED->contains(cp); + if (newLen >= 0) { + UnicodeString temp(buffer, newLen); + text.handleReplaceBetween(textPos, textPos + oldLen, temp); + if (newLen != oldLen) { + textPos += newLen; + offsets.limit += newLen - oldLen; + offsets.contextLimit += newLen - oldLen; + continue; + } + } + } + textPos += oldLen; + } + offsets.start = offsets.limit; } /** diff --git a/icu4c/source/i18n/titletrn.h b/icu4c/source/i18n/titletrn.h index 7897c6149ec..b587fec61ba 100644 --- a/icu4c/source/i18n/titletrn.h +++ b/icu4c/source/i18n/titletrn.h @@ -11,6 +11,7 @@ #define TITLETRN_H #include "unicode/translit.h" +#include "unicode/locid.h" U_NAMESPACE_BEGIN @@ -33,7 +34,7 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator { /** * Constructs a transliterator. */ - TitlecaseTransliterator(UnicodeFilter* adoptedFilter = 0); + TitlecaseTransliterator(const Locale& loc = Locale::getDefault()); /** * Destructor. @@ -70,6 +71,9 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator { * CALL. */ static void cleanup(); + private: + Locale loc; + UChar* buffer; }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/tolowtrn.cpp b/icu4c/source/i18n/tolowtrn.cpp index 3315f5b0604..fbd969cb615 100644 --- a/icu4c/source/i18n/tolowtrn.cpp +++ b/icu4c/source/i18n/tolowtrn.cpp @@ -9,6 +9,9 @@ */ #include "tolowtrn.h" +#include "unicode/ustring.h" +#include "ustr_imp.h" +#include "cpputils.h" #include "unicode/uchar.h" U_NAMESPACE_BEGIN @@ -18,31 +21,35 @@ const char LowercaseTransliterator::_ID[] = "Any-Lower"; /** * Constructs a transliterator. */ -LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc, - UnicodeFilter* adoptedFilter) : - TransformTransliterator(_ID, adoptedFilter), - loc(theLoc) { +LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc) : Transliterator(_ID, 0), + loc(theLoc) , buffer(0) { + buffer = new UChar[u_getMaxCaseExpansion()]; } /** * Destructor. */ -LowercaseTransliterator::~LowercaseTransliterator() {} +LowercaseTransliterator::~LowercaseTransliterator() { + delete [] buffer; +} /** * Copy constructor. */ LowercaseTransliterator::LowercaseTransliterator(const LowercaseTransliterator& o) : - TransformTransliterator(o), - loc(o.loc) {} + Transliterator(o), + loc(o.loc), buffer(0) { + buffer = new UChar[u_getMaxCaseExpansion()]; +} /** * Assignment operator. */ LowercaseTransliterator& LowercaseTransliterator::operator=( const LowercaseTransliterator& o) { - TransformTransliterator::operator=(o); + Transliterator::operator=(o); loc = o.loc; + uprv_arrayCopy((const UChar*)o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion()); return *this; } @@ -54,18 +61,58 @@ Transliterator* LowercaseTransliterator::clone(void) const { } /** - * TransformTransliterator framework method. + * Implements {@link Transliterator#handleTransliterate}. */ -UBool LowercaseTransliterator::hasTransform(UChar32 c) const { - return c != u_tolower(c); -} +void LowercaseTransliterator::handleTransliterate(Replaceable& text, + UTransPosition& offsets, + UBool isIncremental) const +{ + int32_t textPos = offsets.start; + int32_t loop; + if (textPos >= offsets.limit) return; -/** - * TransformTransliterator framework method. - */ -void LowercaseTransliterator::transform(UnicodeString& s) const { - s.toLower(loc); -} + // get string for context + // TODO: add convenience method to do this, since we do it all over + + UnicodeString original; + /*UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1];*/ // get whole context + /* Extract the characters from Replaceable */ + for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) { + original.append(text.charAt(loop)); + } + + // Walk through original string + // If there is a case change, modify corresponding position in replaceable + + int32_t i = textPos - offsets.contextStart; + int32_t limit = offsets.limit - offsets.contextStart; + UChar32 cp; + int32_t oldLen; + + for (; i < limit; ) { + UErrorCode status = U_ZERO_ERROR; + int32_t s = i; + buffer[0] = original.charAt(s); + UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp); + oldLen = UTF_CHAR_LENGTH(cp); + i += oldLen; + u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status); + /* Skip checking of status code here because the buffer should not have overflowed. */ + if ( buffer[0] != original.charAt(s) ) { + int len = u_strlen(buffer); + UnicodeString temp(buffer); + text.handleReplaceBetween(textPos, textPos + oldLen, temp); + if (len != oldLen) { + textPos += len; + offsets.limit += len - oldLen; + offsets.contextLimit += len - oldLen; + continue; + } + } + textPos += oldLen; + } + offsets.start = offsets.limit; +} U_NAMESPACE_END diff --git a/icu4c/source/i18n/tolowtrn.h b/icu4c/source/i18n/tolowtrn.h index 3e4d17dd590..c036b2f01b2 100644 --- a/icu4c/source/i18n/tolowtrn.h +++ b/icu4c/source/i18n/tolowtrn.h @@ -20,15 +20,14 @@ U_NAMESPACE_BEGIN * case mapping. * @author Alan Liu */ -class U_I18N_API LowercaseTransliterator : public TransformTransliterator { +class U_I18N_API LowercaseTransliterator : public Transliterator { public: /** * Constructs a transliterator. */ - LowercaseTransliterator(const Locale& loc = Locale::getDefault(), - UnicodeFilter* adoptedFilter = 0); + LowercaseTransliterator(const Locale& loc = Locale::getDefault()); /** * Destructor. @@ -53,19 +52,16 @@ class U_I18N_API LowercaseTransliterator : public TransformTransliterator { protected: /** - * TransformTransliterator framework method. + * Implements {@link Transliterator#handleTransliterate}. */ - virtual UBool hasTransform(UChar32 c) const; - - /** - * TransformTransliterator framework method. - */ - virtual void transform(UnicodeString& s) const; + virtual void handleTransliterate(Replaceable& text, + UTransPosition& offsets, + UBool isIncremental) const; private: Locale loc; - + UChar* buffer; static const char _ID[]; }; diff --git a/icu4c/source/i18n/toupptrn.cpp b/icu4c/source/i18n/toupptrn.cpp index be4b2dce087..f23ca8da5c5 100644 --- a/icu4c/source/i18n/toupptrn.cpp +++ b/icu4c/source/i18n/toupptrn.cpp @@ -9,6 +9,9 @@ */ #include "toupptrn.h" +#include "unicode/ustring.h" +#include "ustr_imp.h" +#include "cpputils.h" #include "unicode/uchar.h" U_NAMESPACE_BEGIN @@ -18,31 +21,39 @@ const char UppercaseTransliterator::_ID[] = "Any-Upper"; /** * Constructs a transliterator. */ -UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc, - UnicodeFilter* adoptedFilter) : - TransformTransliterator(_ID, adoptedFilter), - loc(theLoc) { +UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc) : + Transliterator(_ID, 0), + loc(theLoc), + buffer(0) { + buffer = new UChar[u_getMaxCaseExpansion()]; } /** * Destructor. */ -UppercaseTransliterator::~UppercaseTransliterator() {} +UppercaseTransliterator::~UppercaseTransliterator() { + delete [] buffer; +} /** * Copy constructor. */ UppercaseTransliterator::UppercaseTransliterator(const UppercaseTransliterator& o) : - TransformTransliterator(o), - loc(o.loc) {} + Transliterator(o), + loc(o.loc), + buffer(0){ + buffer = new UChar[u_getMaxCaseExpansion()]; + uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion()); +} /** * Assignment operator. */ UppercaseTransliterator& UppercaseTransliterator::operator=( const UppercaseTransliterator& o) { - TransformTransliterator::operator=(o); + Transliterator::operator=(o); loc = o.loc; + uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion()); return *this; } @@ -54,17 +65,57 @@ Transliterator* UppercaseTransliterator::clone(void) const { } /** - * TransformTransliterator framework method. + * Implements {@link Transliterator#handleTransliterate}. */ -UBool UppercaseTransliterator::hasTransform(UChar32 c) const { - return c != u_toupper(c); -} +void UppercaseTransliterator::handleTransliterate(Replaceable& text, + UTransPosition& offsets, + UBool isIncremental) const { + int32_t textPos = offsets.start; + int32_t loop = 0; + if (textPos >= offsets.limit) return; -/** - * TransformTransliterator framework method. - */ -void UppercaseTransliterator::transform(UnicodeString& s) const { - s.toUpper(loc); + // get string for context + // TODO: add convenience method to do this, since we do it all over + + UnicodeString original; + /* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context + /* Extract the characters from Replaceable */ + for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) { + original.append(text.charAt(loop)); + } + + // Walk through original string + // If there is a case change, modify corresponding position in replaceable + + int32_t i = textPos - offsets.contextStart; + int32_t limit = offsets.limit - offsets.contextStart; + UChar32 cp; + int32_t oldLen; + + for (; i < limit; ) { + UErrorCode status = U_ZERO_ERROR; + int32_t s = i; + buffer[0] = original.charAt(s); + + UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp); + oldLen = UTF_CHAR_LENGTH(cp); + i += oldLen; + u_strToUpper(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status); + /* Skip checking of status code here because the buffer should not have overflowed. */ + if (buffer[0] != original.charAt(s)) { + int len = u_strlen(buffer); + UnicodeString temp(buffer); + text.handleReplaceBetween(textPos, textPos + oldLen, temp); + if (len != oldLen) { + textPos += len; + offsets.limit += len - oldLen; + offsets.contextLimit += len - oldLen; + continue; + } + } + textPos += oldLen; + } + offsets.start = offsets.limit; } U_NAMESPACE_END diff --git a/icu4c/source/i18n/toupptrn.h b/icu4c/source/i18n/toupptrn.h index 1bdbd5fb6ef..a20a6b774cb 100644 --- a/icu4c/source/i18n/toupptrn.h +++ b/icu4c/source/i18n/toupptrn.h @@ -20,15 +20,14 @@ U_NAMESPACE_BEGIN * case mapping. * @author Alan Liu */ -class U_I18N_API UppercaseTransliterator : public TransformTransliterator { +class U_I18N_API UppercaseTransliterator : public Transliterator { public: /** * Constructs a transliterator. */ - UppercaseTransliterator(const Locale& loc = Locale::getDefault(), - UnicodeFilter* adoptedFilter = 0); + UppercaseTransliterator(const Locale& loc = Locale::getDefault()); /** * Destructor. @@ -52,20 +51,18 @@ class U_I18N_API UppercaseTransliterator : public TransformTransliterator { protected: - /** - * TransformTransliterator framework method. - */ - virtual UBool hasTransform(UChar32 c) const; /** - * TransformTransliterator framework method. + * Implements {@link Transliterator#handleTransliterate}. */ - virtual void transform(UnicodeString& s) const; + virtual void handleTransliterate(Replaceable& text, + UTransPosition& offsets, + UBool isIncremental) const; private: Locale loc; - + UChar* buffer; static const char _ID[]; };