ICU-1501 Ported back the Upper/Lower/TitlecaseTransliterator changes from Java.

X-SVN-Rev: 7192
This commit is contained in:
Helena Chapman 2001-11-30 00:57:29 +00:00
parent 61f09aba1d
commit cd5de5761f
6 changed files with 222 additions and 76 deletions

View file

@ -13,6 +13,9 @@
#include "unicode/uniset.h"
#include "mutex.h"
#include "ucln_in.h"
#include "unicode/ustring.h"
#include "ustr_imp.h"
#include "cpputils.h"
U_NAMESPACE_BEGIN
@ -33,8 +36,11 @@ static UnicodeSet* SKIP = NULL;
*/
static UnicodeSet* CASED = NULL;
TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter) {
TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
Transliterator(_ID, 0),
loc(theLoc),
buffer(0) {
buffer = new UChar[u_getMaxCaseExpansion()];
// Need to look back 2 characters in the case of "can't"
setMaximumContextLength(2);
}
@ -42,13 +48,20 @@ TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
/**
* Destructor.
*/
TitlecaseTransliterator::~TitlecaseTransliterator() {}
TitlecaseTransliterator::~TitlecaseTransliterator() {
delete [] buffer;
}
/**
* Copy constructor.
*/
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
Transliterator(o) {}
Transliterator(o),
loc(o.loc),
buffer(0) {
buffer = new UChar[u_getMaxCaseExpansion()];
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
}
/**
* Assignment operator.
@ -56,6 +69,8 @@ TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator&
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
const TitlecaseTransliterator& o) {
Transliterator::operator=(o);
loc = o.loc;
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
return *this;
}
@ -103,22 +118,58 @@ void TitlecaseTransliterator::handleTransliterate(
// Convert things after a CASED character toLower; things
// after a non-CASED, non-SKIP character toTitle. SKIP
// characters are copied directly and do not change the mode.
UnicodeString str("A", "");
for (start=offsets.start; start<offsets.limit; ++start) {
UChar c = text.charAt(start);
if (SKIP->contains(c)) {
continue;
}
UChar d = (UChar) (doTitle ? u_totitle(c)
: u_tolower(c));
if (c != d) {
str.setCharAt(0, d);
text.handleReplaceBetween(start, start+1, str);
}
doTitle = !CASED->contains(c);
int32_t textPos = offsets.start;
if (textPos >= offsets.limit) return;
// get string for context
// TODO: add convenience method to do this, since we do it all over
int32_t loop = 0;
UnicodeString original;
/* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context
/* Extract the characters from Replaceable */
for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
original.append(text.charAt(loop));
}
offsets.start = start;
// Walk through original string
// If there is a case change, modify corresponding position in replaceable
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
int32_t oldLen;
int32_t newLen;
for (; i < limit; ) {
UErrorCode status = U_ZERO_ERROR;
int32_t s = i;
buffer[0] = original.charAt(s);
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
if (!SKIP->contains(cp)) {
if (doTitle) {
newLen = u_internalTitleCase(cp, buffer, u_getMaxCaseExpansion(), loc.getName());
} else {
u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
newLen = buffer[0] == original.charAt(s) ? -1 : u_strlen(buffer);
}
doTitle = !CASED->contains(cp);
if (newLen >= 0) {
UnicodeString temp(buffer, newLen);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
if (newLen != oldLen) {
textPos += newLen;
offsets.limit += newLen - oldLen;
offsets.contextLimit += newLen - oldLen;
continue;
}
}
}
textPos += oldLen;
}
offsets.start = offsets.limit;
}
/**

View file

@ -11,6 +11,7 @@
#define TITLETRN_H
#include "unicode/translit.h"
#include "unicode/locid.h"
U_NAMESPACE_BEGIN
@ -33,7 +34,7 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator {
/**
* Constructs a transliterator.
*/
TitlecaseTransliterator(UnicodeFilter* adoptedFilter = 0);
TitlecaseTransliterator(const Locale& loc = Locale::getDefault());
/**
* Destructor.
@ -70,6 +71,9 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator {
* CALL.
*/
static void cleanup();
private:
Locale loc;
UChar* buffer;
};
U_NAMESPACE_END

View file

@ -9,6 +9,9 @@
*/
#include "tolowtrn.h"
#include "unicode/ustring.h"
#include "ustr_imp.h"
#include "cpputils.h"
#include "unicode/uchar.h"
U_NAMESPACE_BEGIN
@ -18,31 +21,35 @@ const char LowercaseTransliterator::_ID[] = "Any-Lower";
/**
* Constructs a transliterator.
*/
LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc,
UnicodeFilter* adoptedFilter) :
TransformTransliterator(_ID, adoptedFilter),
loc(theLoc) {
LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc) : Transliterator(_ID, 0),
loc(theLoc) , buffer(0) {
buffer = new UChar[u_getMaxCaseExpansion()];
}
/**
* Destructor.
*/
LowercaseTransliterator::~LowercaseTransliterator() {}
LowercaseTransliterator::~LowercaseTransliterator() {
delete [] buffer;
}
/**
* Copy constructor.
*/
LowercaseTransliterator::LowercaseTransliterator(const LowercaseTransliterator& o) :
TransformTransliterator(o),
loc(o.loc) {}
Transliterator(o),
loc(o.loc), buffer(0) {
buffer = new UChar[u_getMaxCaseExpansion()];
}
/**
* Assignment operator.
*/
LowercaseTransliterator& LowercaseTransliterator::operator=(
const LowercaseTransliterator& o) {
TransformTransliterator::operator=(o);
Transliterator::operator=(o);
loc = o.loc;
uprv_arrayCopy((const UChar*)o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
return *this;
}
@ -54,18 +61,58 @@ Transliterator* LowercaseTransliterator::clone(void) const {
}
/**
* TransformTransliterator framework method.
* Implements {@link Transliterator#handleTransliterate}.
*/
UBool LowercaseTransliterator::hasTransform(UChar32 c) const {
return c != u_tolower(c);
}
void LowercaseTransliterator::handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const
{
int32_t textPos = offsets.start;
int32_t loop;
if (textPos >= offsets.limit) return;
/**
* TransformTransliterator framework method.
*/
void LowercaseTransliterator::transform(UnicodeString& s) const {
s.toLower(loc);
}
// get string for context
// TODO: add convenience method to do this, since we do it all over
UnicodeString original;
/*UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1];*/ // get whole context
/* Extract the characters from Replaceable */
for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
original.append(text.charAt(loop));
}
// Walk through original string
// If there is a case change, modify corresponding position in replaceable
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
int32_t oldLen;
for (; i < limit; ) {
UErrorCode status = U_ZERO_ERROR;
int32_t s = i;
buffer[0] = original.charAt(s);
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
/* Skip checking of status code here because the buffer should not have overflowed. */
if ( buffer[0] != original.charAt(s) ) {
int len = u_strlen(buffer);
UnicodeString temp(buffer);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
if (len != oldLen) {
textPos += len;
offsets.limit += len - oldLen;
offsets.contextLimit += len - oldLen;
continue;
}
}
textPos += oldLen;
}
offsets.start = offsets.limit;
}
U_NAMESPACE_END

View file

@ -20,15 +20,14 @@ U_NAMESPACE_BEGIN
* case mapping.
* @author Alan Liu
*/
class U_I18N_API LowercaseTransliterator : public TransformTransliterator {
class U_I18N_API LowercaseTransliterator : public Transliterator {
public:
/**
* Constructs a transliterator.
*/
LowercaseTransliterator(const Locale& loc = Locale::getDefault(),
UnicodeFilter* adoptedFilter = 0);
LowercaseTransliterator(const Locale& loc = Locale::getDefault());
/**
* Destructor.
@ -53,19 +52,16 @@ class U_I18N_API LowercaseTransliterator : public TransformTransliterator {
protected:
/**
* TransformTransliterator framework method.
* Implements {@link Transliterator#handleTransliterate}.
*/
virtual UBool hasTransform(UChar32 c) const;
/**
* TransformTransliterator framework method.
*/
virtual void transform(UnicodeString& s) const;
virtual void handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const;
private:
Locale loc;
UChar* buffer;
static const char _ID[];
};

View file

@ -9,6 +9,9 @@
*/
#include "toupptrn.h"
#include "unicode/ustring.h"
#include "ustr_imp.h"
#include "cpputils.h"
#include "unicode/uchar.h"
U_NAMESPACE_BEGIN
@ -18,31 +21,39 @@ const char UppercaseTransliterator::_ID[] = "Any-Upper";
/**
* Constructs a transliterator.
*/
UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc,
UnicodeFilter* adoptedFilter) :
TransformTransliterator(_ID, adoptedFilter),
loc(theLoc) {
UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc) :
Transliterator(_ID, 0),
loc(theLoc),
buffer(0) {
buffer = new UChar[u_getMaxCaseExpansion()];
}
/**
* Destructor.
*/
UppercaseTransliterator::~UppercaseTransliterator() {}
UppercaseTransliterator::~UppercaseTransliterator() {
delete [] buffer;
}
/**
* Copy constructor.
*/
UppercaseTransliterator::UppercaseTransliterator(const UppercaseTransliterator& o) :
TransformTransliterator(o),
loc(o.loc) {}
Transliterator(o),
loc(o.loc),
buffer(0){
buffer = new UChar[u_getMaxCaseExpansion()];
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
}
/**
* Assignment operator.
*/
UppercaseTransliterator& UppercaseTransliterator::operator=(
const UppercaseTransliterator& o) {
TransformTransliterator::operator=(o);
Transliterator::operator=(o);
loc = o.loc;
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
return *this;
}
@ -54,17 +65,57 @@ Transliterator* UppercaseTransliterator::clone(void) const {
}
/**
* TransformTransliterator framework method.
* Implements {@link Transliterator#handleTransliterate}.
*/
UBool UppercaseTransliterator::hasTransform(UChar32 c) const {
return c != u_toupper(c);
}
void UppercaseTransliterator::handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const {
int32_t textPos = offsets.start;
int32_t loop = 0;
if (textPos >= offsets.limit) return;
/**
* TransformTransliterator framework method.
*/
void UppercaseTransliterator::transform(UnicodeString& s) const {
s.toUpper(loc);
// get string for context
// TODO: add convenience method to do this, since we do it all over
UnicodeString original;
/* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context
/* Extract the characters from Replaceable */
for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
original.append(text.charAt(loop));
}
// Walk through original string
// If there is a case change, modify corresponding position in replaceable
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
int32_t oldLen;
for (; i < limit; ) {
UErrorCode status = U_ZERO_ERROR;
int32_t s = i;
buffer[0] = original.charAt(s);
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
u_strToUpper(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
/* Skip checking of status code here because the buffer should not have overflowed. */
if (buffer[0] != original.charAt(s)) {
int len = u_strlen(buffer);
UnicodeString temp(buffer);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
if (len != oldLen) {
textPos += len;
offsets.limit += len - oldLen;
offsets.contextLimit += len - oldLen;
continue;
}
}
textPos += oldLen;
}
offsets.start = offsets.limit;
}
U_NAMESPACE_END

View file

@ -20,15 +20,14 @@ U_NAMESPACE_BEGIN
* case mapping.
* @author Alan Liu
*/
class U_I18N_API UppercaseTransliterator : public TransformTransliterator {
class U_I18N_API UppercaseTransliterator : public Transliterator {
public:
/**
* Constructs a transliterator.
*/
UppercaseTransliterator(const Locale& loc = Locale::getDefault(),
UnicodeFilter* adoptedFilter = 0);
UppercaseTransliterator(const Locale& loc = Locale::getDefault());
/**
* Destructor.
@ -52,20 +51,18 @@ class U_I18N_API UppercaseTransliterator : public TransformTransliterator {
protected:
/**
* TransformTransliterator framework method.
*/
virtual UBool hasTransform(UChar32 c) const;
/**
* TransformTransliterator framework method.
* Implements {@link Transliterator#handleTransliterate}.
*/
virtual void transform(UnicodeString& s) const;
virtual void handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const;
private:
Locale loc;
UChar* buffer;
static const char _ID[];
};