mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-1501 Ported back the Upper/Lower/TitlecaseTransliterator changes from Java.
X-SVN-Rev: 7192
This commit is contained in:
parent
61f09aba1d
commit
cd5de5761f
6 changed files with 222 additions and 76 deletions
|
@ -13,6 +13,9 @@
|
|||
#include "unicode/uniset.h"
|
||||
#include "mutex.h"
|
||||
#include "ucln_in.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cpputils.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -33,8 +36,11 @@ static UnicodeSet* SKIP = NULL;
|
|||
*/
|
||||
static UnicodeSet* CASED = NULL;
|
||||
|
||||
TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter) {
|
||||
TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
|
||||
Transliterator(_ID, 0),
|
||||
loc(theLoc),
|
||||
buffer(0) {
|
||||
buffer = new UChar[u_getMaxCaseExpansion()];
|
||||
// Need to look back 2 characters in the case of "can't"
|
||||
setMaximumContextLength(2);
|
||||
}
|
||||
|
@ -42,13 +48,20 @@ TitlecaseTransliterator::TitlecaseTransliterator(UnicodeFilter* adoptedFilter) :
|
|||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
TitlecaseTransliterator::~TitlecaseTransliterator() {}
|
||||
TitlecaseTransliterator::~TitlecaseTransliterator() {
|
||||
delete [] buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
|
||||
Transliterator(o) {}
|
||||
Transliterator(o),
|
||||
loc(o.loc),
|
||||
buffer(0) {
|
||||
buffer = new UChar[u_getMaxCaseExpansion()];
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
|
@ -56,6 +69,8 @@ TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator&
|
|||
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
|
||||
const TitlecaseTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
loc = o.loc;
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -103,22 +118,58 @@ void TitlecaseTransliterator::handleTransliterate(
|
|||
// Convert things after a CASED character toLower; things
|
||||
// after a non-CASED, non-SKIP character toTitle. SKIP
|
||||
// characters are copied directly and do not change the mode.
|
||||
UnicodeString str("A", "");
|
||||
for (start=offsets.start; start<offsets.limit; ++start) {
|
||||
UChar c = text.charAt(start);
|
||||
if (SKIP->contains(c)) {
|
||||
continue;
|
||||
}
|
||||
UChar d = (UChar) (doTitle ? u_totitle(c)
|
||||
: u_tolower(c));
|
||||
if (c != d) {
|
||||
str.setCharAt(0, d);
|
||||
text.handleReplaceBetween(start, start+1, str);
|
||||
}
|
||||
doTitle = !CASED->contains(c);
|
||||
int32_t textPos = offsets.start;
|
||||
if (textPos >= offsets.limit) return;
|
||||
|
||||
// get string for context
|
||||
// TODO: add convenience method to do this, since we do it all over
|
||||
|
||||
int32_t loop = 0;
|
||||
UnicodeString original;
|
||||
/* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context
|
||||
/* Extract the characters from Replaceable */
|
||||
for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
|
||||
original.append(text.charAt(loop));
|
||||
}
|
||||
|
||||
offsets.start = start;
|
||||
// Walk through original string
|
||||
// If there is a case change, modify corresponding position in replaceable
|
||||
|
||||
int32_t i = textPos - offsets.contextStart;
|
||||
int32_t limit = offsets.limit - offsets.contextStart;
|
||||
UChar32 cp;
|
||||
int32_t oldLen;
|
||||
int32_t newLen;
|
||||
|
||||
for (; i < limit; ) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t s = i;
|
||||
buffer[0] = original.charAt(s);
|
||||
|
||||
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
|
||||
oldLen = UTF_CHAR_LENGTH(cp);
|
||||
i += oldLen;
|
||||
if (!SKIP->contains(cp)) {
|
||||
if (doTitle) {
|
||||
newLen = u_internalTitleCase(cp, buffer, u_getMaxCaseExpansion(), loc.getName());
|
||||
} else {
|
||||
u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
|
||||
newLen = buffer[0] == original.charAt(s) ? -1 : u_strlen(buffer);
|
||||
}
|
||||
doTitle = !CASED->contains(cp);
|
||||
if (newLen >= 0) {
|
||||
UnicodeString temp(buffer, newLen);
|
||||
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
|
||||
if (newLen != oldLen) {
|
||||
textPos += newLen;
|
||||
offsets.limit += newLen - oldLen;
|
||||
offsets.contextLimit += newLen - oldLen;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
textPos += oldLen;
|
||||
}
|
||||
offsets.start = offsets.limit;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define TITLETRN_H
|
||||
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/locid.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -33,7 +34,7 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator {
|
|||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
TitlecaseTransliterator(UnicodeFilter* adoptedFilter = 0);
|
||||
TitlecaseTransliterator(const Locale& loc = Locale::getDefault());
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -70,6 +71,9 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator {
|
|||
* CALL.
|
||||
*/
|
||||
static void cleanup();
|
||||
private:
|
||||
Locale loc;
|
||||
UChar* buffer;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
*/
|
||||
|
||||
#include "tolowtrn.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cpputils.h"
|
||||
#include "unicode/uchar.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -18,31 +21,35 @@ const char LowercaseTransliterator::_ID[] = "Any-Lower";
|
|||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
TransformTransliterator(_ID, adoptedFilter),
|
||||
loc(theLoc) {
|
||||
LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc) : Transliterator(_ID, 0),
|
||||
loc(theLoc) , buffer(0) {
|
||||
buffer = new UChar[u_getMaxCaseExpansion()];
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
LowercaseTransliterator::~LowercaseTransliterator() {}
|
||||
LowercaseTransliterator::~LowercaseTransliterator() {
|
||||
delete [] buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
LowercaseTransliterator::LowercaseTransliterator(const LowercaseTransliterator& o) :
|
||||
TransformTransliterator(o),
|
||||
loc(o.loc) {}
|
||||
Transliterator(o),
|
||||
loc(o.loc), buffer(0) {
|
||||
buffer = new UChar[u_getMaxCaseExpansion()];
|
||||
}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
LowercaseTransliterator& LowercaseTransliterator::operator=(
|
||||
const LowercaseTransliterator& o) {
|
||||
TransformTransliterator::operator=(o);
|
||||
Transliterator::operator=(o);
|
||||
loc = o.loc;
|
||||
uprv_arrayCopy((const UChar*)o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -54,18 +61,58 @@ Transliterator* LowercaseTransliterator::clone(void) const {
|
|||
}
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
UBool LowercaseTransliterator::hasTransform(UChar32 c) const {
|
||||
return c != u_tolower(c);
|
||||
}
|
||||
void LowercaseTransliterator::handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const
|
||||
{
|
||||
int32_t textPos = offsets.start;
|
||||
int32_t loop;
|
||||
if (textPos >= offsets.limit) return;
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
*/
|
||||
void LowercaseTransliterator::transform(UnicodeString& s) const {
|
||||
s.toLower(loc);
|
||||
}
|
||||
// get string for context
|
||||
// TODO: add convenience method to do this, since we do it all over
|
||||
|
||||
UnicodeString original;
|
||||
/*UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1];*/ // get whole context
|
||||
/* Extract the characters from Replaceable */
|
||||
for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
|
||||
original.append(text.charAt(loop));
|
||||
}
|
||||
|
||||
// Walk through original string
|
||||
// If there is a case change, modify corresponding position in replaceable
|
||||
|
||||
int32_t i = textPos - offsets.contextStart;
|
||||
int32_t limit = offsets.limit - offsets.contextStart;
|
||||
UChar32 cp;
|
||||
int32_t oldLen;
|
||||
|
||||
for (; i < limit; ) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t s = i;
|
||||
buffer[0] = original.charAt(s);
|
||||
|
||||
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
|
||||
oldLen = UTF_CHAR_LENGTH(cp);
|
||||
i += oldLen;
|
||||
u_strToLower(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
|
||||
/* Skip checking of status code here because the buffer should not have overflowed. */
|
||||
if ( buffer[0] != original.charAt(s) ) {
|
||||
int len = u_strlen(buffer);
|
||||
UnicodeString temp(buffer);
|
||||
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
|
||||
if (len != oldLen) {
|
||||
textPos += len;
|
||||
offsets.limit += len - oldLen;
|
||||
offsets.contextLimit += len - oldLen;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
textPos += oldLen;
|
||||
}
|
||||
offsets.start = offsets.limit;
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
|
|
@ -20,15 +20,14 @@ U_NAMESPACE_BEGIN
|
|||
* case mapping.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_I18N_API LowercaseTransliterator : public TransformTransliterator {
|
||||
class U_I18N_API LowercaseTransliterator : public Transliterator {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
LowercaseTransliterator(const Locale& loc = Locale::getDefault(),
|
||||
UnicodeFilter* adoptedFilter = 0);
|
||||
LowercaseTransliterator(const Locale& loc = Locale::getDefault());
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -53,19 +52,16 @@ class U_I18N_API LowercaseTransliterator : public TransformTransliterator {
|
|||
protected:
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
virtual UBool hasTransform(UChar32 c) const;
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
*/
|
||||
virtual void transform(UnicodeString& s) const;
|
||||
virtual void handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const;
|
||||
|
||||
private:
|
||||
|
||||
Locale loc;
|
||||
|
||||
UChar* buffer;
|
||||
static const char _ID[];
|
||||
};
|
||||
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
*/
|
||||
|
||||
#include "toupptrn.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cpputils.h"
|
||||
#include "unicode/uchar.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -18,31 +21,39 @@ const char UppercaseTransliterator::_ID[] = "Any-Upper";
|
|||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
TransformTransliterator(_ID, adoptedFilter),
|
||||
loc(theLoc) {
|
||||
UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc) :
|
||||
Transliterator(_ID, 0),
|
||||
loc(theLoc),
|
||||
buffer(0) {
|
||||
buffer = new UChar[u_getMaxCaseExpansion()];
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
UppercaseTransliterator::~UppercaseTransliterator() {}
|
||||
UppercaseTransliterator::~UppercaseTransliterator() {
|
||||
delete [] buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
UppercaseTransliterator::UppercaseTransliterator(const UppercaseTransliterator& o) :
|
||||
TransformTransliterator(o),
|
||||
loc(o.loc) {}
|
||||
Transliterator(o),
|
||||
loc(o.loc),
|
||||
buffer(0){
|
||||
buffer = new UChar[u_getMaxCaseExpansion()];
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
UppercaseTransliterator& UppercaseTransliterator::operator=(
|
||||
const UppercaseTransliterator& o) {
|
||||
TransformTransliterator::operator=(o);
|
||||
Transliterator::operator=(o);
|
||||
loc = o.loc;
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -54,17 +65,57 @@ Transliterator* UppercaseTransliterator::clone(void) const {
|
|||
}
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
UBool UppercaseTransliterator::hasTransform(UChar32 c) const {
|
||||
return c != u_toupper(c);
|
||||
}
|
||||
void UppercaseTransliterator::handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const {
|
||||
int32_t textPos = offsets.start;
|
||||
int32_t loop = 0;
|
||||
if (textPos >= offsets.limit) return;
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
*/
|
||||
void UppercaseTransliterator::transform(UnicodeString& s) const {
|
||||
s.toUpper(loc);
|
||||
// get string for context
|
||||
// TODO: add convenience method to do this, since we do it all over
|
||||
|
||||
UnicodeString original;
|
||||
/* UChar *original = new UChar[offsets.contextLimit - offsets.contextStart+1]; */// get whole context
|
||||
/* Extract the characters from Replaceable */
|
||||
for (loop = offsets.contextStart; loop < offsets.contextLimit; loop++) {
|
||||
original.append(text.charAt(loop));
|
||||
}
|
||||
|
||||
// Walk through original string
|
||||
// If there is a case change, modify corresponding position in replaceable
|
||||
|
||||
int32_t i = textPos - offsets.contextStart;
|
||||
int32_t limit = offsets.limit - offsets.contextStart;
|
||||
UChar32 cp;
|
||||
int32_t oldLen;
|
||||
|
||||
for (; i < limit; ) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t s = i;
|
||||
buffer[0] = original.charAt(s);
|
||||
|
||||
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
|
||||
oldLen = UTF_CHAR_LENGTH(cp);
|
||||
i += oldLen;
|
||||
u_strToUpper(buffer, u_getMaxCaseExpansion(), original.getBuffer()+s, i-s, loc.getName(), &status);
|
||||
/* Skip checking of status code here because the buffer should not have overflowed. */
|
||||
if (buffer[0] != original.charAt(s)) {
|
||||
int len = u_strlen(buffer);
|
||||
UnicodeString temp(buffer);
|
||||
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
|
||||
if (len != oldLen) {
|
||||
textPos += len;
|
||||
offsets.limit += len - oldLen;
|
||||
offsets.contextLimit += len - oldLen;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
textPos += oldLen;
|
||||
}
|
||||
offsets.start = offsets.limit;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -20,15 +20,14 @@ U_NAMESPACE_BEGIN
|
|||
* case mapping.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_I18N_API UppercaseTransliterator : public TransformTransliterator {
|
||||
class U_I18N_API UppercaseTransliterator : public Transliterator {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
UppercaseTransliterator(const Locale& loc = Locale::getDefault(),
|
||||
UnicodeFilter* adoptedFilter = 0);
|
||||
UppercaseTransliterator(const Locale& loc = Locale::getDefault());
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -52,20 +51,18 @@ class U_I18N_API UppercaseTransliterator : public TransformTransliterator {
|
|||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
*/
|
||||
virtual UBool hasTransform(UChar32 c) const;
|
||||
|
||||
/**
|
||||
* TransformTransliterator framework method.
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
virtual void transform(UnicodeString& s) const;
|
||||
virtual void handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const;
|
||||
|
||||
private:
|
||||
|
||||
Locale loc;
|
||||
|
||||
UChar* buffer;
|
||||
static const char _ID[];
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue