mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-16 02:07:15 +00:00
ICU-965 create TransformTransliterator
X-SVN-Rev: 4940
This commit is contained in:
parent
44bc4891f6
commit
18db07d235
2 changed files with 174 additions and 0 deletions
55
icu4c/source/i18n/unicode/xformtrn.h
Normal file
55
icu4c/source/i18n/unicode/xformtrn.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 05/24/01 aliu Creation.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef XFORMTRN_H
|
||||
#define XFORMTRN_H
|
||||
|
||||
#include "unicode/translit.h"
|
||||
|
||||
/**
|
||||
* An abstract class for transliterators based on a transform
|
||||
* operation. To create a transliterator that implements a
|
||||
* transformation, create a subclass of this class and implement the
|
||||
* abstract <code>transform()</code> and <code>hasTransform()</code>
|
||||
* methods.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_I18N_API TransformTransliterator : public Transliterator {
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Constructs a transliterator. For use by subclasses.
|
||||
*/
|
||||
TransformTransliterator(const UnicodeString& id,
|
||||
UnicodeFilter* adoptedFilter);
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
void handleTransliterate(Replaceable& text, UTransPosition& offset,
|
||||
UBool isIncremental) const;
|
||||
/**
|
||||
* Subclasses must implement this method to determine whether a
|
||||
* given character has a transform that is not equal to itself.
|
||||
* This is approximately equivalent to <code>c !=
|
||||
* transform(String.valueOf(c))</code>, where
|
||||
* <code>String.valueOf(c)</code> returns a String containing the
|
||||
* single character (not integer) <code>c</code>. Subclasses that
|
||||
* transform all their input can simply return <code>true</code>.
|
||||
*/
|
||||
virtual UBool hasTransform(UChar32 c) const = 0;
|
||||
|
||||
/**
|
||||
* Subclasses must implement this method to transform a string.
|
||||
*/
|
||||
virtual void transform(UnicodeString& s) const = 0;
|
||||
};
|
||||
|
||||
#endif
|
119
icu4c/source/i18n/xformtrn.cpp
Normal file
119
icu4c/source/i18n/xformtrn.cpp
Normal file
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 05/24/01 aliu Creation.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/xformtrn.h"
|
||||
#include "unicode/unifilt.h"
|
||||
|
||||
/**
|
||||
* Constructs a transliterator. For use by subclasses.
|
||||
*/
|
||||
TransformTransliterator::TransformTransliterator(const UnicodeString& id,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(id, adoptedFilter) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
void TransformTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
|
||||
UBool isIncremental) const {
|
||||
|
||||
int32_t start;
|
||||
for (start = offsets.start; start < offsets.limit; ++start) {
|
||||
// Scan for the first character that is != its transform.
|
||||
// If there are none, we fall out without doing anything.
|
||||
UChar32 c = filteredCharAt(text, start);
|
||||
if (hasTransform(c)) {
|
||||
// There is a transforming character at start. Break
|
||||
// up the remaining string, from start to
|
||||
// offsets.limit, into segments of unfiltered and
|
||||
// filtered characters. Only transform the unfiltered
|
||||
// characters. As always, minimize the number of
|
||||
// calls to Replaceable.replace().
|
||||
|
||||
int32_t len = offsets.limit - start;
|
||||
// assert(len >= 1);
|
||||
|
||||
int32_t base = start;
|
||||
|
||||
int32_t segStart = 0;
|
||||
int32_t segLimit;
|
||||
const UnicodeFilter* filt = getFilter();
|
||||
|
||||
// lenDelta is the accumulated length difference for
|
||||
// all transformed segments. It is new length - old
|
||||
// length.
|
||||
int32_t lenDelta = 0;
|
||||
|
||||
// Temporary string used to do transformations
|
||||
UnicodeString str;
|
||||
|
||||
// Set segStart, segLimit to the unfiltered segment
|
||||
// starting with start. If the filter is null, then
|
||||
// segStart/Limit will be set to the whole string,
|
||||
// that is, 0/len.
|
||||
do {
|
||||
// Set segLimit to the first filtered char at or
|
||||
// after segStart.
|
||||
if (filt != 0) {
|
||||
segLimit = segStart;
|
||||
UChar c;
|
||||
while (segLimit < len &&
|
||||
filt->contains(c=text.charAt(base + segLimit))) {
|
||||
++segLimit;
|
||||
str.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
// If there is no filter then we'll do everthing at
|
||||
// once, and we'll only make one iteration of this do
|
||||
// loop. Copy the entire range to the string.
|
||||
else {
|
||||
segLimit = len;
|
||||
int32_t i;
|
||||
for (i=start; i<offsets.limit; ++i) {
|
||||
str.append(text.charAt(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Transform the unfiltered chars between segStart
|
||||
// and segLimit.
|
||||
int32_t segLen = segLimit - segStart;
|
||||
if (segLen != 0) {
|
||||
transform(str);
|
||||
text.handleReplaceBetween(start, start + segLen, str);
|
||||
start += str.length();
|
||||
lenDelta += str.length() - segLen;
|
||||
str.truncate(0);
|
||||
}
|
||||
|
||||
// Set segStart to the first unfiltered char at or
|
||||
// after segLimit.
|
||||
segStart = segLimit;
|
||||
if (filt != 0) {
|
||||
while (segStart < len &&
|
||||
!filt->contains(text.charAt(base + segStart))) {
|
||||
++segStart;
|
||||
}
|
||||
}
|
||||
start += segStart - segLimit;
|
||||
|
||||
} while (segStart < len);
|
||||
|
||||
offsets.limit += lenDelta;
|
||||
offsets.contextLimit += lenDelta;
|
||||
offsets.start = offsets.limit;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// assert(start == offsets.limit);
|
||||
offsets.start = start;
|
||||
}
|
Loading…
Add table
Reference in a new issue