ICU-965 create TransformTransliterator

X-SVN-Rev: 4940
This commit is contained in:
Alan Liu 2001-06-11 19:23:05 +00:00
parent 44bc4891f6
commit 18db07d235
2 changed files with 174 additions and 0 deletions

View file

@ -0,0 +1,55 @@
/*
**********************************************************************
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 05/24/01 aliu Creation.
**********************************************************************
*/
#ifndef XFORMTRN_H
#define XFORMTRN_H
#include "unicode/translit.h"
/**
* An abstract class for transliterators based on a transform
* operation. To create a transliterator that implements a
* transformation, create a subclass of this class and implement the
* abstract <code>transform()</code> and <code>hasTransform()</code>
* methods.
* @author Alan Liu
*/
class U_I18N_API TransformTransliterator : public Transliterator {
protected:
/**
* Constructs a transliterator. For use by subclasses.
*/
TransformTransliterator(const UnicodeString& id,
UnicodeFilter* adoptedFilter);
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void handleTransliterate(Replaceable& text, UTransPosition& offset,
UBool isIncremental) const;
/**
* Subclasses must implement this method to determine whether a
* given character has a transform that is not equal to itself.
* This is approximately equivalent to <code>c !=
* transform(String.valueOf(c))</code>, where
* <code>String.valueOf(c)</code> returns a String containing the
* single character (not integer) <code>c</code>. Subclasses that
* transform all their input can simply return <code>true</code>.
*/
virtual UBool hasTransform(UChar32 c) const = 0;
/**
* Subclasses must implement this method to transform a string.
*/
virtual void transform(UnicodeString& s) const = 0;
};
#endif

View file

@ -0,0 +1,119 @@
/*
**********************************************************************
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 05/24/01 aliu Creation.
**********************************************************************
*/
#include "unicode/xformtrn.h"
#include "unicode/unifilt.h"
/**
* Constructs a transliterator. For use by subclasses.
*/
TransformTransliterator::TransformTransliterator(const UnicodeString& id,
UnicodeFilter* adoptedFilter) :
Transliterator(id, adoptedFilter) {
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void TransformTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
UBool isIncremental) const {
int32_t start;
for (start = offsets.start; start < offsets.limit; ++start) {
// Scan for the first character that is != its transform.
// If there are none, we fall out without doing anything.
UChar32 c = filteredCharAt(text, start);
if (hasTransform(c)) {
// There is a transforming character at start. Break
// up the remaining string, from start to
// offsets.limit, into segments of unfiltered and
// filtered characters. Only transform the unfiltered
// characters. As always, minimize the number of
// calls to Replaceable.replace().
int32_t len = offsets.limit - start;
// assert(len >= 1);
int32_t base = start;
int32_t segStart = 0;
int32_t segLimit;
const UnicodeFilter* filt = getFilter();
// lenDelta is the accumulated length difference for
// all transformed segments. It is new length - old
// length.
int32_t lenDelta = 0;
// Temporary string used to do transformations
UnicodeString str;
// Set segStart, segLimit to the unfiltered segment
// starting with start. If the filter is null, then
// segStart/Limit will be set to the whole string,
// that is, 0/len.
do {
// Set segLimit to the first filtered char at or
// after segStart.
if (filt != 0) {
segLimit = segStart;
UChar c;
while (segLimit < len &&
filt->contains(c=text.charAt(base + segLimit))) {
++segLimit;
str.append(c);
}
}
// If there is no filter then we'll do everthing at
// once, and we'll only make one iteration of this do
// loop. Copy the entire range to the string.
else {
segLimit = len;
int32_t i;
for (i=start; i<offsets.limit; ++i) {
str.append(text.charAt(i));
}
}
// Transform the unfiltered chars between segStart
// and segLimit.
int32_t segLen = segLimit - segStart;
if (segLen != 0) {
transform(str);
text.handleReplaceBetween(start, start + segLen, str);
start += str.length();
lenDelta += str.length() - segLen;
str.truncate(0);
}
// Set segStart to the first unfiltered char at or
// after segLimit.
segStart = segLimit;
if (filt != 0) {
while (segStart < len &&
!filt->contains(text.charAt(base + segStart))) {
++segStart;
}
}
start += segStart - segLimit;
} while (segStart < len);
offsets.limit += lenDelta;
offsets.contextLimit += lenDelta;
offsets.start = offsets.limit;
return;
}
}
// assert(start == offsets.limit);
offsets.start = start;
}