ICU-1434 initial public implementation of getSourceSet and getTargetSet

X-SVN-Rev: 8950
This commit is contained in:
Alan Liu 2002-06-26 18:12:40 +00:00
parent ebfd41bfe6
commit 31e8a2b206
9 changed files with 182 additions and 27 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
* $Date: 2002/02/25 22:43:58 $
* $Revision: 1.29 $
* $Date: 2002/06/26 18:12:39 $
* $Revision: 1.30 $
*
*****************************************************************************************
*/
@ -30,7 +30,7 @@ import java.util.Vector;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.29 $ $Date: 2002/02/25 22:43:58 $
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.30 $ $Date: 2002/06/26 18:12:39 $
*/
public class CompoundTransliterator extends Transliterator {
@ -307,9 +307,9 @@ public class CompoundTransliterator extends Transliterator {
/**
* Return the set of all characters that may be modified by this
* Transliterator, ignoring the effect of filters.
* Transliterator, ignoring the effect of our filter.
*/
UnicodeSet getSourceSet() {
protected UnicodeSet handleGetSourceSet() {
UnicodeSet set = new UnicodeSet();
for (int i=0; i<trans.length; ++i) {
set.addAll(trans[i].getSourceSet());
@ -328,6 +328,19 @@ public class CompoundTransliterator extends Transliterator {
return set;
}
/**
* Returns the set of all characters that may be generated as
* replacement text by this transliterator.
*/
public UnicodeSet getTargetSet() {
UnicodeSet set = new UnicodeSet();
for (int i=0; i<trans.length; ++i) {
// This is a heuristic, and not 100% reliable.
set.addAll(trans[i].getTargetSet());
}
return set;
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/

View file

@ -71,6 +71,16 @@ class FunctionReplacer implements UnicodeReplacer {
rule.append(" )");
return rule.toString();
}
/**
* Union the set of all characters that may output by this object
* into the given set.
* @param toUnionTo the set into which to union the output characters
* @return a reference to toUnionTo
*/
public UnicodeSet getReplacementSet(UnicodeSet toUnionTo) {
return toUnionTo.addAll(translit.getTargetSet());
}
}
//eof

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
* $Date: 2002/02/25 22:43:58 $
* $Revision: 1.54 $
* $Date: 2002/06/26 18:12:39 $
* $Revision: 1.55 $
*
*****************************************************************************************
*/
@ -279,7 +279,7 @@ import com.ibm.icu.impl.data.ResourceReader;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.54 $ $Date: 2002/02/25 22:43:58 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.55 $ $Date: 2002/06/26 18:12:39 $
*/
public class RuleBasedTransliterator extends Transliterator {
@ -451,15 +451,26 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* Return the set of all characters that may be modified by this
* Transliterator, ignoring the effect of filters.
* Transliterator, ignoring the effect of our filter.
*/
UnicodeSet getSourceSet() {
protected UnicodeSet handleGetSourceSet() {
return data.ruleSet.getSourceSet();
}
/**
* Returns the set of all characters that may be generated as
* replacement text by this transliterator.
*/
public UnicodeSet getTargetSet() {
return data.ruleSet.getTargetSet();
}
}
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.55 2002/06/26 18:12:39 alan
* jitterbug 1434: initial public implementation of getSourceSet and getTargetSet
*
* Revision 1.54 2002/02/25 22:43:58 ram
* Move Utility class to icu.impl
*

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringMatcher.java,v $
* $Date: 2002/02/25 22:43:58 $
* $Revision: 1.9 $
* $Date: 2002/06/26 18:12:39 $
* $Revision: 1.10 $
*
*****************************************************************************************
*/
@ -280,6 +280,21 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer {
public void resetMatch() {
matchStart = matchLimit = -1;
}
/**
* Union the set of all characters that may output by this object
* into the given set.
* @param toUnionTo the set into which to union the output characters
* @return a reference to toUnionTo
*/
public UnicodeSet getReplacementSet(UnicodeSet toUnionTo) {
// The output of this replacer varies; it is the source text between
// matchStart and matchLimit. Since this varies depending on the
// input text, we can't compute it here. We can either do nothing
// or we can add ALL characters to the set. It's probably more useful
// to do nothing.
return toUnionTo;
}
}
//eof

View file

@ -273,6 +273,26 @@ class StringReplacer implements UnicodeReplacer {
return rule.toString();
}
/**
* Union the set of all characters that may output by this object
* into the given set.
* @param toUnionTo the set into which to union the output characters
* @return a reference to toUnionTo
*/
public UnicodeSet getReplacementSet(UnicodeSet toUnionTo) {
int ch;
for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
ch = UTF16.charAt(output, i);
UnicodeReplacer r = data.lookupReplacer(ch);
if (r == null) {
toUnionTo.add(ch);
} else {
r.getReplacementSet(toUnionTo);
}
}
return toUnionTo;
}
}
//eof

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
* $Date: 2002/02/25 22:43:57 $
* $Revision: 1.45 $
* $Date: 2002/06/26 18:12:39 $
* $Revision: 1.46 $
*
*****************************************************************************************
*/
@ -46,7 +46,7 @@ import com.ibm.icu.impl.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.45 $ $Date: 2002/02/25 22:43:57 $
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.46 $ $Date: 2002/06/26 18:12:39 $
*/
class TransliterationRule {
@ -579,10 +579,21 @@ class TransliterationRule {
}
return toUnionTo;
}
/**
* Union the set of all characters that may be emitted by this rule
* into the given set.
*/
UnicodeSet getTargetSet(UnicodeSet toUnionTo) {
return output.getReplacementSet(toUnionTo);
}
}
/**
* $Log: TransliterationRule.java,v $
* Revision 1.46 2002/06/26 18:12:39 alan
* jitterbug 1434: initial public implementation of getSourceSet and getTargetSet
*
* Revision 1.45 2002/02/25 22:43:57 ram
* Move Utility class to icu.impl
*

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java,v $
* $Date: 2002/02/25 22:43:58 $
* $Revision: 1.23 $
* $Date: 2002/06/26 18:12:40 $
* $Revision: 1.24 $
*
*****************************************************************************************
*/
@ -28,7 +28,7 @@ import com.ibm.icu.impl.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.23 $ $Date: 2002/02/25 22:43:58 $
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.24 $ $Date: 2002/06/26 18:12:40 $
*/
class TransliterationRuleSet {
/**
@ -256,12 +256,29 @@ class TransliterationRuleSet {
}
return set;
}
/**
* Return the set of all characters that may be emitted by this set.
*/
UnicodeSet getTargetSet() {
UnicodeSet set = new UnicodeSet();
int count = ruleVector.size();
for (int i=0; i<count; ++i) {
TransliterationRule r =
(TransliterationRule) ruleVector.elementAt(i);
r.getTargetSet(set);
}
return set;
}
}
/* $Log: TransliterationRuleSet.java,v $
* Revision 1.23 2002/02/25 22:43:58 ram
* Move Utility class to icu.impl
* Revision 1.24 2002/06/26 18:12:40 alan
* jitterbug 1434: initial public implementation of getSourceSet and getTargetSet
*
/* Revision 1.23 2002/02/25 22:43:58 ram
/* Move Utility class to icu.impl
/*
/* Revision 1.22 2002/02/16 03:06:17 Mohan
/* ICU4J reorganization
/*

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
* $Date: 2002/06/12 17:35:24 $
* $Revision: 1.77 $
* $Date: 2002/06/26 18:12:40 $
* $Revision: 1.78 $
*
*****************************************************************************************
*/
@ -250,7 +250,7 @@ import java.util.Vector;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.77 $ $Date: 2002/06/12 17:35:24 $
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.78 $ $Date: 2002/06/26 18:12:40 $
*/
public abstract class Transliterator {
/**
@ -1311,11 +1311,61 @@ public abstract class Transliterator {
}
/**
* Return the set of all characters that may be modified by this
* Transliterator, ignoring the effect of filters. The default
* implementation returns an empty set.
* Returns the set of all characters that may be modified in the
* input text by this Transliterator. This incorporates this
* object's current filter; if the filter is changed, the return
* value of this function will change. The default implementation
* returns an empty set. Some subclasses may override {@link
* #handleGetSourceSet} to return a more precise result. The
* return result is approximate in any case and is intended for
* use by tests, tools, or utilities.
* @see #getTargetSet
* @see #handleGetSourceSet
*/
UnicodeSet getSourceSet() {
public final UnicodeSet getSourceSet() {
UnicodeSet set = handleGetSourceSet();
if (filter != null) {
UnicodeSet filterSet;
// Most, but not all filters will be UnicodeSets. Optimize for
// the high-runner case.
try {
filterSet = (UnicodeSet) filter;
} catch (ClassCastException e) {
filterSet = new UnicodeSet();
filter.getMatchSet(filterSet);
}
set.retainAll(filterSet);
}
return set;
}
/**
* Framework method that returns the set of all characters that
* may be modified in the input text by this Transliterator,
* ignoring the effect of this object's filter. The base class
* implementation returns the empty set. Subclasses that wish to
* implement this should override this method.
* @return the set of characters that this transliterator may
* modify. The set may be modified, so subclasses should return a
* newly-created object.
* @see #getSourceSet
* @see #getTargetSet
*/
protected UnicodeSet handleGetSourceSet() {
return new UnicodeSet();
}
/**
* Returns the set of all characters that may be generated as
* replacement text by this transliterator. The default
* implementation returns the empty set. Some subclasses may
* override this method to return a more precise result. The
* return result is approximate in any case and is intended for
* use by tests, tools, or utilities requiring such
* meta-information.
* @see #getTargetSet
*/
public UnicodeSet getTargetSet() {
return new UnicodeSet();
}

View file

@ -52,6 +52,14 @@ interface UnicodeReplacer {
* Utility.isUnprintable().
*/
public abstract String toReplacerPattern(boolean escapeUnprintable);
/**
* Union the set of all characters that may output by this object
* into the given set.
* @param toUnionTo the set into which to union the output characters
* @return a reference to toUnionTo
*/
public abstract UnicodeSet getReplacementSet(UnicodeSet toUnionTo);
}
//eof