mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 04:29:31 +00:00
ICU-1434 initial public implementation of getSourceSet and getTargetSet
X-SVN-Rev: 8950
This commit is contained in:
parent
ebfd41bfe6
commit
31e8a2b206
9 changed files with 182 additions and 27 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
|
||||
* $Date: 2002/02/25 22:43:58 $
|
||||
* $Revision: 1.29 $
|
||||
* $Date: 2002/06/26 18:12:39 $
|
||||
* $Revision: 1.30 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -30,7 +30,7 @@ import java.util.Vector;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.29 $ $Date: 2002/02/25 22:43:58 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.30 $ $Date: 2002/06/26 18:12:39 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
|
@ -307,9 +307,9 @@ public class CompoundTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* Return the set of all characters that may be modified by this
|
||||
* Transliterator, ignoring the effect of filters.
|
||||
* Transliterator, ignoring the effect of our filter.
|
||||
*/
|
||||
UnicodeSet getSourceSet() {
|
||||
protected UnicodeSet handleGetSourceSet() {
|
||||
UnicodeSet set = new UnicodeSet();
|
||||
for (int i=0; i<trans.length; ++i) {
|
||||
set.addAll(trans[i].getSourceSet());
|
||||
|
@ -328,6 +328,19 @@ public class CompoundTransliterator extends Transliterator {
|
|||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of all characters that may be generated as
|
||||
* replacement text by this transliterator.
|
||||
*/
|
||||
public UnicodeSet getTargetSet() {
|
||||
UnicodeSet set = new UnicodeSet();
|
||||
for (int i=0; i<trans.length; ++i) {
|
||||
// This is a heuristic, and not 100% reliable.
|
||||
set.addAll(trans[i].getTargetSet());
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
|
|
@ -71,6 +71,16 @@ class FunctionReplacer implements UnicodeReplacer {
|
|||
rule.append(" )");
|
||||
return rule.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may output by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the output characters
|
||||
* @return a reference to toUnionTo
|
||||
*/
|
||||
public UnicodeSet getReplacementSet(UnicodeSet toUnionTo) {
|
||||
return toUnionTo.addAll(translit.getTargetSet());
|
||||
}
|
||||
}
|
||||
|
||||
//eof
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2002/02/25 22:43:58 $
|
||||
* $Revision: 1.54 $
|
||||
* $Date: 2002/06/26 18:12:39 $
|
||||
* $Revision: 1.55 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -279,7 +279,7 @@ import com.ibm.icu.impl.data.ResourceReader;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.54 $ $Date: 2002/02/25 22:43:58 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.55 $ $Date: 2002/06/26 18:12:39 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
|
@ -451,15 +451,26 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* Return the set of all characters that may be modified by this
|
||||
* Transliterator, ignoring the effect of filters.
|
||||
* Transliterator, ignoring the effect of our filter.
|
||||
*/
|
||||
UnicodeSet getSourceSet() {
|
||||
protected UnicodeSet handleGetSourceSet() {
|
||||
return data.ruleSet.getSourceSet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of all characters that may be generated as
|
||||
* replacement text by this transliterator.
|
||||
*/
|
||||
public UnicodeSet getTargetSet() {
|
||||
return data.ruleSet.getTargetSet();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.55 2002/06/26 18:12:39 alan
|
||||
* jitterbug 1434: initial public implementation of getSourceSet and getTargetSet
|
||||
*
|
||||
* Revision 1.54 2002/02/25 22:43:58 ram
|
||||
* Move Utility class to icu.impl
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringMatcher.java,v $
|
||||
* $Date: 2002/02/25 22:43:58 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2002/06/26 18:12:39 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -280,6 +280,21 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer {
|
|||
public void resetMatch() {
|
||||
matchStart = matchLimit = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may output by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the output characters
|
||||
* @return a reference to toUnionTo
|
||||
*/
|
||||
public UnicodeSet getReplacementSet(UnicodeSet toUnionTo) {
|
||||
// The output of this replacer varies; it is the source text between
|
||||
// matchStart and matchLimit. Since this varies depending on the
|
||||
// input text, we can't compute it here. We can either do nothing
|
||||
// or we can add ALL characters to the set. It's probably more useful
|
||||
// to do nothing.
|
||||
return toUnionTo;
|
||||
}
|
||||
}
|
||||
|
||||
//eof
|
||||
|
|
|
@ -273,6 +273,26 @@ class StringReplacer implements UnicodeReplacer {
|
|||
|
||||
return rule.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may output by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the output characters
|
||||
* @return a reference to toUnionTo
|
||||
*/
|
||||
public UnicodeSet getReplacementSet(UnicodeSet toUnionTo) {
|
||||
int ch;
|
||||
for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
|
||||
ch = UTF16.charAt(output, i);
|
||||
UnicodeReplacer r = data.lookupReplacer(ch);
|
||||
if (r == null) {
|
||||
toUnionTo.add(ch);
|
||||
} else {
|
||||
r.getReplacementSet(toUnionTo);
|
||||
}
|
||||
}
|
||||
return toUnionTo;
|
||||
}
|
||||
}
|
||||
|
||||
//eof
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
|
||||
* $Date: 2002/02/25 22:43:57 $
|
||||
* $Revision: 1.45 $
|
||||
* $Date: 2002/06/26 18:12:39 $
|
||||
* $Revision: 1.46 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -46,7 +46,7 @@ import com.ibm.icu.impl.Utility;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.45 $ $Date: 2002/02/25 22:43:57 $
|
||||
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.46 $ $Date: 2002/06/26 18:12:39 $
|
||||
*/
|
||||
class TransliterationRule {
|
||||
|
||||
|
@ -579,10 +579,21 @@ class TransliterationRule {
|
|||
}
|
||||
return toUnionTo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may be emitted by this rule
|
||||
* into the given set.
|
||||
*/
|
||||
UnicodeSet getTargetSet(UnicodeSet toUnionTo) {
|
||||
return output.getReplacementSet(toUnionTo);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* $Log: TransliterationRule.java,v $
|
||||
* Revision 1.46 2002/06/26 18:12:39 alan
|
||||
* jitterbug 1434: initial public implementation of getSourceSet and getTargetSet
|
||||
*
|
||||
* Revision 1.45 2002/02/25 22:43:57 ram
|
||||
* Move Utility class to icu.impl
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java,v $
|
||||
* $Date: 2002/02/25 22:43:58 $
|
||||
* $Revision: 1.23 $
|
||||
* $Date: 2002/06/26 18:12:40 $
|
||||
* $Revision: 1.24 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -28,7 +28,7 @@ import com.ibm.icu.impl.Utility;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.23 $ $Date: 2002/02/25 22:43:58 $
|
||||
* @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.24 $ $Date: 2002/06/26 18:12:40 $
|
||||
*/
|
||||
class TransliterationRuleSet {
|
||||
/**
|
||||
|
@ -256,12 +256,29 @@ class TransliterationRuleSet {
|
|||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the set of all characters that may be emitted by this set.
|
||||
*/
|
||||
UnicodeSet getTargetSet() {
|
||||
UnicodeSet set = new UnicodeSet();
|
||||
int count = ruleVector.size();
|
||||
for (int i=0; i<count; ++i) {
|
||||
TransliterationRule r =
|
||||
(TransliterationRule) ruleVector.elementAt(i);
|
||||
r.getTargetSet(set);
|
||||
}
|
||||
return set;
|
||||
}
|
||||
}
|
||||
|
||||
/* $Log: TransliterationRuleSet.java,v $
|
||||
* Revision 1.23 2002/02/25 22:43:58 ram
|
||||
* Move Utility class to icu.impl
|
||||
* Revision 1.24 2002/06/26 18:12:40 alan
|
||||
* jitterbug 1434: initial public implementation of getSourceSet and getTargetSet
|
||||
*
|
||||
/* Revision 1.23 2002/02/25 22:43:58 ram
|
||||
/* Move Utility class to icu.impl
|
||||
/*
|
||||
/* Revision 1.22 2002/02/16 03:06:17 Mohan
|
||||
/* ICU4J reorganization
|
||||
/*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
||||
* $Date: 2002/06/12 17:35:24 $
|
||||
* $Revision: 1.77 $
|
||||
* $Date: 2002/06/26 18:12:40 $
|
||||
* $Revision: 1.78 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -250,7 +250,7 @@ import java.util.Vector;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.77 $ $Date: 2002/06/12 17:35:24 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.78 $ $Date: 2002/06/26 18:12:40 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
|
@ -1311,11 +1311,61 @@ public abstract class Transliterator {
|
|||
}
|
||||
|
||||
/**
|
||||
* Return the set of all characters that may be modified by this
|
||||
* Transliterator, ignoring the effect of filters. The default
|
||||
* implementation returns an empty set.
|
||||
* Returns the set of all characters that may be modified in the
|
||||
* input text by this Transliterator. This incorporates this
|
||||
* object's current filter; if the filter is changed, the return
|
||||
* value of this function will change. The default implementation
|
||||
* returns an empty set. Some subclasses may override {@link
|
||||
* #handleGetSourceSet} to return a more precise result. The
|
||||
* return result is approximate in any case and is intended for
|
||||
* use by tests, tools, or utilities.
|
||||
* @see #getTargetSet
|
||||
* @see #handleGetSourceSet
|
||||
*/
|
||||
UnicodeSet getSourceSet() {
|
||||
public final UnicodeSet getSourceSet() {
|
||||
UnicodeSet set = handleGetSourceSet();
|
||||
if (filter != null) {
|
||||
UnicodeSet filterSet;
|
||||
// Most, but not all filters will be UnicodeSets. Optimize for
|
||||
// the high-runner case.
|
||||
try {
|
||||
filterSet = (UnicodeSet) filter;
|
||||
} catch (ClassCastException e) {
|
||||
filterSet = new UnicodeSet();
|
||||
filter.getMatchSet(filterSet);
|
||||
}
|
||||
set.retainAll(filterSet);
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Framework method that returns the set of all characters that
|
||||
* may be modified in the input text by this Transliterator,
|
||||
* ignoring the effect of this object's filter. The base class
|
||||
* implementation returns the empty set. Subclasses that wish to
|
||||
* implement this should override this method.
|
||||
* @return the set of characters that this transliterator may
|
||||
* modify. The set may be modified, so subclasses should return a
|
||||
* newly-created object.
|
||||
* @see #getSourceSet
|
||||
* @see #getTargetSet
|
||||
*/
|
||||
protected UnicodeSet handleGetSourceSet() {
|
||||
return new UnicodeSet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of all characters that may be generated as
|
||||
* replacement text by this transliterator. The default
|
||||
* implementation returns the empty set. Some subclasses may
|
||||
* override this method to return a more precise result. The
|
||||
* return result is approximate in any case and is intended for
|
||||
* use by tests, tools, or utilities requiring such
|
||||
* meta-information.
|
||||
* @see #getTargetSet
|
||||
*/
|
||||
public UnicodeSet getTargetSet() {
|
||||
return new UnicodeSet();
|
||||
}
|
||||
|
||||
|
|
|
@ -52,6 +52,14 @@ interface UnicodeReplacer {
|
|||
* Utility.isUnprintable().
|
||||
*/
|
||||
public abstract String toReplacerPattern(boolean escapeUnprintable);
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may output by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the output characters
|
||||
* @return a reference to toUnionTo
|
||||
*/
|
||||
public abstract UnicodeSet getReplacementSet(UnicodeSet toUnionTo);
|
||||
}
|
||||
|
||||
//eof
|
||||
|
|
Loading…
Add table
Reference in a new issue