mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 04:29:31 +00:00
Implement anchors.
X-SVN-Rev: 2408
This commit is contained in:
parent
6b707c92f0
commit
58c0f1bf5b
4 changed files with 74 additions and 16 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2000/08/30 20:40:30 $
|
||||
* $Revision: 1.38 $
|
||||
* $Date: 2000/08/31 17:11:42 $
|
||||
* $Revision: 1.39 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -149,6 +149,32 @@ import com.ibm.util.Utility;
|
|||
* represent the input string segments, in left-to-right order of
|
||||
* definition.</p>
|
||||
*
|
||||
* <p><b>Anchors</b></p>
|
||||
*
|
||||
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
|
||||
* special characters '<code>^</code>' and '<code>$</code>'. For example:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>^ a > 'BEG_A'; # match 'a' at start of text<br>
|
||||
* a > 'A'; # match other instances
|
||||
* of 'a'<br>
|
||||
* z $ > 'END_Z'; # match 'z' at end of text<br>
|
||||
* z > 'Z'; # match other instances
|
||||
* of 'z'</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
|
||||
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
|
||||
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
|
||||
* match either the beginning or the end of the text, depending on its placement. For
|
||||
* example:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>$x = [a-z$]; # match 'a' through 'z' OR anchor<br>
|
||||
* $x 1 > 2; # match '1' after a-z or at the start<br>
|
||||
* 3 $x > 4; # match '3' before a-z or at the end</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p><b>Example</b> </p>
|
||||
*
|
||||
* <p>The following example rules illustrate many of the features of
|
||||
|
@ -252,7 +278,7 @@ import com.ibm.util.Utility;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.38 $ $Date: 2000/08/30 20:40:30 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.39 $ $Date: 2000/08/31 17:11:42 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
|
@ -1323,6 +1349,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.39 2000/08/31 17:11:42 alan4j
|
||||
* Implement anchors.
|
||||
*
|
||||
* Revision 1.38 2000/08/30 20:40:30 alan4j
|
||||
* Implement anchors.
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
|
||||
* $Date: 2000/08/30 20:40:30 $
|
||||
* $Revision: 1.29 $
|
||||
* $Date: 2000/08/31 17:11:42 $
|
||||
* $Revision: 1.30 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -254,7 +254,7 @@ import java.text.*;
|
|||
* *Unsupported by Java (and hence unsupported by UnicodeSet).
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.29 $ $Date: 2000/08/30 20:40:30 $ */
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.30 $ $Date: 2000/08/31 17:11:42 $ */
|
||||
public class UnicodeSet implements UnicodeFilter {
|
||||
|
||||
/* Implementation Notes.
|
||||
|
@ -1341,11 +1341,11 @@ public class UnicodeSet implements UnicodeFilter {
|
|||
//----------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the character after the given position, or '\uFFFF' if
|
||||
* Returns the character after the given position, or '\uFFFE' if
|
||||
* there is none.
|
||||
*/
|
||||
private static final char charAfter(String str, int i) {
|
||||
return ((++i) < str.length()) ? str.charAt(i) : '\uFFFF';
|
||||
return ((++i) < str.length()) ? str.charAt(i) : '\uFFFE';
|
||||
}
|
||||
|
||||
private void ensureCapacity(int newLen) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2000/08/30 20:40:30 $
|
||||
* $Revision: 1.38 $
|
||||
* $Date: 2000/08/31 17:11:42 $
|
||||
* $Revision: 1.39 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -149,6 +149,32 @@ import com.ibm.util.Utility;
|
|||
* represent the input string segments, in left-to-right order of
|
||||
* definition.</p>
|
||||
*
|
||||
* <p><b>Anchors</b></p>
|
||||
*
|
||||
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
|
||||
* special characters '<code>^</code>' and '<code>$</code>'. For example:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>^ a > 'BEG_A'; # match 'a' at start of text<br>
|
||||
* a > 'A'; # match other instances
|
||||
* of 'a'<br>
|
||||
* z $ > 'END_Z'; # match 'z' at end of text<br>
|
||||
* z > 'Z'; # match other instances
|
||||
* of 'z'</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
|
||||
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
|
||||
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
|
||||
* match either the beginning or the end of the text, depending on its placement. For
|
||||
* example:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>$x = [a-z$]; # match 'a' through 'z' OR anchor<br>
|
||||
* $x 1 > 2; # match '1' after a-z or at the start<br>
|
||||
* 3 $x > 4; # match '3' before a-z or at the end</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p><b>Example</b> </p>
|
||||
*
|
||||
* <p>The following example rules illustrate many of the features of
|
||||
|
@ -252,7 +278,7 @@ import com.ibm.util.Utility;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.38 $ $Date: 2000/08/30 20:40:30 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.39 $ $Date: 2000/08/31 17:11:42 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
|
@ -1323,6 +1349,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.39 2000/08/31 17:11:42 alan4j
|
||||
* Implement anchors.
|
||||
*
|
||||
* Revision 1.38 2000/08/30 20:40:30 alan4j
|
||||
* Implement anchors.
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
|
||||
* $Date: 2000/08/30 20:40:30 $
|
||||
* $Revision: 1.29 $
|
||||
* $Date: 2000/08/31 17:11:42 $
|
||||
* $Revision: 1.30 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -254,7 +254,7 @@ import java.text.*;
|
|||
* *Unsupported by Java (and hence unsupported by UnicodeSet).
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.29 $ $Date: 2000/08/30 20:40:30 $ */
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.30 $ $Date: 2000/08/31 17:11:42 $ */
|
||||
public class UnicodeSet implements UnicodeFilter {
|
||||
|
||||
/* Implementation Notes.
|
||||
|
@ -1341,11 +1341,11 @@ public class UnicodeSet implements UnicodeFilter {
|
|||
//----------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the character after the given position, or '\uFFFF' if
|
||||
* Returns the character after the given position, or '\uFFFE' if
|
||||
* there is none.
|
||||
*/
|
||||
private static final char charAfter(String str, int i) {
|
||||
return ((++i) < str.length()) ? str.charAt(i) : '\uFFFF';
|
||||
return ((++i) < str.length()) ? str.charAt(i) : '\uFFFE';
|
||||
}
|
||||
|
||||
private void ensureCapacity(int newLen) {
|
||||
|
|
Loading…
Add table
Reference in a new issue