ICU-1373 more fixes to support supplementals

X-SVN-Rev: 7285
This commit is contained in:
Alan Liu 2001-12-03 21:33:59 +00:00
parent da0fef51a8
commit c7903f1367
12 changed files with 334 additions and 302 deletions

View file

@ -3,8 +3,8 @@
* others. All Rights Reserved.
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/NameUnicodeTransliterator.java,v $
* $Date: 2001/11/21 20:56:50 $
* $Revision: 1.5 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.6 $
*/
package com.ibm.text;
import java.util.*;
@ -73,9 +73,10 @@ class NameUnicodeTransliterator extends Transliterator {
int mode = 0;
int ibuf = 0;
int openPos = offsets.start; // position of openDelimiter
for (; cursor < limit; ++cursor) {
char c = text.charAt(cursor);
int c;
for (; cursor < limit; cursor+=UTF16.getCharCount(c)) {
c = UTF16.charAt(text, cursor);
switch (mode) {
case 0: // looking for open delimiter

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringMatcher.java,v $
* $Date: 2001/11/29 22:31:18 $
* $Revision: 1.4 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.5 $
*
*****************************************************************************************
*/
@ -48,16 +48,20 @@ class StringMatcher implements UnicodeMatcher {
int[] offset,
int limit,
boolean incremental) {
// Note (1): We process text in 16-bit code units, rather than
// 32-bit code points. This works because stand-ins are
// always in the BMP and because we are doing a literal match
// operation, which can be done 16-bits at a time.
int i;
int[] cursor = new int[] { offset[0] };
if (limit < cursor[0]) {
// Match in the reverse direction
for (i=pattern.length()-1; i>=0; --i) {
char keyChar = pattern.charAt(i);
char keyChar = pattern.charAt(i); // OK; see note (1) above
UnicodeMatcher subm = data.lookup(keyChar);
if (subm == null) {
if (cursor[0] >= limit &&
keyChar == text.charAt(cursor[0])) {
keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
--cursor[0];
} else {
return U_MISMATCH;
@ -84,14 +88,14 @@ class StringMatcher implements UnicodeMatcher {
// without completing our match.
return U_PARTIAL_MATCH;
}
char keyChar = pattern.charAt(i);
char keyChar = pattern.charAt(i); // OK; see note (1) above
UnicodeMatcher subm = data.lookup(keyChar);
if (subm == null) {
// Don't need the cursor < limit check if
// incremental is true (because it's done above); do need
// it otherwise.
if (cursor[0] < limit &&
keyChar == text.charAt(cursor[0])) {
keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
++cursor[0];
} else {
return U_MISMATCH;
@ -123,7 +127,7 @@ class StringMatcher implements UnicodeMatcher {
result.append('(');
}
for (int i=0; i<pattern.length(); ++i) {
char keyChar = pattern.charAt(i);
char keyChar = pattern.charAt(i); // OK; see note (1) above
UnicodeMatcher m = data.lookup(keyChar);
if (m == null) {
TransliterationRule.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf);

View file

@ -3,124 +3,128 @@
* others. All Rights Reserved.
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransformTransliterator.java,v $
* $Date: 2001/11/17 20:45:35 $
* $Revision: 1.3 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.4 $
*/
package com.ibm.text;
import java.util.*;
/**
* An abstract class for transliterators based on a transform
* operation. To create a transliterator that implements a
* transformation, create a subclass of this class and implement the
* abstract <code>transform()</code> and <code>hasTransform()</code>
* methods.
* @author Alan Liu
*/
abstract class TransformTransliterator extends Transliterator {
/**
* Constructs a transliterator. For use by subclasses.
*/
protected TransformTransliterator(String id, UnicodeFilter f) {
super(id, f);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
protected void handleTransliterate(Replaceable text,
Position offsets, boolean incremental) {
int start;
for (start = offsets.start; start < offsets.limit; ++start) {
// Scan for the first character that is != its transform.
// If there are none, we fall out without doing anything.
char c = text.charAt(start);
if (hasTransform(c)) {
// There is a transforming character at start. Break
// up the remaining string, from start to
// offsets.limit, into segments of unfiltered and
// filtered characters. Only transform the unfiltered
// characters. As always, minimize the number of
// calls to Replaceable.replace().
int len = offsets.limit - start;
// assert(len >= 1);
char[] buf = new char[len];
text.getChars(start, offsets.limit, buf, 0);
int segStart = 0;
int segLimit;
UnicodeFilter filt = getFilter();
// lenDelta is the accumulated length difference for
// all transformed segments. It is new length - old
// length.
int lenDelta = 0;
// Set segStart, segLimit to the unfiltered segment
// starting with start. If the filter is null, then
// segStart/Limit will be set to the whole string,
// that is, 0/len.
do {
// Set segLimit to the first filtered char at or
// after segStart.
segLimit = len;
if (filt != null) {
segLimit = segStart;
while (segLimit < len && filt.contains(buf[segLimit])) {
++segLimit;
}
}
// Transform the unfiltered chars between segStart
// and segLimit.
int segLen = segLimit - segStart;
if (segLen != 0) {
String newStr = transform(
new String(buf, segStart, segLen));
text.replace(start, start + segLen, newStr);
start += newStr.length();
lenDelta += newStr.length() - segLen;
}
// Set segStart to the first unfiltered char at or
// after segLimit.
segStart = segLimit;
if (filt != null) {
while (segStart < len && !filt.contains(buf[segStart])) {
++segStart;
}
}
start += segStart - segLimit;
} while (segStart < len);
offsets.limit += lenDelta;
offsets.contextLimit += lenDelta;
offsets.start = offsets.limit;
return;
}
}
// assert(start == offsets.limit);
offsets.start = start;
}
/**
* Subclasses must implement this method to determine whether a
* given character has a transform that is not equal to itself.
* This is approximately equivalent to <code>c !=
* transform(String.valueOf(c))</code>, where
* <code>String.valueOf(c)</code> returns a String containing the
* single character (not integer) <code>c</code>. Subclasses that
* transform all their input can simply return <code>true</code>.
*/
protected abstract boolean hasTransform(int c);
/**
* Subclasses must implement this method to transform a string.
*/
protected abstract String transform(String s);
abstract class TransformTransliterator {
// Currently unused
}
///**
// * An abstract class for transliterators based on a transform
// * operation. To create a transliterator that implements a
// * transformation, create a subclass of this class and implement the
// * abstract <code>transform()</code> and <code>hasTransform()</code>
// * methods.
// * @author Alan Liu
// */
//abstract class TransformTransliterator extends Transliterator {
//
// /**
// * Constructs a transliterator. For use by subclasses.
// */
// protected TransformTransliterator(String id, UnicodeFilter f) {
// super(id, f);
// }
//
// /**
// * Implements {@link Transliterator#handleTransliterate}.
// */
// protected void handleTransliterate(Replaceable text,
// Position offsets, boolean incremental) {
//
// int start;
// for (start = offsets.start; start < offsets.limit; ++start) {
// // Scan for the first character that is != its transform.
// // If there are none, we fall out without doing anything.
// char c = text.charAt(start);
// if (hasTransform(c)) {
// // There is a transforming character at start. Break
// // up the remaining string, from start to
// // offsets.limit, into segments of unfiltered and
// // filtered characters. Only transform the unfiltered
// // characters. As always, minimize the number of
// // calls to Replaceable.replace().
//
// int len = offsets.limit - start;
// // assert(len >= 1);
//
// char[] buf = new char[len];
// text.getChars(start, offsets.limit, buf, 0);
//
// int segStart = 0;
// int segLimit;
// UnicodeFilter filt = getFilter();
//
// // lenDelta is the accumulated length difference for
// // all transformed segments. It is new length - old
// // length.
// int lenDelta = 0;
//
// // Set segStart, segLimit to the unfiltered segment
// // starting with start. If the filter is null, then
// // segStart/Limit will be set to the whole string,
// // that is, 0/len.
// do {
// // Set segLimit to the first filtered char at or
// // after segStart.
// segLimit = len;
// if (filt != null) {
// segLimit = segStart;
// while (segLimit < len && filt.contains(buf[segLimit])) {
// ++segLimit;
// }
// }
//
// // Transform the unfiltered chars between segStart
// // and segLimit.
// int segLen = segLimit - segStart;
// if (segLen != 0) {
// String newStr = transform(
// new String(buf, segStart, segLen));
// text.replace(start, start + segLen, newStr);
// start += newStr.length();
// lenDelta += newStr.length() - segLen;
// }
//
// // Set segStart to the first unfiltered char at or
// // after segLimit.
// segStart = segLimit;
// if (filt != null) {
// while (segStart < len && !filt.contains(buf[segStart])) {
// ++segStart;
// }
// }
// start += segStart - segLimit;
//
// } while (segStart < len);
//
// offsets.limit += lenDelta;
// offsets.contextLimit += lenDelta;
// offsets.start = offsets.limit;
// return;
// }
// }
// // assert(start == offsets.limit);
// offsets.start = start;
// }
//
// /**
// * Subclasses must implement this method to determine whether a
// * given character has a transform that is not equal to itself.
// * This is approximately equivalent to <code>c !=
// * transform(String.valueOf(c))</code>, where
// * <code>String.valueOf(c)</code> returns a String containing the
// * single character (not integer) <code>c</code>. Subclasses that
// * transform all their input can simply return <code>true</code>.
// */
// protected abstract boolean hasTransform(int c);
//
// /**
// * Subclasses must implement this method to transform a string.
// */
// protected abstract String transform(String s);
//}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
* $Date: 2001/11/30 22:27:29 $
* $Revision: 1.38 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.39 $
*
*****************************************************************************************
*/
@ -46,7 +46,7 @@ import com.ibm.util.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.38 $ $Date: 2001/11/30 22:27:29 $
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.39 $ $Date: 2001/12/03 21:33:58 $
*/
class TransliterationRule {
@ -396,12 +396,17 @@ class TransliterationRule {
// Backup oText by one
oText = posBefore(text, pos.start);
// Note (1): We process text in 16-bit code units, rather than
// 32-bit code points. This works because stand-ins are
// always in the BMP and because we are doing a literal match
// operation, which can be done 16-bits at a time.
for (oPattern=anteContextLength-1; oPattern>=0; --oPattern) {
char keyChar = pattern.charAt(oPattern);
char keyChar = pattern.charAt(oPattern); // See note (1)
UnicodeMatcher matcher = data.lookup(keyChar);
if (matcher == null) {
if (oText >= pos.contextStart &&
keyChar == text.charAt(oText)) {
keyChar == text.charAt(oText)) { // See note (1)
--oText;
} else {
return UnicodeMatcher.U_MISMATCH;
@ -457,14 +462,14 @@ class TransliterationRule {
// can match up to pos.contextLimit.
int matchLimit = (oPattern < keyLength) ? pos.limit : pos.contextLimit;
char keyChar = pattern.charAt(anteContextLength + oPattern++);
char keyChar = pattern.charAt(anteContextLength + oPattern++); // See note (1)
UnicodeMatcher matcher = data.lookup(keyChar);
if (matcher == null) {
// Don't need the oText < pos.contextLimit check if
// incremental is TRUE (because it's done above); do need
// it otherwise.
if (oText < matchLimit &&
keyChar == text.charAt(oText)) {
keyChar == text.charAt(oText)) { // See note (1)
++oText;
} else {
return UnicodeMatcher.U_MISMATCH;
@ -716,6 +721,7 @@ class TransliterationRule {
boolean escapeUnprintable,
StringBuffer quoteBuf) {
for (int i=0; i<text.length(); ++i) {
// Okay to process in 16-bit code units here
appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
}
}
@ -757,7 +763,7 @@ class TransliterationRule {
appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
}
char c = pattern.charAt(i);
char c = pattern.charAt(i); // Ok to use 16-bits here
UnicodeMatcher matcher = data.lookup(c);
if (matcher == null) {
appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -793,7 +799,7 @@ class TransliterationRule {
if (i == cursor) {
appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
}
char c = output.charAt(i);
char c = output.charAt(i); // Ok to use 16-bits here
int seg = data.lookupSegmentReference(c);
if (seg < 0) {
appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -872,6 +878,9 @@ class TransliterationRule {
/**
* $Log: TransliterationRule.java,v $
* Revision 1.39 2001/12/03 21:33:58 alan
* jitterbug 1373: more fixes to support supplementals
*
* Revision 1.38 2001/11/30 22:27:29 alan
* jitterbug 1560: fix double increment bug in getSourceSet
*

View file

@ -3,8 +3,8 @@
* others. All Rights Reserved.
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeNameTransliterator.java,v $
* $Date: 2001/11/17 20:45:35 $
* $Revision: 1.4 $
* $Date: 2001/12/03 21:33:59 $
* $Revision: 1.5 $
*/
package com.ibm.text;
import java.util.*;
@ -63,16 +63,17 @@ class UnicodeNameTransliterator extends Transliterator {
String name;
while (cursor < limit) {
char c = text.charAt(cursor);
int c = UTF16.charAt(text, cursor);
if ((name=UCharacter.getName(c)) != null) {
str.setLength(1);
str.append(name).append(closeDelimiter);
text.replace(cursor, cursor+1, str.toString());
int clen = UTF16.getCharCount(c);
text.replace(cursor, cursor+clen, str.toString());
len = str.length();
cursor += len; // advance cursor by 1 and adjust for new text
limit += len-1; // change in length is (len - 1)
limit += len-clen; // change in length
} else {
++cursor;
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
* $Date: 2001/12/03 20:26:24 $
* $Revision: 1.52 $
* $Date: 2001/12/03 21:33:59 $
* $Revision: 1.53 $
*
*****************************************************************************************
*/
@ -204,7 +204,7 @@ import com.ibm.util.Utility;
* Unicode property
* </table>
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.52 $ $Date: 2001/12/03 20:26:24 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.53 $ $Date: 2001/12/03 21:33:59 $
*/
public class UnicodeSet extends UnicodeFilter {
@ -396,16 +396,13 @@ public class UnicodeSet extends UnicodeFilter {
applyPattern(pattern, pos, null, ignoreWhitespace);
int i = pos.getIndex();
int n = pattern.length();
// Skip over trailing whitespace
if (ignoreWhitespace) {
while (i < n && Character.isWhitespace(pattern.charAt(i))) {
++i;
}
i = Utility.skipWhitespace(pattern, i);
}
if (i != n) {
if (i != pattern.length()) {
throw new IllegalArgumentException("Parse of \"" + pattern +
"\" failed at " + i);
}

View file

@ -3,8 +3,8 @@
* others. All Rights Reserved.
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/NameUnicodeTransliterator.java,v $
* $Date: 2001/11/21 20:56:50 $
* $Revision: 1.5 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.6 $
*/
package com.ibm.text;
import java.util.*;
@ -73,9 +73,10 @@ class NameUnicodeTransliterator extends Transliterator {
int mode = 0;
int ibuf = 0;
int openPos = offsets.start; // position of openDelimiter
for (; cursor < limit; ++cursor) {
char c = text.charAt(cursor);
int c;
for (; cursor < limit; cursor+=UTF16.getCharCount(c)) {
c = UTF16.charAt(text, cursor);
switch (mode) {
case 0: // looking for open delimiter

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/StringMatcher.java,v $
* $Date: 2001/11/29 22:31:18 $
* $Revision: 1.4 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.5 $
*
*****************************************************************************************
*/
@ -48,16 +48,20 @@ class StringMatcher implements UnicodeMatcher {
int[] offset,
int limit,
boolean incremental) {
// Note (1): We process text in 16-bit code units, rather than
// 32-bit code points. This works because stand-ins are
// always in the BMP and because we are doing a literal match
// operation, which can be done 16-bits at a time.
int i;
int[] cursor = new int[] { offset[0] };
if (limit < cursor[0]) {
// Match in the reverse direction
for (i=pattern.length()-1; i>=0; --i) {
char keyChar = pattern.charAt(i);
char keyChar = pattern.charAt(i); // OK; see note (1) above
UnicodeMatcher subm = data.lookup(keyChar);
if (subm == null) {
if (cursor[0] >= limit &&
keyChar == text.charAt(cursor[0])) {
keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
--cursor[0];
} else {
return U_MISMATCH;
@ -84,14 +88,14 @@ class StringMatcher implements UnicodeMatcher {
// without completing our match.
return U_PARTIAL_MATCH;
}
char keyChar = pattern.charAt(i);
char keyChar = pattern.charAt(i); // OK; see note (1) above
UnicodeMatcher subm = data.lookup(keyChar);
if (subm == null) {
// Don't need the cursor < limit check if
// incremental is true (because it's done above); do need
// it otherwise.
if (cursor[0] < limit &&
keyChar == text.charAt(cursor[0])) {
keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
++cursor[0];
} else {
return U_MISMATCH;
@ -123,7 +127,7 @@ class StringMatcher implements UnicodeMatcher {
result.append('(');
}
for (int i=0; i<pattern.length(); ++i) {
char keyChar = pattern.charAt(i);
char keyChar = pattern.charAt(i); // OK; see note (1) above
UnicodeMatcher m = data.lookup(keyChar);
if (m == null) {
TransliterationRule.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf);

View file

@ -3,124 +3,128 @@
* others. All Rights Reserved.
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransformTransliterator.java,v $
* $Date: 2001/11/17 20:45:35 $
* $Revision: 1.3 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.4 $
*/
package com.ibm.text;
import java.util.*;
/**
* An abstract class for transliterators based on a transform
* operation. To create a transliterator that implements a
* transformation, create a subclass of this class and implement the
* abstract <code>transform()</code> and <code>hasTransform()</code>
* methods.
* @author Alan Liu
*/
abstract class TransformTransliterator extends Transliterator {
/**
* Constructs a transliterator. For use by subclasses.
*/
protected TransformTransliterator(String id, UnicodeFilter f) {
super(id, f);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
protected void handleTransliterate(Replaceable text,
Position offsets, boolean incremental) {
int start;
for (start = offsets.start; start < offsets.limit; ++start) {
// Scan for the first character that is != its transform.
// If there are none, we fall out without doing anything.
char c = text.charAt(start);
if (hasTransform(c)) {
// There is a transforming character at start. Break
// up the remaining string, from start to
// offsets.limit, into segments of unfiltered and
// filtered characters. Only transform the unfiltered
// characters. As always, minimize the number of
// calls to Replaceable.replace().
int len = offsets.limit - start;
// assert(len >= 1);
char[] buf = new char[len];
text.getChars(start, offsets.limit, buf, 0);
int segStart = 0;
int segLimit;
UnicodeFilter filt = getFilter();
// lenDelta is the accumulated length difference for
// all transformed segments. It is new length - old
// length.
int lenDelta = 0;
// Set segStart, segLimit to the unfiltered segment
// starting with start. If the filter is null, then
// segStart/Limit will be set to the whole string,
// that is, 0/len.
do {
// Set segLimit to the first filtered char at or
// after segStart.
segLimit = len;
if (filt != null) {
segLimit = segStart;
while (segLimit < len && filt.contains(buf[segLimit])) {
++segLimit;
}
}
// Transform the unfiltered chars between segStart
// and segLimit.
int segLen = segLimit - segStart;
if (segLen != 0) {
String newStr = transform(
new String(buf, segStart, segLen));
text.replace(start, start + segLen, newStr);
start += newStr.length();
lenDelta += newStr.length() - segLen;
}
// Set segStart to the first unfiltered char at or
// after segLimit.
segStart = segLimit;
if (filt != null) {
while (segStart < len && !filt.contains(buf[segStart])) {
++segStart;
}
}
start += segStart - segLimit;
} while (segStart < len);
offsets.limit += lenDelta;
offsets.contextLimit += lenDelta;
offsets.start = offsets.limit;
return;
}
}
// assert(start == offsets.limit);
offsets.start = start;
}
/**
* Subclasses must implement this method to determine whether a
* given character has a transform that is not equal to itself.
* This is approximately equivalent to <code>c !=
* transform(String.valueOf(c))</code>, where
* <code>String.valueOf(c)</code> returns a String containing the
* single character (not integer) <code>c</code>. Subclasses that
* transform all their input can simply return <code>true</code>.
*/
protected abstract boolean hasTransform(int c);
/**
* Subclasses must implement this method to transform a string.
*/
protected abstract String transform(String s);
abstract class TransformTransliterator {
// Currently unused
}
///**
// * An abstract class for transliterators based on a transform
// * operation. To create a transliterator that implements a
// * transformation, create a subclass of this class and implement the
// * abstract <code>transform()</code> and <code>hasTransform()</code>
// * methods.
// * @author Alan Liu
// */
//abstract class TransformTransliterator extends Transliterator {
//
// /**
// * Constructs a transliterator. For use by subclasses.
// */
// protected TransformTransliterator(String id, UnicodeFilter f) {
// super(id, f);
// }
//
// /**
// * Implements {@link Transliterator#handleTransliterate}.
// */
// protected void handleTransliterate(Replaceable text,
// Position offsets, boolean incremental) {
//
// int start;
// for (start = offsets.start; start < offsets.limit; ++start) {
// // Scan for the first character that is != its transform.
// // If there are none, we fall out without doing anything.
// char c = text.charAt(start);
// if (hasTransform(c)) {
// // There is a transforming character at start. Break
// // up the remaining string, from start to
// // offsets.limit, into segments of unfiltered and
// // filtered characters. Only transform the unfiltered
// // characters. As always, minimize the number of
// // calls to Replaceable.replace().
//
// int len = offsets.limit - start;
// // assert(len >= 1);
//
// char[] buf = new char[len];
// text.getChars(start, offsets.limit, buf, 0);
//
// int segStart = 0;
// int segLimit;
// UnicodeFilter filt = getFilter();
//
// // lenDelta is the accumulated length difference for
// // all transformed segments. It is new length - old
// // length.
// int lenDelta = 0;
//
// // Set segStart, segLimit to the unfiltered segment
// // starting with start. If the filter is null, then
// // segStart/Limit will be set to the whole string,
// // that is, 0/len.
// do {
// // Set segLimit to the first filtered char at or
// // after segStart.
// segLimit = len;
// if (filt != null) {
// segLimit = segStart;
// while (segLimit < len && filt.contains(buf[segLimit])) {
// ++segLimit;
// }
// }
//
// // Transform the unfiltered chars between segStart
// // and segLimit.
// int segLen = segLimit - segStart;
// if (segLen != 0) {
// String newStr = transform(
// new String(buf, segStart, segLen));
// text.replace(start, start + segLen, newStr);
// start += newStr.length();
// lenDelta += newStr.length() - segLen;
// }
//
// // Set segStart to the first unfiltered char at or
// // after segLimit.
// segStart = segLimit;
// if (filt != null) {
// while (segStart < len && !filt.contains(buf[segStart])) {
// ++segStart;
// }
// }
// start += segStart - segLimit;
//
// } while (segStart < len);
//
// offsets.limit += lenDelta;
// offsets.contextLimit += lenDelta;
// offsets.start = offsets.limit;
// return;
// }
// }
// // assert(start == offsets.limit);
// offsets.start = start;
// }
//
// /**
// * Subclasses must implement this method to determine whether a
// * given character has a transform that is not equal to itself.
// * This is approximately equivalent to <code>c !=
// * transform(String.valueOf(c))</code>, where
// * <code>String.valueOf(c)</code> returns a String containing the
// * single character (not integer) <code>c</code>. Subclasses that
// * transform all their input can simply return <code>true</code>.
// */
// protected abstract boolean hasTransform(int c);
//
// /**
// * Subclasses must implement this method to transform a string.
// */
// protected abstract String transform(String s);
//}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $
* $Date: 2001/11/30 22:27:29 $
* $Revision: 1.38 $
* $Date: 2001/12/03 21:33:58 $
* $Revision: 1.39 $
*
*****************************************************************************************
*/
@ -46,7 +46,7 @@ import com.ibm.util.Utility;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.38 $ $Date: 2001/11/30 22:27:29 $
* @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.39 $ $Date: 2001/12/03 21:33:58 $
*/
class TransliterationRule {
@ -396,12 +396,17 @@ class TransliterationRule {
// Backup oText by one
oText = posBefore(text, pos.start);
// Note (1): We process text in 16-bit code units, rather than
// 32-bit code points. This works because stand-ins are
// always in the BMP and because we are doing a literal match
// operation, which can be done 16-bits at a time.
for (oPattern=anteContextLength-1; oPattern>=0; --oPattern) {
char keyChar = pattern.charAt(oPattern);
char keyChar = pattern.charAt(oPattern); // See note (1)
UnicodeMatcher matcher = data.lookup(keyChar);
if (matcher == null) {
if (oText >= pos.contextStart &&
keyChar == text.charAt(oText)) {
keyChar == text.charAt(oText)) { // See note (1)
--oText;
} else {
return UnicodeMatcher.U_MISMATCH;
@ -457,14 +462,14 @@ class TransliterationRule {
// can match up to pos.contextLimit.
int matchLimit = (oPattern < keyLength) ? pos.limit : pos.contextLimit;
char keyChar = pattern.charAt(anteContextLength + oPattern++);
char keyChar = pattern.charAt(anteContextLength + oPattern++); // See note (1)
UnicodeMatcher matcher = data.lookup(keyChar);
if (matcher == null) {
// Don't need the oText < pos.contextLimit check if
// incremental is TRUE (because it's done above); do need
// it otherwise.
if (oText < matchLimit &&
keyChar == text.charAt(oText)) {
keyChar == text.charAt(oText)) { // See note (1)
++oText;
} else {
return UnicodeMatcher.U_MISMATCH;
@ -716,6 +721,7 @@ class TransliterationRule {
boolean escapeUnprintable,
StringBuffer quoteBuf) {
for (int i=0; i<text.length(); ++i) {
// Okay to process in 16-bit code units here
appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
}
}
@ -757,7 +763,7 @@ class TransliterationRule {
appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
}
char c = pattern.charAt(i);
char c = pattern.charAt(i); // Ok to use 16-bits here
UnicodeMatcher matcher = data.lookup(c);
if (matcher == null) {
appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -793,7 +799,7 @@ class TransliterationRule {
if (i == cursor) {
appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
}
char c = output.charAt(i);
char c = output.charAt(i); // Ok to use 16-bits here
int seg = data.lookupSegmentReference(c);
if (seg < 0) {
appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -872,6 +878,9 @@ class TransliterationRule {
/**
* $Log: TransliterationRule.java,v $
* Revision 1.39 2001/12/03 21:33:58 alan
* jitterbug 1373: more fixes to support supplementals
*
* Revision 1.38 2001/11/30 22:27:29 alan
* jitterbug 1560: fix double increment bug in getSourceSet
*

View file

@ -3,8 +3,8 @@
* others. All Rights Reserved.
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeNameTransliterator.java,v $
* $Date: 2001/11/17 20:45:35 $
* $Revision: 1.4 $
* $Date: 2001/12/03 21:33:59 $
* $Revision: 1.5 $
*/
package com.ibm.text;
import java.util.*;
@ -63,16 +63,17 @@ class UnicodeNameTransliterator extends Transliterator {
String name;
while (cursor < limit) {
char c = text.charAt(cursor);
int c = UTF16.charAt(text, cursor);
if ((name=UCharacter.getName(c)) != null) {
str.setLength(1);
str.append(name).append(closeDelimiter);
text.replace(cursor, cursor+1, str.toString());
int clen = UTF16.getCharCount(c);
text.replace(cursor, cursor+clen, str.toString());
len = str.length();
cursor += len; // advance cursor by 1 and adjust for new text
limit += len-1; // change in length is (len - 1)
limit += len-clen; // change in length
} else {
++cursor;
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
* $Date: 2001/12/03 20:26:24 $
* $Revision: 1.52 $
* $Date: 2001/12/03 21:33:59 $
* $Revision: 1.53 $
*
*****************************************************************************************
*/
@ -204,7 +204,7 @@ import com.ibm.util.Utility;
* Unicode property
* </table>
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.52 $ $Date: 2001/12/03 20:26:24 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.53 $ $Date: 2001/12/03 21:33:59 $
*/
public class UnicodeSet extends UnicodeFilter {
@ -396,16 +396,13 @@ public class UnicodeSet extends UnicodeFilter {
applyPattern(pattern, pos, null, ignoreWhitespace);
int i = pos.getIndex();
int n = pattern.length();
// Skip over trailing whitespace
if (ignoreWhitespace) {
while (i < n && Character.isWhitespace(pattern.charAt(i))) {
++i;
}
i = Utility.skipWhitespace(pattern, i);
}
if (i != n) {
if (i != pattern.length()) {
throw new IllegalArgumentException("Parse of \"" + pattern +
"\" failed at " + i);
}