diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java
index 5e981655015..49c74f4d95b 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java
@@ -52,7 +52,7 @@ public class RuleCharacterIterator {
/**
* Current variable expansion, or null if none.
*/
- private char[] buf;
+ private String buf;
/**
* Position within buf[]. Meaningless if buf == null.
@@ -79,7 +79,7 @@ public class RuleCharacterIterator {
/**
* Bitmask option to enable parsing of escape sequences. If (options &
* PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
- * to its value. Escapes are parsed using Utility.unescapeAt().
+ * to its value. Escapes are parsed using Utility.unescapeAndLengthAt().
*/
public static final int PARSE_ESCAPES = 2;
@@ -90,6 +90,13 @@ public class RuleCharacterIterator {
*/
public static final int SKIP_WHITESPACE = 4;
+ /** For use with {@link #getPos(Position)} & {@link #setPos(Position)}. */
+ public static final class Position {
+ private String buf;
+ private int bufPos;
+ private int posIndex;
+ };
+
/**
* Constructs an iterator over the given text, starting at the given
* position.
@@ -144,15 +151,17 @@ public class RuleCharacterIterator {
break;
}
bufPos = 0;
- buf = sym.lookup(name);
- if (buf == null) {
+ char[] chars = sym.lookup(name);
+ if (chars == null) {
+ buf = null;
throw new IllegalArgumentException(
"Undefined variable: " + name);
}
// Handle empty variable value
- if (buf.length == 0) {
+ if (chars.length == 0) {
buf = null;
}
+ buf = new String(chars);
continue;
}
@@ -162,13 +171,14 @@ public class RuleCharacterIterator {
}
if (c == '\\' && (options & PARSE_ESCAPES) != 0) {
- int offset[] = new int[] { 0 };
- c = Utility.unescapeAt(lookahead(), offset);
- jumpahead(offset[0]);
- isEscaped = true;
- if (c < 0) {
+ int cpAndLength = Utility.unescapeAndLengthAt(
+ getCurrentBuffer(), getCurrentBufferPos());
+ if (cpAndLength < 0) {
throw new IllegalArgumentException("Invalid escape");
}
+ c = Utility.cpFromCodePointAndLength(cpAndLength);
+ jumpahead(Utility.lengthFromCodePointAndLength(cpAndLength));
+ isEscaped = true;
}
break;
@@ -199,7 +209,7 @@ public class RuleCharacterIterator {
* restore this iterator's position. Usage idiom:
*
* RuleCharacterIterator iterator = ...;
- * Object pos = iterator.getPos(null); // allocate position object
+ * Position pos = iterator.getPos(null); // allocate position object
* for (;;) {
* pos = iterator.getPos(pos); // reuse position object
* int c = iterator.next(...);
@@ -213,15 +223,13 @@ public class RuleCharacterIterator {
* @return a position object which may be passed to setPos(),
* either `p,' or if `p' == null, a newly-allocated object
*/
- public Object getPos(Object p) {
+ public Position getPos(Position p) {
if (p == null) {
- return new Object[] {buf, new int[] {pos.getIndex(), bufPos}};
+ p = new Position();
}
- Object[] a = (Object[]) p;
- a[0] = buf;
- int[] v = (int[]) a[1];
- v[0] = pos.getIndex();
- v[1] = bufPos;
+ p.buf = buf;
+ p.bufPos = bufPos;
+ p.posIndex = pos.getIndex();
return p;
}
@@ -230,12 +238,10 @@ public class RuleCharacterIterator {
* returned the given object.
* @param p a position object previously returned by getPos()
*/
- public void setPos(Object p) {
- Object[] a = (Object[]) p;
- buf = (char[]) a[0];
- int[] v = (int[]) a[1];
- pos.setIndex(v[0]);
- bufPos = v[1];
+ public void setPos(Position p) {
+ buf = p.buf;
+ pos.setIndex(p.posIndex);
+ bufPos = p.bufPos;
}
/**
@@ -260,25 +266,35 @@ public class RuleCharacterIterator {
* Returns a string containing the remainder of the characters to be
* returned by this iterator, without any option processing. If the
* iterator is currently within a variable expansion, this will only
- * extend to the end of the variable expansion. This method is provided
- * so that iterators may interoperate with string-based APIs. The typical
- * sequence of calls is to call skipIgnored(), then call lookahead(), then
- * parse the string returned by lookahead(), then call jumpahead() to
+ * extend to the end of the variable expansion.
+ * This method, together with getCurrentBufferPos() (which replace the former lookahead()),
+ * is provided so that iterators may interoperate with string-based APIs. The typical
+ * sequence of calls is to call skipIgnored(), then call these methods, then
+ * parse that substring, then call jumpahead() to
* resynchronize the iterator.
* @return a string containing the characters to be returned by future
* calls to next()
*/
- public String lookahead() {
+ public String getCurrentBuffer() {
if (buf != null) {
- return new String(buf, bufPos, buf.length - bufPos);
+ return buf;
} else {
- return text.substring(pos.getIndex());
+ return text;
+ }
+ }
+
+ public int getCurrentBufferPos() {
+ if (buf != null) {
+ return bufPos;
+ } else {
+ return pos.getIndex();
}
}
/**
* Advances the position by the given number of 16-bit code units.
- * This is useful in conjunction with the lookahead() method.
+ * This is useful in conjunction with getCurrentBuffer()+getCurrentBufferPos()
+ * (formerly lookahead()).
* @param count the number of 16-bit code units to jump over
*/
public void jumpahead(int count) {
@@ -287,10 +303,10 @@ public class RuleCharacterIterator {
}
if (buf != null) {
bufPos += count;
- if (bufPos > buf.length) {
+ if (bufPos > buf.length()) {
throw new IllegalArgumentException();
}
- if (bufPos == buf.length) {
+ if (bufPos == buf.length()) {
buf = null;
}
} else {
@@ -321,7 +337,7 @@ public class RuleCharacterIterator {
*/
private int _current() {
if (buf != null) {
- return UTF16.charAt(buf, 0, buf.length, bufPos);
+ return UTF16.charAt(buf, bufPos);
} else {
int i = pos.getIndex();
return (i < text.length()) ? UTF16.charAt(text, i) : DONE;
@@ -335,7 +351,7 @@ public class RuleCharacterIterator {
private void _advance(int count) {
if (buf != null) {
bufPos += count;
- if (bufPos == buf.length) {
+ if (bufPos == buf.length()) {
buf = null;
}
} else {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java
index cd906011552..25b02e50694 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java
@@ -778,14 +778,16 @@ public final class Utility {
};
/**
- * Convert an escape to a 32-bit code point value. We attempt
+ * Converts an escape to a code point value. We attempt
* to parallel the icu4c unescapeAt() function.
- * @param offset16 an array containing offset to the character
- * after the backslash. Upon return offset16[0] will
- * be updated to point after the escape sequence.
- * @return character value from 0 to 10FFFF, or -1 on error.
+ * This function returns an integer with
+ * both the code point (bits 28..8) and the length of the escape sequence (bits 7..0).
+ * offset+length is the index after the escape sequence.
+ *
+ * @param offset the offset to the character after the backslash.
+ * @return the code point and length, or -1 on error.
*/
- public static int unescapeAt(String s, int[] offset16) {
+ public static int unescapeAndLengthAt(CharSequence s, int offset) {
int c;
int result = 0;
int n = 0;
@@ -797,11 +799,11 @@ public final class Utility {
boolean braces = false;
/* Check that offset is in range */
- int offset = offset16[0];
int length = s.length();
if (offset < 0 || offset >= length) {
return -1;
}
+ int start = offset;
/* Fetch first UChar after '\\' */
c = Character.codePointAt(s, offset);
@@ -867,24 +869,24 @@ public final class Utility {
int ahead = offset+1;
c = s.charAt(offset); // [sic] get 16-bit code unit
if (c == '\\' && ahead < length) {
- int o[] = new int[] { ahead };
- c = unescapeAt(s, o);
- ahead = o[0];
+ int cpAndLength = unescapeAndLengthAt(s, ahead);
+ if (cpAndLength >= 0) {
+ c = cpAndLength >> 8;
+ ahead += cpAndLength & 0xff;
+ }
}
if (c <= 0xffff && UTF16.isTrailSurrogate((char) c)) {
offset = ahead;
result = Character.toCodePoint((char) result, (char) c);
}
}
- offset16[0] = offset;
- return result;
+ return codePointAndLength(result, start, offset);
}
/* Convert C-style escapes in table */
for (i=0; i= 0;
+ return cpAndLength >> 8;
+ }
+
+ public static int lengthFromCodePointAndLength(int cpAndLength) {
+ assert cpAndLength >= 0;
+ return cpAndLength & 0xff;
}
/**
- * Convert all escapes in a given string using unescapeAt().
+ * Convert all escapes in a given string using unescapeAndLengthAt().
* @exception IllegalArgumentException if an invalid escape is
* seen.
*/
- public static String unescape(String s) {
- StringBuilder buf = new StringBuilder();
- int[] pos = new int[1];
+ public static String unescape(CharSequence s) {
+ StringBuilder buf = null;
for (int i=0; i> 8);
+ i += cpAndLength & 0xff;
+ } else if (buf != null) {
+ // We could optimize this further by appending whole substrings between escapes.
buf.append(c);
}
}
+ if (buf == null) {
+ // No escapes in s.
+ return s.toString();
+ }
return buf.toString();
}
/**
- * Convert all escapes in a given string using unescapeAt().
+ * Convert all escapes in a given string using unescapeAndLengthAt().
* Leave invalid escape sequences unchanged.
*/
- public static String unescapeLeniently(String s) {
- StringBuilder buf = new StringBuilder();
- int[] pos = new int[1];
+ public static String unescapeLeniently(CharSequence s) {
+ StringBuilder buf = null;
for (int i=0; i> 8);
+ i += cpAndLength & 0xff;
}
- } else {
+ } else if (buf != null) {
+ // We could optimize this further by appending whole substrings between escapes.
buf.append(c);
}
}
+ if (buf == null) {
+ // No escapes in s.
+ return s.toString();
+ }
return buf.toString();
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java
index d37ea468ff5..166948b417d 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java
@@ -87,7 +87,7 @@ public class TokenIterator {
public int getLineNumber() {
return reader.getLineNumber();
}
-
+
/**
* Return a string description of the position of the last line
* returned by readLine() or readLineSkippingComments().
@@ -95,7 +95,7 @@ public class TokenIterator {
public String describePosition() {
return reader.describePosition() + ':' + (lastpos+1);
}
-
+
/**
* Read the next token from 'this.line' and append it to
* 'this.buf'. Tokens are separated by Pattern_White_Space. Tokens
@@ -127,22 +127,17 @@ public class TokenIterator {
buf.append(c);
break;
}
- int[] posref = null;
while (position < line.length()) {
c = line.charAt(position); // 16-bit ok
if (c == '\\') {
- if (posref == null) {
- posref = new int[1];
- }
- posref[0] = position+1;
- int c32 = Utility.unescapeAt(line, posref);
- if (c32 < 0) {
+ int cpAndLength = Utility.unescapeAndLengthAt(line, position + 1);
+ if (cpAndLength < 0) {
throw new RuntimeException("Invalid escape at " +
reader.describePosition() + ':' +
position);
}
- UTF16.append(buf, c32);
- position = posref[0];
+ UTF16.append(buf, Utility.cpFromCodePointAndLength(cpAndLength));
+ position += 1 + Utility.lengthFromCodePointAndLength(cpAndLength);
} else if ((quote != 0 && c == quote) ||
(quote == 0 && PatternProps.isWhiteSpace(c))) {
return ++position;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
index c9a8aff5a6d..9249ba86edc 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
@@ -823,19 +823,18 @@ class RBBIRuleScanner {
//
// check for backslash escaped characters.
- // Use String.unescapeAt() to handle them.
//
if (c.fChar == '\\') {
c.fEscaped = true;
- int[] unescapeIndex = new int[1];
- unescapeIndex[0] = fNextIndex;
- c.fChar = Utility.unescapeAt(fRB.fRules, unescapeIndex);
- if (unescapeIndex[0] == fNextIndex) {
+ int cpAndLength = Utility.unescapeAndLengthAt(fRB.fRules, fNextIndex);
+ if (cpAndLength < 0) {
error(RBBIRuleBuilder.U_BRK_HEX_DIGITS_EXPECTED);
}
+ c.fChar = Utility.cpFromCodePointAndLength(cpAndLength);
+ int length = Utility.lengthFromCodePointAndLength(cpAndLength);
- fCharNum += unescapeIndex[0] - fNextIndex;
- fNextIndex = unescapeIndex[0];
+ fCharNum += length;
+ fNextIndex += length;
}
}
// putc(c.fChar, stdout);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
index 04154054ff8..d41ff99ea80 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@@ -2555,7 +2555,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa
StringBuilder patBuf = new StringBuilder(), buf = null;
boolean usePat = false;
UnicodeSet scratch = null;
- Object backup = null;
+ RuleCharacterIterator.Position backup = null;
// mode: 0=before [, 1=between [...], 2=after ]
// lastItem: 0=none, 1=char, 2=set
@@ -3673,7 +3673,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa
int iterOpts) {
boolean result = false;
iterOpts &= ~RuleCharacterIterator.PARSE_ESCAPES;
- Object pos = chars.getPos(null);
+ RuleCharacterIterator.Position pos = chars.getPos(null);
int c = chars.next(iterOpts);
if (c == '[' || c == '\\') {
int d = chars.next(iterOpts & ~RuleCharacterIterator.SKIP_WHITESPACE);
@@ -3784,14 +3784,16 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa
*/
private void applyPropertyPattern(RuleCharacterIterator chars,
Appendable rebuiltPat, SymbolTable symbols) {
- String patStr = chars.lookahead();
- ParsePosition pos = new ParsePosition(0);
+ String patStr = chars.getCurrentBuffer();
+ int start = chars.getCurrentBufferPos();
+ ParsePosition pos = new ParsePosition(start);
applyPropertyPattern(patStr, pos, symbols);
- if (pos.getIndex() == 0) {
+ int length = pos.getIndex() - start;
+ if (length == 0) {
syntaxError(chars, "Invalid property pattern");
}
- chars.jumpahead(pos.getIndex());
- append(rebuiltPat, patStr.substring(0, pos.getIndex()));
+ chars.jumpahead(length);
+ append(rebuiltPat, patStr.substring(start, pos.getIndex()));
}
//----------------------------------------------------------------
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java
index 9e300a729b2..f16a121e71b 100644
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java
@@ -493,12 +493,12 @@ class TransliteratorParser {
if (pos == limit) {
syntaxError("Trailing backslash", rule, start);
}
- iref[0] = pos;
- int escaped = Utility.unescapeAt(rule, iref);
- pos = iref[0];
- if (escaped == -1) {
+ int cpAndLength = Utility.unescapeAndLengthAt(rule, pos);
+ if (cpAndLength < 0) {
syntaxError("Malformed escape", rule, start);
}
+ int escaped = Utility.cpFromCodePointAndLength(cpAndLength);
+ pos += Utility.lengthFromCodePointAndLength(cpAndLength);
parser.checkVariableRange(escaped, rule, start);
UTF16.append(buf, escaped);
continue;
@@ -902,16 +902,16 @@ class TransliteratorParser {
boolean parsingIDs = true;
int ruleCount = 0;
- dataVector = new ArrayList();
- idBlockVector = new ArrayList();
+ dataVector = new ArrayList<>();
+ idBlockVector = new ArrayList<>();
curData = null;
direction = dir;
compoundFilter = null;
- variablesVector = new ArrayList