ICU-21645 test & fix unescapeAt(2 supp escapes)

This commit is contained in:
Markus Scherer 2021-06-19 02:01:58 +00:00
parent fc28b3521d
commit d9aa1ecab0
2 changed files with 18 additions and 3 deletions

View file

@ -863,8 +863,7 @@ public final class Utility {
// if there is a trail surrogate after it, either as an
// escape or as a literal. If so, join them up into a
// supplementary.
if (offset < length &&
UTF16.isLeadSurrogate((char) result)) {
if (offset < length && result <= 0xffff && UTF16.isLeadSurrogate((char) result)) {
int ahead = offset+1;
c = s.charAt(offset); // [sic] get 16-bit code unit
if (c == '\\' && ahead < length) {
@ -872,7 +871,7 @@ public final class Utility {
c = unescapeAt(s, o);
ahead = o[0];
}
if (UTF16.isTrailSurrogate((char) c)) {
if (c <= 0xffff && UTF16.isTrailSurrogate((char) c)) {
offset = ahead;
result = Character.toCodePoint((char) result, (char) c);
}

View file

@ -49,6 +49,22 @@ public class UtilityTest extends TestFmwk {
if (!result.equals(expect)) {
errln("FAIL: Utility.unescape() returned " + result + ", exp. " + expect);
}
// Regression test for ICU-21645
String s = "\\U0001DA8B\\U0001DF00-\\U0001DF1E";
int[] offset16 = new int[] { 1 }; // after the backslash
// This returned U+B2F00 for the first _two_ escapes.
int c = Utility.unescapeAt(s, offset16);
assertEquals(s + " unescape at 1, code point", 0x1DA8B, c);
assertEquals(s + " unescape at 1, offset", 10, offset16[0]);
String pattern = "[" + s + "]";
// This threw an IllegalArgumentException because the parser called Utility.unescapeAt()
// and saw an invalid range of B2F00..1DF1E (start >= end).
UnicodeSet set = new UnicodeSet(pattern);
assertEquals(pattern + " size", 32, set.size());
assertTrue(pattern + " contains U+1DA8B", set.contains(0x1DA8B));
assertTrue(pattern + " contains U+1DF00..U+1DF1E", set.contains(0x1DF00, 0x1DF1E));
assertFalse(pattern + " contains U+1DF1F", set.contains(0x1DF1F));
}
@Test