mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
parent
b1a685a676
commit
ce640dc850
6 changed files with 88 additions and 0 deletions
|
@ -856,6 +856,10 @@ UChar32 RBBIRuleScanner::nextCharLL() {
|
|||
return (UChar32)-1;
|
||||
}
|
||||
ch = fRB->fRules.char32At(fNextIndex);
|
||||
if (U_IS_SURROGATE(ch)) {
|
||||
error(U_ILLEGAL_CHAR_FOUND);
|
||||
return U_SENTINEL;
|
||||
}
|
||||
fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
|
||||
|
||||
if (ch == chCR ||
|
||||
|
|
|
@ -134,6 +134,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
|||
TESTCASE_AUTO(Test16BitsTrieWith16BitStateTable);
|
||||
TESTCASE_AUTO(TestTable_8_16_Bits);
|
||||
TESTCASE_AUTO(TestBug13590);
|
||||
TESTCASE_AUTO(TestUnpairedSurrogate);
|
||||
|
||||
#if U_ENABLE_TRACING
|
||||
TESTCASE_AUTO(TestTraceCreateCharacter);
|
||||
|
@ -5323,4 +5324,43 @@ void RBBITest::TestTraceCreateBreakEngine(void) {
|
|||
}
|
||||
#endif
|
||||
|
||||
void RBBITest::TestUnpairedSurrogate() {
|
||||
UnicodeString rules(u"ab;");
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError pe;
|
||||
RuleBasedBreakIterator bi1(rules, pe, status);
|
||||
assertSuccess(WHERE, status);
|
||||
UnicodeString rtRules = bi1.getRules();
|
||||
// make sure the simple one work first.
|
||||
assertEquals(WHERE, rules, rtRules);
|
||||
|
||||
|
||||
rules = UnicodeString(u"a\\ud800b;").unescape();
|
||||
pe.line = 0;
|
||||
pe.offset = 0;
|
||||
RuleBasedBreakIterator bi2(rules, pe, status);
|
||||
assertEquals(WHERE "unpaired lead surrogate", U_ILLEGAL_CHAR_FOUND , status);
|
||||
if (pe.line != 1 || pe.offset != 1) {
|
||||
errln("pe (line, offset) expected (1, 1), got (%d, %d)", pe.line, pe.offset);
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
rules = UnicodeString(u"a\\ude00b;").unescape();
|
||||
pe.line = 0;
|
||||
pe.offset = 0;
|
||||
RuleBasedBreakIterator bi3(rules, pe, status);
|
||||
assertEquals(WHERE "unpaired tail surrogate", U_ILLEGAL_CHAR_FOUND , status);
|
||||
if (pe.line != 1 || pe.offset != 1) {
|
||||
errln("pe (line, offset) expected (1, 1), got (%d, %d)", pe.line, pe.offset);
|
||||
}
|
||||
|
||||
// make sure the surrogate one work too.
|
||||
status = U_ZERO_ERROR;
|
||||
rules = UnicodeString(u"a😀b;");
|
||||
RuleBasedBreakIterator bi4(rules, pe, status);
|
||||
rtRules = bi4.getRules();
|
||||
assertEquals(WHERE, rules, rtRules);
|
||||
}
|
||||
|
||||
#endif // #if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
|
|
@ -83,6 +83,7 @@ public:
|
|||
void TestReverse(std::unique_ptr<RuleBasedBreakIterator>bi);
|
||||
void TestBug13692();
|
||||
void TestDebugRules();
|
||||
void TestUnpairedSurrogate();
|
||||
|
||||
void TestDebug();
|
||||
void TestProperties();
|
||||
|
|
|
@ -85,6 +85,9 @@ class RBBIRuleBuilder {
|
|||
// using these simplified the porting, and consolidated the
|
||||
// creation of Java exceptions
|
||||
//
|
||||
static final int U_ILLEGAL_CHAR_FOUND = 12;
|
||||
/**< Character conversion: Illegal input sequence/combination of input units. */
|
||||
|
||||
static final int U_BRK_ERROR_START = 0x10200;
|
||||
/**< Start of codes indicating Break Iterator failures */
|
||||
|
||||
|
|
|
@ -723,6 +723,9 @@ class RBBIRuleScanner {
|
|||
return -1;
|
||||
}
|
||||
ch = UTF16.charAt(fRB.fRules, fNextIndex);
|
||||
if (Character.isBmpCodePoint(ch) && Character.isSurrogate((char)ch)) {
|
||||
error(RBBIRuleBuilder.U_ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
fNextIndex = UTF16.moveCodePointOffset(fRB.fRules, fNextIndex, 1);
|
||||
|
||||
if (ch == '\r' ||
|
||||
|
|
|
@ -905,4 +905,41 @@ public class RBBITest extends TestFmwk {
|
|||
assertEquals("Wrong number of breaks found", 2, breaksFound);
|
||||
}
|
||||
|
||||
/* Test handling of unpair surrogate.
|
||||
*/
|
||||
@Test
|
||||
public void TestUnpairedSurrogate() {
|
||||
// make sure the simple one work first.
|
||||
String rules = "ab;";
|
||||
RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);
|
||||
assertEquals("Rules does not match", rules, bi.toString());
|
||||
|
||||
try {
|
||||
new RuleBasedBreakIterator("a\ud800b;");
|
||||
fail("TestUnpairedSurrogate: RuleBasedBreakIterator() failed to throw an exception with unpair low surrogate.");
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// expected exception with unpair surrogate.
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("TestUnpairedSurrogate: Unexpected exception while new RuleBasedBreakIterator() with unpair low surrogate: " + e);
|
||||
}
|
||||
|
||||
try {
|
||||
new RuleBasedBreakIterator("a\ude00b;");
|
||||
fail("TestUnpairedSurrogate: RuleBasedBreakIterator() failed to throw an exception with unpair high surrogate.");
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// expected exception with unpair surrogate.
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("TestUnpairedSurrogate: Unexpected exception while new RuleBasedBreakIterator() with unpair high surrogate: " + e);
|
||||
}
|
||||
|
||||
|
||||
// make sure the surrogate one work too.
|
||||
rules = "a😀b;";
|
||||
bi = new RuleBasedBreakIterator(rules);
|
||||
assertEquals("Rules does not match", rules, bi.toString());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue