mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-04 13:05:31 +00:00
ICU-22898 MF2 parser bug fixes
ICU4C: Escape curly braces when serializing and normalizing ICU4C: Escape '|' in patterns ICU4C: When normalizing input, escape optionally-escaped characters in patterns ICU4C/ICU4J: Allow trailing whitespace after a match ICU4C: Fix parser to iterate over code points, not code units Add tests with old reserved syntax as syntax-error tests
This commit is contained in:
parent
9adcf8bd09
commit
8f82facca8
9 changed files with 407 additions and 321 deletions
File diff suppressed because it is too large
Load diff
|
@ -140,10 +140,21 @@ namespace message2 {
|
|||
SelectorKeys parseNonEmptyKeys(UErrorCode&);
|
||||
void errorPattern(UErrorCode& status);
|
||||
Pattern parseQuotedPattern(UErrorCode&);
|
||||
bool isDeclarationStart();
|
||||
|
||||
UChar32 peek() const { return source.char32At(index) ; }
|
||||
UChar32 peek(uint32_t i) const {
|
||||
return source.char32At(source.moveIndex32(index, i));
|
||||
}
|
||||
void next() { index = source.moveIndex32(index, 1); }
|
||||
|
||||
bool inBounds() const { return (int32_t) index < source.length(); }
|
||||
bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); }
|
||||
bool allConsumed() const { return (int32_t) index == source.length(); }
|
||||
|
||||
// The input string
|
||||
const UnicodeString &source;
|
||||
// The current position within the input string
|
||||
// The current position within the input string -- counting in UChar32
|
||||
uint32_t index;
|
||||
// Represents the current line (and when an error is indicated),
|
||||
// character offset within the line of the parse error
|
||||
|
|
|
@ -42,24 +42,26 @@ void Serializer::emit(const std::u16string_view& token) {
|
|||
void Serializer::emit(const Literal& l) {
|
||||
if (l.isQuoted()) {
|
||||
emit(PIPE);
|
||||
const UnicodeString& contents = l.unquoted();
|
||||
for (int32_t i = 0; i < contents.length(); i++) {
|
||||
// Re-escape any PIPE or BACKSLASH characters
|
||||
}
|
||||
const UnicodeString& contents = l.unquoted();
|
||||
for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
|
||||
// Re-escape any escaped-char characters
|
||||
switch(contents[i]) {
|
||||
case BACKSLASH:
|
||||
case PIPE: {
|
||||
emit(BACKSLASH);
|
||||
break;
|
||||
case PIPE:
|
||||
case LEFT_CURLY_BRACE:
|
||||
case RIGHT_CURLY_BRACE: {
|
||||
emit(BACKSLASH);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
break;
|
||||
}
|
||||
}
|
||||
emit(contents[i]);
|
||||
}
|
||||
emit(PIPE);
|
||||
} else {
|
||||
emit(l.unquoted());
|
||||
}
|
||||
if (l.isQuoted()) {
|
||||
emit(PIPE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -194,9 +196,10 @@ void Serializer::emit(const PatternPart& part) {
|
|||
if (part.isText()) {
|
||||
// Raw text
|
||||
const UnicodeString& text = part.asText();
|
||||
// Re-escape '{'/'}'/'\'
|
||||
for (int32_t i = 0; i < text.length(); i++) {
|
||||
// Re-escape '{'/'}'/'\''|'
|
||||
for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
|
||||
switch(text[i]) {
|
||||
case PIPE:
|
||||
case BACKSLASH:
|
||||
case LEFT_CURLY_BRACE:
|
||||
case RIGHT_CURLY_BRACE: {
|
||||
|
|
|
@ -303,6 +303,8 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
|
|||
// Do tests for reserved statements/expressions
|
||||
runTestsFromJsonFile(*this, "spec/unsupported-expressions.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "spec/unsupported-statements.json", errorCode);
|
||||
// Uncomment when reserved syntax is removed
|
||||
// runTestsFromJsonFile(*this, "syntax-errors-reserved.json", errorCode);
|
||||
|
||||
// Do valid spec tests
|
||||
runTestsFromJsonFile(*this, "spec/syntax.json", errorCode);
|
||||
|
|
|
@ -613,10 +613,8 @@ public class MFParser {
|
|||
}
|
||||
keys.add(key);
|
||||
}
|
||||
// Only want to skip whitespace if we parsed at least one key
|
||||
if (!keys.isEmpty()) {
|
||||
skipOptionalWhitespaces();
|
||||
}
|
||||
// Trailing whitespace is allowed after the message
|
||||
skipOptionalWhitespaces();
|
||||
if (input.atEnd()) {
|
||||
checkCondition(
|
||||
keys.isEmpty(), "After selector keys it is mandatory to have a pattern.");
|
||||
|
|
|
@ -40,6 +40,7 @@ public class CoreTest extends CoreTestFmwk {
|
|||
"syntax-errors-diagnostics.json",
|
||||
"syntax-errors-diagnostics-multiline.json",
|
||||
"syntax-errors-end-of-input.json",
|
||||
"syntax-errors-reserved.json",
|
||||
"tricky-declarations.json",
|
||||
"valid-tests.json"};
|
||||
|
||||
|
|
|
@ -136,11 +136,6 @@
|
|||
"char": 28,
|
||||
"comment": "Trailing characters that are not whitespace"
|
||||
},
|
||||
{
|
||||
"src": ".match {$foo :string} {$bar :string} one * {{one}} * * {{other}} ",
|
||||
"char": 66,
|
||||
"comment": "Trailing whitespace at end of message should not be accepted either"
|
||||
},
|
||||
{
|
||||
"src": "empty { }",
|
||||
"char": 8,
|
||||
|
@ -343,6 +338,10 @@
|
|||
"src": ".match {1} {{_}}",
|
||||
"char": 12,
|
||||
"comment": "Disambiguating a wrong .match from an unsupported statement"
|
||||
},
|
||||
{
|
||||
"src": "{{{/p o4. = 1}}}",
|
||||
"char": 9
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
22
testdata/message2/syntax-errors-reserved.json
vendored
Normal file
22
testdata/message2/syntax-errors-reserved.json
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"scenario": "Reserved and private annotations",
|
||||
"description": "Tests for old unsupported expression (reserved/private) syntax -- these are now syntax errors",
|
||||
"defaultTestProperties": {
|
||||
"locale": "en-US",
|
||||
"expErrors": [
|
||||
{
|
||||
"type": "syntax-error"
|
||||
}
|
||||
]
|
||||
},
|
||||
"tests": [
|
||||
{ "src" : "\\\\{ ? 𫓜|@} |}" },
|
||||
{ "src" : "\\\\{ ? 𫓜|@} | \\} @𠟅𥪙=|| @㥞=$ @𰺱:}" },
|
||||
{ "src" : "{ $iFN ^ @USh =$u @l}" },
|
||||
{ "src" : ".local $dS ={ $p4 ^ |.| \\\\ @g:FV = $kd}{{@}}" },
|
||||
{ "src" : ".D. \\\\ ||{1}{{}} " },
|
||||
{ "src" : ".cIT ||{|@| % \\} } { * }{{}}" },
|
||||
{ "src" : ".󠆿 . |@| {$ & \\{ . @= $}{{\\\\}}" }
|
||||
]
|
||||
}
|
||||
|
28
testdata/message2/valid-tests.json
vendored
28
testdata/message2/valid-tests.json
vendored
|
@ -123,6 +123,34 @@
|
|||
"comment": "message -> complex-message -> *(declaration [s]) complex-body -> declaration complex-body -> input-declaration complex-body -> input variable-expression complex-body",
|
||||
"src": ".input{$x}{{}}",
|
||||
"exp": ""
|
||||
},
|
||||
{
|
||||
"comment": "Test that escaped characters are re-escaped properly when serializing literals",
|
||||
"src": "{|\\}|}",
|
||||
"exp": "}"
|
||||
},
|
||||
{
|
||||
"comment": "Test that escaped characters are re-escaped properly when serializing patterns",
|
||||
"src": "\\|",
|
||||
"exp": "|"
|
||||
},
|
||||
{
|
||||
"comment": "Test that parser and serializer treat optionally-escaped characters consistently",
|
||||
"src" : "{{{1}|}}",
|
||||
"exp": "1|"
|
||||
},
|
||||
{
|
||||
"comment": "Trailing whitespace after match is valid",
|
||||
"src": ".match {1 :string} * {{}} ",
|
||||
"exp": ""
|
||||
},
|
||||
{
|
||||
"src" : "",
|
||||
"exp" : ""
|
||||
},
|
||||
{
|
||||
"src" : "{{}}",
|
||||
"exp" : ""
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue