mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-04 13:05:31 +00:00
ICU-22940 MF2 ICU4C: Update for bidi support
Per https://github.com/unicode-org/message-format-wg/pull/884
This commit is contained in:
parent
fae4512d33
commit
1b8118049f
11 changed files with 610 additions and 120 deletions
|
@ -469,8 +469,8 @@ void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res,
|
|||
if (!key.isWildcard()) {
|
||||
// 2ii(b)(a) Assert that key is a literal.
|
||||
// (Not needed)
|
||||
// 2ii(b)(b) Let `ks` be the resolved value of `key`.
|
||||
ks = key.asLiteral().unquoted();
|
||||
// 2ii(b)(b) Let `ks` be the resolved value of `key` in Unicode Normalization Form C.
|
||||
ks = normalizeNFC(key.asLiteral().unquoted());
|
||||
// 2ii(b)(c) Append `ks` as the last element of the list `keys`.
|
||||
ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
|
||||
CHECK_ERROR(status);
|
||||
|
@ -531,7 +531,7 @@ void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UError
|
|||
// 2i(c). Assert that `key` is a literal.
|
||||
// (Not needed)
|
||||
// 2i(d). Let `ks` be the resolved value of `key`.
|
||||
UnicodeString ks = key.asLiteral().unquoted();
|
||||
UnicodeString ks = normalizeNFC(key.asLiteral().unquoted());
|
||||
// 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
|
||||
const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
|
||||
// 2i(f). If `matches` includes `ks`
|
||||
|
@ -593,7 +593,7 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo
|
|||
// 5iii(c)(a). Assert that `key` is a literal.
|
||||
// (Not needed)
|
||||
// 5iii(c)(b). Let `ks` be the resolved value of `key`.
|
||||
UnicodeString ks = key.asLiteral().unquoted();
|
||||
UnicodeString ks = normalizeNFC(key.asLiteral().unquoted());
|
||||
// 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
|
||||
matchpref = vectorFind(matches, ks);
|
||||
U_ASSERT(matchpref >= 0);
|
||||
|
|
|
@ -45,7 +45,8 @@ namespace message2 {
|
|||
|
||||
// Parse the pattern
|
||||
MFDataModel::Builder tree(errorCode);
|
||||
Parser(pat, tree, *errors, normalizedInput).parse(parseError, errorCode);
|
||||
Parser(pat, tree, *errors, normalizedInput, errorCode)
|
||||
.parse(parseError, errorCode);
|
||||
|
||||
// Fail on syntax errors
|
||||
if (errors->hasSyntaxError()) {
|
||||
|
|
|
@ -9,9 +9,12 @@
|
|||
|
||||
#if !UCONFIG_NO_MF2
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "messageformat2_errors.h"
|
||||
#include "messageformat2_macros.h"
|
||||
#include "messageformat2_parser.h"
|
||||
#include "ucln_in.h"
|
||||
#include "umutex.h"
|
||||
#include "uvector.h" // U_ASSERT
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -93,14 +96,235 @@ static void copyContext(const UChar in[U_PARSE_CONTEXT_LEN], UChar out[U_PARSE_C
|
|||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Predicates
|
||||
// Initialization of UnicodeSets
|
||||
|
||||
// Returns true if `c` is in the interval [`first`, `last`]
|
||||
static bool inRange(UChar32 c, UChar32 first, UChar32 last) {
|
||||
U_ASSERT(first < last);
|
||||
return c >= first && c <= last;
|
||||
namespace unisets {
|
||||
|
||||
UnicodeSet* gUnicodeSets[unisets::UNISETS_KEY_COUNT] = {};
|
||||
|
||||
inline UnicodeSet* getImpl(Key key) {
|
||||
return gUnicodeSets[key];
|
||||
}
|
||||
|
||||
icu::UInitOnce gMF2ParseUniSetsInitOnce {};
|
||||
}
|
||||
|
||||
UnicodeSet* initContentChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet(0x0001, 0x0008); // Omit NULL, HTAB and LF
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
result->add(0x000B, 0x000C); // Omit CR
|
||||
result->add(0x000E, 0x001F); // Omit SP
|
||||
result->add(0x0021, 0x002D); // Omit '.'
|
||||
result->add(0x002F, 0x003F); // Omit '@'
|
||||
result->add(0x0041, 0x005B); // Omit '\'
|
||||
result->add(0x005D, 0x007A); // Omit { | }
|
||||
result->add(0x007E, 0x2FFF); // Omit IDEOGRAPHIC_SPACE
|
||||
result->add(0x3001, 0x10FFFF); // Allowing surrogates is intentional
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initWhitespace(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
result->add(SPACE);
|
||||
result->add(HTAB);
|
||||
result->add(CR);
|
||||
result->add(LF);
|
||||
result->add(IDEOGRAPHIC_SPACE);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initBidiControls(UErrorCode& status) {
|
||||
UnicodeSet* result = new UnicodeSet(UnicodeString("[\\u061C]"), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
result->add(0x200E, 0x200F);
|
||||
result->add(0x2066, 0x2069);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initAlpha(UErrorCode& status) {
|
||||
UnicodeSet* result = new UnicodeSet(UnicodeString("[:letter:]"), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initDigits(UErrorCode& status) {
|
||||
UnicodeSet* result = new UnicodeSet(UnicodeString("[:number:]"), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initNameStartChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet(*unisets::getImpl(unisets::ALPHA));
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->add(UNDERSCORE);
|
||||
result->add(0x00C0, 0x00D6);
|
||||
result->add(0x00D8, 0x00F6);
|
||||
result->add(0x00F8, 0x02FF);
|
||||
result->add(0x0370, 0x037D);
|
||||
result->add(0x037F, 0x061B);
|
||||
result->add(0x061D, 0x1FFF);
|
||||
result->add(0x200C, 0x200D);
|
||||
result->add(0x2070, 0x218F);
|
||||
result->add(0x2C00, 0x2FEF);
|
||||
result->add(0x3001, 0xD7FF);
|
||||
result->add(0xF900, 0xFDCF);
|
||||
result->add(0xFDF0, 0xFFFD);
|
||||
result->add(0x100000, 0xEFFFF);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initNameChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->addAll(*unisets::getImpl(unisets::NAME_START));
|
||||
result->addAll(*unisets::getImpl(unisets::DIGIT));
|
||||
result->add(HYPHEN);
|
||||
result->add(PERIOD);
|
||||
result->add(0x00B7);
|
||||
result->add(0x0300, 0x036F);
|
||||
result->add(0x203F, 0x2040);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initTextChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->addAll(*unisets::getImpl(unisets::CONTENT));
|
||||
result->addAll(*unisets::getImpl(unisets::WHITESPACE));
|
||||
result->add(PERIOD);
|
||||
result->add(AT);
|
||||
result->add(PIPE);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initQuotedChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->addAll(*unisets::getImpl(unisets::CONTENT));
|
||||
result->addAll(*unisets::getImpl(unisets::WHITESPACE));
|
||||
result->add(PERIOD);
|
||||
result->add(AT);
|
||||
result->add(LEFT_CURLY_BRACE);
|
||||
result->add(RIGHT_CURLY_BRACE);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* initEscapableChars(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
result->add(PIPE);
|
||||
result->add(BACKSLASH);
|
||||
result->add(LEFT_CURLY_BRACE);
|
||||
result->add(RIGHT_CURLY_BRACE);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace unisets {
|
||||
|
||||
UBool U_CALLCONV cleanupMF2ParseUniSets() {
|
||||
for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
|
||||
delete gUnicodeSets[i];
|
||||
gUnicodeSets[i] = nullptr;
|
||||
}
|
||||
gMF2ParseUniSetsInitOnce.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
void U_CALLCONV initMF2ParseUniSets(UErrorCode& status) {
|
||||
ucln_i18n_registerCleanup(UCLN_I18N_MF2_UNISETS, cleanupMF2ParseUniSets);
|
||||
|
||||
gUnicodeSets[unisets::CONTENT] = initContentChars(status);
|
||||
gUnicodeSets[unisets::WHITESPACE] = initWhitespace(status);
|
||||
gUnicodeSets[unisets::BIDI] = initBidiControls(status);
|
||||
gUnicodeSets[unisets::ALPHA] = initAlpha(status);
|
||||
gUnicodeSets[unisets::DIGIT] = initDigits(status);
|
||||
gUnicodeSets[unisets::NAME_START] = initNameStartChars(status);
|
||||
gUnicodeSets[unisets::NAME_CHAR] = initNameChars(status);
|
||||
gUnicodeSets[unisets::TEXT] = initTextChars(status);
|
||||
gUnicodeSets[unisets::QUOTED] = initQuotedChars(status);
|
||||
gUnicodeSets[unisets::ESCAPABLE] = initEscapableChars(status);
|
||||
}
|
||||
|
||||
const UnicodeSet* get(Key key) {
|
||||
UErrorCode localStatus = U_ZERO_ERROR;
|
||||
umtx_initOnce(gMF2ParseUniSetsInitOnce, &initMF2ParseUniSets, localStatus);
|
||||
if (U_FAILURE(localStatus)) {
|
||||
return nullptr;
|
||||
}
|
||||
return getImpl(key);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Predicates
|
||||
|
||||
/*
|
||||
The following helper predicates should exactly match nonterminals in the MessageFormat 2 grammar:
|
||||
|
||||
|
@ -115,76 +339,50 @@ static bool inRange(UChar32 c, UChar32 first, UChar32 last) {
|
|||
`isWhitespace()` : `s`
|
||||
*/
|
||||
|
||||
static bool isContentChar(UChar32 c) {
|
||||
return inRange(c, 0x0001, 0x0008) // Omit NULL, HTAB and LF
|
||||
|| inRange(c, 0x000B, 0x000C) // Omit CR
|
||||
|| inRange(c, 0x000E, 0x001F) // Omit SP
|
||||
|| inRange(c, 0x0021, 0x002D) // Omit '.'
|
||||
|| inRange(c, 0x002F, 0x003F) // Omit '@'
|
||||
|| inRange(c, 0x0041, 0x005B) // Omit '\'
|
||||
|| inRange(c, 0x005D, 0x007A) // Omit { | }
|
||||
|| inRange(c, 0x007E, 0x2FFF) // Omit IDEOGRAPHIC_SPACE
|
||||
|| inRange(c, 0x3001, 0x10FFFF); // Allowing surrogates is intentional
|
||||
bool Parser::isContentChar(UChar32 c) const {
|
||||
return contentChars->contains(c);
|
||||
}
|
||||
|
||||
// See `s` in the MessageFormat 2 grammar
|
||||
inline bool isWhitespace(UChar32 c) {
|
||||
switch (c) {
|
||||
case SPACE:
|
||||
case HTAB:
|
||||
case CR:
|
||||
case LF:
|
||||
case IDEOGRAPHIC_SPACE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
// See `bidi` in the MF2 grammar
|
||||
bool Parser::isBidiControl(UChar32 c) const {
|
||||
return bidiControlChars->contains(c);
|
||||
}
|
||||
|
||||
static bool isTextChar(UChar32 c) {
|
||||
return isContentChar(c)
|
||||
|| isWhitespace(c)
|
||||
|| c == PERIOD
|
||||
|| c == AT
|
||||
|| c == PIPE;
|
||||
// See `ws` in the MessageFormat 2 grammar
|
||||
bool Parser::isWhitespace(UChar32 c) const {
|
||||
return whitespaceChars->contains(c);
|
||||
}
|
||||
|
||||
static bool isAlpha(UChar32 c) { return inRange(c, 0x0041, 0x005A) || inRange(c, 0x0061, 0x007A); }
|
||||
|
||||
static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); }
|
||||
|
||||
static bool isNameStart(UChar32 c) {
|
||||
return isAlpha(c) || c == UNDERSCORE || inRange(c, 0x00C0, 0x00D6) || inRange(c, 0x00D8, 0x00F6) ||
|
||||
inRange(c, 0x00F8, 0x02FF) || inRange(c, 0x0370, 0x037D) || inRange(c, 0x037F, 0x1FFF) ||
|
||||
inRange(c, 0x200C, 0x200D) || inRange(c, 0x2070, 0x218F) || inRange(c, 0x2C00, 0x2FEF) ||
|
||||
inRange(c, 0x3001, 0xD7FF) || inRange(c, 0xF900, 0xFDCF) || inRange(c, 0xFDF0, 0xFFFD) ||
|
||||
inRange(c, 0x10000, 0xEFFFF);
|
||||
bool Parser::isTextChar(UChar32 c) const {
|
||||
return textChars->contains(c);
|
||||
}
|
||||
|
||||
static bool isNameChar(UChar32 c) {
|
||||
return isNameStart(c) || isDigit(c) || c == HYPHEN || c == PERIOD || c == 0x00B7 ||
|
||||
inRange(c, 0x0300, 0x036F) || inRange(c, 0x203F, 0x2040);
|
||||
bool Parser::isAlpha(UChar32 c) const {
|
||||
return alphaChars->contains(c);
|
||||
}
|
||||
|
||||
static bool isUnquotedStart(UChar32 c) {
|
||||
return isNameStart(c) || isDigit(c) || c == HYPHEN || c == PERIOD || c == 0x00B7 ||
|
||||
inRange(c, 0x0300, 0x036F) || inRange(c, 0x203F, 0x2040);
|
||||
bool Parser::isDigit(UChar32 c) const {
|
||||
return digitChars->contains(c);
|
||||
}
|
||||
|
||||
static bool isQuotedChar(UChar32 c) {
|
||||
return isContentChar(c)
|
||||
|| isWhitespace(c)
|
||||
|| c == PERIOD
|
||||
|| c == AT
|
||||
|| c == LEFT_CURLY_BRACE
|
||||
|| c == RIGHT_CURLY_BRACE;
|
||||
bool Parser::isNameStart(UChar32 c) const {
|
||||
return nameStartChars->contains(c);
|
||||
}
|
||||
|
||||
static bool isEscapableChar(UChar32 c) {
|
||||
return c == PIPE
|
||||
|| c == BACKSLASH
|
||||
|| c == LEFT_CURLY_BRACE
|
||||
|| c == RIGHT_CURLY_BRACE;
|
||||
bool Parser::isNameChar(UChar32 c) const {
|
||||
return nameChars->contains(c);
|
||||
}
|
||||
|
||||
bool Parser::isUnquotedStart(UChar32 c) const {
|
||||
return isNameChar(c);
|
||||
}
|
||||
|
||||
bool Parser::isQuotedChar(UChar32 c) const {
|
||||
return quotedChars->contains(c);
|
||||
}
|
||||
|
||||
bool Parser::isEscapableChar(UChar32 c) const {
|
||||
return escapableChars->contains(c);
|
||||
}
|
||||
|
||||
// Returns true iff `c` can begin a `function` nonterminal
|
||||
|
@ -205,12 +403,12 @@ static bool isAnnotationStart(UChar32 c) {
|
|||
}
|
||||
|
||||
// Returns true iff `c` can begin a `literal` nonterminal
|
||||
static bool isLiteralStart(UChar32 c) {
|
||||
bool Parser::isLiteralStart(UChar32 c) const {
|
||||
return (c == PIPE || isNameStart(c) || c == HYPHEN || isDigit(c));
|
||||
}
|
||||
|
||||
// Returns true iff `c` can begin a `key` nonterminal
|
||||
static bool isKeyStart(UChar32 c) {
|
||||
bool Parser::isKeyStart(UChar32 c) const {
|
||||
return (c == ASTERISK || isLiteralStart(c));
|
||||
}
|
||||
|
||||
|
@ -349,7 +547,7 @@ option, or the optional space before an attribute.
|
|||
No pre, no post.
|
||||
A message may end with whitespace, so `index` may equal `len()` on exit.
|
||||
*/
|
||||
void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode) {
|
||||
void Parser::parseRequiredWS(UErrorCode& errorCode) {
|
||||
bool sawWhitespace = false;
|
||||
|
||||
// The loop exits either when we consume all the input,
|
||||
|
@ -360,7 +558,7 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
|
|||
// If whitespace isn't required -- or if we saw it already --
|
||||
// then the caller is responsible for checking this case and
|
||||
// setting an error if necessary.
|
||||
if (!required || sawWhitespace) {
|
||||
if (sawWhitespace) {
|
||||
// Not an error.
|
||||
return;
|
||||
}
|
||||
|
@ -382,24 +580,51 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
|
|||
}
|
||||
}
|
||||
|
||||
if (!sawWhitespace && required) {
|
||||
if (!sawWhitespace) {
|
||||
ERROR(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
void Parser::parseOptionalBidi() {
|
||||
while (true) {
|
||||
if (!inBounds()) {
|
||||
return;
|
||||
}
|
||||
if (isBidiControl(peek())) {
|
||||
next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
|
||||
No pre, no post, because a message may end with whitespace
|
||||
Matches `s` in the MF2 grammar
|
||||
*/
|
||||
void Parser::parseRequiredWhitespace(UErrorCode& errorCode) {
|
||||
parseWhitespaceMaybeRequired(true, errorCode);
|
||||
parseOptionalBidi();
|
||||
parseRequiredWS(errorCode);
|
||||
parseOptionalWhitespace();
|
||||
normalizedInput += SPACE;
|
||||
}
|
||||
|
||||
/*
|
||||
No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
|
||||
*/
|
||||
void Parser::parseOptionalWhitespace(UErrorCode& errorCode) {
|
||||
parseWhitespaceMaybeRequired(false, errorCode);
|
||||
void Parser::parseOptionalWhitespace() {
|
||||
while (true) {
|
||||
if (!inBounds()) {
|
||||
return;
|
||||
}
|
||||
auto cp = peek();
|
||||
if (isWhitespace(cp) || isBidiControl(cp)) {
|
||||
maybeAdvanceLine();
|
||||
next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Consumes a single character, signaling an error if `peek()` != `c`
|
||||
|
@ -444,11 +669,11 @@ void Parser::parseToken(const std::u16string_view& token, UErrorCode& errorCode)
|
|||
*/
|
||||
void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCode& errorCode) {
|
||||
// No need for error check or bounds check before parseOptionalWhitespace
|
||||
parseOptionalWhitespace(errorCode);
|
||||
parseOptionalWhitespace();
|
||||
// Establish precondition
|
||||
CHECK_BOUNDS(errorCode);
|
||||
parseToken(token, errorCode);
|
||||
parseOptionalWhitespace(errorCode);
|
||||
parseOptionalWhitespace();
|
||||
// Guarantee postcondition
|
||||
CHECK_BOUNDS(errorCode);
|
||||
}
|
||||
|
@ -460,12 +685,12 @@ void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCo
|
|||
then consumes optional whitespace again
|
||||
*/
|
||||
void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) {
|
||||
// No need for error check or bounds check before parseOptionalWhitespace(errorCode)
|
||||
parseOptionalWhitespace(errorCode);
|
||||
// No need for error check or bounds check before parseOptionalWhitespace()
|
||||
parseOptionalWhitespace();
|
||||
// Establish precondition
|
||||
CHECK_BOUNDS(errorCode);
|
||||
parseToken(c, errorCode);
|
||||
parseOptionalWhitespace(errorCode);
|
||||
parseOptionalWhitespace();
|
||||
// Guarantee postcondition
|
||||
CHECK_BOUNDS(errorCode);
|
||||
}
|
||||
|
@ -484,11 +709,17 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
|
|||
|
||||
U_ASSERT(inBounds());
|
||||
|
||||
if (!isNameStart(peek())) {
|
||||
if (!(isNameStart(peek()) || isBidiControl(peek()))) {
|
||||
ERROR(errorCode);
|
||||
return name;
|
||||
}
|
||||
|
||||
// name = [bidi] name-start *name-char [bidi]
|
||||
|
||||
// [bidi]
|
||||
parseOptionalBidi();
|
||||
|
||||
// name-start *name-char
|
||||
while (isNameChar(peek())) {
|
||||
UChar32 c = peek();
|
||||
name += c;
|
||||
|
@ -499,6 +730,10 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// [bidi]
|
||||
parseOptionalBidi();
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
@ -847,7 +1082,7 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
|
|||
// about whether whitespace precedes another
|
||||
// attribute, or the '=' sign
|
||||
int32_t savedIndex = index;
|
||||
parseOptionalWhitespace(errorCode);
|
||||
parseOptionalWhitespace();
|
||||
|
||||
Operand rand;
|
||||
if (peek() == EQUALS) {
|
||||
|
@ -1133,7 +1368,7 @@ the comment in `parseOptions()` for details.
|
|||
// (the character is either the required space before an annotation, or optional
|
||||
// trailing space after the literal or variable). It's still ambiguous which
|
||||
// one does apply.
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
// Restore precondition
|
||||
CHECK_BOUNDS(status);
|
||||
|
||||
|
@ -1204,7 +1439,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
|
|||
// Parse opening brace
|
||||
parseToken(LEFT_CURLY_BRACE, status);
|
||||
// Optional whitespace after opening brace
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
|
||||
Expression::Builder exprBuilder(status);
|
||||
// Restore precondition
|
||||
|
@ -1247,7 +1482,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
|
|||
|
||||
// Parse optional space
|
||||
// (the last [s] in e.g. "{" [s] literal [s annotation] *(s attribute) [s] "}")
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
|
||||
// Either an operand or operator (or both) must have been set already,
|
||||
// so there can't be an error
|
||||
|
@ -1323,7 +1558,7 @@ void Parser::parseInputDeclaration(UErrorCode& status) {
|
|||
CHECK_BOUNDS(status);
|
||||
|
||||
parseToken(ID_INPUT, status);
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
|
||||
// Restore precondition before calling parseExpression()
|
||||
CHECK_BOUNDS(status);
|
||||
|
@ -1384,7 +1619,7 @@ void Parser::parseDeclarations(UErrorCode& status) {
|
|||
// Avoid looping infinitely
|
||||
CHECK_ERROR(status);
|
||||
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
// Restore precondition
|
||||
CHECK_BOUNDS(status);
|
||||
}
|
||||
|
@ -1494,8 +1729,8 @@ This is addressed using "backtracking" (similarly to `parseOptions()`).
|
|||
|
||||
// We've seen at least one whitespace-key pair, so now we can parse
|
||||
// *(s key) [s]
|
||||
while (peek() != LEFT_CURLY_BRACE || isWhitespace(peek())) { // Try to recover from errors
|
||||
bool wasWhitespace = isWhitespace(peek());
|
||||
while (peek() != LEFT_CURLY_BRACE || isWhitespace(peek()) || isBidiControl(peek())) {
|
||||
bool wasWhitespace = isWhitespace(peek()) || isBidiControl(peek());
|
||||
parseRequiredWhitespace(status);
|
||||
if (!wasWhitespace) {
|
||||
// Avoid infinite loop when parsing something like:
|
||||
|
@ -1553,7 +1788,7 @@ Markup Parser::parseMarkup(UErrorCode& status) {
|
|||
// Consume the '{'
|
||||
next();
|
||||
normalizedInput += LEFT_CURLY_BRACE;
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
bool closing = false;
|
||||
switch (peek()) {
|
||||
case NUMBER_SIGN: {
|
||||
|
@ -1580,19 +1815,19 @@ Markup Parser::parseMarkup(UErrorCode& status) {
|
|||
|
||||
// Parse the options, which must begin with a ' '
|
||||
// if present
|
||||
if (inBounds() && isWhitespace(peek())) {
|
||||
if (inBounds() && (isWhitespace(peek()) || isBidiControl(peek()))) {
|
||||
OptionAdder<Markup::Builder> optionAdder(builder);
|
||||
parseOptions(optionAdder, status);
|
||||
}
|
||||
|
||||
// Parse the attributes, which also must begin
|
||||
// with a ' '
|
||||
if (inBounds() && isWhitespace(peek())) {
|
||||
if (inBounds() && (isWhitespace(peek()) || isBidiControl(peek()))) {
|
||||
AttributeAdder<Markup::Builder> attrAdder(builder);
|
||||
parseAttributes(attrAdder, status);
|
||||
}
|
||||
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
|
||||
bool standalone = false;
|
||||
// Check if this is a standalone or not
|
||||
|
@ -1640,7 +1875,7 @@ std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
|
|||
isMarkup = true;
|
||||
break;
|
||||
}
|
||||
if (!isWhitespace(c)){
|
||||
if (!(isWhitespace(c) || isBidiControl(c))) {
|
||||
break;
|
||||
}
|
||||
tempIndex++;
|
||||
|
@ -1696,7 +1931,7 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
|
|||
break;
|
||||
}
|
||||
// Don't loop infinitely
|
||||
if (errors.hasSyntaxError()) {
|
||||
if (errors.hasSyntaxError() || U_FAILURE(status)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1787,8 +2022,8 @@ void Parser::parseSelectors(UErrorCode& status) {
|
|||
return;
|
||||
}
|
||||
|
||||
while (isWhitespace(peek()) || isKeyStart(peek())) {
|
||||
parseOptionalWhitespace(status);
|
||||
while (isWhitespace(peek()) || isBidiControl(peek()) || isKeyStart(peek())) {
|
||||
parseOptionalWhitespace();
|
||||
// Restore the precondition.
|
||||
// Trailing whitespace is allowed.
|
||||
if (!inBounds()) {
|
||||
|
@ -1804,6 +2039,10 @@ void Parser::parseSelectors(UErrorCode& status) {
|
|||
// Because if we don't check it here, the `isWhitespace()` call in
|
||||
// the loop head will read off the end of the input string.
|
||||
CHECK_END_OF_INPUT
|
||||
|
||||
if (errors.hasSyntaxError() || U_FAILURE(status)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1876,7 +2115,7 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
|
|||
bool complex = false;
|
||||
// First, "look ahead" to determine if this is a simple or complex
|
||||
// message. To do that, check the first non-whitespace character.
|
||||
while (inBounds(index) && isWhitespace(peek())) {
|
||||
while (inBounds(index) && (isWhitespace(peek()) || isBidiControl(peek()))) {
|
||||
next();
|
||||
}
|
||||
|
||||
|
@ -1896,10 +2135,10 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
|
|||
// Message can be empty, so we need to only look ahead
|
||||
// if we know it's non-empty
|
||||
if (complex) {
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
parseDeclarations(status);
|
||||
parseBody(status);
|
||||
parseOptionalWhitespace(status);
|
||||
parseOptionalWhitespace();
|
||||
} else {
|
||||
// Simple message
|
||||
// For normalization, quote the pattern
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "unicode/messageformat2_data_model.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
#include "messageformat2_allocation.h"
|
||||
#include "messageformat2_errors.h"
|
||||
|
@ -56,6 +57,26 @@ namespace message2 {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
// Initialization of UnicodeSets
|
||||
namespace unisets {
|
||||
enum Key {
|
||||
CONTENT,
|
||||
WHITESPACE,
|
||||
BIDI,
|
||||
ALPHA,
|
||||
DIGIT,
|
||||
NAME_START,
|
||||
NAME_CHAR,
|
||||
TEXT,
|
||||
QUOTED,
|
||||
ESCAPABLE,
|
||||
UNISETS_KEY_COUNT
|
||||
};
|
||||
|
||||
U_I18N_API const UnicodeSet* get(Key key);
|
||||
}
|
||||
|
||||
// Parser class (private)
|
||||
class Parser : public UMemory {
|
||||
public:
|
||||
|
@ -84,8 +105,23 @@ namespace message2 {
|
|||
UChar postContext[U_PARSE_CONTEXT_LEN];
|
||||
} MessageParseError;
|
||||
|
||||
Parser(const UnicodeString &input, MFDataModel::Builder& dataModelBuilder, StaticErrors& e, UnicodeString& normalizedInputRef)
|
||||
: source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
|
||||
Parser(const UnicodeString &input,
|
||||
MFDataModel::Builder& dataModelBuilder,
|
||||
StaticErrors& e,
|
||||
UnicodeString& normalizedInputRef,
|
||||
UErrorCode& status)
|
||||
: contentChars(unisets::get(unisets::CONTENT)),
|
||||
whitespaceChars(unisets::get(unisets::WHITESPACE)),
|
||||
bidiControlChars(unisets::get(unisets::BIDI)),
|
||||
alphaChars(unisets::get(unisets::ALPHA)),
|
||||
digitChars(unisets::get(unisets::DIGIT)),
|
||||
nameStartChars(unisets::get(unisets::NAME_START)),
|
||||
nameChars(unisets::get(unisets::NAME_CHAR)),
|
||||
textChars(unisets::get(unisets::TEXT)),
|
||||
quotedChars(unisets::get(unisets::QUOTED)),
|
||||
escapableChars(unisets::get(unisets::ESCAPABLE)),
|
||||
source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
|
||||
(void) status;
|
||||
parseError.line = 0;
|
||||
parseError.offset = 0;
|
||||
parseError.lengthBeforeCurrentLine = 0;
|
||||
|
@ -93,6 +129,31 @@ namespace message2 {
|
|||
parseError.postContext[0] = '\0';
|
||||
}
|
||||
|
||||
UnicodeSet initContentChars(UErrorCode& status);
|
||||
UnicodeSet initWhitespace(UErrorCode& status);
|
||||
UnicodeSet initBidiControls(UErrorCode& status);
|
||||
UnicodeSet initAlpha(UErrorCode& status);
|
||||
UnicodeSet initDigits(UErrorCode& status);
|
||||
UnicodeSet initNameStartChars(UErrorCode& status);
|
||||
UnicodeSet initNameChars(UErrorCode& status);
|
||||
UnicodeSet initTextChars(UErrorCode& status);
|
||||
UnicodeSet initQuotedChars(UErrorCode& status);
|
||||
UnicodeSet initEscapableChars(UErrorCode& status);
|
||||
|
||||
bool isContentChar(UChar32) const;
|
||||
bool isBidiControl(UChar32) const;
|
||||
bool isWhitespace(UChar32) const;
|
||||
bool isTextChar(UChar32) const;
|
||||
bool isQuotedChar(UChar32) const;
|
||||
bool isEscapableChar(UChar32) const;
|
||||
bool isAlpha(UChar32) const;
|
||||
bool isDigit(UChar32) const;
|
||||
bool isNameStart(UChar32) const;
|
||||
bool isNameChar(UChar32) const;
|
||||
bool isUnquotedStart(UChar32) const;
|
||||
bool isLiteralStart(UChar32) const;
|
||||
bool isKeyStart(UChar32) const;
|
||||
|
||||
static void translateParseError(const MessageParseError&, UParseError&);
|
||||
static void setParseError(MessageParseError&, uint32_t);
|
||||
void maybeAdvanceLine();
|
||||
|
@ -105,9 +166,10 @@ namespace message2 {
|
|||
void parseSelectors(UErrorCode&);
|
||||
void parseVariant(UErrorCode&);
|
||||
|
||||
void parseWhitespaceMaybeRequired(bool, UErrorCode&);
|
||||
void parseRequiredWS(UErrorCode&);
|
||||
void parseRequiredWhitespace(UErrorCode&);
|
||||
void parseOptionalWhitespace(UErrorCode&);
|
||||
void parseOptionalBidi();
|
||||
void parseOptionalWhitespace();
|
||||
void parseToken(UChar32, UErrorCode&);
|
||||
void parseTokenWithWhitespace(UChar32, UErrorCode&);
|
||||
void parseToken(const std::u16string_view&, UErrorCode&);
|
||||
|
@ -152,6 +214,18 @@ namespace message2 {
|
|||
bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); }
|
||||
bool allConsumed() const { return (int32_t) index == source.length(); }
|
||||
|
||||
// UnicodeSets for checking character ranges
|
||||
const UnicodeSet* contentChars;
|
||||
const UnicodeSet* whitespaceChars;
|
||||
const UnicodeSet* bidiControlChars;
|
||||
const UnicodeSet* alphaChars;
|
||||
const UnicodeSet* digitChars;
|
||||
const UnicodeSet* nameStartChars;
|
||||
const UnicodeSet* nameChars;
|
||||
const UnicodeSet* textChars;
|
||||
const UnicodeSet* quotedChars;
|
||||
const UnicodeSet* escapableChars;
|
||||
|
||||
// The input string
|
||||
const UnicodeString &source;
|
||||
// The current position within the input string -- counting in UChar32
|
||||
|
@ -168,8 +242,8 @@ namespace message2 {
|
|||
|
||||
// The parent builder
|
||||
MFDataModel::Builder &dataModel;
|
||||
}; // class Parser
|
||||
|
||||
}; // class Parser
|
||||
} // namespace message2
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -63,6 +63,7 @@ typedef enum ECleanupI18NType {
|
|||
UCLN_I18N_REGION,
|
||||
UCLN_I18N_LIST_FORMATTER,
|
||||
UCLN_I18N_NUMSYS,
|
||||
UCLN_I18N_MF2_UNISETS,
|
||||
UCLN_I18N_COUNT /* This must be last */
|
||||
} ECleanupI18NType;
|
||||
|
||||
|
|
|
@ -13,16 +13,6 @@
|
|||
|
||||
using namespace icu::message2;
|
||||
|
||||
/*
|
||||
TODO: Tests need to be unified in a single format that
|
||||
both ICU4C and ICU4J can use, rather than being embedded in code.
|
||||
|
||||
Tests are included in their current state to give a sense of
|
||||
how much test coverage has been achieved. Most of the testing is
|
||||
of the parser/serializer; the formatter needs to be tested more
|
||||
thoroughly.
|
||||
*/
|
||||
|
||||
void
|
||||
TestMessageFormat2::runIndexedTest(int32_t index, UBool exec,
|
||||
const char* &name, char* /*par*/) {
|
||||
|
|
|
@ -318,12 +318,16 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
|
|||
// TODO: Delete this file after https://github.com/unicode-org/message-format-wg/pull/904
|
||||
// lands and the tests here are updated from the spec repo
|
||||
runTestsFromJsonFile(*this, "normalization.json", errorCode);
|
||||
// TODO: https://github.com/unicode-org/message-format-wg/pull/902 will
|
||||
// move the bidi tests into the spec
|
||||
runTestsFromJsonFile(*this, "bidi.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "more-functions.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "valid-tests.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "resolution-errors.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "matches-whitespace.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "alias-selector-annotations.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "runtime-errors.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "more-syntax-errors.json", errorCode);
|
||||
|
||||
// Re: the expected output for the first test in this file:
|
||||
// Note: the more "correct" fallback output seems like it should be "1.000 3" (ignoring the
|
||||
|
|
|
@ -294,10 +294,10 @@ class TestUtils {
|
|||
}
|
||||
// Re-run the formatter
|
||||
result = mf.formatToString(MessageArguments(testCase.getArguments(), errorCode), errorCode);
|
||||
if (!testCase.outputMatches(result)) {
|
||||
failWrongOutput(tmsg, testCase, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!testCase.outputMatches(result)) {
|
||||
failWrongOutput(tmsg, testCase, result);
|
||||
return;
|
||||
}
|
||||
errorCode.reset();
|
||||
}
|
||||
|
|
160
testdata/message2/bidi.json
vendored
Normal file
160
testdata/message2/bidi.json
vendored
Normal file
|
@ -0,0 +1,160 @@
|
|||
{
|
||||
"scenario": "Bidi support",
|
||||
"description": "Tests for correct parsing of messages with bidirectional marks and isolates",
|
||||
"defaultTestProperties": {
|
||||
"locale": "en-US"
|
||||
},
|
||||
"tests": [
|
||||
{
|
||||
"comment": "simple-message = o [simple-start pattern]",
|
||||
"src": " \u061C Hello world!",
|
||||
"exp": " \u061C Hello world!"
|
||||
},
|
||||
{
|
||||
"comment": "complex-message = o *(declaration o) complex-body o",
|
||||
"src": "\u200E .local $x = {1} {{ {$x}}}",
|
||||
"exp": " 1"
|
||||
},
|
||||
{
|
||||
"comment": "complex-message = o *(declaration o) complex-body o",
|
||||
"src": ".local $x = {1} \u200F {{ {$x}}}",
|
||||
"exp": " 1"
|
||||
},
|
||||
{
|
||||
"comment": "complex-message = o *(declaration o) complex-body o",
|
||||
"src": ".local $x = {1} {{ {$x}}} \u2066",
|
||||
"exp": " 1"
|
||||
},
|
||||
{
|
||||
"comment": "input-declaration = input o variable-expression",
|
||||
"src": ".input \u2067 {$x :number} {{hello}}",
|
||||
"params": [{"name": "x", "value": "1"}],
|
||||
"exp": "hello"
|
||||
},
|
||||
{
|
||||
"comment": "local s variable o \"=\" o expression",
|
||||
"src": ".local $x \u2068 = \u2069 {1} {{hello}}",
|
||||
"exp": "hello"
|
||||
},
|
||||
{
|
||||
"comment": "local s variable o \"=\" o expression",
|
||||
"src": ".local \u2067 $x = {1} {{hello}}",
|
||||
"exp": "hello"
|
||||
},
|
||||
{
|
||||
"comment": "local s variable o \"=\" o expression",
|
||||
"src": ".local\u2067 $x = {1} {{hello}}",
|
||||
"exp": "hello"
|
||||
},
|
||||
{
|
||||
"comment": "o \"{{\" pattern \"}}\"",
|
||||
"src": "\u2067 {{hello}}",
|
||||
"exp": "hello"
|
||||
},
|
||||
{
|
||||
"comment": "match-statement s variant *(o variant)",
|
||||
"src": [".local $x = {1 :number}\n",
|
||||
".match $x\n",
|
||||
"1 {{one}}\n",
|
||||
"\u061C * {{other}}"],
|
||||
"exp": "one"
|
||||
},
|
||||
{
|
||||
"comment": "match-statement s variant *(o variant)",
|
||||
"src": [".local $x = {1 :number}",
|
||||
".match $x \u061c",
|
||||
"1 {{one}}",
|
||||
"* {{other}}"],
|
||||
"exp": "one"
|
||||
},
|
||||
{
|
||||
"comment": "match-statement s variant *(o variant)",
|
||||
"src": [".local $x = {1 :number}",
|
||||
".match $x \u061c",
|
||||
"1 {{one}}",
|
||||
"* {{other}}"],
|
||||
"exp": "one"
|
||||
},
|
||||
{
|
||||
"comment": "variant = key *(s key) quoted-pattern",
|
||||
"src": [".local $x = {1 :number} .local $y = {$x :number}",
|
||||
".match $x $y\n",
|
||||
"1 \u200E 1 {{one}}",
|
||||
"* * {{other}}"],
|
||||
"exp": "one"
|
||||
},
|
||||
{
|
||||
"comment": "variant = key *(s key) quoted-pattern",
|
||||
"src": [".local $x = {1 :number} .local $y = {$x :number}",
|
||||
".match $x $y\n",
|
||||
"1\u200E 1 {{one}}",
|
||||
"* * {{other}}"],
|
||||
"exp": "one"
|
||||
},
|
||||
{
|
||||
"comment": "literal-expression = \"{\" o literal [s function] *(s attribute) o \"}\"",
|
||||
"src": "{\u200E hello \u200F}",
|
||||
"exp": "hello"
|
||||
},
|
||||
{
|
||||
"comment": "variable-expression = \"{\" o variable [s function] *(s attribute) o \"}\"",
|
||||
"src": ".local $x = {1} {{ {\u200E $x \u200F} }}",
|
||||
"exp": " 1 "
|
||||
},
|
||||
{
|
||||
"comment": "function-expression = \"{\" o function *(s attribute) o \"}\"",
|
||||
"src": "{1 \u200E :number \u200F}",
|
||||
"exp": "1"
|
||||
},
|
||||
{
|
||||
"comment": "markup = \"{\" o \"#\" identifier *(s option) *(s attribute) o [\"/\"] \"}\"",
|
||||
"src": "{\u200F #b \u200E }",
|
||||
"exp": ""
|
||||
},
|
||||
{
|
||||
"comment": "markup = \"{\" o \"/\" identifier *(s option) *(s attribute) o \"}\"",
|
||||
"src": "{\u200F /b \u200E }",
|
||||
"exp": ""
|
||||
},
|
||||
{
|
||||
"comment": "option = identifier o \"=\" o (literal / variable)",
|
||||
"src": "{1 :number minimumFractionDigits\u200F=\u200E1 }",
|
||||
"exp": "1.0"
|
||||
},
|
||||
{
|
||||
"comment": "attribute = \"@\" identifier [o \"=\" o (literal / variable)]",
|
||||
"src": "{1 :number @locale\u200F=\u200Een }",
|
||||
"exp": "1"
|
||||
},
|
||||
{
|
||||
"comment": " name... excludes U+FFFD and U+061C -- this pases as name -> [bidi] name-start *name-char",
|
||||
"src": ".local $\u061Cfoo = {1} {{ {$\u061Cfoo} }}",
|
||||
"exp": " 1 "
|
||||
},
|
||||
{
|
||||
"comment": " name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName but excludes U+FFFD and U+061C",
|
||||
"src": ".local $foo\u061Cbar = {2} {{ }}",
|
||||
"expErrors": [{"type": "syntax-error"}]
|
||||
},
|
||||
{
|
||||
"comment": "name = [bidi] name-start *name-char [bidi]",
|
||||
"src": ".local $\u200Efoo\u200F = {3} {{{$\u200Efoo\u200F}}}",
|
||||
"exp": "3"
|
||||
},
|
||||
{
|
||||
"comment": "name = [bidi] name-start *name-char [bidi]",
|
||||
"src": ".local $foo = {4} {{{$\u200Efoo\u200F}}}",
|
||||
"exp": "4"
|
||||
},
|
||||
{
|
||||
"comment": "name = [bidi] name-start *name-char [bidi]",
|
||||
"src": ".local $\u200Efoo\u200F = {5} {{{$foo}}}",
|
||||
"exp": "5"
|
||||
},
|
||||
{
|
||||
"comment": "name = [bidi] name-start *name-char [bidi]",
|
||||
"src": ".local $foo\u200Ebar = {6} {{{$foo\u200Ebar}}}",
|
||||
"expErrors": [{"type": "syntax-error"}]
|
||||
}
|
||||
]
|
||||
}
|
2
testdata/message2/matches-whitespace.json
vendored
2
testdata/message2/matches-whitespace.json
vendored
|
@ -6,7 +6,7 @@
|
|||
},
|
||||
"tests": [
|
||||
{ "src": ".local $one = {1 :string} .local $bar = {bar :string} .match $one $bar one * {{one}} * * {{other}}",
|
||||
"exp": "one" },
|
||||
"exp": "other" },
|
||||
{ "src": ".local $foo = {foo :string} .local $bar = {bar :string} .match $foo $bar one * {{one}}* * {{other}}",
|
||||
"exp": "other" },
|
||||
{ "src": ".local $foo = {foo :string} .local $bar = {bar :string} .match $foo $bar one *{{one}} * * {{foo}}",
|
||||
|
|
21
testdata/message2/more-syntax-errors.json
vendored
Normal file
21
testdata/message2/more-syntax-errors.json
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json",
|
||||
"scenario": "Syntax errors",
|
||||
"description": "Strings that produce syntax errors when parsed.",
|
||||
"defaultTestProperties": {
|
||||
"locale": "en-US",
|
||||
"expErrors": [
|
||||
{
|
||||
"type": "syntax-error"
|
||||
}
|
||||
]
|
||||
},
|
||||
"tests": [
|
||||
{
|
||||
"src": "{\u0000/\u0000g }\u0000\u0000\u0000\u0000\u000c"
|
||||
},
|
||||
{
|
||||
"src": ".\u0000$\u0000.\u0000\u00ca$\u0000\u0000\u00f9\u00f9{\u0000{\u0000{\u0000/\u0000\u00ff$\u00ff\u00ff\u00ff\u00ff"
|
||||
}
|
||||
]
|
||||
}
|
Loading…
Add table
Reference in a new issue