mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-04 13:05:31 +00:00
Some checks failed
GHA ICU4J / icu4j-mvn-build-and-test (17) (push) Blocked by required conditions
GHA ICU4J / icu4j-mvn-build-and-test (21) (push) Blocked by required conditions
GHA ICU4J / icu4j-mvn-build-and-test (8) (push) Blocked by required conditions
GHA ICU4J / adaboost-icu4j-build-and-test (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm zzzz" "13:13 Pacific Standard Time" 3, TestICUConstruction) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm zzzz" "13:13 Pacific Standard Time" 3, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm zzzz" "13:13 Pacific Standard Time" 3, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm" "13:13" 2, TestICUConstruction) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm" "13:13" 2, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (sw_KE, "dddd MMM yyyy" "15 Jan 2007" 1, TestICUConstruction) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (sw_KE, "dddd MMM yyyy" "15 Jan 2007" 1, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-little-endian-data-test (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests (charperf, TestIsAlpha TestIsUpper TestIsLower TestIsDigit TestIsSpace TestIsAlphaNumeric TestIsPrint TestIsControl TestToLower TestToUpper TestIsWhiteSpace) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Chinese, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Chinese, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Chinese, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-decimalformatperf (de_DE, TestICUConstruction) (push) Blocked by required conditions
GHA ICU4J / lstm-icu4j-build-and-test (push) Blocked by required conditions
GHA ICU4J / icu4j-mvn-init-cache (push) Waiting to run
GHA ICU4J / icu4j-mvn-build-and-test (11) (push) Blocked by required conditions
ICU Common / copyright-scan (push) Waiting to run
ICU Common / valid-UTF-8-and-no-BOM-check (push) Waiting to run
ICU Common / icu4j-mvn-init-cache (push) Waiting to run
ICU Common / icu4c-release-tools (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm z" "13:13 PST" 4, TestICUConstruction) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm z" "13:13 PST" 4, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm z" "13:13 PST" 4, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm" "13:13" 2, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "dddd MMM yyyy" "15 Jan 2007" 1, TestICUConstruction) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "dddd MMM yyyy" "15 Jan 2007" 1, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "dddd MMM yyyy" "15 Jan 2007" 1, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-mvn-init-cache (push) Waiting to run
GHA ICU Merge CI / icu4c-store-perf-libs (push) Waiting to run
GHA ICU Merge CI / icu4c-performance-tests (-f ../../icu4j/perf-tests/data/conversion/xuzhimo.txt, -e gb18030, utfperf, Roundtrip FromUnicode FromUTF8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (sw_KE, "dddd MMM yyyy" "15 Jan 2007" 1, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests (usetperf, titlecase_letter_add titlecase_letter_contains titlecase_letter_iterator unassigned_add unassigned_contains unassigned_iterator pattern1 pattern2 pattern3) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Asian, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Asian, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Asian, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese_k, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-strsrchperf (udhr_cmn_hans, zh) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-strsrchperf (udhr_deu_1996, de) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-unicodesetperf (UnicodeSetContains) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-unicodesetperf (UnicodeSetIterate) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFC_NFC_Text, TestNames_SerbianSH) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFC_Orig_Text, TestNames_Asian) (push) Blocked by required conditions
GHA ICU Merge CI / Copy perf data to remote repo for visualization (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFC_Orig_Text, TestNames_Chinese) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFC_Orig_Text, TestNames_SerbianSH) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_NFC_Text, TestNames_Asian) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_NFC_Text, TestNames_Chinese) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, arabic, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, english, US-ASCII) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, english, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, french, UTF-16BE) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, french, UTF-16LE) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, french, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, french, csisolatin1) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese_h, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese_h, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese_h, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese_k, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Japanese_k, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Korean, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Korean, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Korean, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Latin, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Latin, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Latin, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Russian, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Russian, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Russian, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_SerbianSH, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_SerbianSH, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_SerbianSH, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_SerbianSR, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_SerbianSR, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_SerbianSR, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Simplified_Chinese, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Simplified_Chinese, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Thai, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Thai, collperf, TestIcu_KeyGen_null TestIcu_qsort_strcoll_null TestIcu_qsort_usekey TestIcu_BinarySearch_strcoll_null TestIcu_BinarySearch_usekey) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (TestNames_Thai, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (th18057, -l -u, ustrperf, TestCtor TestCtor1 TestCtor2 TestCtor3 TestAssign TestAssign1 TestAssign2 TestGetch TestCatenate TestScan TestScan1 TestScan2) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (th18057, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (thesis, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-performance-tests-with-files (vfear11a, normperf, TestICU_NFC_NFD_Text TestICU_NFC_NFC_Text TestICU_NFC_Orig_Text TestICU_NFD_NFD_Text TestICU_NFD_NFC_Text TestICU_NFD_Orig_Text) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-strsrchperf (udhr_eng, en) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-strsrchperf (udhr_fra, fr) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-strsrchperf (udhr_jpn, ja) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-strsrchperf (udhr_rus, ru) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-strsrchperf (udhr_tha, th) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-unicodesetperf (UnicodeSetAdd) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-ucharacterperf (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-decimalformatperf (de_DE, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-decimalformatperf (de_DE, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-decimalformatperf (en_US, TestICUConstruction) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-decimalformatperf (en_US, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-decimalformatperf (en_US, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFC_NFC_Text, TestNames_Asian) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFC_NFC_Text, TestNames_Chinese) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_NFC_Text, TestNames_SerbianSH) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_NFD_Text, TestNames_Asian) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_NFD_Text, TestNames_Chinese) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_NFD_Text, TestNames_SerbianSH) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_Orig_Text, TestNames_Asian) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_Orig_Text, TestNames_Chinese) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-normperf (-l, TestICU_NFD_Orig_Text, TestNames_SerbianSH) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, arabic, csisolatinarabic) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, korean, csiso2022kr) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, s-chinese, EUC-CN) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, s-chinese, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, hebrew, csisolatinhebrew) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, hindi, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, japanese, EUC-JP) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, korean, csiso2022kr) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, s-chinese, EUC-CN) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-windows-msvc-postmerge (x64, Debug, x64) (push) Waiting to run
GHA ICU Merge CI / icu4c-windows-msvc-postmerge (x64, Release, x64) (push) Waiting to run
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, greek, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, greek, csisolatingreek) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, hebrew, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, hebrew, csisolatinhebrew) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, hindi, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, japanese, EUC-JP) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, japanese, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, japanese, csiso2022jp) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetDecoderICU, korean, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, arabic, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, arabic, csisolatinarabic) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, english, US-ASCII) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, english, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, french, UTF-16BE) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, french, UTF-16LE) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, french, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, french, csisolatin1) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, greek, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, greek, csisolatingreek) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, hebrew, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, japanese, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, japanese, csiso2022jp) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, korean, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-converterperf (TestCharsetEncoderICU, s-chinese, UTF-8) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm Z" "13:13 -0800" 5, TestICUConstruction) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm Z" "13:13 -0800" 5, TestICUFormat) (push) Blocked by required conditions
GHA ICU Merge CI / icu4j-dateformatperf (en_US, "HH:mm Z" "13:13 -0800" 5, TestICUParse) (push) Blocked by required conditions
GHA ICU Merge CI / icu4c-windows-msvc-postmerge (x86, Debug, Win32) (push) Waiting to run
GHA ICU Merge CI / icu4c-windows-msvc-postmerge (x86, Release, Win32) (push) Waiting to run
GHA ICU Merge CI / icu4c-windows-cygwin-gcc (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
GHA CI Valgrind / clang-valgrind-intltest (spoof) (push) Has been cancelled
GHA ICU4C / windows-msvc (/p:Configuration=Debug /p:Platform=Win32, x86 Debug) (push) Has been cancelled
GHA ICU4C / windows-msvc (/p:Configuration=Debug /p:Platform=x64, x64 Debug) (push) Has been cancelled
GHA ICU4C / windows-msvc (/p:Configuration=Release /p:Platform=ARM, arm Release) (push) Has been cancelled
GHA ICU4C / windows-msvc (/p:LanguageStandard=stdcpplatest /p:Configuration=Release /p:Platform=x64, x64 Release) (push) Has been cancelled
GHA ICU4C / windows-msvc (/p:_HAS_EXCEPTIONS=0 /p:Configuration=Release /p:Platform=x64, x64 Release) (push) Has been cancelled
CIFuzz / Fuzzing (address) (push) Has been cancelled
CIFuzz / Fuzzing (undefined) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (translit) (push) Has been cancelled
GHA ICU4C / icu4c-docs-build (push) Has been cancelled
GHA ICU4C / clang18-cpp20-warning-as-errors (-std=c++20) (push) Has been cancelled
GHA ICU4C / windows-msys2-gcc-x86_64 (push) Has been cancelled
GHA ICU4C / run-with-stubdata (push) Has been cancelled
GHA ICU4C / u-charset-is-utf8-test (push) Has been cancelled
GHA ICU4C / u-override-cxx-allocation-is-0-test (push) Has been cancelled
GHA ICU4C / lstm-test (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_BREAK_ITERATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_IDNA=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_LEGACY_CONVERSION=1 -DUCONFIG_NO_NORMALIZATION=1 -DUCONFIG_NO_BREAK_ITERATION=1 -DUCONFIG_NO_IDNA=1 -DUCONFIG_NO_COLLATION=1 -DUCONFIG_NO_FORMATTING=1 -DUCONFIG_NO_MF2=1 -DUCONFIG_NO_TRANSLITERATION=1 -DUCONFIG_NO_REG… (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_LEGACY_CONVERSION=1) (push) Has been cancelled
GHA ICU4C / icu4c-without-collation-rule-strings (push) Has been cancelled
GHA ICU4C / icu4c-icuexportdata (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (rbbi) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-test (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (bidi) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (collator) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (convert) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (csdet) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (format) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (rbnfrt) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (regex) (push) Has been cancelled
GHA ICU4C / gcc-debug-build-and-test (push) Has been cancelled
GHA ICU4C / clang-asan (push) Has been cancelled
GHA ICU4C / gcc11-cpp20 (push) Has been cancelled
GHA ICU4C / clang-release-build-and-test (push) Has been cancelled
GHA ICU4C / clang-options-build-and-test (--enable-static --disable-shared) (push) Has been cancelled
GHA ICU4C / clang-options-build-and-test (--enable-static) (push) Has been cancelled
GHA ICU4C / gcc-10-stdlib17 (push) Has been cancelled
GHA ICU4C / clang-lsan (push) Has been cancelled
GHA ICU4C / clang-ubsan (push) Has been cancelled
GHA ICU4C / clang-cfi (push) Has been cancelled
GHA ICU4C / clang-tsan (push) Has been cancelled
GHA ICU4C / clang-datafilter (push) Has been cancelled
GHA ICU4C / clang-cpp17 (push) Has been cancelled
GHA ICU4C / clang-lang-with-extn-tags (push) Has been cancelled
GHA ICU4C / clang18-cpp20-warning-as-errors (-std=c++20 -stdlib=libc++) (push) Has been cancelled
GHA ICU4C / macos-clang (push) Has been cancelled
GHA ICU4C / windows-msvc-datafilter (push) Has been cancelled
GHA ICU4C / windows-msvc-dist-release (arm64, ARM64, WinARM64) (push) Has been cancelled
GHA ICU4C / windows-msvc-dist-release (x64, x64, Win64) (push) Has been cancelled
GHA ICU4C / windows-msvc-dist-release (x86, Win32, Win32) (push) Has been cancelled
GHA ICU4C / adaboost-test (push) Has been cancelled
GHA ICU4C / testmap (push) Has been cancelled
GHA ICU4C / copyright-scan (push) Has been cancelled
GHA ICU4C / internal-header-compilation (push) Has been cancelled
GHA ICU4C / valid-UTF-8-and-no-BOM-check (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_COLLATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_FILTERED_BREAK_ITERATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_FORMATTING=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_IDNA=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_LEGACY_CONVERSION=1 -DUCONFIG_NO_NORMALIZATION=1 -DUCONFIG_NO_BREAK_ITERATION=1 -DUCONFIG_NO_IDNA=1 -DUCONFIG_NO_COLLATION=1 -DUCONFIG_NO_FORMATTING=1 -DUCONFIG_NO_MF2=1 -DUCONFIG_NO_TRANSLITERATION=1 -DUCONFIG_NO_REGUL… (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_LEGACY_CONVERSION=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_MF2=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_NORMALIZATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_REGULAR_EXPRESSIONS=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_SERVICE=1) (push) Has been cancelled
GHA ICU4C / uconfig-unit-tests (-DUCONFIG_NO_TRANSLITERATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_BREAK_ITERATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_COLLATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_FILTERED_BREAK_ITERATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_FORMATTING=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_MF2=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_NORMALIZATION=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_REGULAR_EXPRESSIONS=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_SERVICE=1) (push) Has been cancelled
GHA ICU4C / uconfig-header-tests (-DUCONFIG_NO_TRANSLITERATION=1) (push) Has been cancelled
GHA ICU4C / unicode-update-tools (push) Has been cancelled
GHA ICU4C / icu4c-test-samples (push) Has been cancelled
GHA ICU4C / icu4c-uconfig-no-conversion (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (icuserv) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (idna) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (normalize) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (rbnf) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (rbnfp) (push) Has been cancelled
GHA CI Valgrind / clang-valgrind-intltest (utility) (push) Has been cancelled
Update spec tests to current version from message-format-wg - Update parser for changed name-start grammar rule - Validate number literals in :number implementation (since parser no longer does this) - Disallow `:number`/`:integer` select option set from variable See https://github.com/unicode-org/message-format-wg/pull/1016 As part of this, un-skip tests where the `bad-option` error is expected, and implement validating digit size options (pending PR https://github.com/unicode-org/icu/pull/2973 is intended to do this more fully)
2172 lines
65 KiB
C++
2172 lines
65 KiB
C++
// © 2024 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_NORMALIZATION
|
|
|
|
#if !UCONFIG_NO_FORMATTING
|
|
|
|
#if !UCONFIG_NO_MF2
|
|
|
|
#include "unicode/uniset.h"
|
|
#include "messageformat2_errors.h"
|
|
#include "messageformat2_macros.h"
|
|
#include "messageformat2_parser.h"
|
|
#include "ucln_in.h"
|
|
#include "umutex.h"
|
|
#include "uvector.h" // U_ASSERT
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
namespace message2 {
|
|
|
|
using namespace pluralimpl;
|
|
|
|
using namespace data_model;
|
|
|
|
/*
|
|
The `ERROR()` macro sets a syntax error in the context
|
|
and sets the offset in `parseError` to `index`. It does not alter control flow.
|
|
*/
|
|
#define ERROR(errorCode) \
|
|
if (!errors.hasSyntaxError()) { \
|
|
setParseError(parseError, index); \
|
|
errors.addSyntaxError(errorCode); \
|
|
}
|
|
|
|
#define ERROR_AT(errorCode, i) \
|
|
if (!errors.hasSyntaxError()) { \
|
|
setParseError(parseError, i); \
|
|
errors.addSyntaxError(errorCode); \
|
|
}
|
|
|
|
// Increments the line number and updates the "characters seen before
|
|
// current line" count in `parseError`, iff `peek()` is a newline
|
|
void Parser::maybeAdvanceLine() {
|
|
if (peek() == LF) {
|
|
parseError.line++;
|
|
// add 1 to index to get the number of characters seen so far
|
|
// (including the newline)
|
|
parseError.lengthBeforeCurrentLine = index + 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Signals an error and returns either if `parseError` already denotes an
|
|
error, or `index` is out of bounds for the string `source`
|
|
*/
|
|
#define CHECK_BOUNDS(errorCode) \
|
|
if (!inBounds()) { \
|
|
ERROR(errorCode); \
|
|
return; \
|
|
}
|
|
#define CHECK_BOUNDS_1(errorCode) \
|
|
if (!inBounds(1)) { \
|
|
ERROR_AT(errorCode, index + 1); \
|
|
return; \
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Helper functions
|
|
|
|
static void copyContext(const UChar in[U_PARSE_CONTEXT_LEN], UChar out[U_PARSE_CONTEXT_LEN]) {
|
|
for (int32_t i = 0; i < U_PARSE_CONTEXT_LEN; i++) {
|
|
out[i] = in[i];
|
|
if (in[i] == '\0') {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* static */ void Parser::translateParseError(const MessageParseError &messageParseError, UParseError &parseError) {
|
|
parseError.line = messageParseError.line;
|
|
parseError.offset = messageParseError.offset;
|
|
copyContext(messageParseError.preContext, parseError.preContext);
|
|
copyContext(messageParseError.postContext, parseError.postContext);
|
|
}
|
|
|
|
/* static */ void Parser::setParseError(MessageParseError &parseError, uint32_t index) {
|
|
// Translate absolute to relative offset
|
|
parseError.offset = index // Start with total number of characters seen
|
|
- parseError.lengthBeforeCurrentLine; // Subtract all characters before the current line
|
|
// TODO: Fill this in with actual pre and post-context
|
|
parseError.preContext[0] = 0;
|
|
parseError.postContext[0] = 0;
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Initialization of UnicodeSets
|
|
|
|
namespace unisets {
|
|
|
|
UnicodeSet* gUnicodeSets[unisets::UNISETS_KEY_COUNT] = {};
|
|
|
|
inline UnicodeSet* getImpl(Key key) {
|
|
return gUnicodeSets[key];
|
|
}
|
|
|
|
icu::UInitOnce gMF2ParseUniSetsInitOnce {};
|
|
}
|
|
|
|
UnicodeSet* initContentChars(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
UnicodeSet* result = new UnicodeSet(0x0001, 0x0008); // Omit NULL, HTAB and LF
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
result->add(0x000B, 0x000C); // Omit CR
|
|
result->add(0x000E, 0x001F); // Omit SP
|
|
result->add(0x0021, 0x002D); // Omit '.'
|
|
result->add(0x002F, 0x003F); // Omit '@'
|
|
result->add(0x0041, 0x005B); // Omit '\'
|
|
result->add(0x005D, 0x007A); // Omit { | }
|
|
result->add(0x007E, 0x2FFF); // Omit IDEOGRAPHIC_SPACE
|
|
result->add(0x3001, 0x10FFFF); // Allowing surrogates is intentional
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initWhitespace(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
UnicodeSet* result = new UnicodeSet();
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
result->add(SPACE);
|
|
result->add(HTAB);
|
|
result->add(CR);
|
|
result->add(LF);
|
|
result->add(IDEOGRAPHIC_SPACE);
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initBidiControls(UErrorCode& status) {
|
|
UnicodeSet* result = new UnicodeSet(UnicodeString("[\\u061C]"), status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
result->add(0x200E, 0x200F);
|
|
result->add(0x2066, 0x2069);
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initAlpha(UErrorCode& status) {
|
|
UnicodeSet* result = new UnicodeSet(UnicodeString("[:letter:]"), status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initDigits(UErrorCode& status) {
|
|
UnicodeSet* result = new UnicodeSet(UnicodeString("[:number:]"), status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initNameStartChars(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
UnicodeSet* isAlpha = unisets::gUnicodeSets[unisets::ALPHA] = initAlpha(status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
UnicodeSet* result = new UnicodeSet();
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
};
|
|
|
|
result->addAll(*isAlpha);
|
|
result->add(0x002B);
|
|
result->add(0x005F);
|
|
result->add(0x00A1, 0x061B);
|
|
result->add(0x061D, 0x167F);
|
|
result->add(0x1681, 0x1FFF);
|
|
result->add(0x200B, 0x200D);
|
|
result->add(0x2010, 0x2027);
|
|
result->add(0x2030, 0x205E);
|
|
result->add(0x2060, 0x2065);
|
|
result->add(0x206A, 0x2FFF);
|
|
result->add(0x3001, 0xD7FF);
|
|
result->add(0xE000, 0xFDCF);
|
|
result->add(0xFDF0, 0xFFFD);
|
|
result->add(0x10000, 0x1FFFD);
|
|
result->add(0x20000, 0x2FFFD);
|
|
result->add(0x30000, 0x3FFFD);
|
|
result->add(0x40000, 0x4FFFD);
|
|
result->add(0x50000, 0x5FFFD);
|
|
result->add(0x60000, 0x6FFFD);
|
|
result->add(0x70000, 0x7FFFD);
|
|
result->add(0x80000, 0x8FFFD);
|
|
result->add(0x90000, 0x9FFFD);
|
|
result->add(0xA0000, 0xAFFFD);
|
|
result->add(0xB0000, 0xBFFFD);
|
|
result->add(0xC0000, 0xCFFFD);
|
|
result->add(0xD0000, 0xDFFFD);
|
|
result->add(0xE0000, 0xEFFFD);
|
|
result->add(0xF0000, 0xFFFFD);
|
|
result->add(0x100000, 0x10FFFD);
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initNameChars(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
UnicodeSet* nameStart = unisets::gUnicodeSets[unisets::NAME_START] = initNameStartChars(status);
|
|
UnicodeSet* digit = unisets::gUnicodeSets[unisets::DIGIT] = initDigits(status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
UnicodeSet* result = new UnicodeSet();
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
};
|
|
result->addAll(*nameStart);
|
|
result->addAll(*digit);
|
|
result->add(HYPHEN);
|
|
result->add(PERIOD);
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initTextChars(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
UnicodeSet* content = unisets::gUnicodeSets[unisets::CONTENT] = initContentChars(status);
|
|
UnicodeSet* whitespace = unisets::gUnicodeSets[unisets::WHITESPACE] = initWhitespace(status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
UnicodeSet* result = new UnicodeSet();
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
};
|
|
result->addAll(*content);
|
|
result->addAll(*whitespace);
|
|
result->add(PERIOD);
|
|
result->add(AT);
|
|
result->add(PIPE);
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initQuotedChars(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
unisets::gUnicodeSets[unisets::TEXT] = initTextChars(status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
UnicodeSet* result = new UnicodeSet();
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
};
|
|
// content and whitespace were initialized by `initTextChars()`
|
|
UnicodeSet* content = unisets::getImpl(unisets::CONTENT);
|
|
if (content == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
result->addAll(*content);
|
|
UnicodeSet* whitespace = unisets::getImpl(unisets::WHITESPACE);
|
|
if (whitespace == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
result->addAll(*whitespace);
|
|
result->add(PERIOD);
|
|
result->add(AT);
|
|
result->add(LEFT_CURLY_BRACE);
|
|
result->add(RIGHT_CURLY_BRACE);
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
UnicodeSet* initEscapableChars(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
UnicodeSet* result = new UnicodeSet();
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
result->add(PIPE);
|
|
result->add(BACKSLASH);
|
|
result->add(LEFT_CURLY_BRACE);
|
|
result->add(RIGHT_CURLY_BRACE);
|
|
result->freeze();
|
|
return result;
|
|
}
|
|
|
|
namespace unisets {
|
|
|
|
UBool U_CALLCONV cleanupMF2ParseUniSets() {
|
|
for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
|
|
delete gUnicodeSets[i];
|
|
gUnicodeSets[i] = nullptr;
|
|
}
|
|
gMF2ParseUniSetsInitOnce.reset();
|
|
return true;
|
|
}
|
|
|
|
void U_CALLCONV initMF2ParseUniSets(UErrorCode& status) {
|
|
ucln_i18n_registerCleanup(UCLN_I18N_MF2_UNISETS, cleanupMF2ParseUniSets);
|
|
/*
|
|
Each of the init functions initializes the UnicodeSets
|
|
that it depends on.
|
|
|
|
initBidiControls (no dependencies)
|
|
|
|
initEscapableChars (no dependencies)
|
|
|
|
initNameChars depends on
|
|
initDigits
|
|
initNameStartChars depends on
|
|
initAlpha
|
|
|
|
initQuotedChars depends on
|
|
initTextChars depends on
|
|
initContentChars
|
|
initWhitespace
|
|
*/
|
|
gUnicodeSets[unisets::BIDI] = initBidiControls(status);
|
|
gUnicodeSets[unisets::NAME_CHAR] = initNameChars(status);
|
|
gUnicodeSets[unisets::QUOTED] = initQuotedChars(status);
|
|
gUnicodeSets[unisets::ESCAPABLE] = initEscapableChars(status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
cleanupMF2ParseUniSets();
|
|
}
|
|
}
|
|
|
|
const UnicodeSet* get(Key key, UErrorCode& status) {
|
|
umtx_initOnce(gMF2ParseUniSetsInitOnce, &initMF2ParseUniSets, status);
|
|
if (U_FAILURE(status)) {
|
|
return nullptr;
|
|
}
|
|
UnicodeSet* result = getImpl(key);
|
|
if (result == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Predicates
|
|
|
|
/*
|
|
The following helper predicates should exactly match nonterminals in the MessageFormat 2 grammar:
|
|
|
|
`isContentChar()` : `content-char`
|
|
`isTextChar()` : `text-char`
|
|
`isAlpha()` : `ALPHA`
|
|
`isDigit()` : `DIGIT`
|
|
`isNameStart()` : `name-start`
|
|
`isNameChar()` : `name-char`
|
|
`isUnquotedStart()` : `unquoted-start`
|
|
`isQuotedChar()` : `quoted-char`
|
|
`isWhitespace()` : `s`
|
|
*/
|
|
|
|
bool Parser::isContentChar(UChar32 c) const {
|
|
return contentChars->contains(c);
|
|
}
|
|
|
|
// See `bidi` in the MF2 grammar
|
|
bool Parser::isBidiControl(UChar32 c) const {
|
|
return bidiControlChars->contains(c);
|
|
}
|
|
|
|
// See `ws` in the MessageFormat 2 grammar
|
|
bool Parser::isWhitespace(UChar32 c) const {
|
|
return whitespaceChars->contains(c);
|
|
}
|
|
|
|
bool Parser::isTextChar(UChar32 c) const {
|
|
return textChars->contains(c);
|
|
}
|
|
|
|
bool Parser::isAlpha(UChar32 c) const {
|
|
return alphaChars->contains(c);
|
|
}
|
|
|
|
bool Parser::isDigit(UChar32 c) const {
|
|
return digitChars->contains(c);
|
|
}
|
|
|
|
bool Parser::isNameStart(UChar32 c) const {
|
|
return nameStartChars->contains(c);
|
|
}
|
|
|
|
bool Parser::isNameChar(UChar32 c) const {
|
|
return nameChars->contains(c);
|
|
}
|
|
|
|
bool Parser::isUnquotedStart(UChar32 c) const {
|
|
return isNameChar(c);
|
|
}
|
|
|
|
bool Parser::isQuotedChar(UChar32 c) const {
|
|
return quotedChars->contains(c);
|
|
}
|
|
|
|
bool Parser::isEscapableChar(UChar32 c) const {
|
|
return escapableChars->contains(c);
|
|
}
|
|
|
|
// Returns true iff `c` can begin a `function` nonterminal
|
|
static bool isFunctionStart(UChar32 c) {
|
|
switch (c) {
|
|
case COLON: {
|
|
return true;
|
|
}
|
|
default: {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Returns true iff `c` can begin an `annotation` nonterminal
|
|
static bool isAnnotationStart(UChar32 c) {
|
|
return isFunctionStart(c);
|
|
}
|
|
|
|
// Returns true iff `c` can begin a `literal` nonterminal
|
|
bool Parser::isLiteralStart(UChar32 c) const {
|
|
return (c == PIPE || isNameStart(c) || c == HYPHEN || isDigit(c));
|
|
}
|
|
|
|
// Returns true iff `c` can begin a `key` nonterminal
|
|
bool Parser::isKeyStart(UChar32 c) const {
|
|
return (c == ASTERISK || isLiteralStart(c));
|
|
}
|
|
|
|
bool Parser::isDeclarationStart() {
|
|
return (peek() == ID_LOCAL[0]
|
|
&& inBounds(1)
|
|
&& peek(1) == ID_LOCAL[1])
|
|
|| (peek() == ID_INPUT[0]
|
|
&& inBounds(1)
|
|
&& peek(1) == ID_INPUT[1]);
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Parsing functions
|
|
|
|
|
|
/*
|
|
TODO: Since handling the whitespace ambiguities needs to be repeated
|
|
in several different places and is hard to factor out,
|
|
it probably would be better to replace the parser with a lexer + parser
|
|
to separate tokenizing from parsing, which would simplify the code significantly.
|
|
This has the disadvantage that there is no token grammar for MessageFormat,
|
|
so one would have to be invented that isn't a component of the spec.
|
|
*/
|
|
|
|
/*
|
|
This is a recursive-descent scannerless parser that,
|
|
with a few exceptions, uses 1 character of lookahead.
|
|
|
|
This may not be an exhaustive list, as the additions of attributes and reserved
|
|
statements introduced several new ambiguities.
|
|
|
|
All but three of the exceptions involve ambiguities about the meaning of whitespace.
|
|
One ambiguity not involving whitespace is:
|
|
identifier -> namespace ":" name
|
|
vs.
|
|
identifier -> name
|
|
|
|
`namespace` and `name` can't be distinguished without arbitrary lookahead.
|
|
(For how this is handled, see parseIdentifier())
|
|
|
|
The second ambiguity not involving whitespace is:
|
|
complex-message -> *(declaration[s]) complex-body
|
|
-> declaration *(declaration[s]) complex-body
|
|
-> declaration complex-body
|
|
-> reserved-statement complex-body
|
|
-> .foo {$x} .match // ...
|
|
When processing the '.', arbitrary lookahead is required to distinguish the
|
|
arbitrary-length unsupported keyword from `.match`.
|
|
(For how this is handled, see parseDeclarations()).
|
|
|
|
The third ambiguity not involving whitespace is:
|
|
complex-message -> *(declaration [s]) complex-body
|
|
-> reserved-statement *(declaration [s]) complex-body
|
|
-> reserved-statement complex-body
|
|
-> reserved-statement quotedPattern
|
|
-> reserved-keyword [s reserved-body] 1*([s] expression) quoted-pattern
|
|
-> reserved-keyword expression quoted-pattern
|
|
Example: .foo {1} {{1}}
|
|
|
|
Without lookahead, the opening '{' of the quoted pattern can't be distinguished
|
|
from the opening '{' of another expression in the unsupported statement.
|
|
(Though this only requires 1 character of lookahead.)
|
|
|
|
Otherwise:
|
|
|
|
There are at least seven ambiguities in the grammar that can't be resolved with finite
|
|
lookahead (since whitespace sequences can be arbitrarily long). They are resolved
|
|
with a form of backtracking (early exit). No state needs to be saved/restored
|
|
since whitespace doesn't affect the shape of the resulting parse tree, so it's
|
|
not true backtracking.
|
|
|
|
In addition, the grammar has been refactored
|
|
in a semantics-preserving way in some cases to make the code easier to structure.
|
|
|
|
First: variant = when 1*(s key) [s] pattern
|
|
Example: when k {a}
|
|
When reading the first space after 'k', it's ambiguous whether it's the
|
|
required space before another key, or the optional space before `pattern`.
|
|
(See comments in parseNonEmptyKeys())
|
|
|
|
Second: expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}"
|
|
annotation = (function *(s option)) / reserved
|
|
Example: {:f }
|
|
When reading the first space after 'f', it's ambiguous whether it's the
|
|
required space before an option, or the optional trailing space after an options list
|
|
(in this case, the options list is empty).
|
|
(See comments in parseOptions() -- handling this case also meant it was easier to base
|
|
the code on a slightly refactored grammar, which should be semantically equivalent.)
|
|
|
|
Third: expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}"
|
|
annotation = (function *(s option)) / reserved
|
|
Example: {@a }
|
|
Similar to the previous case; see comments in parseReserved()
|
|
|
|
Fourth: expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}"
|
|
Example: {|foo| }
|
|
When reading the first space after the '|', it's ambiguous whether it's the required
|
|
space before an annotation, or the optional trailing space before the '}'.
|
|
(See comments in parseLiteralOrVariableWithAnnotation(); handling this case relies on
|
|
the same grammar refactoring as the second exception.)
|
|
|
|
Most functions match a non-terminal in the grammar, except as explained
|
|
in comments.
|
|
|
|
Fifth: matcher = match-statement 1*([s] variant)
|
|
-> match 1 *([s] selector) 1*([s] variant)
|
|
Example: match {42} * {{_}}
|
|
When reading the space after the first '}', it's unclear whether
|
|
it's the optional space before another selector, or the optional space
|
|
before a variant.
|
|
|
|
Sixth: annotation-expression = "{" [s] annotation *(s attribute) [s] "}"
|
|
-> "{" [s] function *(s attribute) [s] "}"
|
|
-> "{" [s] ":" identifier *(s option) *(s attribute) [s] "}"
|
|
-> "{" [s] ":" identifier s attribute *(s attribute) [s] "}"
|
|
|
|
Example: {:func @foo}
|
|
(Note: the same ambiguity is present with variable-expression and literal-expression)
|
|
|
|
Seventh:
|
|
|
|
|
|
When parsing the space, it's unclear whether it's the optional space before an
|
|
option, or the optional space before an attribute.
|
|
|
|
Unless otherwise noted in a comment, all helper functions that take
|
|
a `source` string, an `index` unsigned int, and an `errorCode` `UErrorCode`
|
|
have the precondition:
|
|
`index` < `len()`
|
|
and the postcondition:
|
|
`U_FAILURE(errorCode)` || `index < `len()`
|
|
*/
|
|
|
|
/*
|
|
No pre, no post.
|
|
A message may end with whitespace, so `index` may equal `len()` on exit.
|
|
*/
|
|
void Parser::parseRequiredWS(UErrorCode& errorCode) {
|
|
bool sawWhitespace = false;
|
|
|
|
// The loop exits either when we consume all the input,
|
|
// or when we see a non-whitespace character.
|
|
while (true) {
|
|
// Check if all input has been consumed
|
|
if (!inBounds()) {
|
|
// If whitespace isn't required -- or if we saw it already --
|
|
// then the caller is responsible for checking this case and
|
|
// setting an error if necessary.
|
|
if (sawWhitespace) {
|
|
// Not an error.
|
|
return;
|
|
}
|
|
// Otherwise, whitespace is required; the end of the input has
|
|
// been reached without whitespace. This is an error.
|
|
ERROR(errorCode);
|
|
return;
|
|
}
|
|
|
|
// Input remains; process the next character if it's whitespace,
|
|
// exit the loop otherwise
|
|
if (isWhitespace(peek())) {
|
|
sawWhitespace = true;
|
|
// Increment line number in parse error if we consume a newline
|
|
maybeAdvanceLine();
|
|
next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!sawWhitespace) {
|
|
ERROR(errorCode);
|
|
}
|
|
}
|
|
|
|
void Parser::parseOptionalBidi() {
|
|
while (true) {
|
|
if (!inBounds()) {
|
|
return;
|
|
}
|
|
if (isBidiControl(peek())) {
|
|
next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
No pre, no post, because a message may end with whitespace
|
|
Matches `s` in the MF2 grammar
|
|
*/
|
|
void Parser::parseRequiredWhitespace(UErrorCode& errorCode) {
|
|
parseOptionalBidi();
|
|
parseRequiredWS(errorCode);
|
|
parseOptionalWhitespace();
|
|
normalizedInput += SPACE;
|
|
}
|
|
|
|
/*
|
|
No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`.
|
|
*/
|
|
void Parser::parseOptionalWhitespace() {
|
|
while (true) {
|
|
if (!inBounds()) {
|
|
return;
|
|
}
|
|
auto cp = peek();
|
|
if (isWhitespace(cp) || isBidiControl(cp)) {
|
|
maybeAdvanceLine();
|
|
next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Consumes a single character, signaling an error if `peek()` != `c`
|
|
// No postcondition -- a message can end with a '}' token
|
|
void Parser::parseToken(UChar32 c, UErrorCode& errorCode) {
|
|
CHECK_BOUNDS(errorCode);
|
|
|
|
if (peek() == c) {
|
|
next();
|
|
normalizedInput += c;
|
|
return;
|
|
}
|
|
// Next character didn't match -- error out
|
|
ERROR(errorCode);
|
|
}
|
|
|
|
/*
|
|
Consumes a fixed-length token, signaling an error if the token isn't a prefix of
|
|
the string beginning at `peek()`
|
|
No postcondition -- a message can end with a '}' token
|
|
*/
|
|
void Parser::parseToken(const std::u16string_view& token, UErrorCode& errorCode) {
|
|
U_ASSERT(inBounds());
|
|
|
|
int32_t tokenPos = 0;
|
|
while (tokenPos < static_cast<int32_t>(token.length())) {
|
|
if (peek() != token[tokenPos]) {
|
|
ERROR(errorCode);
|
|
return;
|
|
}
|
|
normalizedInput += token[tokenPos];
|
|
next();
|
|
tokenPos++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Consumes optional whitespace, possibly advancing `index` to `index'`,
|
|
then consumes a fixed-length token (signaling an error if the token isn't a prefix of
|
|
the string beginning at `source[index']`),
|
|
then consumes optional whitespace again
|
|
*/
|
|
void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCode& errorCode) {
|
|
// No need for error check or bounds check before parseOptionalWhitespace
|
|
parseOptionalWhitespace();
|
|
// Establish precondition
|
|
CHECK_BOUNDS(errorCode);
|
|
parseToken(token, errorCode);
|
|
parseOptionalWhitespace();
|
|
// Guarantee postcondition
|
|
CHECK_BOUNDS(errorCode);
|
|
}
|
|
|
|
/*
|
|
Consumes optional whitespace, possibly advancing `index` to `index'`,
|
|
then consumes a single character (signaling an error if it doesn't match
|
|
`source[index']`),
|
|
then consumes optional whitespace again
|
|
*/
|
|
void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) {
|
|
// No need for error check or bounds check before parseOptionalWhitespace()
|
|
parseOptionalWhitespace();
|
|
// Establish precondition
|
|
CHECK_BOUNDS(errorCode);
|
|
parseToken(c, errorCode);
|
|
parseOptionalWhitespace();
|
|
// Guarantee postcondition
|
|
CHECK_BOUNDS(errorCode);
|
|
}
|
|
|
|
/*
|
|
Consumes a possibly-empty sequence of name-chars. Appends to `str`
|
|
and returns `str`.
|
|
*/
|
|
UnicodeString Parser::parseNameChars(UnicodeString& str, UErrorCode& errorCode) {
|
|
if (U_FAILURE(errorCode)) {
|
|
return {};
|
|
}
|
|
|
|
while (isNameChar(peek())) {
|
|
UChar32 c = peek();
|
|
str += c;
|
|
normalizedInput += c;
|
|
next();
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
/*
|
|
Consumes a non-empty sequence of `name-char`s, the first of which is
|
|
also a `name-start`.
|
|
that begins with a character `start` such that `isNameStart(start)`.
|
|
|
|
Returns this sequence.
|
|
|
|
(Matches the `name` nonterminal in the grammar.)
|
|
*/
|
|
UnicodeString Parser::parseName(UErrorCode& errorCode) {
|
|
UnicodeString name;
|
|
|
|
U_ASSERT(inBounds());
|
|
|
|
if (!(isNameStart(peek()) || isBidiControl(peek()))) {
|
|
ERROR(errorCode);
|
|
return name;
|
|
}
|
|
|
|
// name = [bidi] name-start *name-char [bidi]
|
|
|
|
// [bidi]
|
|
parseOptionalBidi();
|
|
|
|
// name-start *name-char
|
|
parseNameChars(name, errorCode);
|
|
|
|
// [bidi]
|
|
parseOptionalBidi();
|
|
|
|
return name;
|
|
}
|
|
|
|
/*
|
|
Consumes a '$' followed by a `name`, returning a VariableName
|
|
with `name` as its name
|
|
|
|
(Matches the `variable` nonterminal in the grammar.)
|
|
*/
|
|
VariableName Parser::parseVariableName(UErrorCode& errorCode) {
|
|
VariableName result;
|
|
|
|
U_ASSERT(inBounds());
|
|
|
|
parseToken(DOLLAR, errorCode);
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
return result;
|
|
}
|
|
return VariableName(parseName(errorCode));
|
|
}
|
|
|
|
/*
|
|
Corresponds to the `identifier` nonterminal in the grammar
|
|
*/
|
|
UnicodeString Parser::parseIdentifier(UErrorCode& errorCode) {
|
|
U_ASSERT(inBounds());
|
|
|
|
UnicodeString result;
|
|
// The following is a hack to get around ambiguity in the grammar:
|
|
// identifier -> namespace ":" name
|
|
// vs.
|
|
// identifier -> name
|
|
// can't be distinguished without arbitrary lookahead.
|
|
// Instead, we treat the production as:
|
|
// identifier -> namespace *(":"name)
|
|
// and then check for multiple colons.
|
|
|
|
// Parse namespace
|
|
result += parseName(errorCode);
|
|
int32_t firstColon = -1;
|
|
while (inBounds() && peek() == COLON) {
|
|
// Parse ':' separator
|
|
if (firstColon == -1) {
|
|
firstColon = index;
|
|
}
|
|
parseToken(COLON, errorCode);
|
|
result += COLON;
|
|
// Check for message ending with something like "foo:"
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
} else {
|
|
// Parse name part
|
|
result += parseName(errorCode);
|
|
}
|
|
}
|
|
|
|
// If there's at least one ':', scan from the first ':'
|
|
// to the end of the name to check for multiple ':'s
|
|
if (firstColon != -1) {
|
|
for (int32_t i = firstColon + 1; i < result.length(); i++) {
|
|
if (result[i] == COLON) {
|
|
ERROR_AT(errorCode, i);
|
|
return {};
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
Consumes a reference to a function, matching the ": identifier"
|
|
in the `function` nonterminal in the grammar.
|
|
|
|
Returns the function name.
|
|
*/
|
|
FunctionName Parser::parseFunction(UErrorCode& errorCode) {
|
|
U_ASSERT(inBounds());
|
|
if (!isFunctionStart(peek())) {
|
|
ERROR(errorCode);
|
|
return FunctionName();
|
|
}
|
|
|
|
normalizedInput += peek();
|
|
next(); // Consume the function start character
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
return FunctionName();
|
|
}
|
|
return parseIdentifier(errorCode);
|
|
}
|
|
|
|
|
|
/*
|
|
Precondition: peek() == BACKSLASH
|
|
|
|
Consume an escaped character.
|
|
Corresponds to `escaped-char` in the grammar.
|
|
|
|
No postcondition (a message can end with an escaped char)
|
|
*/
|
|
UnicodeString Parser::parseEscapeSequence(UErrorCode& errorCode) {
|
|
U_ASSERT(inBounds());
|
|
U_ASSERT(peek() == BACKSLASH);
|
|
normalizedInput += BACKSLASH;
|
|
next(); // Skip the initial backslash
|
|
UnicodeString str;
|
|
if (inBounds()) {
|
|
// Expect a '{', '|' or '}'
|
|
switch (peek()) {
|
|
case LEFT_CURLY_BRACE:
|
|
case RIGHT_CURLY_BRACE:
|
|
case PIPE:
|
|
case BACKSLASH: {
|
|
/* Append to the output string */
|
|
str += peek();
|
|
/* Update normalizedInput */
|
|
normalizedInput += peek();
|
|
/* Consume the character */
|
|
next();
|
|
return str;
|
|
}
|
|
default: {
|
|
// No other characters are allowed here
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// If control reaches here, there was an error
|
|
ERROR(errorCode);
|
|
return str;
|
|
}
|
|
|
|
|
|
/*
|
|
Consume and return a quoted literal, matching the `literal` nonterminal in the grammar.
|
|
*/
|
|
Literal Parser::parseQuotedLiteral(UErrorCode& errorCode) {
|
|
bool error = false;
|
|
|
|
UnicodeString contents;
|
|
if (U_SUCCESS(errorCode)) {
|
|
// Parse the opening '|'
|
|
parseToken(PIPE, errorCode);
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
error = true;
|
|
} else {
|
|
// Parse the contents
|
|
bool done = false;
|
|
while (!done) {
|
|
if (peek() == BACKSLASH) {
|
|
contents += parseEscapeSequence(errorCode);
|
|
} else if (isQuotedChar(peek())) {
|
|
contents += peek();
|
|
// Handle cases like:
|
|
// |}{| -- we want to escape everywhere that
|
|
// can be escaped, to make round-trip checking
|
|
// easier -- so this case normalizes to
|
|
// |\}\{|
|
|
if (isEscapableChar(peek())) {
|
|
normalizedInput += BACKSLASH;
|
|
}
|
|
normalizedInput += peek();
|
|
next(); // Consume this character
|
|
maybeAdvanceLine();
|
|
} else {
|
|
// Assume the sequence of literal characters ends here
|
|
done = true;
|
|
}
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
error = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (error) {
|
|
return {};
|
|
}
|
|
|
|
// Parse the closing '|'
|
|
parseToken(PIPE, errorCode);
|
|
|
|
return Literal(true, contents);
|
|
}
|
|
|
|
// Parse (1*DIGIT)
|
|
UnicodeString Parser::parseDigits(UErrorCode& errorCode) {
|
|
if (U_FAILURE(errorCode)) {
|
|
return {};
|
|
}
|
|
|
|
U_ASSERT(isDigit(peek()));
|
|
|
|
UnicodeString contents;
|
|
do {
|
|
contents += peek();
|
|
normalizedInput += peek();
|
|
next();
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
return {};
|
|
}
|
|
} while (isDigit(peek()));
|
|
|
|
return contents;
|
|
}
|
|
/*
|
|
Consume and return an unquoted literal, matching the `unquoted` nonterminal in the grammar.
|
|
*/
|
|
Literal Parser::parseUnquotedLiteral(UErrorCode& errorCode) {
|
|
if (U_FAILURE(errorCode)) {
|
|
return {};
|
|
}
|
|
// unquoted-literal = 1*name-char
|
|
|
|
if (!(isNameChar(peek()))) {
|
|
ERROR(errorCode);
|
|
return {};
|
|
}
|
|
|
|
UnicodeString contents;
|
|
parseNameChars(contents, errorCode);
|
|
return Literal(false, contents);
|
|
}
|
|
|
|
/*
|
|
Consume and return a literal, matching the `literal` nonterminal in the grammar.
|
|
*/
|
|
Literal Parser::parseLiteral(UErrorCode& errorCode) {
|
|
Literal result;
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
} else {
|
|
if (peek() == PIPE) {
|
|
result = parseQuotedLiteral(errorCode);
|
|
} else {
|
|
result = parseUnquotedLiteral(errorCode);
|
|
}
|
|
// Guarantee postcondition
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
Consume a @name-value pair, matching the `attribute` nonterminal in the grammar.
|
|
|
|
Adds the option to `options`
|
|
*/
|
|
template<class T>
|
|
void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode) {
|
|
U_ASSERT(inBounds());
|
|
|
|
U_ASSERT(peek() == AT);
|
|
// Consume the '@'
|
|
parseToken(AT, errorCode);
|
|
|
|
// Parse LHS
|
|
UnicodeString lhs = parseIdentifier(errorCode);
|
|
|
|
// Prepare to "backtrack" to resolve ambiguity
|
|
// about whether whitespace precedes another
|
|
// attribute, or the '=' sign
|
|
int32_t savedIndex = index;
|
|
parseOptionalWhitespace();
|
|
|
|
Operand rand;
|
|
if (peek() == EQUALS) {
|
|
// Parse '='
|
|
parseTokenWithWhitespace(EQUALS, errorCode);
|
|
|
|
UnicodeString rhsStr;
|
|
// Parse RHS, which must be a literal
|
|
// attribute = "@" identifier [o "=" o literal]
|
|
rand = Operand(parseLiteral(errorCode));
|
|
} else {
|
|
// attribute -> "@" identifier [[s] "=" [s]]
|
|
// Use null operand, which `rand` is already set to
|
|
// "Backtrack" by restoring the whitespace (if there was any)
|
|
index = savedIndex;
|
|
}
|
|
|
|
attrAdder.addAttribute(lhs, std::move(Operand(rand)), errorCode);
|
|
}
|
|
|
|
/*
|
|
Consume a name-value pair, matching the `option` nonterminal in the grammar.
|
|
|
|
Adds the option to `optionList`
|
|
*/
|
|
template<class T>
|
|
void Parser::parseOption(OptionAdder<T>& addOption, UErrorCode& errorCode) {
|
|
U_ASSERT(inBounds());
|
|
|
|
// Parse LHS
|
|
UnicodeString lhs = parseIdentifier(errorCode);
|
|
|
|
// Parse '='
|
|
parseTokenWithWhitespace(EQUALS, errorCode);
|
|
|
|
UnicodeString rhsStr;
|
|
Operand rand;
|
|
// Parse RHS, which is either a literal or variable
|
|
switch (peek()) {
|
|
case DOLLAR: {
|
|
rand = Operand(parseVariableName(errorCode));
|
|
break;
|
|
}
|
|
default: {
|
|
// Must be a literal
|
|
rand = Operand(parseLiteral(errorCode));
|
|
break;
|
|
}
|
|
}
|
|
U_ASSERT(!rand.isNull());
|
|
|
|
// Finally, add the key=value mapping
|
|
// Use a local error code, check for duplicate option error and
|
|
// record it as with other errors
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
addOption.addOption(lhs, std::move(rand), status);
|
|
if (U_FAILURE(status)) {
|
|
U_ASSERT(status == U_MF_DUPLICATE_OPTION_NAME_ERROR);
|
|
errors.setDuplicateOptionName(errorCode);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Note: there are multiple overloads of parseOptions() for parsing
|
|
options within markup, vs. within an expression, vs. parsing
|
|
attributes. This should be refactored. TODO
|
|
*/
|
|
|
|
/*
|
|
Consume optional whitespace followed by a sequence of options
|
|
(possibly empty), separated by whitespace
|
|
*/
|
|
template <class T>
|
|
void Parser::parseOptions(OptionAdder<T>& addOption, UErrorCode& errorCode) {
|
|
// Early exit if out of bounds -- no more work is possible
|
|
CHECK_BOUNDS(errorCode);
|
|
|
|
/*
|
|
Arbitrary lookahead is required to parse option lists. To see why, consider
|
|
these rules from the grammar:
|
|
|
|
expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}"
|
|
annotation = (function *(s option)) / reserved
|
|
|
|
And this example:
|
|
{:foo }
|
|
|
|
Derivation:
|
|
expression -> "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}"
|
|
-> "{" [s] annotation [s] "}"
|
|
-> "{" [s] ((function *(s option)) / reserved) [s] "}"
|
|
-> "{" [s] function *(s option) [s] "}"
|
|
|
|
In this example, knowing whether to expect a '}' or the start of another option
|
|
after the whitespace would require arbitrary lookahead -- in other words, which
|
|
rule should we apply?
|
|
*(s option) -> s option *(s option)
|
|
or
|
|
*(s option) ->
|
|
|
|
The same would apply to the example {:foo k=v } (note the trailing space after "v").
|
|
|
|
This is addressed using a form of backtracking and (to make the backtracking easier
|
|
to apply) a slight refactoring to the grammar.
|
|
|
|
This code is written as if the grammar is:
|
|
expression = "{" [s] (((literal / variable) ([s] / [s annotation])) / annotation) "}"
|
|
annotation = (function *(s option) [s]) / (reserved [s])
|
|
|
|
Parsing the `*(s option) [s]` sequence can be done within `parseOptions()`, meaning
|
|
that `parseExpression()` can safely require a '}' after `parseOptions()` finishes.
|
|
|
|
Note that when "backtracking" really just means early exit, since only whitespace
|
|
is involved and there's no state to save.
|
|
|
|
There is a separate but similar ambiguity as to whether the space precedes
|
|
an option or an attribute.
|
|
*/
|
|
|
|
while(true) {
|
|
// If the next character is not whitespace, that means we've already
|
|
// parsed the entire options list (which may have been empty) and there's
|
|
// no trailing whitespace. In that case, exit.
|
|
if (!isWhitespace(peek())) {
|
|
break;
|
|
}
|
|
int32_t firstWhitespace = index;
|
|
|
|
// In any case other than an empty options list, there must be at least
|
|
// one whitespace character.
|
|
parseRequiredWhitespace(errorCode);
|
|
// Restore precondition
|
|
CHECK_BOUNDS(errorCode);
|
|
|
|
// If a name character follows, then at least one more option remains
|
|
// in the list.
|
|
// Otherwise, we've consumed all the options and any trailing whitespace,
|
|
// and can exit.
|
|
// Note that exiting is sort of like backtracking: "(s option)" doesn't apply,
|
|
// so we back out to [s].
|
|
if (!isNameStart(peek())) {
|
|
// We've consumed all the options (meaning that either we consumed non-empty
|
|
// whitespace, or consumed at least one option.)
|
|
// Done.
|
|
// Remove the required whitespace from normalizedInput
|
|
normalizedInput.truncate(normalizedInput.length() - 1);
|
|
// "Backtrack" so as to leave the optional whitespace there
|
|
// when parsing attributes
|
|
index = firstWhitespace;
|
|
break;
|
|
}
|
|
parseOption(addOption, errorCode);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Consume optional whitespace followed by a sequence of attributes
|
|
(possibly empty), separated by whitespace
|
|
*/
|
|
template<class T>
|
|
void Parser::parseAttributes(AttributeAdder<T>& attrAdder, UErrorCode& errorCode) {
|
|
|
|
// Early exit if out of bounds -- no more work is possible
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
Arbitrary lookahead is required to parse attribute lists, similarly to option lists.
|
|
(See comment in parseOptions()).
|
|
*/
|
|
|
|
while(true) {
|
|
// If the next character is not whitespace, that means we've already
|
|
// parsed the entire attributes list (which may have been empty) and there's
|
|
// no trailing whitespace. In that case, exit.
|
|
if (!isWhitespace(peek())) {
|
|
break;
|
|
}
|
|
|
|
// In any case other than an empty attributes list, there must be at least
|
|
// one whitespace character.
|
|
parseRequiredWhitespace(errorCode);
|
|
// Restore precondition
|
|
if (!inBounds()) {
|
|
ERROR(errorCode);
|
|
break;
|
|
}
|
|
|
|
// If an '@' follows, then at least one more attribute remains
|
|
// in the list.
|
|
// Otherwise, we've consumed all the attributes and any trailing whitespace,
|
|
// and can exit.
|
|
// Note that exiting is sort of like backtracking: "(s attributes)" doesn't apply,
|
|
// so we back out to [s].
|
|
if (peek() != AT) {
|
|
// We've consumed all the attributes (meaning that either we consumed non-empty
|
|
// whitespace, or consumed at least one attribute.)
|
|
// Done.
|
|
// Remove the whitespace from normalizedInput
|
|
normalizedInput.truncate(normalizedInput.length() - 1);
|
|
break;
|
|
}
|
|
parseAttribute(attrAdder, errorCode);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Consume a function call, matching the `annotation`
|
|
nonterminal in the grammar
|
|
|
|
Returns an `Operator` representing this (a reserved is a parse error)
|
|
*/
|
|
Operator Parser::parseAnnotation(UErrorCode& status) {
|
|
U_ASSERT(inBounds());
|
|
Operator::Builder ratorBuilder(status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
if (isFunctionStart(peek())) {
|
|
// Consume the function name
|
|
FunctionName func = parseFunction(status);
|
|
ratorBuilder.setFunctionName(std::move(func));
|
|
|
|
OptionAdder<Operator::Builder> addOptions(ratorBuilder);
|
|
// Consume the options (which may be empty)
|
|
parseOptions(addOptions, status);
|
|
} else {
|
|
ERROR(status);
|
|
}
|
|
return ratorBuilder.build(status);
|
|
}
|
|
|
|
/*
|
|
Consume a literal or variable (depending on `isVariable`),
|
|
followed by either required whitespace followed by an annotation,
|
|
or optional whitespace.
|
|
*/
|
|
void Parser::parseLiteralOrVariableWithAnnotation(bool isVariable,
|
|
Expression::Builder& builder,
|
|
UErrorCode& status) {
|
|
CHECK_ERROR(status);
|
|
|
|
U_ASSERT(inBounds());
|
|
|
|
Operand rand;
|
|
if (isVariable) {
|
|
rand = Operand(parseVariableName(status));
|
|
} else {
|
|
rand = Operand(parseLiteral(status));
|
|
}
|
|
|
|
builder.setOperand(std::move(rand));
|
|
|
|
/*
|
|
Parsing a literal or variable with an optional annotation requires arbitrary lookahead.
|
|
To see why, consider this rule from the grammar:
|
|
|
|
expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}"
|
|
|
|
And this example:
|
|
|
|
{|foo| }
|
|
|
|
Derivation:
|
|
expression -> "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}"
|
|
-> "{" [s] ((literal / variable) [s annotation]) [s] "}"
|
|
-> "{" [s] (literal [s annotation]) [s] "}"
|
|
|
|
When reading the ' ' after the second '|', it's ambiguous whether that's the required
|
|
space before an annotation, or the optional space before the '}'.
|
|
|
|
To make this ambiguity easier to handle, this code is based on the same grammar
|
|
refactoring for the `expression` nonterminal that `parseOptions()` relies on. See
|
|
the comment in `parseOptions()` for details.
|
|
*/
|
|
|
|
if (isWhitespace(peek())) {
|
|
int32_t firstWhitespace = index;
|
|
|
|
// If the next character is whitespace, either [s annotation] or [s] applies
|
|
// (the character is either the required space before an annotation, or optional
|
|
// trailing space after the literal or variable). It's still ambiguous which
|
|
// one does apply.
|
|
parseOptionalWhitespace();
|
|
// Restore precondition
|
|
CHECK_BOUNDS(status);
|
|
|
|
// This next check resolves the ambiguity between [s annotation] and [s]
|
|
bool isSAnnotation = isAnnotationStart(peek());
|
|
|
|
if (isSAnnotation) {
|
|
normalizedInput += SPACE;
|
|
}
|
|
|
|
if (isSAnnotation) {
|
|
// The previously consumed whitespace precedes an annotation
|
|
builder.setOperator(parseAnnotation(status));
|
|
} else {
|
|
// Either there's a right curly brace (will be consumed by the caller),
|
|
// or there's an error and the trailing whitespace should be
|
|
// handled by the caller. However, this is not an error
|
|
// here because we're just parsing `literal [s annotation]`.
|
|
index = firstWhitespace;
|
|
}
|
|
} else {
|
|
// Either there was never whitespace, or
|
|
// the previously consumed whitespace is the optional trailing whitespace;
|
|
// either the next character is '}' or the error will be handled by parseExpression.
|
|
// Do nothing, since the operand was already set
|
|
}
|
|
|
|
// At the end of this code, the next character should either be '}',
|
|
// whitespace followed by a '}',
|
|
// or end-of-input
|
|
}
|
|
|
|
/*
|
|
Consume an expression, matching the `expression` nonterminal in the grammar
|
|
*/
|
|
|
|
static void exprFallback(Expression::Builder& exprBuilder) {
|
|
// Construct a literal consisting just of The U+FFFD REPLACEMENT CHARACTER
|
|
// per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
|
|
exprBuilder.setOperand(Operand(Literal(false, UnicodeString(REPLACEMENT))));
|
|
}
|
|
|
|
static Expression exprFallback(UErrorCode& status) {
|
|
Expression result;
|
|
if (U_SUCCESS(status)) {
|
|
Expression::Builder exprBuilder(status);
|
|
if (U_SUCCESS(status)) {
|
|
// Construct a literal consisting just of The U+FFFD REPLACEMENT CHARACTER
|
|
// per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
|
|
exprBuilder.setOperand(Operand(Literal(false, UnicodeString(REPLACEMENT))));
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
result = exprBuilder.build(status);
|
|
// An operand was set, so there can't be an error
|
|
U_ASSERT(U_SUCCESS(status));
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Expression Parser::parseExpression(UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
|
|
// Early return if out of input -- no more work is possible
|
|
U_ASSERT(inBounds());
|
|
|
|
// Parse opening brace
|
|
parseToken(LEFT_CURLY_BRACE, status);
|
|
// Optional whitespace after opening brace
|
|
parseOptionalWhitespace();
|
|
|
|
Expression::Builder exprBuilder(status);
|
|
// Restore precondition
|
|
if (!inBounds()) {
|
|
exprFallback(exprBuilder);
|
|
} else {
|
|
// literal '|', variable '$' or annotation
|
|
switch (peek()) {
|
|
case PIPE: {
|
|
// Quoted literal
|
|
parseLiteralOrVariableWithAnnotation(false, exprBuilder, status);
|
|
break;
|
|
}
|
|
case DOLLAR: {
|
|
// Variable
|
|
parseLiteralOrVariableWithAnnotation(true, exprBuilder, status);
|
|
break;
|
|
}
|
|
default: {
|
|
if (isAnnotationStart(peek())) {
|
|
Operator rator = parseAnnotation(status);
|
|
exprBuilder.setOperator(std::move(rator));
|
|
} else if (isUnquotedStart(peek())) {
|
|
// Unquoted literal
|
|
parseLiteralOrVariableWithAnnotation(false, exprBuilder, status);
|
|
} else {
|
|
// Not a literal, variable or annotation -- error out
|
|
ERROR(status);
|
|
exprFallback(exprBuilder);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Parse attributes
|
|
AttributeAdder<Expression::Builder> attrAdder(exprBuilder);
|
|
parseAttributes(attrAdder, status);
|
|
|
|
// Parse optional space
|
|
// (the last [s] in e.g. "{" [s] literal [s annotation] *(s attribute) [s] "}")
|
|
parseOptionalWhitespace();
|
|
|
|
// Either an operand or operator (or both) must have been set already,
|
|
// so there can't be an error
|
|
UErrorCode localStatus = U_ZERO_ERROR;
|
|
Expression result = exprBuilder.build(localStatus);
|
|
U_ASSERT(U_SUCCESS(localStatus));
|
|
|
|
// Check for end-of-input and missing '}'
|
|
if (!inBounds()) {
|
|
ERROR(status);
|
|
} else {
|
|
// Otherwise, it's safe to check for the '}'
|
|
parseToken(RIGHT_CURLY_BRACE, status);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
Parse a .local declaration, matching the `local-declaration`
|
|
production in the grammar
|
|
*/
|
|
void Parser::parseLocalDeclaration(UErrorCode& status) {
|
|
// End-of-input here would be an error; even empty
|
|
// declarations must be followed by a body
|
|
CHECK_BOUNDS(status);
|
|
|
|
parseToken(ID_LOCAL, status);
|
|
parseRequiredWhitespace(status);
|
|
|
|
// Restore precondition
|
|
CHECK_BOUNDS(status);
|
|
VariableName lhs = parseVariableName(status);
|
|
parseTokenWithWhitespace(EQUALS, status);
|
|
// Restore precondition before calling parseExpression()
|
|
CHECK_BOUNDS(status);
|
|
|
|
Expression rhs = parseExpression(status);
|
|
|
|
// Add binding from lhs to rhs, unless there was an error
|
|
// (This ensures that if there was a correct lhs but a
|
|
// parse error in rhs, the fallback for uses of the
|
|
// lhs will be its own name rather than the rhs)
|
|
/* This affects the behavior of this test case, which the spec
|
|
is ambiguous about:
|
|
|
|
.local $bar {|foo|} {{{$bar}}}
|
|
|
|
Should `$bar` still be bound to a value although
|
|
its declaration is syntactically incorrect (missing the '=')?
|
|
This code says no, but it needs to change if
|
|
https://github.com/unicode-org/message-format-wg/issues/703
|
|
is resolved differently.
|
|
*/
|
|
CHECK_ERROR(status);
|
|
if (!errors.hasSyntaxError()) {
|
|
dataModel.addBinding(Binding(std::move(lhs), std::move(rhs)), status);
|
|
// Check if status is U_DUPLICATE_DECLARATION_ERROR
|
|
// and add that as an internal error if so
|
|
if (status == U_MF_DUPLICATE_DECLARATION_ERROR) {
|
|
status = U_ZERO_ERROR;
|
|
errors.addError(StaticErrorType::DuplicateDeclarationError, status);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Parse an .input declaration, matching the `local-declaration`
|
|
production in the grammar
|
|
*/
|
|
void Parser::parseInputDeclaration(UErrorCode& status) {
|
|
// End-of-input here would be an error; even empty
|
|
// declarations must be followed by a body
|
|
CHECK_BOUNDS(status);
|
|
|
|
parseToken(ID_INPUT, status);
|
|
parseOptionalWhitespace();
|
|
|
|
// Restore precondition before calling parseExpression()
|
|
CHECK_BOUNDS(status);
|
|
|
|
// Save the index for error diagnostics
|
|
int32_t exprIndex = index;
|
|
Expression rhs = parseExpression(status);
|
|
|
|
// Here we have to check that the rhs is a variable-expression
|
|
if (!rhs.getOperand().isVariable()) {
|
|
// This case is a syntax error; report it at the beginning
|
|
// of the expression
|
|
ERROR_AT(status, exprIndex);
|
|
return;
|
|
}
|
|
|
|
VariableName lhs = rhs.getOperand().asVariable();
|
|
|
|
// Add binding from lhs to rhs
|
|
// This just adds a new local variable that shadows the message
|
|
// argument referred to, which is harmless.
|
|
// When evaluating the RHS, the new local is not in scope
|
|
// and the message argument will be correctly referred to.
|
|
CHECK_ERROR(status);
|
|
if (!errors.hasSyntaxError()) {
|
|
dataModel.addBinding(Binding::input(std::move(lhs), std::move(rhs), status), status);
|
|
// Check if status is U_MF_DUPLICATE_DECLARATION_ERROR
|
|
// and add that as an internal error if so
|
|
if (status == U_MF_DUPLICATE_DECLARATION_ERROR) {
|
|
status = U_ZERO_ERROR;
|
|
errors.addError(StaticErrorType::DuplicateDeclarationError, status);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Consume a possibly-empty sequence of declarations separated by whitespace;
|
|
each declaration matches the `declaration` nonterminal in the grammar
|
|
|
|
Builds up an environment representing those declarations
|
|
*/
|
|
void Parser::parseDeclarations(UErrorCode& status) {
|
|
// End-of-input here would be an error; even empty
|
|
// declarations must be followed by a body
|
|
CHECK_BOUNDS(status);
|
|
|
|
while (peek() == PERIOD) {
|
|
CHECK_BOUNDS_1(status);
|
|
if (peek(1) == ID_LOCAL[1]) {
|
|
parseLocalDeclaration(status);
|
|
} else if (peek(1) == ID_INPUT[1]) {
|
|
parseInputDeclaration(status);
|
|
} else {
|
|
// Done parsing declarations
|
|
break;
|
|
}
|
|
|
|
// Avoid looping infinitely
|
|
CHECK_ERROR(status);
|
|
|
|
parseOptionalWhitespace();
|
|
// Restore precondition
|
|
CHECK_BOUNDS(status);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Consume a text character
|
|
matching the `text-char` nonterminal in the grammar
|
|
|
|
No postcondition (a message can end with a text-char)
|
|
*/
|
|
UnicodeString Parser::parseTextChar(UErrorCode& status) {
|
|
UnicodeString str;
|
|
if (!inBounds() || !(isTextChar(peek()))) {
|
|
// Error -- text-char is expected here
|
|
ERROR(status);
|
|
} else {
|
|
// See comment in parseQuotedLiteral()
|
|
if (isEscapableChar(peek())) {
|
|
normalizedInput += BACKSLASH;
|
|
}
|
|
normalizedInput += peek();
|
|
str += peek();
|
|
next();
|
|
maybeAdvanceLine();
|
|
}
|
|
return str;
|
|
}
|
|
|
|
/*
|
|
Consume an `nmtoken`, `literal`, or the string "*", matching
|
|
the `key` nonterminal in the grammar
|
|
*/
|
|
Key Parser::parseKey(UErrorCode& status) {
|
|
U_ASSERT(inBounds());
|
|
|
|
Key k; // wildcard by default
|
|
// Literal | '*'
|
|
switch (peek()) {
|
|
case ASTERISK: {
|
|
next();
|
|
normalizedInput += ASTERISK;
|
|
// Guarantee postcondition
|
|
if (!inBounds()) {
|
|
ERROR(status);
|
|
return k;
|
|
}
|
|
break;
|
|
}
|
|
default: {
|
|
// Literal
|
|
k = Key(parseLiteral(status));
|
|
break;
|
|
}
|
|
}
|
|
return k;
|
|
}
|
|
|
|
/*
|
|
Consume a non-empty sequence of `key`s separated by whitespace
|
|
|
|
Takes ownership of `keys`
|
|
*/
|
|
SelectorKeys Parser::parseNonEmptyKeys(UErrorCode& status) {
|
|
SelectorKeys result;
|
|
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
|
|
U_ASSERT(inBounds());
|
|
|
|
/*
|
|
Arbitrary lookahead is required to parse key lists. To see why, consider
|
|
this rule from the grammar:
|
|
|
|
variant = key *(s key) [s] quoted-pattern
|
|
|
|
And this example:
|
|
when k1 k2 {a}
|
|
|
|
Derivation:
|
|
variant -> key *(s key) [s] quoted-pattern
|
|
-> key s key *(s key) quoted-pattern
|
|
|
|
After matching ' ' to `s` and 'k2' to `key`, it would require arbitrary lookahead
|
|
to know whether to expect the start of a pattern or the start of another key.
|
|
In other words: is the second whitespace sequence the required space in *(s key),
|
|
or the optional space in [s] quoted-pattern?
|
|
|
|
This is addressed using "backtracking" (similarly to `parseOptions()`).
|
|
*/
|
|
|
|
SelectorKeys::Builder keysBuilder(status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
|
|
// Since the first key is required, it's simplest to parse it separately.
|
|
keysBuilder.add(parseKey(status), status);
|
|
|
|
// Restore precondition
|
|
if (!inBounds()) {
|
|
ERROR(status);
|
|
return result;
|
|
}
|
|
|
|
// We've seen at least one whitespace-key pair, so now we can parse
|
|
// *(s key) [s]
|
|
while (peek() != LEFT_CURLY_BRACE || isWhitespace(peek()) || isBidiControl(peek())) {
|
|
bool wasWhitespace = isWhitespace(peek()) || isBidiControl(peek());
|
|
parseRequiredWhitespace(status);
|
|
if (!wasWhitespace) {
|
|
// Avoid infinite loop when parsing something like:
|
|
// when * @{!...
|
|
next();
|
|
}
|
|
|
|
// Restore precondition
|
|
if (!inBounds()) {
|
|
ERROR(status);
|
|
return result;
|
|
}
|
|
|
|
// At this point, it's ambiguous whether we are inside (s key) or [s].
|
|
// This check resolves that ambiguity.
|
|
if (peek() == LEFT_CURLY_BRACE) {
|
|
// A pattern follows, so what we just parsed was the optional
|
|
// trailing whitespace. All the keys have been parsed.
|
|
|
|
// Unpush the whitespace from `normalizedInput`
|
|
normalizedInput.truncate(normalizedInput.length() - 1);
|
|
break;
|
|
}
|
|
keysBuilder.add(parseKey(status), status);
|
|
}
|
|
|
|
return keysBuilder.build(status);
|
|
}
|
|
|
|
Pattern Parser::parseQuotedPattern(UErrorCode& status) {
|
|
U_ASSERT(inBounds());
|
|
|
|
parseToken(LEFT_CURLY_BRACE, status);
|
|
parseToken(LEFT_CURLY_BRACE, status);
|
|
Pattern p = parseSimpleMessage(status);
|
|
parseToken(RIGHT_CURLY_BRACE, status);
|
|
parseToken(RIGHT_CURLY_BRACE, status);
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
Consume a `placeholder`, matching the nonterminal in the grammar
|
|
No postcondition (a markup can end a message)
|
|
*/
|
|
Markup Parser::parseMarkup(UErrorCode& status) {
|
|
U_ASSERT(inBounds(1));
|
|
|
|
U_ASSERT(peek() == LEFT_CURLY_BRACE);
|
|
|
|
Markup::Builder builder(status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
|
|
// Consume the '{'
|
|
next();
|
|
normalizedInput += LEFT_CURLY_BRACE;
|
|
parseOptionalWhitespace();
|
|
bool closing = false;
|
|
switch (peek()) {
|
|
case NUMBER_SIGN: {
|
|
// Open or standalone; consume the '#'
|
|
normalizedInput += peek();
|
|
next();
|
|
break;
|
|
}
|
|
case SLASH: {
|
|
// Closing
|
|
normalizedInput += peek();
|
|
closing = true;
|
|
next();
|
|
break;
|
|
}
|
|
default: {
|
|
ERROR(status);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
// Parse the markup identifier
|
|
builder.setName(parseIdentifier(status));
|
|
|
|
// Parse the options, which must begin with a ' '
|
|
// if present
|
|
if (inBounds() && (isWhitespace(peek()) || isBidiControl(peek()))) {
|
|
OptionAdder<Markup::Builder> optionAdder(builder);
|
|
parseOptions(optionAdder, status);
|
|
}
|
|
|
|
// Parse the attributes, which also must begin
|
|
// with a ' '
|
|
if (inBounds() && (isWhitespace(peek()) || isBidiControl(peek()))) {
|
|
AttributeAdder<Markup::Builder> attrAdder(builder);
|
|
parseAttributes(attrAdder, status);
|
|
}
|
|
|
|
parseOptionalWhitespace();
|
|
|
|
bool standalone = false;
|
|
// Check if this is a standalone or not
|
|
if (!closing) {
|
|
if (inBounds() && peek() == SLASH) {
|
|
standalone = true;
|
|
normalizedInput += SLASH;
|
|
next();
|
|
}
|
|
}
|
|
|
|
parseToken(RIGHT_CURLY_BRACE, status);
|
|
|
|
if (standalone) {
|
|
builder.setStandalone();
|
|
} else if (closing) {
|
|
builder.setClose();
|
|
} else {
|
|
builder.setOpen();
|
|
}
|
|
|
|
return builder.build(status);
|
|
}
|
|
|
|
/*
|
|
Consume a `placeholder`, matching the nonterminal in the grammar
|
|
No postcondition (a placeholder can end a message)
|
|
*/
|
|
std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
|
|
U_ASSERT(peek() == LEFT_CURLY_BRACE);
|
|
|
|
if (!inBounds()) {
|
|
ERROR(status);
|
|
return exprFallback(status);
|
|
}
|
|
|
|
// Need to look ahead arbitrarily since whitespace
|
|
// can appear before the '{' and '#'
|
|
// in markup
|
|
int32_t tempIndex = 1;
|
|
bool isMarkup = false;
|
|
while (inBounds(1)) {
|
|
UChar32 c = peek(tempIndex);
|
|
if (c == NUMBER_SIGN || c == SLASH) {
|
|
isMarkup = true;
|
|
break;
|
|
}
|
|
if (!(isWhitespace(c) || isBidiControl(c))) {
|
|
break;
|
|
}
|
|
tempIndex++;
|
|
}
|
|
|
|
if (isMarkup) {
|
|
return parseMarkup(status);
|
|
}
|
|
return parseExpression(status);
|
|
}
|
|
|
|
/*
|
|
Consume a `simple-message`, matching the nonterminal in the grammar
|
|
Postcondition: `index == len()` or U_FAILURE(status);
|
|
for a syntactically correct message, this will consume the entire input
|
|
*/
|
|
Pattern Parser::parseSimpleMessage(UErrorCode& status) {
|
|
Pattern::Builder result(status);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
Expression expression;
|
|
while (inBounds()) {
|
|
switch (peek()) {
|
|
case LEFT_CURLY_BRACE: {
|
|
// Must be placeholder
|
|
std::variant<Expression, Markup> piece = parsePlaceholder(status);
|
|
if (std::holds_alternative<Expression>(piece)) {
|
|
Expression expr = *std::get_if<Expression>(&piece);
|
|
result.add(std::move(expr), status);
|
|
} else {
|
|
Markup markup = *std::get_if<Markup>(&piece);
|
|
result.add(std::move(markup), status);
|
|
}
|
|
break;
|
|
}
|
|
case BACKSLASH: {
|
|
// Must be escaped-char
|
|
result.add(parseEscapeSequence(status), status);
|
|
break;
|
|
}
|
|
case RIGHT_CURLY_BRACE: {
|
|
// Distinguish unescaped '}' from end of quoted pattern
|
|
break;
|
|
}
|
|
default: {
|
|
// Must be text-char
|
|
result.add(parseTextChar(status), status);
|
|
break;
|
|
}
|
|
}
|
|
if (peek() == RIGHT_CURLY_BRACE) {
|
|
// End of quoted pattern
|
|
break;
|
|
}
|
|
// Don't loop infinitely
|
|
if (errors.hasSyntaxError() || U_FAILURE(status)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return result.build(status);
|
|
}
|
|
|
|
void Parser::parseVariant(UErrorCode& status) {
|
|
CHECK_ERROR(status);
|
|
|
|
// At least one key is required
|
|
SelectorKeys keyList(parseNonEmptyKeys(status));
|
|
|
|
// parseNonEmptyKeys() consumes any trailing whitespace,
|
|
// so the pattern can be consumed next.
|
|
|
|
// Restore precondition before calling parsePattern()
|
|
// (which must return a non-null value)
|
|
CHECK_BOUNDS(status);
|
|
Pattern rhs = parseQuotedPattern(status);
|
|
|
|
dataModel.addVariant(std::move(keyList), std::move(rhs), status);
|
|
}
|
|
|
|
/*
|
|
Consume a `selectors` (matching the nonterminal in the grammar),
|
|
followed by a non-empty sequence of `variant`s (matching the nonterminal
|
|
in the grammar) preceded by whitespace
|
|
No postcondition (on return, `index` might equal `len()` with no syntax error
|
|
because a message can end with a variant)
|
|
*/
|
|
void Parser::parseSelectors(UErrorCode& status) {
|
|
CHECK_ERROR(status);
|
|
|
|
U_ASSERT(inBounds());
|
|
|
|
parseToken(ID_MATCH, status);
|
|
|
|
bool empty = true;
|
|
// Parse selectors
|
|
// "Backtracking" is required here. It's not clear if whitespace is
|
|
// (`[s]` selector) or (`[s]` variant)
|
|
while (isWhitespace(peek()) || peek() == DOLLAR) {
|
|
int32_t whitespaceStart = index;
|
|
parseRequiredWhitespace(status);
|
|
// Restore precondition
|
|
CHECK_BOUNDS(status);
|
|
if (peek() != DOLLAR) {
|
|
// This is not necessarily an error, but rather,
|
|
// means the whitespace we parsed was the optional
|
|
// whitespace preceding the first variant, not the
|
|
// required whitespace preceding a subsequent variable.
|
|
// In that case, "push back" the whitespace.
|
|
normalizedInput.truncate(normalizedInput.length() - 1);
|
|
index = whitespaceStart;
|
|
break;
|
|
}
|
|
VariableName var = parseVariableName(status);
|
|
empty = false;
|
|
|
|
dataModel.addSelector(std::move(var), status);
|
|
CHECK_ERROR(status);
|
|
}
|
|
|
|
// At least one selector is required
|
|
if (empty) {
|
|
ERROR(status);
|
|
return;
|
|
}
|
|
|
|
#define CHECK_END_OF_INPUT \
|
|
if (!inBounds()) { \
|
|
break; \
|
|
} \
|
|
|
|
// Parse variants
|
|
// matcher = match-statement s variant *(o variant)
|
|
|
|
// Parse first variant
|
|
parseRequiredWhitespace(status);
|
|
if (!inBounds()) {
|
|
ERROR(status);
|
|
return;
|
|
}
|
|
parseVariant(status);
|
|
if (!inBounds()) {
|
|
// Not an error; there might be only one variant
|
|
return;
|
|
}
|
|
|
|
while (isWhitespace(peek()) || isBidiControl(peek()) || isKeyStart(peek())) {
|
|
parseOptionalWhitespace();
|
|
// Restore the precondition.
|
|
// Trailing whitespace is allowed.
|
|
if (!inBounds()) {
|
|
return;
|
|
}
|
|
|
|
parseVariant(status);
|
|
|
|
// Restore the precondition, *without* erroring out if we've
|
|
// reached the end of input. That's because it's valid for the
|
|
// message to end with a variant that has no trailing whitespace.
|
|
// Why do we need to check this condition twice inside the loop?
|
|
// Because if we don't check it here, the `isWhitespace()` call in
|
|
// the loop head will read off the end of the input string.
|
|
CHECK_END_OF_INPUT
|
|
|
|
if (errors.hasSyntaxError() || U_FAILURE(status)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Consume a `body` (matching the nonterminal in the grammar),
|
|
No postcondition (on return, `index` might equal `len()` with no syntax error,
|
|
because a message can end with a body (trailing whitespace is optional)
|
|
*/
|
|
|
|
void Parser::errorPattern(UErrorCode& status) {
|
|
errors.addSyntaxError(status);
|
|
// Set to empty pattern
|
|
Pattern::Builder result = Pattern::Builder(status);
|
|
CHECK_ERROR(status);
|
|
|
|
// If still in bounds, then add the remaining input as a single text part
|
|
// to the pattern
|
|
/*
|
|
TODO: this behavior isn't documented in the spec, but it comes from
|
|
https://github.com/messageformat/messageformat/blob/e0087bff312d759b67a9129eac135d318a1f0ce7/packages/mf2-messageformat/src/__fixtures/test-messages.json#L236
|
|
and a pending pull request https://github.com/unicode-org/message-format-wg/pull/462 will clarify
|
|
whether this is the intent behind the spec
|
|
*/
|
|
UnicodeString partStr(LEFT_CURLY_BRACE);
|
|
while (inBounds()) {
|
|
partStr += peek();
|
|
next();
|
|
}
|
|
// Add curly braces around the entire output (same comment as above)
|
|
partStr += RIGHT_CURLY_BRACE;
|
|
result.add(std::move(partStr), status);
|
|
dataModel.setPattern(result.build(status));
|
|
}
|
|
|
|
void Parser::parseBody(UErrorCode& status) {
|
|
CHECK_ERROR(status);
|
|
|
|
// Out-of-input is a syntax warning
|
|
if (!inBounds()) {
|
|
errorPattern(status);
|
|
return;
|
|
}
|
|
|
|
// Body must be either a pattern or selectors
|
|
switch (peek()) {
|
|
case LEFT_CURLY_BRACE: {
|
|
// Pattern
|
|
dataModel.setPattern(parseQuotedPattern(status));
|
|
break;
|
|
}
|
|
case ID_MATCH[0]: {
|
|
// Selectors
|
|
parseSelectors(status);
|
|
return;
|
|
}
|
|
default: {
|
|
ERROR(status);
|
|
errorPattern(status);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
// -------------------------------------
|
|
// Parses the source pattern.
|
|
|
|
void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
|
|
CHECK_ERROR(status);
|
|
|
|
bool complex = false;
|
|
// First, "look ahead" to determine if this is a simple or complex
|
|
// message. To do that, check the first non-whitespace character.
|
|
while (inBounds(index) && (isWhitespace(peek()) || isBidiControl(peek()))) {
|
|
next();
|
|
}
|
|
|
|
// Message can be empty, so we need to only look ahead
|
|
// if we know it's non-empty
|
|
if (inBounds()) {
|
|
if (peek() == PERIOD
|
|
|| (inBounds(1)
|
|
&& peek() == LEFT_CURLY_BRACE
|
|
&& peek(1) == LEFT_CURLY_BRACE)) {
|
|
complex = true;
|
|
}
|
|
}
|
|
// Reset index
|
|
index = 0;
|
|
|
|
// Message can be empty, so we need to only look ahead
|
|
// if we know it's non-empty
|
|
if (complex) {
|
|
parseOptionalWhitespace();
|
|
parseDeclarations(status);
|
|
parseBody(status);
|
|
parseOptionalWhitespace();
|
|
} else {
|
|
// Simple message
|
|
// For normalization, quote the pattern
|
|
normalizedInput += LEFT_CURLY_BRACE;
|
|
normalizedInput += LEFT_CURLY_BRACE;
|
|
dataModel.setPattern(parseSimpleMessage(status));
|
|
normalizedInput += RIGHT_CURLY_BRACE;
|
|
normalizedInput += RIGHT_CURLY_BRACE;
|
|
}
|
|
|
|
CHECK_ERROR(status);
|
|
|
|
// There are no errors; finally, check that the entire input was consumed
|
|
if (!allConsumed()) {
|
|
ERROR(status);
|
|
}
|
|
|
|
// Finally, copy the relevant fields of the internal `MessageParseError`
|
|
// into the `UParseError` argument
|
|
translateParseError(parseError, parseErrorResult);
|
|
}
|
|
|
|
Parser::~Parser() {}
|
|
|
|
} // namespace message2
|
|
U_NAMESPACE_END
|
|
|
|
#endif /* #if !UCONFIG_NO_MF2 */
|
|
|
|
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
|
|
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|