mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 09:21:03 +00:00
Compare commits
41 commits
release-77
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
40fb3a9e4a | ||
|
32c96106d3 | ||
|
e13492c92b | ||
|
89fe16ef3b | ||
|
e1f13e1518 | ||
|
572d03f85a | ||
|
046392853d | ||
|
770c4b8042 | ||
|
e296ac93b5 | ||
|
1ea549be99 | ||
|
7eb090f910 | ||
|
ee90520429 | ||
|
1bf6bf774d | ||
|
d0e30acc68 | ||
|
0b98404454 | ||
|
d93b8bb344 | ||
|
f6abd2cc2e | ||
|
7e63370784 | ||
|
cfed9374b7 | ||
|
2628d4ed32 | ||
|
f6894e28d2 | ||
|
ad0df7e4c8 | ||
|
dcff47f86b | ||
|
434ef42093 | ||
|
57eaf4997a | ||
|
37e7693ab2 | ||
|
d78cf74bca | ||
|
b65650bc5f | ||
|
3edd9c828a | ||
|
255eb4ef3e | ||
|
cdf52396dc | ||
|
545bf260e1 | ||
|
ba0f39b592 | ||
|
72406ed78a | ||
|
f23af97cf4 | ||
|
c5b7dce96b | ||
|
30e23b0d28 | ||
|
faa80a2972 | ||
|
9cc28a6428 | ||
|
0748442ed6 | ||
|
60a0d9fcd8 |
191 changed files with 10946 additions and 8085 deletions
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
|
@ -8,8 +8,8 @@ Thank you for your pull request!
|
|||
* Associating PRs with Jira issues
|
||||
- We require each pull request to be associated with a [Jira issue](https://icu.unicode.org/bugs).
|
||||
- Reuse existing issues for minor changes:
|
||||
* ICU 77 docs minor fixes: ICU-22921 — User Guide & API docs typos etc., and version updates (e.g., dependabot for User Guide)
|
||||
* ICU 77 code warnings/version updates: ICU-22920 — Fix compiler warnings. Update versions of code-related dependencies (e.g., dependabot).
|
||||
* ICU 78 docs minor fixes: ICU-23055 — User Guide & API docs typos etc., and version updates (e.g., dependabot for User Guide)
|
||||
* ICU 78 code warnings/version updates: ICU-23054 — Fix compiler warnings. Update versions of code-related dependencies (e.g., dependabot).
|
||||
* Contributors license agreement (CLA):
|
||||
- You will be automatically asked to sign the CLA before the PR is accepted.
|
||||
- To sign the CLA: https://cla-assistant.io/unicode-org/icu
|
||||
|
|
64
.github/workflows/brs-commit-checker.yml
vendored
Normal file
64
.github/workflows/brs-commit-checker.yml
vendored
Normal file
|
@ -0,0 +1,64 @@
|
|||
name: BRS Commit Checker Report
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
fix_version:
|
||||
type: string
|
||||
required: true
|
||||
description: The ICU Jira "Fix Version" semver
|
||||
from_git_ref:
|
||||
type: string
|
||||
required: true
|
||||
description: The git ref start of comparison range. Prefix branches with `origin/`.
|
||||
end_git_ref:
|
||||
type: string
|
||||
required: true
|
||||
description: The git ref end of comparison range. Must be descendant of `from_git_ref`. Prefix branches with `origin/`.
|
||||
# Jira user name & API token is used for processing sensitive tickets comes from Github Secrets
|
||||
# stored in the repository
|
||||
|
||||
jobs:
|
||||
commit-report:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
# workaround for bug in checkout action. this step should be redundant.
|
||||
# https://github.com/actions/checkout/issues/1471
|
||||
# https://github.com/actions/checkout/issues/1781
|
||||
# https://github.com/actions/checkout/issues/701#issuecomment-1133937950
|
||||
- name: Fetch all tags
|
||||
run: |
|
||||
git fetch --tags origin
|
||||
- name: Fetch all branches
|
||||
run: |
|
||||
git fetch origin
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12.8'
|
||||
cache: 'pipenv'
|
||||
cache-dependency-path: |
|
||||
tools/commit-checker/Pipfile
|
||||
tools/commit-checker/Pipfile.lock
|
||||
- name: Install pipenv
|
||||
run: |
|
||||
sudo pip3 install pipenv
|
||||
- name: Generate report
|
||||
env:
|
||||
JIRA_USERNAME: ${{ secrets.COMMIT_CHECKER_JIRA_EMAIL }}
|
||||
JIRA_PASSWORD: ${{ secrets.COMMIT_CHECKER_JIRA_TOKEN }}
|
||||
run: |
|
||||
pushd ./tools/commit-checker
|
||||
pipenv install
|
||||
pipenv run python3 check.py \
|
||||
--jira-query "project=ICU AND fixVersion=${{ inputs.fix_version }}" \
|
||||
--rev-range "${{ inputs.from_git_ref }}..${{ inputs.end_git_ref }}" > REPORT.md
|
||||
popd
|
||||
# https://github.blog/news-insights/product-news/supercharging-github-actions-with-job-summaries/
|
||||
- name: Reproduce report as workflow job summary
|
||||
run: |
|
||||
cat ./tools/commit-checker/REPORT.md >> $GITHUB_STEP_SUMMARY
|
||||
echo "View the Summary page of this GHA Workflow instance to view the rendered Markdown of this report."
|
2
.github/workflows/cifuzz.yml
vendored
2
.github/workflows/cifuzz.yml
vendored
|
@ -55,7 +55,7 @@ jobs:
|
|||
path: ./out/artifacts
|
||||
- name: Upload Sarif
|
||||
if: always() && steps.build.outcome == 'success'
|
||||
uses: github/codeql-action/upload-sarif@v3.28.9
|
||||
uses: github/codeql-action/upload-sarif@v3.28.10
|
||||
with:
|
||||
# Path to SARIF file relative to the root of the repository
|
||||
sarif_file: cifuzz-sarif/results.sarif
|
||||
|
|
2
.github/workflows/icu4c.yml
vendored
2
.github/workflows/icu4c.yml
vendored
|
@ -740,7 +740,7 @@ jobs:
|
|||
runs-on: ubuntu-22.04 # Updated in BRS
|
||||
steps:
|
||||
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
- uses: bazel-contrib/setup-bazel@0.13.0
|
||||
- uses: bazel-contrib/setup-bazel@0.14.0
|
||||
- name: Get CI Linux runner VM version
|
||||
id: linux-version
|
||||
run: |
|
||||
|
|
4
.github/workflows/icu4j.yml
vendored
4
.github/workflows/icu4j.yml
vendored
|
@ -60,7 +60,7 @@ jobs:
|
|||
- uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
# Download all of the artifacts needed for the code and build plugins, but
|
||||
# exclude any needed by profiles depending on system artifacts
|
||||
- name: Download all artifacts
|
||||
|
@ -75,7 +75,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
java-version: [ '8', '11', '17', '21' ]
|
||||
java-version: [ '11', '17', '21' ]
|
||||
runs-on: ubuntu-22.04 # Updated in BRS
|
||||
steps:
|
||||
- name: Checkout and setup
|
||||
|
|
2
.github/workflows/icu_common.yml
vendored
2
.github/workflows/icu_common.yml
vendored
|
@ -66,7 +66,7 @@ jobs:
|
|||
- uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
# Download all of the artifacts needed for the code and build plugins, but
|
||||
# exclude any needed by profiles depending on system artifacts
|
||||
- name: Download all artifacts
|
||||
|
|
44
.github/workflows/icu_merge_ci.yml
vendored
44
.github/workflows/icu_merge_ci.yml
vendored
|
@ -30,8 +30,8 @@ permissions:
|
|||
jobs:
|
||||
|
||||
# Initialize the Maven artifact cache
|
||||
# Uses Java 8 because Java version not deemed significant for downloading
|
||||
# artifacts
|
||||
# Using Java 11 because Java version is not deemed significant for downloading artifacts,
|
||||
# and is the lowest version we support, so the cached artifacts can be used by any other version.
|
||||
#
|
||||
# This job is created according to the cache strategy of reuse from a single job:
|
||||
# https://github.com/actions/cache/blob/main/caching-strategies.md#make-cache-read-only--reuse-cache-from-centralized-job
|
||||
|
@ -52,7 +52,7 @@ jobs:
|
|||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
# Download all of the artifacts needed for the code and build plugins
|
||||
- name: Download all artifacts
|
||||
run: |
|
||||
|
@ -144,7 +144,7 @@ jobs:
|
|||
- name: Create directory for lib files
|
||||
run: mkdir icu4c/source/perflib
|
||||
- name: Get ICU libs
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
|
||||
with:
|
||||
name: icu-perf-libs
|
||||
path: icu4c/source/lib
|
||||
|
@ -218,7 +218,7 @@ jobs:
|
|||
- name: Create directory for lib files
|
||||
run: mkdir icu4c/source/perflib
|
||||
- name: Get ICU libs
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
|
||||
with:
|
||||
name: icu-perf-libs
|
||||
path: icu4c/source/lib
|
||||
|
@ -283,7 +283,7 @@ jobs:
|
|||
- name: Create directory for lib files
|
||||
run: mkdir icu4c/source/perflib
|
||||
- name: Get ICU libs
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
|
||||
with:
|
||||
name: icu-perf-libs
|
||||
path: icu4c/source/lib
|
||||
|
@ -345,15 +345,16 @@ jobs:
|
|||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
|
||||
- name: Build and run unicodesetperf test
|
||||
run: |
|
||||
cd icu4j;
|
||||
mvn ${SHARED_MVN_ARGS} verify -DskipITs -DskipTests;
|
||||
mvn ${SHARED_MVN_ARGS} install -DskipITs -DskipTests;
|
||||
git status
|
||||
cd perf-tests;
|
||||
mkdir -p perf/results/j_unicodesetperf/${{ matrix.perf }};
|
||||
mvn dependency:copy-dependencies
|
||||
java -cp ./target/*:./target/dependency/* com.ibm.icu.dev.test.perf.UnicodeSetPerf ${{ matrix.perf }} -a -t 2 -p 4 [:Lt:] | tee perf/results/j_unicodesetperf/${{ matrix.perf }}/output.txt
|
||||
|
||||
- name: Store performance test results
|
||||
|
@ -402,14 +403,15 @@ jobs:
|
|||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
|
||||
- name: Build and run ucharacterperf test
|
||||
run: |
|
||||
cd icu4j;
|
||||
mvn ${SHARED_MVN_ARGS} verify -DskipITs -DskipTests;
|
||||
mvn ${SHARED_MVN_ARGS} install -DskipITs -DskipTests;
|
||||
cd perf-tests;
|
||||
mkdir -p perf/results/j_ucharacterperf;
|
||||
mvn dependency:copy-dependencies
|
||||
java -cp ./target/*:./target/dependency/* com.ibm.icu.dev.test.perf.UCharacterPerf -a -t 2 -p 4 0 ffff | tee perf/results/j_ucharacterperf/output.txt
|
||||
|
||||
- name: Store performance test results
|
||||
|
@ -461,18 +463,19 @@ jobs:
|
|||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
|
||||
- name: Build and run decimalformatperf
|
||||
run: |
|
||||
cd icu4j;
|
||||
mvn ${SHARED_MVN_ARGS} verify -DskipITs -DskipTests;
|
||||
mvn ${SHARED_MVN_ARGS} install -DskipITs -DskipTests;
|
||||
cd perf-tests;
|
||||
mkdir -p perf/results/j_decimalformatperf/${{ matrix.locale }}/${{ matrix.perf }};
|
||||
# Delay execution by random number of seconds. Spreading execution of multiple
|
||||
# tests over 180 secs. minimizes the possibility of push conflicts when storing
|
||||
# tests results in the data branch.
|
||||
sleep $(($RANDOM % 180));
|
||||
mvn dependency:copy-dependencies
|
||||
java -cp ./target/*:./target/dependency/* com.ibm.icu.dev.test.perf.DecimalFormatPerformanceTest ${{ matrix.perf }} -a -t 2 -p 4 -L ${{ matrix.locale }} "#,###.##" "1.234,56" -r 1 | tee perf/results/j_decimalformatperf/${{ matrix.locale }}/${{ matrix.perf }}/output.txt
|
||||
|
||||
- name: Store performance test results
|
||||
|
@ -525,20 +528,21 @@ jobs:
|
|||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
|
||||
- name: Build and run normperf
|
||||
env:
|
||||
DATA_FILE_PATH: data/collation
|
||||
run: |
|
||||
cd icu4j;
|
||||
mvn ${SHARED_MVN_ARGS} verify -DskipITs -DskipTests;
|
||||
mvn ${SHARED_MVN_ARGS} install -DskipITs -DskipTests;
|
||||
cd perf-tests;
|
||||
mkdir -p perf/results/j_normperf/${{ matrix.source_text }}/${{ matrix.perf }};
|
||||
# Delay execution by random number of seconds. Spreading execution of multiple
|
||||
# tests over 180 secs. minimizes the possibility of push conflicts when storing
|
||||
# tests results in the data branch.
|
||||
sleep $(($RANDOM % 180));
|
||||
mvn dependency:copy-dependencies
|
||||
java -cp ./target/*:./target/dependency/* com.ibm.icu.dev.test.perf.NormalizerPerformanceTest ${{ matrix.perf }} -a -t 2 -p 4 -f $DATA_FILE_PATH/${{ matrix.source_text }}.txt -e UTF-8 ${{ matrix.mode }} | tee perf/results/j_normperf/${{ matrix.source_text }}/${{ matrix.perf }}/output.txt
|
||||
cat perf/results/j_normperf/${{ matrix.source_text }}/${{ matrix.perf }}/output.txt
|
||||
|
||||
|
@ -659,21 +663,22 @@ jobs:
|
|||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
|
||||
- name: Build and run converterperf
|
||||
env:
|
||||
DATA_FILE_PATH: data/conversion
|
||||
run: |
|
||||
cd icu4j;
|
||||
mvn ${SHARED_MVN_ARGS} verify -DskipITs -DskipTests;
|
||||
mvn ${SHARED_MVN_ARGS} install -DskipITs -DskipTests;
|
||||
cd perf-tests;
|
||||
mkdir -p perf/results/j_converterperf/${{ matrix.source_text }}/${{ matrix.test_enc }}/${{ matrix.perf }};
|
||||
# Delay execution by random number of seconds. Spreading execution of multiple
|
||||
# tests over 180 secs. minimizes the possibility of push conflicts when storing
|
||||
# tests results in the data branch.
|
||||
sleep $(($RANDOM % 180));
|
||||
java -cp ./target/*:./target/dependency/*:../main/charset/target/* com.ibm.icu.dev.test.perf.ConverterPerformanceTest ${{ matrix.perf }} -a -t 2 -p 4 -f $DATA_FILE_PATH/${{ matrix.source_text }}.txt -e UTF-8 -T ${{ matrix.test_enc }} | tee perf/results/j_converterperf/${{ matrix.source_text }}/${{ matrix.test_enc }}/${{ matrix.perf }}/output.txt
|
||||
mvn dependency:copy-dependencies
|
||||
java -cp ./target/*:./target/dependency/* com.ibm.icu.dev.test.perf.ConverterPerformanceTest ${{ matrix.perf }} -a -t 2 -p 4 -f $DATA_FILE_PATH/${{ matrix.source_text }}.txt -e UTF-8 -T ${{ matrix.test_enc }} | tee perf/results/j_converterperf/${{ matrix.source_text }}/${{ matrix.test_enc }}/${{ matrix.perf }}/output.txt
|
||||
|
||||
- name: Store performance test results
|
||||
uses: gregtatum/github-action-benchmark@d3f06f738e9612988d575db23fae5ca0008d3d12
|
||||
|
@ -737,7 +742,7 @@ jobs:
|
|||
- uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '8'
|
||||
java-version: '11'
|
||||
|
||||
- name: Extract identifying digit and parameter.
|
||||
run: |
|
||||
|
@ -747,13 +752,14 @@ jobs:
|
|||
- name: Build and run dateformatperf
|
||||
run: |
|
||||
cd icu4j;
|
||||
mvn ${SHARED_MVN_ARGS} verify -DskipITs -DskipTests;
|
||||
mvn ${SHARED_MVN_ARGS} install -DskipITs -DskipTests;
|
||||
cd perf-tests;
|
||||
mkdir -p perf/results/j_dateformatperf/${{ matrix.locale }}/${{ matrix.perf }}/${{ env.DDIR }};
|
||||
# Delay execution by random number of seconds. Spreading execution of multiple
|
||||
# tests over 180 secs. minimizes the possibility of push conflicts when storing
|
||||
# tests results in the data branch.
|
||||
sleep $(($RANDOM % 180));
|
||||
mvn dependency:copy-dependencies
|
||||
java -cp ./target/*:./target/dependency/* com.ibm.icu.dev.test.perf.DateFormatPerformanceTest ${{ matrix.perf }} -a -t 2 -p 4 -L ${{ matrix.locale }} ${{ env.PARM }} -r 1 | tee perf/results/j_dateformatperf/${{ matrix.locale }}/${{ matrix.perf }}/${{ env.DDIR }}/output.txt
|
||||
|
||||
- name: Store performance test results
|
||||
|
|
2
.github/workflows/release-icu4j-maven.yml
vendored
2
.github/workflows/release-icu4j-maven.yml
vendored
|
@ -55,7 +55,7 @@ jobs:
|
|||
- name: Set up JDK
|
||||
uses: actions/setup-java@v4.2.2
|
||||
with:
|
||||
java-version: '8' # The custom Taglets for javadoc (tools/build) are still Java 8. They need updating to use a different JDK version.
|
||||
java-version: '11'
|
||||
distribution: 'temurin'
|
||||
server-id: icu4j-maven-repo # Value of the distributionManagement/repository/id field of the pom.xml
|
||||
server-username: MAVEN_USERNAME # env variable for username in deploy
|
||||
|
|
4
.github/workflows/scorecard.yml
vendored
4
.github/workflows/scorecard.yml
vendored
|
@ -34,7 +34,7 @@ jobs:
|
|||
persist-credentials: false
|
||||
|
||||
- name: "Run analysis"
|
||||
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0
|
||||
uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1
|
||||
with:
|
||||
results_file: results.sarif
|
||||
results_format: sarif
|
||||
|
@ -59,6 +59,6 @@ jobs:
|
|||
|
||||
# Upload the results to GitHub's code scanning dashboard.
|
||||
- name: "Upload to code-scanning"
|
||||
uses: github/codeql-action/upload-sarif@3d3d628990a5f99229dd9fa1821cc5a4f31b613b # v2.25.15
|
||||
uses: github/codeql-action/upload-sarif@83923549f688e42b34d0b90ee94725f7c30532fc # v2.25.15
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
|
|
|
@ -16,15 +16,14 @@ License & terms of use: http://www.unicode.org/copyright.html
|
|||
ICU is the [premier library for software internationalization](https://icu.unicode.org/#h.i33fakvpjb7o),
|
||||
used by a [wide array of companies and organizations](https://icu.unicode.org/#h.f9qwubthqabj).
|
||||
|
||||
## Release Candidate
|
||||
|
||||
**Please use this release candidate for testing, but do not use it in production!**
|
||||
|
||||
## Release Overview
|
||||
|
||||
* Download: [releases/tag/release-77-1](https://github.com/unicode-org/icu/releases/tag/release-77-1)
|
||||
* [Maven: com.ibm.icu / icu4j / version 77.1](https://mvnrepository.com/artifact/com.ibm.icu/icu4j/77.1)
|
||||
|
||||
ICU 77 updates to
|
||||
[CLDR 47](https://cldr.unicode.org/downloads/cldr-47)
|
||||
([alpha blog](https://blog.unicode.org/2025/02/unicode-cldr-47-alpha-now-available-for.html))
|
||||
([beta blog](https://blog.unicode.org/2025/02/unicode-cldr-47-beta-available-for.html))
|
||||
locale data with new locales, and various additions and corrections.
|
||||
|
||||
ICU 77 is mostly focused on bug fixes, segmentation conformance, and other refinements.
|
||||
|
@ -39,11 +38,31 @@ For more details, including migration issues, see below.
|
|||
|
||||
Please use the [icu-support mailing list](https://icu.unicode.org/contacts) and/or [find/submit error reports](https://icu.unicode.org/bugs).
|
||||
|
||||
### Attention: Future Changes
|
||||
|
||||
Beginning with ICU 78 (2025-oct):
|
||||
|
||||
1. We want to move the minimum required Java version from Java 8 to Java 11.
|
||||
This is a significant, useful update in terms of the Java language and
|
||||
standard library, and simplifies ICU tooling which currently uses
|
||||
a mix of Java 8 and Java 11.
|
||||
Note that [Android desugaring](https://developer.android.com/studio/write/java11-default-support-table)
|
||||
supports at least Java 11 since late 2023.\
|
||||
See [ICU-23072](https://unicode-org.atlassian.net/browse/ICU-23072)
|
||||
where you can provide comments.
|
||||
2. We are planning to remove the
|
||||
[ICU4J Locale Service Provider](../userguide/icu4j/locale-service-provider.md)
|
||||
([Maven: com.ibm.icu / icu4j-localespi / version 76.1](https://mvnrepository.com/artifact/com.ibm.icu/icu4j-localespi/76.1)).
|
||||
It is much less useful than when we added it, has very low usage,
|
||||
needs work for newer Java versions, and complicates ICU4J development.\
|
||||
See [ICU-23071](https://unicode-org.atlassian.net/browse/ICU-23071)
|
||||
where you can provide comments.
|
||||
|
||||
### Version Number
|
||||
|
||||
The initial release has library version number 77.1.
|
||||
|
||||
* Release date: 2025-03-TODO
|
||||
* Release date: 2025-03-13
|
||||
* [List of tickets fixed in ICU 77](https://unicode-org.atlassian.net/issues/?jql=project%20%3D%20ICU%20AND%20status%20%3D%20Done%20AND%20resolution%20in%20%28Fixed%2C%20%22Fixed%20by%20Other%20Ticket%22%29%20AND%20fixVersion%20%3D%2077.1%20ORDER%20BY%20component%20ASC%2C%20created%20DESC)
|
||||
|
||||
If there are maintenance releases, they will be 77.2, 77.3, etc. (During ICU 77 development, the library version number was 77.0.x.)
|
||||
|
@ -53,7 +72,7 @@ Note: There may be additional commits on the [maint/maint-77](https://github.com
|
|||
## Common Changes
|
||||
|
||||
* [CLDR 47](https://cldr.unicode.org/downloads/cldr-47)
|
||||
([alpha blog](https://blog.unicode.org/2025/02/unicode-cldr-47-alpha-now-available-for.html)):
|
||||
([beta blog](https://blog.unicode.org/2025/02/unicode-cldr-47-beta-available-for.html)):
|
||||
* No major data collection for existing locales; focus on bug fixes and structural improvements
|
||||
* New regional variants: English in several European countries, and Cantonese in Macau (`yue_Hant_MO`)
|
||||
* Improved RBNF (number spellout) and transliteration data
|
||||
|
@ -91,7 +110,9 @@ Note: There may be additional commits on the [maint/maint-77](https://github.com
|
|||
|
||||
## Known Issues
|
||||
|
||||
* (none yet)
|
||||
* The new MeasureUnit for `portion-per-1e9` works for formatting,
|
||||
but when the new member function `getConstantDenominator()` is called on this unit,
|
||||
it incorrectly returns 0. ([ICU-23045](https://unicode-org.atlassian.net/browse/ICU-23045))
|
||||
|
||||
## Migration Issues
|
||||
|
||||
|
@ -118,20 +139,17 @@ ICU4J should work on Android API level 21 and later but may require “[library
|
|||
|
||||
## Download
|
||||
|
||||
Source and binary downloads are available on the git/GitHub tag page: <https://github.com/unicode-org/icu/releases/tag/release-77-rc>
|
||||
### GitHub
|
||||
Source and binary downloads are available on the git/GitHub tag page: <https://github.com/unicode-org/icu/releases/tag/release-77-1>
|
||||
|
||||
See the [Source Code Setup](../devsetup/source/) page for how to download the ICU file tree directly from GitHub.
|
||||
|
||||
ICU locale data was generated from CLDR data equivalent to:
|
||||
|
||||
* <https://github.com/unicode-org/cldr/releases/tag/release-47-alpha2>
|
||||
* <https://github.com/unicode-org/cldr-staging/releases/tag/release-47-alpha2>
|
||||
* <https://github.com/unicode-org/cldr/releases/tag/release-47>
|
||||
* <https://github.com/unicode-org/cldr-staging/releases/tag/release-47>
|
||||
|
||||
TODO: [Maven dependency](https://central.sonatype.com/artifact/com.ibm.icu/icu4j):
|
||||
```
|
||||
<dependency>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j</artifactId>
|
||||
<version>77.1</version>
|
||||
</dependency>
|
||||
```
|
||||
### Maven
|
||||
* https://mvnrepository.com/artifact/com.ibm.icu/icu4j/77.1
|
||||
* https://mvnrepository.com/artifact/com.ibm.icu/icu4j-charset/77.1
|
||||
* https://mvnrepository.com/artifact/com.ibm.icu/icu4j-localespi/77.1
|
||||
|
|
|
@ -14,26 +14,23 @@ License & terms of use: http://www.unicode.org/copyright.html
|
|||
|
||||
If you want to use ICU (as opposed to developing it), it is recommended that you download an official packaged version of the ICU source code. These versions are tested more thoroughly than day-to-day development builds of the system, and they are packaged in zip and tar files for convenient download. Here are several recent releases of ICU that are available:
|
||||
|
||||
## Release Candidate
|
||||
## Latest Release
|
||||
|
||||
***2025-02-21: ICU 77rc is now available.***
|
||||
It updates to [CLDR 47](https://cldr.unicode.org/downloads/cldr-47)
|
||||
***2025-03-13: ICU 77 is now available*** —
|
||||
[releases/tag/release-77-1](https://github.com/unicode-org/icu/releases/tag/release-77-1) —
|
||||
[Maven: com.ibm.icu / icu4j / version 77.1](https://mvnrepository.com/artifact/com.ibm.icu/icu4j/77.1)
|
||||
|
||||
[ICU 77](77.md) updates to [CLDR 47](https://cldr.unicode.org/downloads/cldr-47)
|
||||
locale data with new locales, and various additions and corrections.
|
||||
|
||||
ICU 77 is mostly focused on bug fixes, segmentation conformance, and other refinements.
|
||||
The technology preview implementations of the CLDR MessageFormat 2.0 specification have been updated to incorporate some, but not yet all, of the CLDR 47 changes. (Java more than C++)
|
||||
|
||||
See [ICU 77](77.md).
|
||||
|
||||
## Latest Release
|
||||
|
||||
***2024-10-24: ICU 76 is now available.***
|
||||
It updates to [Unicode 16](https://www.unicode.org/versions/Unicode16.0.0/) ([blog](https://blog.unicode.org/2024/09/announcing-unicode-standard-version-160.html)), including new characters and scripts, emoji, collation & IDNA changes, and corresponding APIs and implementations. It also updates to [CLDR 46](https://cldr.unicode.org/downloads/cldr-46) ([beta blog](https://blog.unicode.org/2024/09/unicode-cldr-46-beta-available-for.html)) locale data with new locales, significant updates to existing locales, and various additions and corrections. For example, the CLDR and Unicode default sort orders are now very nearly the same.
|
||||
|
||||
Most of the java.time (Temporal) types can now be formatted directly. There are some new APIs to make ICU easier to use with modern C++ and Java patterns. The Java and C++ technology preview implementations of the CLDR MessageFormat 2.0 specification have been updated to match recent changes. See [ICU 76](76.md).
|
||||
|
||||
## Previous Releases
|
||||
|
||||
- 2024-10-24: **ICU 76** updates to [Unicode 16](https://www.unicode.org/versions/Unicode16.0.0/) ([blog](https://blog.unicode.org/2024/09/announcing-unicode-standard-version-160.html)), including new characters and scripts, emoji, collation & IDNA changes, and corresponding APIs and implementations. It also updates to [CLDR 46](https://cldr.unicode.org/downloads/cldr-46) ([beta blog](https://blog.unicode.org/2024/09/unicode-cldr-46-beta-available-for.html)) locale data with new locales, significant updates to existing locales, and various additions and corrections. For example, the CLDR and Unicode default sort orders are now very nearly the same.
|
||||
- Most of the java.time (Temporal) types can now be formatted directly. There are some new APIs to make ICU easier to use with modern C++ and Java patterns. The Java and C++ technology preview implementations of the CLDR MessageFormat 2.0 specification have been updated to match recent changes. See [ICU 76](76.md).
|
||||
|
||||
- 2024-04-17: **ICU 75** updates to [CLDR 45](https://cldr.unicode.org/index/downloads/cldr-45) ([beta blog](https://blog.unicode.org/2024/04/unicode-cldr-v45-beta-available-for.html)) locale data with new locales and various additions and corrections. C++ code now requires C++17 and is being made more robust. The CLDR MessageFormat 2.0 specification is now in [technology preview](https://github.com/unicode-org/message-format-wg?tab=readme-ov-file#messageformat-2-technical-preview), together with a corresponding update of the ICU4J (Java) tech preview and a new ICU4C (C++) tech preview. See [Downloading ICU > ICU 75](https://icu.unicode.org/download/75).
|
||||
|
||||
- 2023-12-13: **ICU 74.2** released with date/time formatting bug fixes. See [Downloading ICU > ICU 74](https://icu.unicode.org/download/74).
|
||||
|
|
|
@ -29,10 +29,22 @@ It is the official landing page for the ICU project.
|
|||
Some of the pages from the ICU Site have been migrated here.
|
||||
The migrated sections and pages from the ICU Site are visible in the navigation bar of this site below the "ICU Site" section heading.
|
||||
|
||||
### Downloading ICU
|
||||
## Downloading ICU
|
||||
|
||||
The [Downloading ICU](download) page has been migrated here.
|
||||
|
||||
### Latest Release
|
||||
|
||||
***2025-03-13: ICU 77 is now available*** —
|
||||
[releases/tag/release-77-1](https://github.com/unicode-org/icu/releases/tag/release-77-1) —
|
||||
[Maven: com.ibm.icu / icu4j / version 77.1](https://mvnrepository.com/artifact/com.ibm.icu/icu4j/77.1)
|
||||
|
||||
[ICU 77](download/77.md) updates to [CLDR 47](https://cldr.unicode.org/downloads/cldr-47)
|
||||
locale data with new locales, and various additions and corrections.
|
||||
|
||||
ICU 77 is mostly focused on bug fixes, segmentation conformance, and other refinements.
|
||||
The technology preview implementations of the CLDR MessageFormat 2.0 specification have been updated to incorporate some, but not yet all, of the CLDR 47 changes. (Java more than C++)
|
||||
|
||||
## ICU team member pages
|
||||
|
||||
Other documentation pages here are written by and for team members.
|
||||
|
|
|
@ -57,13 +57,34 @@ merging post RC fixes from trunk and others.
|
|||
Every commit being shipped in the next ICU release should be labeled with a Jira
|
||||
ticket that is marked as fixed with the correct fix version. Further, there
|
||||
should be no Jira tickets marked as fixed with the current fixVersion that do
|
||||
not have commits. To check this, run the following tool:
|
||||
not have commits.
|
||||
|
||||
### Run locally
|
||||
|
||||
To check this, run the following tool:
|
||||
|
||||
<https://github.com/unicode-org/icu/tree/main/tools/commit-checker>
|
||||
|
||||
Follow the instructions in the README file to generate the report and send it
|
||||
Follow the instructions in the README file to generate the report locally and send it
|
||||
for review.
|
||||
|
||||
### Run via CI
|
||||
|
||||
Alternatively, you can run the "BRS Commit Checker Report" workflow directly from the project page CI
|
||||
by:
|
||||
|
||||
1. Go to the [Actions tab](https://github.com/unicode-org/icu/actions)
|
||||
1. Click on the "BRS Commit Checker Report" workflow name on the left hand list of workflows
|
||||
1. Click the "Run workflow" dropdown.
|
||||
|
||||
The dropdown should reveal an input form in which to provide inputs
|
||||
|
||||
1. Provide the same inputs in the form as you would for a local run of the tool,
|
||||
as described in the tool's Readme in the instructions above.
|
||||
|
||||
The only difference from the local run instructions is that git branch names in the
|
||||
Actions workflow input form should be prefixed with `origin/`.
|
||||
|
||||
---
|
||||
|
||||
## Fix Mis-ticketted commits
|
||||
|
|
|
@ -204,7 +204,7 @@ The command requires a version number string that follows the typical Java / Mav
|
|||
This can be done by running the following command:
|
||||
|
||||
```
|
||||
mvn versions:set-property -DnewVersion=74 -Dproperty=icu.major.version
|
||||
mvn versions:set-property -DnewVersion=74 -Dproperty=icu.major.version -DgenerateBackupPoms=false
|
||||
```
|
||||
|
||||
This should happen at the same time and along with the work in the previous step for the version number
|
||||
|
|
|
@ -20,6 +20,8 @@ License & terms of use: http://www.unicode.org/copyright.html
|
|||
|
||||
---
|
||||
|
||||
# WARNING: Please note that for ICU 78 (2025-oct) we are planning to remove the ICU4J Locale Service Provider. See the [ICU 77 page](https://unicode-org.github.io/icu/download/77.html) for details.
|
||||
|
||||
## Overview
|
||||
|
||||
Java SE 6 introduced a new feature which allows Java user code to extend locale
|
||||
|
|
|
@ -55,7 +55,7 @@ The ICU Data Build Tool enables you to write a configuration file that
|
|||
specifies what features and locales to include in a custom data bundle.
|
||||
|
||||
The configuration file may be written in either [JSON](http://json.org/) or
|
||||
[Hjson](https://hjson.org/). To build ICU4C with custom data, set the
|
||||
[Hjson](https://hjson.github.io/). To build ICU4C with custom data, set the
|
||||
`ICU_DATA_FILTER_FILE` environment variable when running `runConfigureICU` on
|
||||
Unix or when building the data package on Windows. For example:
|
||||
|
||||
|
|
|
@ -240,7 +240,7 @@ xcheck-local: $(top_builddir)/config/icu-config $(top_builddir)/config/Makefile.
|
|||
@echo verifying that icu-config --selfcheck can operate
|
||||
@test "passed" = "$(shell $(top_builddir)/config/icu-config --selfcheck 2>&1)" || (echo "FAIL: icu-config could not run properly." ; exit 1)
|
||||
@echo verifying that $(MAKE) -f Makefile.inc selfcheck can operate
|
||||
@test "passed" = "$(shell $(MAKE) --no-print-directory -f $(top_builddir)/config/Makefile.inc SELFCHECK=1 selfcheck)" || (echo "FAIL: Makefile.inc could not run properly." ; exit 1 )
|
||||
@test "passed" = "$(shell MAKEFLAGS= $(MAKE) --no-print-directory -f $(top_builddir)/config/Makefile.inc SELFCHECK=1 selfcheck)" || (echo "FAIL: Makefile.inc could not run properly." ; exit 1 )
|
||||
@echo "PASS: config selfcheck OK"
|
||||
|
||||
#$(srcdir)/configure : $(srcdir)/configure.ac $(top_srcdir)/aclocal.m4
|
||||
|
|
|
@ -3,6 +3,6 @@
|
|||
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- This file is used to set the ICU Major Version number, which is used as a suffix on various file names in other Visual Studio projects. -->
|
||||
<PropertyGroup>
|
||||
<IcuMajorVersion>77</IcuMajorVersion>
|
||||
<IcuMajorVersion>78</IcuMajorVersion>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
|
|
|
@ -1267,6 +1267,9 @@
|
|||
<CustomBuild Include="unicode\utf_old.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\utfiterator.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\bytestrie.h">
|
||||
<Filter>collections</Filter>
|
||||
</CustomBuild>
|
||||
|
|
|
@ -715,13 +715,29 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
|
|||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value > 0);
|
||||
U_ASSERT(value != 0);
|
||||
// For the case of und_Latn
|
||||
if (value < 0) {
|
||||
retainLanguage = !language.empty();
|
||||
retainScript = !script.empty();
|
||||
retainRegion = !region.empty();
|
||||
// Fallback to und_$region =>
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value == 0);
|
||||
int64_t trieUndEmptyState = iter.getState64();
|
||||
value = trieNext(iter, region, 0);
|
||||
// Fallback to und =>
|
||||
if (value < 0) {
|
||||
iter.resetToState64(trieUndEmptyState);
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
U_ASSERT(value < lsrsLength);
|
||||
const LSR &matched = lsrs[value];
|
||||
|
||||
if (returnInputIfUnmatch &&
|
||||
(!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
|
||||
return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching.
|
||||
|
@ -731,18 +747,23 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
|
|||
}
|
||||
|
||||
if (!(retainLanguage || retainScript || retainRegion)) {
|
||||
U_ASSERT(value >= 0);
|
||||
// Quickly return a copy of the lookup-result LSR
|
||||
// without new allocation of the subtags.
|
||||
const LSR &matched = lsrs[value];
|
||||
return LSR(matched.language, matched.script, matched.region, matched.flags);
|
||||
}
|
||||
if (!retainLanguage) {
|
||||
language = matched.language;
|
||||
U_ASSERT(value >= 0);
|
||||
language = lsrs[value].language;
|
||||
}
|
||||
if (!retainScript) {
|
||||
script = matched.script;
|
||||
U_ASSERT(value >= 0);
|
||||
script = lsrs[value].script;
|
||||
}
|
||||
if (!retainRegion) {
|
||||
region = matched.region;
|
||||
U_ASSERT(value >= 0);
|
||||
region = lsrs[value].region;
|
||||
}
|
||||
int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
|
|
|
@ -479,6 +479,8 @@
|
|||
/* Otherwise use the predefined value. */
|
||||
#elif !defined(__cplusplus)
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
#elif __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||
# define U_CPLUSPLUS_VERSION 20
|
||||
#elif __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
|
||||
# define U_CPLUSPLUS_VERSION 17
|
||||
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
|
||||
|
|
|
@ -119,6 +119,28 @@
|
|||
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
|
||||
#define U_INTERNAL U_CAPI
|
||||
|
||||
/**
|
||||
* \def U_FORCE_INLINE
|
||||
* Forces function inlining on compilers that are known to support it.
|
||||
* Place this before specifiers like "static" and "explicit".
|
||||
*
|
||||
* This does not replace the "inline" keyword which suspends the One Definition Rule (ODR)
|
||||
* in addition to optionally serving as an inlining hint to the compiler.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_FORCE_INLINE
|
||||
// already defined
|
||||
#elif defined(U_IN_DOXYGEN)
|
||||
# define U_FORCE_INLINE inline
|
||||
#elif (defined(__clang__) && __clang__) || U_GCC_MAJOR_MINOR != 0
|
||||
# define U_FORCE_INLINE [[gnu::always_inline]]
|
||||
#elif defined(U_REAL_MSVC)
|
||||
# define U_FORCE_INLINE __forceinline
|
||||
#else
|
||||
# define U_FORCE_INLINE inline
|
||||
#endif
|
||||
|
||||
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
|
||||
// series of statements wrapped in { } blocks and the caller could choose to
|
||||
// either treat them as if they were actual functions and end the invocation
|
||||
|
|
|
@ -517,7 +517,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
if(U8_IS_TRAIL(__t1)) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else /* c>=0xf0 */ { \
|
||||
} else /* b>=0xf0 */ { \
|
||||
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
|
@ -683,7 +683,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*/
|
||||
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(U8_IS_TRAIL(c)) { \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c is a trail byte */ \
|
||||
|
|
2294
icu4c/source/common/unicode/utfiterator.h
Normal file
2294
icu4c/source/common/unicode/utfiterator.h
Normal file
File diff suppressed because it is too large
Load diff
|
@ -53,19 +53,19 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 77
|
||||
#define U_ICU_VERSION_MAJOR_NUM 78
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
#define U_ICU_VERSION_MINOR_NUM 0
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
|
||||
#define U_ICU_VERSION_PATCHLEVEL_NUM 1
|
||||
|
||||
/** The current ICU build level version as an integer.
|
||||
* This value is for use by ICU clients. It defaults to 0.
|
||||
|
@ -79,7 +79,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _77
|
||||
#define U_ICU_VERSION_SUFFIX _78
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
|
@ -132,7 +132,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "77.1"
|
||||
#define U_ICU_VERSION "78.0.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
|
@ -145,13 +145,13 @@
|
|||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "77"
|
||||
#define U_ICU_VERSION_SHORT "78"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "77.1"
|
||||
#define U_ICU_DATA_VERSION "78.0.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
|
|
|
@ -974,12 +974,12 @@ void UnicodeSet::_add(const UnicodeString& s) {
|
|||
setToBogus();
|
||||
return;
|
||||
}
|
||||
UnicodeString* t = new UnicodeString(s);
|
||||
if (t == nullptr) { // Check for memory allocation error.
|
||||
LocalPointer<UnicodeString> t(new UnicodeString(s));
|
||||
if (t.isNull()) { // Check for memory allocation error.
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
strings_->sortedInsert(t, compareUnicodeString, ec);
|
||||
strings_->sortedInsert(t.orphan(), compareUnicodeString, ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
}
|
||||
|
|
18
icu4c/source/configure
vendored
18
icu4c/source/configure
vendored
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.72 for ICU 77.1.
|
||||
# Generated by GNU Autoconf 2.72 for ICU 78.0.1.
|
||||
#
|
||||
# Report bugs to <https://icu.unicode.org/bugs>.
|
||||
#
|
||||
|
@ -606,8 +606,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='ICU'
|
||||
PACKAGE_TARNAME='icu4c'
|
||||
PACKAGE_VERSION='77.1'
|
||||
PACKAGE_STRING='ICU 77.1'
|
||||
PACKAGE_VERSION='78.0.1'
|
||||
PACKAGE_STRING='ICU 78.0.1'
|
||||
PACKAGE_BUGREPORT='https://icu.unicode.org/bugs'
|
||||
PACKAGE_URL='https://icu.unicode.org/'
|
||||
|
||||
|
@ -1387,7 +1387,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
'configure' configures ICU 77.1 to adapt to many kinds of systems.
|
||||
'configure' configures ICU 78.0.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1453,7 +1453,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of ICU 77.1:";;
|
||||
short | recursive ) echo "Configuration of ICU 78.0.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1592,7 +1592,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
ICU configure 77.1
|
||||
ICU configure 78.0.1
|
||||
generated by GNU Autoconf 2.72
|
||||
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
@ -2184,7 +2184,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by ICU $as_me 77.1, which was
|
||||
It was created by ICU $as_me 78.0.1, which was
|
||||
generated by GNU Autoconf 2.72. Invocation command line was
|
||||
|
||||
$ $0$ac_configure_args_raw
|
||||
|
@ -9019,7 +9019,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by ICU $as_me 77.1, which was
|
||||
This file was extended by ICU $as_me 78.0.1, which was
|
||||
generated by GNU Autoconf 2.72. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -9075,7 +9075,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config='$ac_cs_config_escaped'
|
||||
ac_cs_version="\\
|
||||
ICU config.status 77.1
|
||||
ICU config.status 78.0.1
|
||||
configured by $0, generated by GNU Autoconf 2.72,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
|
|
@ -12,11 +12,11 @@
|
|||
|
||||
##############################################################################
|
||||
# Keep the following in sync with the version - see common/unicode/uvernum.h
|
||||
U_ICUDATA_NAME=icudt77
|
||||
U_ICUDATA_NAME=icudt78
|
||||
##############################################################################
|
||||
!IF "$(UWP)" == "UWP"
|
||||
# Optionally change the name of the data file for the UWP version.
|
||||
U_ICUDATA_NAME=icudt77
|
||||
U_ICUDATA_NAME=icudt78
|
||||
!ENDIF
|
||||
U_ICUDATA_ENDIAN_SUFFIX=l
|
||||
UNICODE_VERSION=16.0
|
||||
|
|
|
@ -9,6 +9,6 @@
|
|||
// ***************************************************************************
|
||||
icuver:table(nofallback){
|
||||
CLDRVersion{"47"}
|
||||
DataVersion{"77.1.0.0"}
|
||||
ICUVersion{"77.1.0.0"}
|
||||
DataVersion{"78.0.1.0"}
|
||||
ICUVersion{"78.0.1.0"}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1705,6 +1705,13 @@ metaZones:table(nofallback){
|
|||
"America_Central",
|
||||
}
|
||||
}
|
||||
"America:Coyhaique"{
|
||||
{
|
||||
"Chile",
|
||||
"1970-01-01 00:00",
|
||||
"2025-03-19 21:00",
|
||||
}
|
||||
}
|
||||
"America:Creston"{
|
||||
{
|
||||
"America_Mountain",
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
timezoneTypes:table(nofallback){
|
||||
bcpTypeAlias{
|
||||
tz{
|
||||
aqams{"nzakl"}
|
||||
aqams{"aqmcm"}
|
||||
aukns{"auhba"}
|
||||
caffs{"cawnp"}
|
||||
camtr{"cator"}
|
||||
|
@ -80,7 +80,7 @@ timezoneTypes:table(nofallback){
|
|||
"America:Thunder_Bay"{"America/Toronto"}
|
||||
"America:Virgin"{"America/St_Thomas"}
|
||||
"America:Yellowknife"{"America/Edmonton"}
|
||||
"Antarctica:South_Pole"{"Pacific/Auckland"}
|
||||
"Antarctica:South_Pole"{"Antarctica/McMurdo"}
|
||||
"Asia:Ashkhabad"{"Asia/Ashgabat"}
|
||||
"Asia:Choibalsan"{"Asia/Ulaanbaatar"}
|
||||
"Asia:Chongqing"{"Asia/Shanghai"}
|
||||
|
@ -300,6 +300,7 @@ timezoneTypes:table(nofallback){
|
|||
"America:Coral_Harbour"{"cayzs"}
|
||||
"America:Cordoba"{"arcor"}
|
||||
"America:Costa_Rica"{"crsjo"}
|
||||
"America:Coyhaique"{"clcxq"}
|
||||
"America:Creston"{"cacfq"}
|
||||
"America:Cuiaba"{"brcgb"}
|
||||
"America:Curacao"{"ancur"}
|
||||
|
|
|
@ -362,7 +362,7 @@ windowsZones:table(nofallback){
|
|||
}
|
||||
"Magallanes Standard Time"{
|
||||
001{"America/Punta_Arenas"}
|
||||
CL{"America/Punta_Arenas"}
|
||||
CL{"America/Punta_Arenas America/Coyhaique"}
|
||||
}
|
||||
"Marquesas Standard Time"{
|
||||
001{"Pacific/Marquesas"}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -140,8 +140,8 @@ BasicTimeZone::getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
|
|||
int32_t initialRaw, initialDst;
|
||||
UnicodeString initialName;
|
||||
|
||||
AnnualTimeZoneRule *ar1 = nullptr;
|
||||
AnnualTimeZoneRule *ar2 = nullptr;
|
||||
LocalPointer<AnnualTimeZoneRule> ar1;
|
||||
LocalPointer<AnnualTimeZoneRule> ar2;
|
||||
UnicodeString name;
|
||||
|
||||
UBool avail;
|
||||
|
@ -179,8 +179,8 @@ BasicTimeZone::getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
|
|||
// zone to return wrong offset after the transition.
|
||||
// When we encounter such case, we do not inspect next next
|
||||
// transition for another rule.
|
||||
ar1 = new AnnualTimeZoneRule(name, initialRaw, tr.getTo()->getDSTSavings(),
|
||||
dtr, year, AnnualTimeZoneRule::MAX_YEAR);
|
||||
ar1.adoptInstead(new AnnualTimeZoneRule(name, initialRaw, tr.getTo()->getDSTSavings(),
|
||||
dtr, year, AnnualTimeZoneRule::MAX_YEAR));
|
||||
|
||||
if (tr.getTo()->getRawOffset() == initialRaw) {
|
||||
// Get the next next transition
|
||||
|
@ -200,8 +200,8 @@ BasicTimeZone::getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
|
|||
// Generate another DOW rule
|
||||
dtr = new DateTimeRule(month, weekInMonth, dow, mid, DateTimeRule::WALL_TIME);
|
||||
tr.getTo()->getName(name);
|
||||
ar2 = new AnnualTimeZoneRule(name, tr.getTo()->getRawOffset(), tr.getTo()->getDSTSavings(),
|
||||
dtr, year - 1, AnnualTimeZoneRule::MAX_YEAR);
|
||||
ar2.adoptInstead(new AnnualTimeZoneRule(name, tr.getTo()->getRawOffset(), tr.getTo()->getDSTSavings(),
|
||||
dtr, year - 1, AnnualTimeZoneRule::MAX_YEAR));
|
||||
|
||||
// Make sure this rule can be applied to the specified date
|
||||
avail = ar2->getPreviousStart(date, tr.getFrom()->getRawOffset(), tr.getFrom()->getDSTSavings(), true, d);
|
||||
|
@ -209,13 +209,12 @@ BasicTimeZone::getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
|
|||
|| initialRaw != tr.getTo()->getRawOffset()
|
||||
|| initialDst != tr.getTo()->getDSTSavings()) {
|
||||
// We cannot use this rule as the second transition rule
|
||||
delete ar2;
|
||||
ar2 = nullptr;
|
||||
ar2.adoptInstead(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ar2 == nullptr) {
|
||||
if (ar2.isNull()) {
|
||||
// Try previous transition
|
||||
avail = getPreviousTransition(date, true, tr);
|
||||
if (avail) {
|
||||
|
@ -234,23 +233,21 @@ BasicTimeZone::getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
|
|||
|
||||
// second rule raw/dst offsets should match raw/dst offsets
|
||||
// at the given time
|
||||
ar2 = new AnnualTimeZoneRule(name, initialRaw, initialDst,
|
||||
dtr, ar1->getStartYear() - 1, AnnualTimeZoneRule::MAX_YEAR);
|
||||
ar2.adoptInstead(new AnnualTimeZoneRule(name, initialRaw, initialDst,
|
||||
dtr, ar1->getStartYear() - 1, AnnualTimeZoneRule::MAX_YEAR));
|
||||
|
||||
// Check if this rule start after the first rule after the specified date
|
||||
avail = ar2->getNextStart(date, tr.getFrom()->getRawOffset(), tr.getFrom()->getDSTSavings(), false, d);
|
||||
if (!avail || d <= nextTransitionTime) {
|
||||
// We cannot use this rule as the second transition rule
|
||||
delete ar2;
|
||||
ar2 = nullptr;
|
||||
ar2.adoptInstead(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ar2 == nullptr) {
|
||||
if (ar2.isNull()) {
|
||||
// Cannot find a good pair of AnnualTimeZoneRule
|
||||
delete ar1;
|
||||
ar1 = nullptr;
|
||||
ar1.adoptInstead(nullptr);
|
||||
} else {
|
||||
// The initial rule should represent the rule before the previous transition
|
||||
ar1->getName(initialName);
|
||||
|
@ -278,13 +275,13 @@ BasicTimeZone::getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
|
|||
initial = new InitialTimeZoneRule(initialName, initialRaw, initialDst);
|
||||
|
||||
// Set the standard and daylight saving rules
|
||||
if (ar1 != nullptr && ar2 != nullptr) {
|
||||
if (ar1.isValid() && ar2.isValid()) {
|
||||
if (ar1->getDSTSavings() != 0) {
|
||||
dst = ar1;
|
||||
std = ar2;
|
||||
dst = ar1.orphan();
|
||||
std = ar2.orphan();
|
||||
} else {
|
||||
std = ar1;
|
||||
dst = ar2;
|
||||
std = ar1.orphan();
|
||||
dst = ar2.orphan();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4251,17 +4251,20 @@ int32_t Calendar::internalGetMonth(UErrorCode& status) const {
|
|||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
if (resolveFields(kMonthPrecedence) == UCAL_MONTH) {
|
||||
return internalGet(UCAL_MONTH, status);
|
||||
if (resolveFields(kMonthPrecedence) == UCAL_ORDINAL_MONTH) {
|
||||
return internalGet(UCAL_ORDINAL_MONTH);
|
||||
}
|
||||
return internalGet(UCAL_ORDINAL_MONTH, status);
|
||||
return internalGet(UCAL_MONTH);
|
||||
}
|
||||
|
||||
int32_t Calendar::internalGetMonth(int32_t defaultValue, UErrorCode& /* status */) const {
|
||||
if (resolveFields(kMonthPrecedence) == UCAL_MONTH) {
|
||||
return internalGet(UCAL_MONTH, defaultValue);
|
||||
int32_t Calendar::internalGetMonth(int32_t defaultValue, UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
return internalGet(UCAL_ORDINAL_MONTH);
|
||||
if (resolveFields(kMonthPrecedence) == UCAL_ORDINAL_MONTH) {
|
||||
return internalGet(UCAL_ORDINAL_MONTH);
|
||||
}
|
||||
return internalGet(UCAL_MONTH, defaultValue);
|
||||
}
|
||||
|
||||
BasicTimeZone*
|
||||
|
|
|
@ -1177,10 +1177,14 @@ int32_t ChineseCalendar::internalGetMonth(int32_t defaultValue, UErrorCode& stat
|
|||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
if (resolveFields(kMonthPrecedence) == UCAL_MONTH) {
|
||||
return internalGet(UCAL_MONTH, defaultValue);
|
||||
switch (resolveFields(kMonthPrecedence)) {
|
||||
case UCAL_MONTH:
|
||||
return internalGet(UCAL_MONTH);
|
||||
case UCAL_ORDINAL_MONTH:
|
||||
return internalGetMonth(status);
|
||||
default:
|
||||
return defaultValue;
|
||||
}
|
||||
return internalGetMonth(status);
|
||||
}
|
||||
|
||||
ChineseCalendar::Setting ChineseCalendar::getSetting(UErrorCode&) const {
|
||||
|
|
|
@ -446,8 +446,7 @@ int32_t startOfYear(int32_t year, UErrorCode &status)
|
|||
// If the 1st is on Sun, Wed, or Fri, postpone to the next day
|
||||
day += 1;
|
||||
wd = (day % 7);
|
||||
}
|
||||
if (wd == 1 && frac > 15*HOUR_PARTS+204 && !HebrewCalendar::isLeapYear(year) ) {
|
||||
} else if (wd == 1 && frac > 15*HOUR_PARTS+204 && !HebrewCalendar::isLeapYear(year) ) {
|
||||
// If the new moon falls after 3:11:20am (15h204p from the previous noon)
|
||||
// on a Tuesday and it is not a leap year, postpone by 2 days.
|
||||
// This prevents 356-day years.
|
||||
|
|
|
@ -40,7 +40,7 @@ U_NAMESPACE_BEGIN
|
|||
* solar year (approximately 365.24 days) is not an even multiple of
|
||||
* the lunar month (approximately 29.53 days) an extra "leap month" is
|
||||
* inserted in 7 out of every 19 years. To make matters even more
|
||||
* interesting, the start of a year can be delayed by up to three days
|
||||
* interesting, the start of a year can be delayed by up to two days
|
||||
* in order to prevent certain holidays from falling on the Sabbath and
|
||||
* to prevent certain illegal year lengths. Finally, the lengths of certain
|
||||
* months can vary depending on the number of days in the year.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "messageformat2_allocation.h"
|
||||
#include "messageformat2_checker.h"
|
||||
#include "messageformat2_evaluation.h"
|
||||
#include "messageformat2_function_registry_internal.h"
|
||||
#include "messageformat2_macros.h"
|
||||
|
||||
|
||||
|
@ -36,29 +37,62 @@ static Formattable evalLiteral(const Literal& lit) {
|
|||
}
|
||||
|
||||
// Assumes that `var` is a message argument; returns the argument's value.
|
||||
[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const {
|
||||
[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const UnicodeString& fallback,
|
||||
const VariableName& var,
|
||||
MessageContext& context,
|
||||
UErrorCode& errorCode) const {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
// The fallback for a variable name is itself.
|
||||
UnicodeString str(DOLLAR);
|
||||
str += var;
|
||||
const Formattable* val = context.getGlobal(*this, var, errorCode);
|
||||
const Formattable* val = context.getGlobal(var, errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
return (FormattedPlaceholder(*val, str));
|
||||
// Note: the fallback string has to be passed in because in a declaration like:
|
||||
// .local $foo = {$bar :number}
|
||||
// the fallback for $bar is "$foo".
|
||||
UnicodeString fallbackToUse = fallback;
|
||||
if (fallbackToUse.isEmpty()) {
|
||||
fallbackToUse += DOLLAR;
|
||||
fallbackToUse += var;
|
||||
}
|
||||
return (FormattedPlaceholder(*val, fallbackToUse));
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// Returns the contents of the literal
|
||||
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const {
|
||||
// The fallback for a literal is itself.
|
||||
return FormattedPlaceholder(evalLiteral(lit), lit.quoted());
|
||||
// Helper function to re-escape any escaped-char characters
|
||||
static UnicodeString reserialize(const UnicodeString& s) {
|
||||
UnicodeString result(PIPE);
|
||||
for (int32_t i = 0; i < s.length(); i++) {
|
||||
switch(s[i]) {
|
||||
case BACKSLASH:
|
||||
case PIPE:
|
||||
case LEFT_CURLY_BRACE:
|
||||
case RIGHT_CURLY_BRACE: {
|
||||
result += BACKSLASH;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
result += s[i];
|
||||
}
|
||||
result += PIPE;
|
||||
return result;
|
||||
}
|
||||
|
||||
[[nodiscard]] InternalValue* MessageFormatter::formatOperand(const Environment& env,
|
||||
const Operand& rand,
|
||||
MessageContext& context,
|
||||
UErrorCode &status) const {
|
||||
// Returns the contents of the literal
|
||||
[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const UnicodeString& fallback,
|
||||
const Literal& lit) const {
|
||||
// The fallback for a literal is itself, unless another fallback is passed in
|
||||
// (same reasoning as evalArgument())
|
||||
UnicodeString fallbackToUse = fallback.isEmpty() ? reserialize(lit.unquoted()) : fallback;
|
||||
return FormattedPlaceholder(evalLiteral(lit), fallbackToUse);
|
||||
}
|
||||
|
||||
[[nodiscard]] InternalValue* MessageFormatter::formatOperand(const UnicodeString& fallback,
|
||||
const Environment& env,
|
||||
const Operand& rand,
|
||||
MessageContext& context,
|
||||
UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
@ -77,17 +111,20 @@ static Formattable evalLiteral(const Literal& lit) {
|
|||
|
||||
// NFC-normalize the variable name. See
|
||||
// https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers
|
||||
const VariableName normalized = normalizeNFC(var);
|
||||
const VariableName normalized = StandardFunctions::normalizeNFC(var);
|
||||
|
||||
// Look up the variable in the environment
|
||||
if (env.has(normalized)) {
|
||||
// `var` is a local -- look it up
|
||||
const Closure& rhs = env.lookup(normalized);
|
||||
// Format the expression using the environment from the closure
|
||||
return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
|
||||
// The name of this local variable is the fallback for its RHS.
|
||||
UnicodeString newFallback(DOLLAR);
|
||||
newFallback += var;
|
||||
return formatExpression(newFallback, rhs.getEnv(), rhs.getExpr(), context, status);
|
||||
}
|
||||
// Variable wasn't found in locals -- check if it's global
|
||||
FormattedPlaceholder result = evalArgument(normalized, context, status);
|
||||
FormattedPlaceholder result = evalArgument(fallback, normalized, context, status);
|
||||
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
// Unbound variable -- set a resolution error
|
||||
|
@ -101,7 +138,7 @@ static Formattable evalLiteral(const Literal& lit) {
|
|||
return create<InternalValue>(InternalValue(std::move(result)), status);
|
||||
} else {
|
||||
U_ASSERT(rand.isLiteral());
|
||||
return create<InternalValue>(InternalValue(formatLiteral(rand.asLiteral())), status);
|
||||
return create<InternalValue>(InternalValue(formatLiteral(fallback, rand.asLiteral())), status);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,7 +159,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O
|
|||
|
||||
// Options are fully evaluated before calling the function
|
||||
// Format the operand
|
||||
LocalPointer<InternalValue> rhsVal(formatOperand(env, v, context, status));
|
||||
LocalPointer<InternalValue> rhsVal(formatOperand({}, env, v, context, status));
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
@ -132,7 +169,8 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O
|
|||
FormattedPlaceholder optValue = rhsVal->forceFormatting(context.getErrors(), status);
|
||||
resolvedOpt.adoptInstead(create<ResolvedFunctionOption>
|
||||
(ResolvedFunctionOption(k,
|
||||
optValue.asFormattable()),
|
||||
optValue.asFormattable(),
|
||||
v.isLiteral()),
|
||||
status));
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
|
@ -227,17 +265,18 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O
|
|||
}
|
||||
|
||||
// Formats an expression using `globalEnv` for the values of variables
|
||||
[[nodiscard]] InternalValue* MessageFormatter::formatExpression(const Environment& globalEnv,
|
||||
const Expression& expr,
|
||||
MessageContext& context,
|
||||
UErrorCode &status) const {
|
||||
[[nodiscard]] InternalValue* MessageFormatter::formatExpression(const UnicodeString& fallback,
|
||||
const Environment& globalEnv,
|
||||
const Expression& expr,
|
||||
MessageContext& context,
|
||||
UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const Operand& rand = expr.getOperand();
|
||||
// Format the operand (formatOperand handles the case of a null operand)
|
||||
LocalPointer<InternalValue> randVal(formatOperand(globalEnv, rand, context, status));
|
||||
LocalPointer<InternalValue> randVal(formatOperand(fallback, globalEnv, rand, context, status));
|
||||
|
||||
FormattedPlaceholder maybeRand = randVal->takeArgument(status);
|
||||
|
||||
|
@ -281,7 +320,7 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment&
|
|||
} else {
|
||||
// Format the expression
|
||||
LocalPointer<InternalValue> partVal(
|
||||
formatExpression(globalEnv, part.contents(), context, status));
|
||||
formatExpression({}, globalEnv, part.contents(), context, status));
|
||||
FormattedPlaceholder partResult = partVal->forceFormatting(context.getErrors(),
|
||||
status);
|
||||
// Force full evaluation, e.g. applying default formatters to
|
||||
|
@ -315,7 +354,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme
|
|||
// 2. For each expression exp of the message's selectors
|
||||
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
|
||||
// 2i. Let rv be the resolved value of exp.
|
||||
LocalPointer<InternalValue> rv(formatOperand(env, Operand(selectors[i]), context, status));
|
||||
LocalPointer<InternalValue> rv(formatOperand({}, env, Operand(selectors[i]), context, status));
|
||||
if (rv->canSelect()) {
|
||||
// 2ii. If selection is supported for rv:
|
||||
// (True if this code has been reached)
|
||||
|
@ -444,7 +483,7 @@ void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res,
|
|||
// 2ii(b)(a) Assert that key is a literal.
|
||||
// (Not needed)
|
||||
// 2ii(b)(b) Let `ks` be the resolved value of `key` in Unicode Normalization Form C.
|
||||
ks = normalizeNFC(key.asLiteral().unquoted());
|
||||
ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
|
||||
// 2ii(b)(c) Append `ks` as the last element of the list `keys`.
|
||||
ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
|
||||
CHECK_ERROR(status);
|
||||
|
@ -505,7 +544,7 @@ void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UError
|
|||
// 2i(c). Assert that `key` is a literal.
|
||||
// (Not needed)
|
||||
// 2i(d). Let `ks` be the resolved value of `key`.
|
||||
UnicodeString ks = normalizeNFC(key.asLiteral().unquoted());
|
||||
UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
|
||||
// 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
|
||||
const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
|
||||
// 2i(f). If `matches` includes `ks`
|
||||
|
@ -567,7 +606,7 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo
|
|||
// 5iii(c)(a). Assert that `key` is a literal.
|
||||
// (Not needed)
|
||||
// 5iii(c)(b). Let `ks` be the resolved value of `key`.
|
||||
UnicodeString ks = normalizeNFC(key.asLiteral().unquoted());
|
||||
UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
|
||||
// 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
|
||||
matchpref = vectorFind(matches, ks);
|
||||
U_ASSERT(matchpref >= 0);
|
||||
|
@ -652,7 +691,7 @@ UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments
|
|||
formatPattern(context, *globalEnv, dataModel.getPattern(), status, result);
|
||||
} else {
|
||||
// Check for errors/warnings -- if so, then the result of pattern selection is the fallback value
|
||||
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
|
||||
// See https://www.unicode.org/reports/tr35/tr35-messageFormat.html#pattern-selection
|
||||
const DynamicErrors& err = context.getErrors();
|
||||
if (err.hasSyntaxError() || err.hasDataModelError()) {
|
||||
result += REPLACEMENT;
|
||||
|
@ -692,14 +731,14 @@ void MessageFormatter::check(MessageContext& context, const Environment& localEn
|
|||
|
||||
// Check that variable is in scope
|
||||
const VariableName& var = rand.asVariable();
|
||||
UnicodeString normalized = normalizeNFC(var);
|
||||
UnicodeString normalized = StandardFunctions::normalizeNFC(var);
|
||||
|
||||
// Check local scope
|
||||
if (localEnv.has(normalized)) {
|
||||
return;
|
||||
}
|
||||
// Check global scope
|
||||
context.getGlobal(*this, normalized, status);
|
||||
context.getGlobal(normalized, status);
|
||||
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
context.getErrors().setUnresolvedVariable(var, status);
|
||||
|
@ -736,7 +775,7 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*&
|
|||
// memoizing the value of localEnv up to this point
|
||||
|
||||
// Add the LHS to the environment for checking the next declaration
|
||||
env = Environment::create(normalizeNFC(decl.getVariable()),
|
||||
env = Environment::create(StandardFunctions::normalizeNFC(decl.getVariable()),
|
||||
Closure(rhs, *env),
|
||||
env,
|
||||
status);
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "unicode/messageformat2_arguments.h"
|
||||
#include "unicode/messageformat2_data_model_names.h"
|
||||
#include "messageformat2_evaluation.h"
|
||||
#include "messageformat2_function_registry_internal.h"
|
||||
#include "uvector.h" // U_ASSERT
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -26,13 +27,12 @@ namespace message2 {
|
|||
|
||||
using Arguments = MessageArguments;
|
||||
|
||||
const Formattable* Arguments::getArgument(const MessageFormatter& context,
|
||||
const VariableName& arg,
|
||||
const Formattable* Arguments::getArgument(const VariableName& arg,
|
||||
UErrorCode& errorCode) const {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
U_ASSERT(argsLen == 0 || arguments.isValid());
|
||||
for (int32_t i = 0; i < argsLen; i++) {
|
||||
UnicodeString normalized = context.normalizeNFC(argumentNames[i]);
|
||||
UnicodeString normalized = StandardFunctions::normalizeNFC(argumentNames[i]);
|
||||
// arg already assumed to be normalized
|
||||
if (normalized == arg) {
|
||||
return &arguments[i];
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "messageformat2_allocation.h"
|
||||
#include "messageformat2_checker.h"
|
||||
#include "messageformat2_evaluation.h"
|
||||
#include "messageformat2_function_registry_internal.h"
|
||||
#include "messageformat2_macros.h"
|
||||
#include "uvector.h" // U_ASSERT
|
||||
|
||||
|
@ -113,7 +114,7 @@ Key Checker::normalizeNFC(const Key& k) const {
|
|||
return k;
|
||||
}
|
||||
return Key(Literal(k.asLiteral().isQuoted(),
|
||||
context.normalizeNFC(k.asLiteral().unquoted())));
|
||||
StandardFunctions::normalizeNFC(k.asLiteral().unquoted())));
|
||||
}
|
||||
|
||||
static bool areDefaultKeys(const Key* keys, int32_t len) {
|
||||
|
|
|
@ -834,13 +834,13 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
|
|||
const Variant* otherVariants = other.getVariantsInternal();
|
||||
int32_t numSelectors = other.numSelectors();
|
||||
int32_t numVariants = other.numVariants();
|
||||
VariableName* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
|
||||
Variant* copiedVariants = copyArray(otherVariants, numVariants, localErrorCode);
|
||||
LocalArray<VariableName> copiedSelectors(copyArray(otherSelectors, numSelectors, localErrorCode), localErrorCode);
|
||||
LocalArray<Variant> copiedVariants(copyArray(otherVariants, numVariants, localErrorCode), localErrorCode);
|
||||
if (U_FAILURE(localErrorCode)) {
|
||||
bogus = true;
|
||||
return;
|
||||
}
|
||||
body = Matcher(copiedSelectors, numSelectors, copiedVariants, numVariants);
|
||||
body = Matcher(copiedSelectors.orphan(), numSelectors, copiedVariants.orphan(), numVariants);
|
||||
}
|
||||
|
||||
bindingsLen = other.bindingsLen;
|
||||
|
|
|
@ -29,6 +29,14 @@ namespace message2 {
|
|||
addError(DynamicError(DynamicErrorType::FormattingError, UnicodeString("unknown formatter")), status);
|
||||
}
|
||||
|
||||
void DynamicErrors::setBadOption(const FunctionName& formatterName, UErrorCode& status) {
|
||||
addError(DynamicError(DynamicErrorType::BadOptionError, formatterName), status);
|
||||
}
|
||||
|
||||
void DynamicErrors::setRecoverableBadOption(const FunctionName& formatterName, UErrorCode& status) {
|
||||
addError(DynamicError(DynamicErrorType::RecoverableBadOptionError, formatterName), status);
|
||||
}
|
||||
|
||||
void DynamicErrors::setOperandMismatchError(const FunctionName& formatterName, UErrorCode& status) {
|
||||
addError(DynamicError(DynamicErrorType::OperandMismatchError, formatterName), status);
|
||||
}
|
||||
|
@ -137,6 +145,11 @@ namespace message2 {
|
|||
status = U_MF_FORMATTING_ERROR;
|
||||
break;
|
||||
}
|
||||
case DynamicErrorType::BadOptionError:
|
||||
case DynamicErrorType::RecoverableBadOptionError: {
|
||||
status = U_MF_BAD_OPTION;
|
||||
break;
|
||||
}
|
||||
case DynamicErrorType::OperandMismatchError: {
|
||||
status = U_MF_OPERAND_MISMATCH_ERROR;
|
||||
break;
|
||||
|
@ -228,6 +241,15 @@ namespace message2 {
|
|||
resolutionAndFormattingErrors->adoptElement(errorP, status);
|
||||
break;
|
||||
}
|
||||
case DynamicErrorType::BadOptionError: {
|
||||
badOptionError = true;
|
||||
resolutionAndFormattingErrors->adoptElement(errorP, status);
|
||||
break;
|
||||
}
|
||||
case DynamicErrorType::RecoverableBadOptionError: {
|
||||
resolutionAndFormattingErrors->adoptElement(errorP, status);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -66,6 +66,17 @@ namespace message2 {
|
|||
enum DynamicErrorType {
|
||||
UnresolvedVariable,
|
||||
FormattingError,
|
||||
BadOptionError,
|
||||
/**
|
||||
This is used to signal errors from :number and :integer when a
|
||||
bad `select` option is passed. In this case, fallback output
|
||||
is not used, so it must be distinguished from a regular bad
|
||||
option error (but it maps to a bad option error in the final
|
||||
error code).
|
||||
See https://github.com/unicode-org/message-format-wg/blob/main/spec/functions/number.md#number-selection
|
||||
"The formatting of the _resolved value_ is not affected by the `select` option.")
|
||||
*/
|
||||
RecoverableBadOptionError,
|
||||
OperandMismatchError,
|
||||
SelectorError,
|
||||
UnknownFunction,
|
||||
|
@ -114,6 +125,7 @@ namespace message2 {
|
|||
const StaticErrors& staticErrors;
|
||||
LocalPointer<UVector> resolutionAndFormattingErrors;
|
||||
bool formattingError = false;
|
||||
bool badOptionError = false;
|
||||
bool selectorError = false;
|
||||
bool unknownFunctionError = false;
|
||||
bool unresolvedVariableError = false;
|
||||
|
@ -128,9 +140,12 @@ namespace message2 {
|
|||
void setFormattingError(const FunctionName&, UErrorCode&);
|
||||
// Used when the name of the offending formatter is unknown
|
||||
void setFormattingError(UErrorCode&);
|
||||
void setBadOption(const FunctionName&, UErrorCode&);
|
||||
void setRecoverableBadOption(const FunctionName&, UErrorCode&);
|
||||
void setOperandMismatchError(const FunctionName&, UErrorCode&);
|
||||
bool hasDataModelError() const { return staticErrors.hasDataModelError(); }
|
||||
bool hasFormattingError() const { return formattingError; }
|
||||
bool hasBadOptionError() const { return badOptionError; }
|
||||
bool hasSelectorError() const { return selectorError; }
|
||||
bool hasSyntaxError() const { return staticErrors.hasSyntaxError(); }
|
||||
bool hasUnknownFunctionError() const { return unknownFunctionError; }
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "messageformat2_allocation.h"
|
||||
#include "messageformat2_evaluation.h"
|
||||
#include "messageformat2_function_registry_internal.h"
|
||||
#include "messageformat2_macros.h"
|
||||
#include "uvector.h" // U_ASSERT
|
||||
|
||||
|
@ -28,6 +29,7 @@ using namespace data_model;
|
|||
ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) {
|
||||
name = std::move(other.name);
|
||||
value = std::move(other.value);
|
||||
sourceIsLiteral = other.sourceIsLiteral;
|
||||
}
|
||||
|
||||
ResolvedFunctionOption::~ResolvedFunctionOption() {}
|
||||
|
@ -46,7 +48,21 @@ FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) {
|
|||
options = moveVectorToArray<ResolvedFunctionOption>(optionsVector, status);
|
||||
}
|
||||
|
||||
UBool FunctionOptions::getFunctionOption(const UnicodeString& key, Formattable& option) const {
|
||||
// Returns false if option doesn't exist
|
||||
UBool FunctionOptions::wasSetFromLiteral(const UnicodeString& key) const {
|
||||
if (options == nullptr) {
|
||||
U_ASSERT(functionOptionsLen == 0);
|
||||
}
|
||||
for (int32_t i = 0; i < functionOptionsLen; i++) {
|
||||
const ResolvedFunctionOption& opt = options[i];
|
||||
if (opt.getName() == key) {
|
||||
return opt.isLiteral();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
UBool FunctionOptions::getFunctionOption(std::u16string_view key, Formattable& option) const {
|
||||
if (options == nullptr) {
|
||||
U_ASSERT(functionOptionsLen == 0);
|
||||
}
|
||||
|
@ -60,7 +76,7 @@ UBool FunctionOptions::getFunctionOption(const UnicodeString& key, Formattable&
|
|||
return false;
|
||||
}
|
||||
|
||||
UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) const {
|
||||
UnicodeString FunctionOptions::getStringFunctionOption(std::u16string_view key) const {
|
||||
Formattable option;
|
||||
if (getFunctionOption(key, option)) {
|
||||
if (option.getType() == UFMT_STRING) {
|
||||
|
@ -211,10 +227,9 @@ PrioritizedVariant::~PrioritizedVariant() {}
|
|||
errors.checkErrors(status);
|
||||
}
|
||||
|
||||
const Formattable* MessageContext::getGlobal(const MessageFormatter& context,
|
||||
const VariableName& v,
|
||||
const Formattable* MessageContext::getGlobal(const VariableName& v,
|
||||
UErrorCode& errorCode) const {
|
||||
return arguments.getArgument(context, v, errorCode);
|
||||
return arguments.getArgument(v, errorCode);
|
||||
}
|
||||
|
||||
MessageContext::MessageContext(const MessageArguments& args,
|
||||
|
@ -304,12 +319,25 @@ PrioritizedVariant::~PrioritizedVariant() {}
|
|||
FunctionOptions opts;
|
||||
InternalValue* p = this;
|
||||
FunctionName selectorName = name;
|
||||
|
||||
bool operandSelect = false;
|
||||
while (std::holds_alternative<InternalValue*>(p->argument)) {
|
||||
if (p->name != selectorName) {
|
||||
// Can only compose calls to the same selector
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
// Very special case to detect something like:
|
||||
// .local $sel = {1 :integer select=exact} .local $bad = {$sel :integer} .match $bad 1 {{ONE}} * {{operand select {$bad}}}
|
||||
// This can be done better once function composition is fully implemented.
|
||||
if (p != this &&
|
||||
!p->options.getStringFunctionOption(options::SELECT).isEmpty()
|
||||
&& (selectorName == functions::NUMBER || selectorName == functions::INTEGER)) {
|
||||
// In this case, we want to call the selector normally but emit a
|
||||
// `bad-option` error, possibly with the outcome of normal-looking output (with relaxed
|
||||
// error handling) and an error (with strict error handling).
|
||||
operandSelect = true;
|
||||
}
|
||||
// First argument to mergeOptions takes precedence
|
||||
opts = opts.mergeOptions(std::move(p->options), errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
|
@ -320,15 +348,50 @@ PrioritizedVariant::~PrioritizedVariant() {}
|
|||
}
|
||||
FormattedPlaceholder arg = std::move(*std::get_if<FormattedPlaceholder>(&p->argument));
|
||||
|
||||
// This condition can't be checked in the selector.
|
||||
// Effectively, there are two different kinds of "bad option" errors:
|
||||
// one that can be recovered from (used for select=$var) and one that
|
||||
// can't (used for bad digit size options and other cases).
|
||||
// The checking of the recoverable error has to be done here; otherwise,
|
||||
// the "bad option" signaled by the selector implementation would cause
|
||||
// fallback output to be used when formatting the `*` pattern.
|
||||
bool badSelectOption = !checkSelectOption();
|
||||
|
||||
selector->selectKey(std::move(arg), std::move(opts),
|
||||
keys, keysLen,
|
||||
prefs, prefsLen, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
if (errorCode == U_MF_SELECTOR_ERROR) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
errs.setSelectorError(selectorName, errorCode);
|
||||
} else if (errorCode == U_MF_BAD_OPTION) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
errs.setBadOption(selectorName, errorCode);
|
||||
} else if (operandSelect || badSelectOption) {
|
||||
errs.setRecoverableBadOption(selectorName, errorCode);
|
||||
// In this case, only the `*` variant should match
|
||||
prefsLen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool InternalValue::checkSelectOption() const {
|
||||
if (name != UnicodeString("number") && name != UnicodeString("integer")) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Per the spec, if the "select" option is present, it must have been
|
||||
// set from a literal
|
||||
|
||||
Formattable opt;
|
||||
// Returns false if the `select` option is present and it was not set from a literal
|
||||
|
||||
// OK if the option wasn't present
|
||||
if (!options.getFunctionOption(UnicodeString("select"), opt)) {
|
||||
return true;
|
||||
}
|
||||
// Otherwise, return true if the option was set from a literal
|
||||
return options.wasSetFromLiteral(UnicodeString("select"));
|
||||
}
|
||||
|
||||
FormattedPlaceholder InternalValue::forceFormatting(DynamicErrors& errs, UErrorCode& errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return {};
|
||||
|
@ -356,6 +419,10 @@ PrioritizedVariant::~PrioritizedVariant() {}
|
|||
return {};
|
||||
}
|
||||
|
||||
if (arg.isFallback()) {
|
||||
return arg;
|
||||
}
|
||||
|
||||
// The fallback for a nullary function call is the function name
|
||||
UnicodeString fallback;
|
||||
if (arg.isNullOperand()) {
|
||||
|
@ -365,24 +432,45 @@ PrioritizedVariant::~PrioritizedVariant() {}
|
|||
fallback = arg.getFallback();
|
||||
}
|
||||
|
||||
// Very special case for :number select=foo and :integer select=foo
|
||||
// This check can't be done inside the function implementation because
|
||||
// it doesn't have a way to both signal an error and return usable output,
|
||||
// and the spec stipulates that fallback output shouldn't be used in the
|
||||
// case of a bad `select` option to a formatting call.
|
||||
bool badSelect = !checkSelectOption();
|
||||
|
||||
// Call the function with the argument
|
||||
FormattedPlaceholder result = formatter->format(std::move(arg), std::move(options), errorCode);
|
||||
if (U_SUCCESS(errorCode) && errorCode == U_USING_DEFAULT_WARNING) {
|
||||
// Ignore this warning
|
||||
errorCode = U_ZERO_ERROR;
|
||||
}
|
||||
if (U_FAILURE(errorCode)) {
|
||||
if (errorCode == U_MF_OPERAND_MISMATCH_ERROR) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
errs.setOperandMismatchError(name, errorCode);
|
||||
} else if (errorCode == U_MF_BAD_OPTION) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
errs.setBadOption(name, errorCode);
|
||||
} else {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
// Convey any error generated by the formatter
|
||||
// as a formatting error, except for operand mismatch errors
|
||||
// Convey any other error generated by the formatter
|
||||
// as a formatting error
|
||||
errs.setFormattingError(name, errorCode);
|
||||
}
|
||||
}
|
||||
// Ignore the output if any error occurred
|
||||
if (errs.hasFormattingError()) {
|
||||
// We don't ignore the output in the case of a Bad Option Error,
|
||||
// because of the select=bad case where we want both an error
|
||||
// and non-fallback output.
|
||||
if (errs.hasFormattingError() || errs.hasBadOptionError()) {
|
||||
return FormattedPlaceholder(fallback);
|
||||
}
|
||||
|
||||
if (badSelect) {
|
||||
// In this case, we want to set an error but not replace
|
||||
// the output with a fallback
|
||||
errs.setRecoverableBadOption(name, errorCode);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,18 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
namespace message2 {
|
||||
|
||||
namespace functions {
|
||||
static constexpr std::u16string_view DATETIME = u"datetime";
|
||||
static constexpr std::u16string_view DATE = u"date";
|
||||
static constexpr std::u16string_view TIME = u"time";
|
||||
static constexpr std::u16string_view NUMBER = u"number";
|
||||
static constexpr std::u16string_view INTEGER = u"integer";
|
||||
static constexpr std::u16string_view TEST_FUNCTION = u"test:function";
|
||||
static constexpr std::u16string_view TEST_FORMAT = u"test:format";
|
||||
static constexpr std::u16string_view TEST_SELECT = u"test:select";
|
||||
static constexpr std::u16string_view STRING = u"string";
|
||||
}
|
||||
|
||||
using namespace data_model;
|
||||
|
||||
// PrioritizedVariant
|
||||
|
@ -149,9 +161,7 @@ namespace message2 {
|
|||
public:
|
||||
MessageContext(const MessageArguments&, const StaticErrors&, UErrorCode&);
|
||||
|
||||
const Formattable* getGlobal(const MessageFormatter&,
|
||||
const VariableName&,
|
||||
UErrorCode&) const;
|
||||
const Formattable* getGlobal(const VariableName&, UErrorCode&) const;
|
||||
|
||||
// If any errors were set, update `status` accordingly
|
||||
void checkErrors(UErrorCode& status) const;
|
||||
|
@ -203,6 +213,7 @@ namespace message2 {
|
|||
FunctionName name;
|
||||
const Selector* selector; // May be null
|
||||
const Formatter* formatter; // May be null, but one or the other should be non-null unless argument is a FormattedPlaceholder
|
||||
bool checkSelectOption() const;
|
||||
}; // class InternalValue
|
||||
|
||||
} // namespace message2
|
||||
|
|
|
@ -119,24 +119,6 @@ namespace message2 {
|
|||
|
||||
// MessageFormatter
|
||||
|
||||
// Returns the NFC-normalized version of s, returning s itself
|
||||
// if it's already normalized.
|
||||
UnicodeString MessageFormatter::normalizeNFC(const UnicodeString& s) const {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Check if string is already normalized
|
||||
UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status);
|
||||
// If so, return it
|
||||
if (U_SUCCESS(status) && result == UNORM_YES) {
|
||||
return s;
|
||||
}
|
||||
// Otherwise, normalize it
|
||||
UnicodeString normalized = nfcNormalizer->normalize(s, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) {
|
||||
CHECK_ERROR(success);
|
||||
|
||||
|
@ -148,18 +130,18 @@ namespace message2 {
|
|||
FormatterFactory* time = StandardFunctions::DateTimeFactory::time(success);
|
||||
FormatterFactory* number = new StandardFunctions::NumberFactory();
|
||||
FormatterFactory* integer = new StandardFunctions::IntegerFactory();
|
||||
standardFunctionsBuilder.adoptFormatter(FunctionName(UnicodeString("datetime")), dateTime, success)
|
||||
.adoptFormatter(FunctionName(UnicodeString("date")), date, success)
|
||||
.adoptFormatter(FunctionName(UnicodeString("time")), time, success)
|
||||
.adoptFormatter(FunctionName(UnicodeString("number")), number, success)
|
||||
.adoptFormatter(FunctionName(UnicodeString("integer")), integer, success)
|
||||
.adoptFormatter(FunctionName(UnicodeString("test:function")), new StandardFunctions::TestFormatFactory(), success)
|
||||
.adoptFormatter(FunctionName(UnicodeString("test:format")), new StandardFunctions::TestFormatFactory(), success)
|
||||
.adoptSelector(FunctionName(UnicodeString("number")), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success)
|
||||
.adoptSelector(FunctionName(UnicodeString("integer")), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success)
|
||||
.adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success)
|
||||
.adoptSelector(FunctionName(UnicodeString("test:function")), new StandardFunctions::TestSelectFactory(), success)
|
||||
.adoptSelector(FunctionName(UnicodeString("test:select")), new StandardFunctions::TestSelectFactory(), success);
|
||||
standardFunctionsBuilder.adoptFormatter(FunctionName(functions::DATETIME), dateTime, success)
|
||||
.adoptFormatter(FunctionName(functions::DATE), date, success)
|
||||
.adoptFormatter(FunctionName(functions::TIME), time, success)
|
||||
.adoptFormatter(FunctionName(functions::NUMBER), number, success)
|
||||
.adoptFormatter(FunctionName(functions::INTEGER), integer, success)
|
||||
.adoptFormatter(FunctionName(functions::TEST_FUNCTION), new StandardFunctions::TestFormatFactory(), success)
|
||||
.adoptFormatter(FunctionName(functions::TEST_FORMAT), new StandardFunctions::TestFormatFactory(), success)
|
||||
.adoptSelector(FunctionName(functions::NUMBER), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success)
|
||||
.adoptSelector(FunctionName(functions::INTEGER), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success)
|
||||
.adoptSelector(FunctionName(functions::STRING), new StandardFunctions::TextFactory(), success)
|
||||
.adoptSelector(FunctionName(functions::TEST_FUNCTION), new StandardFunctions::TestSelectFactory(), success)
|
||||
.adoptSelector(FunctionName(functions::TEST_SELECT), new StandardFunctions::TestSelectFactory(), success);
|
||||
CHECK_ERROR(success);
|
||||
standardMFFunctionRegistry = standardFunctionsBuilder.build();
|
||||
CHECK_ERROR(success);
|
||||
|
@ -188,8 +170,6 @@ namespace message2 {
|
|||
errors = errorsNew.orphan();
|
||||
}
|
||||
|
||||
nfcNormalizer = Normalizer2::getNFCInstance(success);
|
||||
|
||||
// Note: we currently evaluate variables lazily,
|
||||
// without memoization. This call is still necessary
|
||||
// to check out-of-scope uses of local variables in
|
||||
|
@ -218,7 +198,6 @@ namespace message2 {
|
|||
signalErrors = other.signalErrors;
|
||||
errors = other.errors;
|
||||
other.errors = nullptr;
|
||||
nfcNormalizer = other.nfcNormalizer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,10 +10,13 @@
|
|||
#if !UCONFIG_NO_MF2
|
||||
|
||||
#include <math.h>
|
||||
#include <cmath>
|
||||
|
||||
#include "unicode/dtptngen.h"
|
||||
#include "unicode/messageformat2.h"
|
||||
#include "unicode/messageformat2_data_model_names.h"
|
||||
#include "unicode/messageformat2_function_registry.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/smpdtfmt.h"
|
||||
#include "charstr.h"
|
||||
#include "double-conversion.h"
|
||||
|
@ -172,6 +175,28 @@ void MFFunctionRegistry::checkStandard() const {
|
|||
|
||||
// Formatter/selector helpers
|
||||
|
||||
// Returns the NFC-normalized version of s, returning s itself
|
||||
// if it's already normalized.
|
||||
/* static */ UnicodeString StandardFunctions::normalizeNFC(const UnicodeString& s) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const Normalizer2* nfcNormalizer = Normalizer2::getNFCInstance(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return s;
|
||||
}
|
||||
// Check if string is already normalized
|
||||
UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status);
|
||||
// If so, return it
|
||||
if (U_SUCCESS(status) && result == UNORM_YES) {
|
||||
return s;
|
||||
}
|
||||
// Otherwise, normalize it
|
||||
UnicodeString normalized = nfcNormalizer->normalize(s, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
// Converts `s` to a double, indicating failure via `errorCode`
|
||||
static void strToDouble(const UnicodeString& s, double& result, UErrorCode& errorCode) {
|
||||
CHECK_ERROR(errorCode);
|
||||
|
@ -261,6 +286,133 @@ MFFunctionRegistry::~MFFunctionRegistry() {
|
|||
|
||||
// --------- Number
|
||||
|
||||
bool inBounds(const UnicodeString& s, int32_t i) {
|
||||
return i < s.length();
|
||||
}
|
||||
|
||||
bool isDigit(UChar32 c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
bool parseDigits(const UnicodeString& s, int32_t& i) {
|
||||
if (!isDigit(s[i])) {
|
||||
return false;
|
||||
}
|
||||
while (inBounds(s, i) && isDigit(s[i])) {
|
||||
i++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT]
|
||||
bool validateNumberLiteral(const UnicodeString& s) {
|
||||
int32_t i = 0;
|
||||
|
||||
if (s.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse optional sign
|
||||
// ["-"]
|
||||
if (s[0] == HYPHEN) {
|
||||
i++;
|
||||
}
|
||||
|
||||
if (!inBounds(s, i)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse integer digits
|
||||
// (%x30 / (%x31-39 *DIGIT))
|
||||
if (s[i] == '0') {
|
||||
if (!inBounds(s, i + 1) || s[i + 1] != PERIOD) {
|
||||
return false;
|
||||
}
|
||||
i++;
|
||||
} else {
|
||||
if (!parseDigits(s, i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// The rest is optional
|
||||
if (!inBounds(s, i)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Parse optional decimal digits
|
||||
// ["." 1*DIGIT]
|
||||
if (s[i] == PERIOD) {
|
||||
i++;
|
||||
if (!parseDigits(s, i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inBounds(s, i)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Parse optional exponent
|
||||
// [%i"e" ["-" / "+"] 1*DIGIT]
|
||||
if (s[i] == 'e' || s[i] == 'E') {
|
||||
i++;
|
||||
if (!inBounds(s, i)) {
|
||||
return false;
|
||||
}
|
||||
// Parse optional sign
|
||||
if (s[i] == HYPHEN || s[i] == PLUS) {
|
||||
i++;
|
||||
}
|
||||
if (!inBounds(s, i)) {
|
||||
return false;
|
||||
}
|
||||
if (!parseDigits(s, i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (i != s.length()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isInteger(const Formattable& s) {
|
||||
switch (s.getType()) {
|
||||
case UFMT_DOUBLE:
|
||||
case UFMT_LONG:
|
||||
case UFMT_INT64:
|
||||
return true;
|
||||
case UFMT_STRING: {
|
||||
UErrorCode ignore = U_ZERO_ERROR;
|
||||
const UnicodeString& str = s.getString(ignore);
|
||||
return validateNumberLiteral(str);
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool isDigitSizeOption(const UnicodeString& s) {
|
||||
return s == UnicodeString("minimumIntegerDigits")
|
||||
|| s == UnicodeString("minimumFractionDigits")
|
||||
|| s == UnicodeString("maximumFractionDigits")
|
||||
|| s == UnicodeString("minimumSignificantDigits")
|
||||
|| s == UnicodeString("maximumSignificantDigits");
|
||||
}
|
||||
|
||||
/* static */ void StandardFunctions::validateDigitSizeOptions(const FunctionOptions& opts,
|
||||
UErrorCode& status) {
|
||||
CHECK_ERROR(status);
|
||||
|
||||
for (int32_t i = 0; i < opts.optionsCount(); i++) {
|
||||
const ResolvedFunctionOption& opt = opts.options[i];
|
||||
if (isDigitSizeOption(opt.getName()) && !isInteger(opt.getValue())) {
|
||||
status = U_MF_BAD_OPTION;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number,
|
||||
const FunctionOptions& opts,
|
||||
UErrorCode& status) {
|
||||
|
@ -268,6 +420,11 @@ MFFunctionRegistry::~MFFunctionRegistry() {
|
|||
|
||||
using namespace number;
|
||||
|
||||
validateDigitSizeOptions(opts, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
Formattable opt;
|
||||
nf = NumberFormatter::with();
|
||||
|
@ -283,14 +440,14 @@ MFFunctionRegistry::~MFFunctionRegistry() {
|
|||
|
||||
// Default notation is simple
|
||||
Notation notation = Notation::simple();
|
||||
UnicodeString notationOpt = opts.getStringFunctionOption(UnicodeString("notation"));
|
||||
if (notationOpt == UnicodeString("scientific")) {
|
||||
UnicodeString notationOpt = opts.getStringFunctionOption(options::NOTATION);
|
||||
if (notationOpt == options::SCIENTIFIC) {
|
||||
notation = Notation::scientific();
|
||||
} else if (notationOpt == UnicodeString("engineering")) {
|
||||
} else if (notationOpt == options::ENGINEERING) {
|
||||
notation = Notation::engineering();
|
||||
} else if (notationOpt == UnicodeString("compact")) {
|
||||
UnicodeString displayOpt = opts.getStringFunctionOption(UnicodeString("compactDisplay"));
|
||||
if (displayOpt == UnicodeString("long")) {
|
||||
} else if (notationOpt == options::COMPACT) {
|
||||
UnicodeString displayOpt = opts.getStringFunctionOption(options::COMPACT_DISPLAY);
|
||||
if (displayOpt == options::LONG) {
|
||||
notation = Notation::compactLong();
|
||||
} else {
|
||||
// Default is short
|
||||
|
@ -354,15 +511,15 @@ MFFunctionRegistry::~MFFunctionRegistry() {
|
|||
nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits));
|
||||
|
||||
// signDisplay
|
||||
UnicodeString sd = opts.getStringFunctionOption(UnicodeString("signDisplay"));
|
||||
UnicodeString sd = opts.getStringFunctionOption(options::SIGN_DISPLAY);
|
||||
UNumberSignDisplay signDisplay;
|
||||
if (sd == UnicodeString("always")) {
|
||||
if (sd == options::ALWAYS) {
|
||||
signDisplay = UNumberSignDisplay::UNUM_SIGN_ALWAYS;
|
||||
} else if (sd == UnicodeString("exceptZero")) {
|
||||
} else if (sd == options::EXCEPT_ZERO) {
|
||||
signDisplay = UNumberSignDisplay::UNUM_SIGN_EXCEPT_ZERO;
|
||||
} else if (sd == UnicodeString("negative")) {
|
||||
} else if (sd == options::NEGATIVE) {
|
||||
signDisplay = UNumberSignDisplay::UNUM_SIGN_NEGATIVE;
|
||||
} else if (sd == UnicodeString("never")) {
|
||||
} else if (sd == options::NEVER) {
|
||||
signDisplay = UNumberSignDisplay::UNUM_SIGN_NEVER;
|
||||
} else {
|
||||
signDisplay = UNumberSignDisplay::UNUM_SIGN_AUTO;
|
||||
|
@ -370,13 +527,13 @@ MFFunctionRegistry::~MFFunctionRegistry() {
|
|||
nf = nf.sign(signDisplay);
|
||||
|
||||
// useGrouping
|
||||
UnicodeString ug = opts.getStringFunctionOption(UnicodeString("useGrouping"));
|
||||
UnicodeString ug = opts.getStringFunctionOption(options::USE_GROUPING);
|
||||
UNumberGroupingStrategy grp;
|
||||
if (ug == UnicodeString("always")) {
|
||||
if (ug == options::ALWAYS) {
|
||||
grp = UNumberGroupingStrategy::UNUM_GROUPING_ON_ALIGNED;
|
||||
} else if (ug == UnicodeString("never")) {
|
||||
} else if (ug == options::NEVER) {
|
||||
grp = UNumberGroupingStrategy::UNUM_GROUPING_OFF;
|
||||
} else if (ug == UnicodeString("min2")) {
|
||||
} else if (ug == options::MIN2) {
|
||||
grp = UNumberGroupingStrategy::UNUM_GROUPING_MIN2;
|
||||
} else {
|
||||
// Default is "auto"
|
||||
|
@ -385,7 +542,7 @@ MFFunctionRegistry::~MFFunctionRegistry() {
|
|||
nf = nf.grouping(grp);
|
||||
|
||||
// numberingSystem
|
||||
UnicodeString ns = opts.getStringFunctionOption(UnicodeString("numberingSystem"));
|
||||
UnicodeString ns = opts.getStringFunctionOption(options::NUMBERING_SYSTEM);
|
||||
if (ns.length() > 0) {
|
||||
ns = ns.toLower(Locale("en-US"));
|
||||
CharString buffer;
|
||||
|
@ -444,22 +601,19 @@ static double parseNumberLiteral(const Formattable& input, UErrorCode& errorCode
|
|||
return {};
|
||||
}
|
||||
|
||||
// Hack: Check for cases that are forbidden by the MF2 grammar
|
||||
// but allowed by StringToDouble
|
||||
int32_t len = inputStr.length();
|
||||
|
||||
if (len > 0 && ((inputStr[0] == '+')
|
||||
|| (inputStr[0] == '0' && len > 1 && inputStr[1] != '.')
|
||||
|| (inputStr[len - 1] == '.')
|
||||
|| (inputStr[0] == '.'))) {
|
||||
// Validate string according to `number-literal` production
|
||||
// in the spec for `:number`. This is because some cases are
|
||||
// forbidden by this grammar, but allowed by StringToDouble.
|
||||
if (!validateNumberLiteral(inputStr)) {
|
||||
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Otherwise, convert to double using double_conversion::StringToDoubleConverter
|
||||
// Convert to double using double_conversion::StringToDoubleConverter
|
||||
using namespace double_conversion;
|
||||
int processedCharactersCount = 0;
|
||||
StringToDoubleConverter converter(0, 0, 0, "", "");
|
||||
int32_t len = inputStr.length();
|
||||
double result =
|
||||
converter.StringToDouble(reinterpret_cast<const uint16_t*>(inputStr.getBuffer()),
|
||||
len,
|
||||
|
@ -504,21 +658,6 @@ static UChar32 digitToChar(int32_t val, UErrorCode errorCode) {
|
|||
}
|
||||
}
|
||||
|
||||
static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
|
||||
double numberValue = parseNumberLiteral(input.asFormattable(), errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return notANumber(input);
|
||||
}
|
||||
|
||||
UErrorCode savedStatus = errorCode;
|
||||
number::FormattedNumber result = nf.formatDouble(numberValue, errorCode);
|
||||
// Ignore U_USING_DEFAULT_WARNING
|
||||
if (errorCode == U_USING_DEFAULT_WARNING) {
|
||||
errorCode = savedStatus;
|
||||
}
|
||||
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
|
||||
}
|
||||
|
||||
int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const {
|
||||
Formattable opt;
|
||||
|
||||
|
@ -526,7 +665,7 @@ int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions&
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (opts.getFunctionOption(UnicodeString("maximumFractionDigits"), opt)) {
|
||||
if (opts.getFunctionOption(options::MAXIMUM_FRACTION_DIGITS, opt)) {
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
int64_t val = getInt64Value(locale, opt, localErrorCode);
|
||||
if (U_SUCCESS(localErrorCode)) {
|
||||
|
@ -543,7 +682,7 @@ int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions&
|
|||
Formattable opt;
|
||||
|
||||
if (!isInteger) {
|
||||
if (opts.getFunctionOption(UnicodeString("minimumFractionDigits"), opt)) {
|
||||
if (opts.getFunctionOption(options::MINIMUM_FRACTION_DIGITS, opt)) {
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
int64_t val = getInt64Value(locale, opt, localErrorCode);
|
||||
if (U_SUCCESS(localErrorCode)) {
|
||||
|
@ -560,21 +699,21 @@ int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions&
|
|||
int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& opts) const {
|
||||
Formattable opt;
|
||||
|
||||
if (opts.getFunctionOption(UnicodeString("minimumIntegerDigits"), opt)) {
|
||||
if (opts.getFunctionOption(options::MINIMUM_INTEGER_DIGITS, opt)) {
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
int64_t val = getInt64Value(locale, opt, localErrorCode);
|
||||
if (U_SUCCESS(localErrorCode)) {
|
||||
return static_cast<int32_t>(val);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOptions& opts) const {
|
||||
Formattable opt;
|
||||
|
||||
if (!isInteger) {
|
||||
if (opts.getFunctionOption(UnicodeString("minimumSignificantDigits"), opt)) {
|
||||
if (opts.getFunctionOption(options::MINIMUM_SIGNIFICANT_DIGITS, opt)) {
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
int64_t val = getInt64Value(locale, opt, localErrorCode);
|
||||
if (U_SUCCESS(localErrorCode)) {
|
||||
|
@ -591,7 +730,7 @@ int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOption
|
|||
int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOptions& opts) const {
|
||||
Formattable opt;
|
||||
|
||||
if (opts.getFunctionOption(UnicodeString("maximumSignificantDigits"), opt)) {
|
||||
if (opts.getFunctionOption(options::MAXIMUM_SIGNIFICANT_DIGITS, opt)) {
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
int64_t val = getInt64Value(locale, opt, localErrorCode);
|
||||
if (U_SUCCESS(localErrorCode)) {
|
||||
|
@ -607,14 +746,14 @@ int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOption
|
|||
bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const {
|
||||
Formattable opt;
|
||||
if (isInteger
|
||||
|| !opts.getFunctionOption(UnicodeString("style"), opt)
|
||||
|| !opts.getFunctionOption(options::STYLE, opt)
|
||||
|| opt.getType() != UFMT_STRING) {
|
||||
return false;
|
||||
}
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
const UnicodeString& style = opt.getString(localErrorCode);
|
||||
U_ASSERT(U_SUCCESS(localErrorCode));
|
||||
return (style == UnicodeString("percent"));
|
||||
return (style == options::PERCENT_STRING);
|
||||
}
|
||||
|
||||
/* static */ StandardFunctions::Number StandardFunctions::Number::integer(const Locale& loc) {
|
||||
|
@ -636,6 +775,8 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
|
|||
realFormatter = formatterForOptions(*this, opts, errorCode);
|
||||
|
||||
number::FormattedNumber numberResult;
|
||||
int64_t integerValue = 0;
|
||||
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
// Already checked that contents can be formatted
|
||||
const Formattable& toFormat = arg.asFormattable();
|
||||
|
@ -644,23 +785,31 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
|
|||
double d = toFormat.getDouble(errorCode);
|
||||
U_ASSERT(U_SUCCESS(errorCode));
|
||||
numberResult = realFormatter.formatDouble(d, errorCode);
|
||||
integerValue = static_cast<int64_t>(std::round(d));
|
||||
break;
|
||||
}
|
||||
case UFMT_LONG: {
|
||||
int32_t l = toFormat.getLong(errorCode);
|
||||
U_ASSERT(U_SUCCESS(errorCode));
|
||||
numberResult = realFormatter.formatInt(l, errorCode);
|
||||
integerValue = l;
|
||||
break;
|
||||
}
|
||||
case UFMT_INT64: {
|
||||
int64_t i = toFormat.getInt64(errorCode);
|
||||
U_ASSERT(U_SUCCESS(errorCode));
|
||||
numberResult = realFormatter.formatInt(i, errorCode);
|
||||
integerValue = i;
|
||||
break;
|
||||
}
|
||||
case UFMT_STRING: {
|
||||
// Try to parse the string as a number
|
||||
return tryParsingNumberLiteral(realFormatter, arg, errorCode);
|
||||
double d = parseNumberLiteral(toFormat, errorCode);
|
||||
if (U_FAILURE(errorCode))
|
||||
return {};
|
||||
numberResult = realFormatter.formatDouble(d, errorCode);
|
||||
integerValue = static_cast<int64_t>(std::round(d));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Other types can't be parsed as a number
|
||||
|
@ -670,7 +819,13 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
|
|||
}
|
||||
}
|
||||
|
||||
return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult)));
|
||||
// Need to return the integer value if invoked as :integer
|
||||
if (isInteger) {
|
||||
return FormattedPlaceholder(FormattedPlaceholder(Formattable(integerValue), arg.getFallback()),
|
||||
std::move(opts),
|
||||
FormattedValue(std::move(numberResult)));
|
||||
}
|
||||
return FormattedPlaceholder(arg, std::move(opts), FormattedValue(std::move(numberResult)));
|
||||
}
|
||||
|
||||
StandardFunctions::Number::~Number() {}
|
||||
|
@ -678,18 +833,17 @@ StandardFunctions::NumberFactory::~NumberFactory() {}
|
|||
|
||||
// --------- PluralFactory
|
||||
|
||||
|
||||
StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const {
|
||||
Formattable opt;
|
||||
|
||||
if (opts.getFunctionOption(UnicodeString("select"), opt)) {
|
||||
if (opts.getFunctionOption(options::SELECT, opt)) {
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
UnicodeString val = opt.getString(localErrorCode);
|
||||
if (U_SUCCESS(localErrorCode)) {
|
||||
if (val == UnicodeString("ordinal")) {
|
||||
if (val == options::ORDINAL) {
|
||||
return PluralType::PLURAL_ORDINAL;
|
||||
}
|
||||
if (val == UnicodeString("exact")) {
|
||||
if (val == options::EXACT) {
|
||||
return PluralType::PLURAL_EXACT;
|
||||
}
|
||||
}
|
||||
|
@ -842,7 +996,7 @@ StandardFunctions::PluralFactory::~PluralFactory() {}
|
|||
// --------- DateTimeFactory
|
||||
|
||||
/* static */ UnicodeString StandardFunctions::getStringOption(const FunctionOptions& opts,
|
||||
const UnicodeString& optionName,
|
||||
std::u16string_view optionName,
|
||||
UErrorCode& errorCode) {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
Formattable opt;
|
||||
|
@ -857,11 +1011,11 @@ StandardFunctions::PluralFactory::~PluralFactory() {}
|
|||
}
|
||||
|
||||
// Date/time options only
|
||||
static UnicodeString defaultForOption(const UnicodeString& optionName) {
|
||||
if (optionName == UnicodeString("dateStyle")
|
||||
|| optionName == UnicodeString("timeStyle")
|
||||
|| optionName == UnicodeString("style")) {
|
||||
return UnicodeString("short");
|
||||
static UnicodeString defaultForOption(std::u16string_view optionName) {
|
||||
if (optionName == options::DATE_STYLE
|
||||
|| optionName == options::TIME_STYLE
|
||||
|| optionName == options::STYLE) {
|
||||
return UnicodeString(options::SHORT);
|
||||
}
|
||||
return {}; // Empty string is default
|
||||
}
|
||||
|
@ -875,7 +1029,7 @@ static UnicodeString defaultForOption(const UnicodeString& optionName) {
|
|||
// which works for datetime options but not necessarily in general.
|
||||
UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat,
|
||||
const FunctionOptions& opts,
|
||||
const UnicodeString& optionName) const {
|
||||
std::u16string_view optionName) const {
|
||||
// Options passed to the current function invocation take priority
|
||||
Formattable opt;
|
||||
UnicodeString s;
|
||||
|
@ -897,7 +1051,7 @@ UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlac
|
|||
// Used for options that don't have defaults
|
||||
UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat,
|
||||
const FunctionOptions& opts,
|
||||
const UnicodeString& optionName,
|
||||
std::u16string_view optionName,
|
||||
UErrorCode& errorCode) const {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
// Options passed to the current function invocation take priority
|
||||
|
@ -922,19 +1076,19 @@ UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlac
|
|||
static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
UnicodeString upper = option.toUpper();
|
||||
if (upper == UnicodeString("FULL")) {
|
||||
if (upper == options::FULL_UPPER) {
|
||||
return DateFormat::EStyle::kFull;
|
||||
}
|
||||
if (upper == UnicodeString("LONG")) {
|
||||
if (upper == options::LONG_UPPER) {
|
||||
return DateFormat::EStyle::kLong;
|
||||
}
|
||||
if (upper == UnicodeString("MEDIUM")) {
|
||||
if (upper == options::MEDIUM_UPPER) {
|
||||
return DateFormat::EStyle::kMedium;
|
||||
}
|
||||
if (upper == UnicodeString("SHORT")) {
|
||||
if (upper == options::SHORT_UPPER) {
|
||||
return DateFormat::EStyle::kShort;
|
||||
}
|
||||
if (upper.isEmpty() || upper == UnicodeString("DEFAULT")) {
|
||||
if (upper.isEmpty() || upper == options::DEFAULT_UPPER) {
|
||||
return DateFormat::EStyle::kDefault;
|
||||
}
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
@ -1053,95 +1207,95 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&&
|
|||
#define ADD_PATTERN(s) skeleton += UnicodeString(s)
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
// Year
|
||||
UnicodeString year = getFunctionOption(toFormat, opts, UnicodeString("year"), errorCode);
|
||||
UnicodeString year = getFunctionOption(toFormat, opts, options::YEAR, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
useDate = true;
|
||||
if (year == UnicodeString("2-digit")) {
|
||||
if (year == options::TWO_DIGIT) {
|
||||
ADD_PATTERN("YY");
|
||||
} else if (year == UnicodeString("numeric")) {
|
||||
} else if (year == options::NUMERIC) {
|
||||
ADD_PATTERN("YYYY");
|
||||
}
|
||||
}
|
||||
// Month
|
||||
UnicodeString month = getFunctionOption(toFormat, opts, UnicodeString("month"), errorCode);
|
||||
UnicodeString month = getFunctionOption(toFormat, opts, options::MONTH, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
useDate = true;
|
||||
/* numeric, 2-digit, long, short, narrow */
|
||||
if (month == UnicodeString("long")) {
|
||||
if (month == options::LONG) {
|
||||
ADD_PATTERN("MMMM");
|
||||
} else if (month == UnicodeString("short")) {
|
||||
} else if (month == options::SHORT) {
|
||||
ADD_PATTERN("MMM");
|
||||
} else if (month == UnicodeString("narrow")) {
|
||||
} else if (month == options::NARROW) {
|
||||
ADD_PATTERN("MMMMM");
|
||||
} else if (month == UnicodeString("numeric")) {
|
||||
} else if (month == options::NUMERIC) {
|
||||
ADD_PATTERN("M");
|
||||
} else if (month == UnicodeString("2-digit")) {
|
||||
} else if (month == options::TWO_DIGIT) {
|
||||
ADD_PATTERN("MM");
|
||||
}
|
||||
}
|
||||
// Weekday
|
||||
UnicodeString weekday = getFunctionOption(toFormat, opts, UnicodeString("weekday"), errorCode);
|
||||
UnicodeString weekday = getFunctionOption(toFormat, opts, options::WEEKDAY, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
useDate = true;
|
||||
if (weekday == UnicodeString("long")) {
|
||||
if (weekday == options::LONG) {
|
||||
ADD_PATTERN("EEEE");
|
||||
} else if (weekday == UnicodeString("short")) {
|
||||
} else if (weekday == options::SHORT) {
|
||||
ADD_PATTERN("EEEEE");
|
||||
} else if (weekday == UnicodeString("narrow")) {
|
||||
} else if (weekday == options::NARROW) {
|
||||
ADD_PATTERN("EEEEE");
|
||||
}
|
||||
}
|
||||
// Day
|
||||
UnicodeString day = getFunctionOption(toFormat, opts, UnicodeString("day"), errorCode);
|
||||
UnicodeString day = getFunctionOption(toFormat, opts, options::DAY, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
useDate = true;
|
||||
if (day == UnicodeString("numeric")) {
|
||||
if (day == options::NUMERIC) {
|
||||
ADD_PATTERN("d");
|
||||
} else if (day == UnicodeString("2-digit")) {
|
||||
} else if (day == options::TWO_DIGIT) {
|
||||
ADD_PATTERN("dd");
|
||||
}
|
||||
}
|
||||
// Hour
|
||||
UnicodeString hour = getFunctionOption(toFormat, opts, UnicodeString("hour"), errorCode);
|
||||
UnicodeString hour = getFunctionOption(toFormat, opts, options::HOUR, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
useTime = true;
|
||||
if (hour == UnicodeString("numeric")) {
|
||||
if (hour == options::NUMERIC) {
|
||||
ADD_PATTERN("h");
|
||||
} else if (hour == UnicodeString("2-digit")) {
|
||||
} else if (hour == options::TWO_DIGIT) {
|
||||
ADD_PATTERN("hh");
|
||||
}
|
||||
}
|
||||
// Minute
|
||||
UnicodeString minute = getFunctionOption(toFormat, opts, UnicodeString("minute"), errorCode);
|
||||
UnicodeString minute = getFunctionOption(toFormat, opts, options::MINUTE, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
useTime = true;
|
||||
if (minute == UnicodeString("numeric")) {
|
||||
if (minute == options::NUMERIC) {
|
||||
ADD_PATTERN("m");
|
||||
} else if (minute == UnicodeString("2-digit")) {
|
||||
} else if (minute == options::TWO_DIGIT) {
|
||||
ADD_PATTERN("mm");
|
||||
}
|
||||
}
|
||||
// Second
|
||||
UnicodeString second = getFunctionOption(toFormat, opts, UnicodeString("second"), errorCode);
|
||||
UnicodeString second = getFunctionOption(toFormat, opts, options::SECOND, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
useTime = true;
|
||||
if (second == UnicodeString("numeric")) {
|
||||
if (second == options::NUMERIC) {
|
||||
ADD_PATTERN("s");
|
||||
} else if (second == UnicodeString("2-digit")) {
|
||||
} else if (second == options::TWO_DIGIT) {
|
||||
ADD_PATTERN("ss");
|
||||
}
|
||||
}
|
||||
|
@ -1263,9 +1417,11 @@ void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat,
|
|||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
// Normalize result
|
||||
UnicodeString normalized = normalizeNFC(formattedValue);
|
||||
|
||||
for (int32_t i = 0; i < keysLen; i++) {
|
||||
if (keys[i] == formattedValue) {
|
||||
if (keys[i] == normalized) {
|
||||
prefs[0] = keys[i];
|
||||
prefsLen = 1;
|
||||
break;
|
||||
|
@ -1370,7 +1526,7 @@ double formattableToNumber(const Formattable& arg, UErrorCode& status) {
|
|||
}
|
||||
// 8. If the decimalPlaces option is set, then
|
||||
Formattable opt;
|
||||
if (options.getFunctionOption(UnicodeString("decimalPlaces"), opt)) {
|
||||
if (options.getFunctionOption(options::DECIMAL_PLACES, opt)) {
|
||||
// 8i. If its value resolves to a numerical integer value 0 or 1
|
||||
// or their corresponding string representations '0' or '1', then
|
||||
double decimalPlacesInput = formattableToNumber(opt, status);
|
||||
|
@ -1389,7 +1545,7 @@ double formattableToNumber(const Formattable& arg, UErrorCode& status) {
|
|||
}
|
||||
// 9. If the fails option is set, then
|
||||
Formattable failsOpt;
|
||||
if (options.getFunctionOption(UnicodeString("fails"), failsOpt)) {
|
||||
if (options.getFunctionOption(options::FAILS, failsOpt)) {
|
||||
UnicodeString failsString = failsOpt.getString(status);
|
||||
if (U_SUCCESS(status)) {
|
||||
// 9i. If its value resolves to the string 'always', then
|
||||
|
|
|
@ -23,6 +23,54 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
namespace message2 {
|
||||
|
||||
// Constants for option names
|
||||
namespace options {
|
||||
static constexpr std::u16string_view ALWAYS = u"always";
|
||||
static constexpr std::u16string_view COMPACT = u"compact";
|
||||
static constexpr std::u16string_view COMPACT_DISPLAY = u"compactDisplay";
|
||||
static constexpr std::u16string_view DATE_STYLE = u"dateStyle";
|
||||
static constexpr std::u16string_view DAY = u"day";
|
||||
static constexpr std::u16string_view DECIMAL_PLACES = u"decimalPlaces";
|
||||
static constexpr std::u16string_view DEFAULT_UPPER = u"DEFAULT";
|
||||
static constexpr std::u16string_view ENGINEERING = u"engineering";
|
||||
static constexpr std::u16string_view EXACT = u"exact";
|
||||
static constexpr std::u16string_view EXCEPT_ZERO = u"exceptZero";
|
||||
static constexpr std::u16string_view FAILS = u"fails";
|
||||
static constexpr std::u16string_view FULL_UPPER = u"FULL";
|
||||
static constexpr std::u16string_view HOUR = u"hour";
|
||||
static constexpr std::u16string_view LONG = u"long";
|
||||
static constexpr std::u16string_view LONG_UPPER = u"LONG";
|
||||
static constexpr std::u16string_view MAXIMUM_FRACTION_DIGITS = u"maximumFractionDigits";
|
||||
static constexpr std::u16string_view MAXIMUM_SIGNIFICANT_DIGITS = u"maximumSignificantDigits";
|
||||
static constexpr std::u16string_view MEDIUM_UPPER = u"MEDIUM";
|
||||
static constexpr std::u16string_view MIN2 = u"min2";
|
||||
static constexpr std::u16string_view MINIMUM_FRACTION_DIGITS = u"minimumFractionDigits";
|
||||
static constexpr std::u16string_view MINIMUM_INTEGER_DIGITS = u"minimumIntegerDigits";
|
||||
static constexpr std::u16string_view MINIMUM_SIGNIFICANT_DIGITS = u"minimumSignificantDigits";
|
||||
static constexpr std::u16string_view MINUTE = u"minute";
|
||||
static constexpr std::u16string_view MONTH = u"month";
|
||||
static constexpr std::u16string_view NARROW = u"narrow";
|
||||
static constexpr std::u16string_view NEGATIVE = u"negative";
|
||||
static constexpr std::u16string_view NEVER = u"never";
|
||||
static constexpr std::u16string_view NOTATION = u"notation";
|
||||
static constexpr std::u16string_view NUMBERING_SYSTEM = u"numberingSystem";
|
||||
static constexpr std::u16string_view NUMERIC = u"numeric";
|
||||
static constexpr std::u16string_view ORDINAL = u"ordinal";
|
||||
static constexpr std::u16string_view PERCENT_STRING = u"percent";
|
||||
static constexpr std::u16string_view SCIENTIFIC = u"scientific";
|
||||
static constexpr std::u16string_view SECOND = u"second";
|
||||
static constexpr std::u16string_view SELECT = u"select";
|
||||
static constexpr std::u16string_view SHORT = u"short";
|
||||
static constexpr std::u16string_view SHORT_UPPER = u"SHORT";
|
||||
static constexpr std::u16string_view SIGN_DISPLAY = u"signDisplay";
|
||||
static constexpr std::u16string_view STYLE = u"style";
|
||||
static constexpr std::u16string_view TIME_STYLE = u"timeStyle";
|
||||
static constexpr std::u16string_view TWO_DIGIT = u"2-digit";
|
||||
static constexpr std::u16string_view USE_GROUPING = u"useGrouping";
|
||||
static constexpr std::u16string_view WEEKDAY = u"weekday";
|
||||
static constexpr std::u16string_view YEAR = u"year";
|
||||
} // namespace options
|
||||
|
||||
// Built-in functions
|
||||
/*
|
||||
The standard functions are :datetime, :date, :time,
|
||||
|
@ -33,8 +81,15 @@ namespace message2 {
|
|||
class StandardFunctions {
|
||||
friend class MessageFormatter;
|
||||
|
||||
public:
|
||||
// Used for normalizing variable names and keys for comparison
|
||||
static UnicodeString normalizeNFC(const UnicodeString&);
|
||||
|
||||
private:
|
||||
static void validateDigitSizeOptions(const FunctionOptions&, UErrorCode&);
|
||||
static void checkSelectOption(const FunctionOptions&, UErrorCode&);
|
||||
static UnicodeString getStringOption(const FunctionOptions& opts,
|
||||
const UnicodeString& optionName,
|
||||
std::u16string_view optionName,
|
||||
UErrorCode& errorCode);
|
||||
|
||||
class DateTime;
|
||||
|
@ -81,12 +136,12 @@ namespace message2 {
|
|||
*/
|
||||
UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat,
|
||||
const FunctionOptions& opts,
|
||||
const UnicodeString& optionName) const;
|
||||
std::u16string_view optionName) const;
|
||||
// Version for options that don't have defaults; sets the error
|
||||
// code instead of returning a default value
|
||||
UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat,
|
||||
const FunctionOptions& opts,
|
||||
const UnicodeString& optionName,
|
||||
std::u16string_view optionName,
|
||||
UErrorCode& errorCode) const;
|
||||
|
||||
};
|
||||
|
|
|
@ -188,25 +188,42 @@ UnicodeSet* initNameStartChars(UErrorCode& status) {
|
|||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
UnicodeSet* result = new UnicodeSet(*isAlpha);
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
};
|
||||
result->add(UNDERSCORE);
|
||||
result->add(0x00C0, 0x00D6);
|
||||
result->add(0x00D8, 0x00F6);
|
||||
result->add(0x00F8, 0x02FF);
|
||||
result->add(0x0370, 0x037D);
|
||||
result->add(0x037F, 0x061B);
|
||||
result->add(0x061D, 0x1FFF);
|
||||
result->add(0x200C, 0x200D);
|
||||
result->add(0x2070, 0x218F);
|
||||
result->add(0x2C00, 0x2FEF);
|
||||
|
||||
result->addAll(*isAlpha);
|
||||
result->add(0x002B);
|
||||
result->add(0x005F);
|
||||
result->add(0x00A1, 0x061B);
|
||||
result->add(0x061D, 0x167F);
|
||||
result->add(0x1681, 0x1FFF);
|
||||
result->add(0x200B, 0x200D);
|
||||
result->add(0x2010, 0x2027);
|
||||
result->add(0x2030, 0x205E);
|
||||
result->add(0x2060, 0x2065);
|
||||
result->add(0x206A, 0x2FFF);
|
||||
result->add(0x3001, 0xD7FF);
|
||||
result->add(0xF900, 0xFDCF);
|
||||
result->add(0xE000, 0xFDCF);
|
||||
result->add(0xFDF0, 0xFFFD);
|
||||
result->add(0x100000, 0xEFFFF);
|
||||
result->add(0x10000, 0x1FFFD);
|
||||
result->add(0x20000, 0x2FFFD);
|
||||
result->add(0x30000, 0x3FFFD);
|
||||
result->add(0x40000, 0x4FFFD);
|
||||
result->add(0x50000, 0x5FFFD);
|
||||
result->add(0x60000, 0x6FFFD);
|
||||
result->add(0x70000, 0x7FFFD);
|
||||
result->add(0x80000, 0x8FFFD);
|
||||
result->add(0x90000, 0x9FFFD);
|
||||
result->add(0xA0000, 0xAFFFD);
|
||||
result->add(0xB0000, 0xBFFFD);
|
||||
result->add(0xC0000, 0xCFFFD);
|
||||
result->add(0xD0000, 0xDFFFD);
|
||||
result->add(0xE0000, 0xEFFFD);
|
||||
result->add(0xF0000, 0xFFFFD);
|
||||
result->add(0x100000, 0x10FFFD);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
@ -230,9 +247,6 @@ UnicodeSet* initNameChars(UErrorCode& status) {
|
|||
result->addAll(*digit);
|
||||
result->add(HYPHEN);
|
||||
result->add(PERIOD);
|
||||
result->add(0x00B7);
|
||||
result->add(0x0300, 0x036F);
|
||||
result->add(0x203F, 0x2040);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
@ -742,6 +756,29 @@ void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) {
|
|||
CHECK_BOUNDS(errorCode);
|
||||
}
|
||||
|
||||
/*
|
||||
Consumes a possibly-empty sequence of name-chars. Appends to `str`
|
||||
and returns `str`.
|
||||
*/
|
||||
UnicodeString Parser::parseNameChars(UnicodeString& str, UErrorCode& errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
while (isNameChar(peek())) {
|
||||
UChar32 c = peek();
|
||||
str += c;
|
||||
normalizedInput += c;
|
||||
next();
|
||||
if (!inBounds()) {
|
||||
ERROR(errorCode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
Consumes a non-empty sequence of `name-char`s, the first of which is
|
||||
also a `name-start`.
|
||||
|
@ -767,16 +804,7 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
|
|||
parseOptionalBidi();
|
||||
|
||||
// name-start *name-char
|
||||
while (isNameChar(peek())) {
|
||||
UChar32 c = peek();
|
||||
name += c;
|
||||
normalizedInput += c;
|
||||
next();
|
||||
if (!inBounds()) {
|
||||
ERROR(errorCode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
parseNameChars(name, errorCode);
|
||||
|
||||
// [bidi]
|
||||
parseOptionalBidi();
|
||||
|
@ -999,91 +1027,15 @@ Literal Parser::parseUnquotedLiteral(UErrorCode& errorCode) {
|
|||
if (U_FAILURE(errorCode)) {
|
||||
return {};
|
||||
}
|
||||
// unquoted-literal = 1*name-char
|
||||
|
||||
// unquoted -> name
|
||||
if (isNameStart(peek())) {
|
||||
return Literal(false, parseName(errorCode));
|
||||
if (!(isNameChar(peek()))) {
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
|
||||
// unquoted -> number
|
||||
// Parse the contents
|
||||
UnicodeString contents;
|
||||
|
||||
// Parse the sign
|
||||
if (peek() == HYPHEN) {
|
||||
contents += peek();
|
||||
normalizedInput += peek();
|
||||
next();
|
||||
}
|
||||
if (!inBounds()) {
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
|
||||
// Parse the integer part
|
||||
if (peek() == ((UChar32)0x0030) /* 0 */) {
|
||||
contents += peek();
|
||||
normalizedInput += peek();
|
||||
next();
|
||||
} else if (isDigit(peek())) {
|
||||
contents += parseDigits(errorCode);
|
||||
} else {
|
||||
// Error -- nothing else can start a number literal
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
|
||||
// Parse the decimal point if present
|
||||
if (peek() == PERIOD) {
|
||||
contents += peek();
|
||||
normalizedInput += peek();
|
||||
next();
|
||||
if (!inBounds()) {
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
// Parse the fraction part
|
||||
if (isDigit(peek())) {
|
||||
contents += parseDigits(errorCode);
|
||||
} else {
|
||||
// '.' not followed by digit is a parse error
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
if (!inBounds()) {
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
|
||||
// Parse the exponent part if present
|
||||
if (peek() == UPPERCASE_E || peek() == LOWERCASE_E) {
|
||||
contents += peek();
|
||||
normalizedInput += peek();
|
||||
next();
|
||||
if (!inBounds()) {
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
// Parse sign if present
|
||||
if (peek() == PLUS || peek() == HYPHEN) {
|
||||
contents += peek();
|
||||
normalizedInput += peek();
|
||||
next();
|
||||
if (!inBounds()) {
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
// Parse exponent digits
|
||||
if (!isDigit(peek())) {
|
||||
ERROR(errorCode);
|
||||
return {};
|
||||
}
|
||||
contents += parseDigits(errorCode);
|
||||
}
|
||||
|
||||
parseNameChars(contents, errorCode);
|
||||
return Literal(false, contents);
|
||||
}
|
||||
|
||||
|
|
|
@ -164,6 +164,7 @@ namespace message2 {
|
|||
void parseToken(const std::u16string_view&, UErrorCode&);
|
||||
void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&);
|
||||
bool nextIs(const std::u16string_view&) const;
|
||||
UnicodeString parseNameChars(UnicodeString&, UErrorCode&);
|
||||
UnicodeString parseName(UErrorCode&);
|
||||
UnicodeString parseIdentifier(UErrorCode&);
|
||||
UnicodeString parseDigits(UErrorCode&);
|
||||
|
|
|
@ -46,8 +46,9 @@ RequireDecimalSeparatorValidator::RequireDecimalSeparatorValidator(bool patternH
|
|||
}
|
||||
|
||||
void RequireDecimalSeparatorValidator::postProcess(ParsedNumber& result) const {
|
||||
bool parseIsInfNaN = 0 != (result.flags & FLAG_INFINITY) || 0 != (result.flags & FLAG_NAN);
|
||||
bool parseHasDecimalSeparator = 0 != (result.flags & FLAG_HAS_DECIMAL_SEPARATOR);
|
||||
if (parseHasDecimalSeparator != fPatternHasDecimalSeparator) {
|
||||
if (!parseIsInfNaN && parseHasDecimalSeparator != fPatternHasDecimalSeparator) {
|
||||
result.flags |= FLAG_FAIL;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -535,15 +535,14 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
|
|||
// in buf, starting at offset bufSegStart. Extract them
|
||||
// into a string matcher, and replace them with a
|
||||
// standin for that matcher.
|
||||
StringMatcher* m =
|
||||
new StringMatcher(buf, bufSegStart, buf.length(),
|
||||
segmentNumber, *parser.curData);
|
||||
if (m == nullptr) {
|
||||
LocalPointer<StringMatcher> m(new StringMatcher(buf, bufSegStart, buf.length(),
|
||||
segmentNumber, *parser.curData), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
|
||||
}
|
||||
|
||||
// Record and associate object and segment number
|
||||
parser.setSegmentObject(segmentNumber, m, status);
|
||||
parser.setSegmentObject(segmentNumber, m.orphan(), status);
|
||||
buf.truncate(bufSegStart);
|
||||
buf.append(parser.getSegmentStandin(segmentNumber, status));
|
||||
}
|
||||
|
@ -577,15 +576,15 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
|
|||
// in buf, starting at offset bufSegStart.
|
||||
UnicodeString output;
|
||||
buf.extractBetween(bufSegStart, buf.length(), output);
|
||||
FunctionReplacer *r =
|
||||
new FunctionReplacer(t, new StringReplacer(output, parser.curData));
|
||||
if (r == nullptr) {
|
||||
LocalPointer<FunctionReplacer> r(
|
||||
new FunctionReplacer(t, new StringReplacer(output, parser.curData)), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
|
||||
}
|
||||
|
||||
// Replace the buffer contents with a stand-in
|
||||
buf.truncate(bufSegStart);
|
||||
buf.append(parser.generateStandInFor(r, status));
|
||||
buf.append(parser.generateStandInFor(r.orphan(), status));
|
||||
}
|
||||
break;
|
||||
case SymbolTable::SYMBOL_REF:
|
||||
|
@ -671,9 +670,9 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
|
|||
qlimit = qstart + 1;
|
||||
}
|
||||
|
||||
UnicodeFunctor *m =
|
||||
new StringMatcher(buf, qstart, qlimit, 0, *parser.curData);
|
||||
if (m == nullptr) {
|
||||
LocalPointer<UnicodeFunctor> m(
|
||||
new StringMatcher(buf, qstart, qlimit, 0, *parser.curData), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
|
||||
}
|
||||
int32_t min = 0;
|
||||
|
@ -689,12 +688,16 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
|
|||
// case KLEENE_STAR:
|
||||
// do nothing -- min, max already set
|
||||
}
|
||||
m = new Quantifier(m, min, max);
|
||||
if (m == nullptr) {
|
||||
LocalPointer<UnicodeFunctor> m2(new Quantifier(m.getAlias(), min, max), status);
|
||||
if (m2.isValid()) {
|
||||
m.orphan();
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
|
||||
}
|
||||
m = std::move(m2);
|
||||
buf.truncate(qstart);
|
||||
buf.append(parser.generateStandInFor(m, status));
|
||||
buf.append(parser.generateStandInFor(m.orphan(), status));
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -921,7 +924,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
|||
|
||||
dotStandIn = static_cast<char16_t>(-1);
|
||||
|
||||
UnicodeString *tempstr = nullptr; // used for memory allocation error checking
|
||||
LocalPointer<UnicodeString> tempstr; // used for memory allocation error checking
|
||||
UnicodeString str; // scratch
|
||||
UnicodeString idBlockResult;
|
||||
int32_t pos = 0;
|
||||
|
@ -1029,17 +1032,16 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
|||
pos = p;
|
||||
} else {
|
||||
if (parsingIDs) {
|
||||
tempstr = new UnicodeString(idBlockResult);
|
||||
tempstr.adoptInsteadAndCheckErrorCode(new UnicodeString(idBlockResult), status);
|
||||
// nullptr pointer check
|
||||
if (tempstr == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
U_ASSERT(idBlockVector.hasDeleter());
|
||||
if (direction == UTRANS_FORWARD)
|
||||
idBlockVector.adoptElement(tempstr, status);
|
||||
idBlockVector.adoptElement(tempstr.orphan(), status);
|
||||
else
|
||||
idBlockVector.insertElementAt(tempstr, 0, status);
|
||||
idBlockVector.insertElementAt(tempstr.orphan(), 0, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
@ -1074,18 +1076,17 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
|||
}
|
||||
|
||||
if (parsingIDs && idBlockResult.length() > 0) {
|
||||
tempstr = new UnicodeString(idBlockResult);
|
||||
tempstr.adoptInsteadAndCheckErrorCode(new UnicodeString(idBlockResult), status);
|
||||
// nullptr pointer check
|
||||
if (tempstr == nullptr) {
|
||||
if (U_FAILURE(status)) {
|
||||
// TODO: Testing, forcing this path, shows many memory leaks. ICU-21701
|
||||
// intltest translit/TransliteratorTest/TestInstantiation
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
if (direction == UTRANS_FORWARD)
|
||||
idBlockVector.adoptElement(tempstr, status);
|
||||
idBlockVector.adoptElement(tempstr.orphan(), status);
|
||||
else
|
||||
idBlockVector.insertElementAt(tempstr, 0, status);
|
||||
idBlockVector.insertElementAt(tempstr.orphan(), 0, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
@ -1365,12 +1366,12 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
|||
return syntaxError(U_MALFORMED_VARIABLE_DEFINITION, rule, start, status);
|
||||
}
|
||||
// We allow anything on the right, including an empty string.
|
||||
UnicodeString* value = new UnicodeString(right->text);
|
||||
LocalPointer<UnicodeString> value(new UnicodeString(right->text), status);
|
||||
// nullptr pointer check
|
||||
if (value == nullptr) {
|
||||
if (U_FAILURE(status)) {
|
||||
return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
|
||||
}
|
||||
variableNames.put(undefinedVariableName, value, status);
|
||||
variableNames.put(undefinedVariableName, value.orphan(), status);
|
||||
++variableLimit;
|
||||
return pos;
|
||||
}
|
||||
|
@ -1451,30 +1452,32 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
|||
}
|
||||
|
||||
// Flatten segment objects vector to an array
|
||||
UnicodeFunctor** segmentsArray = nullptr;
|
||||
LocalMemory<UnicodeFunctor*> segmentsArray;
|
||||
if (segmentObjects.size() > 0) {
|
||||
segmentsArray = static_cast<UnicodeFunctor**>(uprv_malloc(segmentObjects.size() * sizeof(UnicodeFunctor*)));
|
||||
segmentsArray.adoptInstead(static_cast<UnicodeFunctor**>(uprv_malloc(segmentObjects.size() * sizeof(UnicodeFunctor*))));
|
||||
// Null pointer check
|
||||
if (segmentsArray == nullptr) {
|
||||
if (segmentsArray.isNull()) {
|
||||
return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
|
||||
}
|
||||
segmentObjects.toArray(reinterpret_cast<void**>(segmentsArray));
|
||||
segmentObjects.toArray(reinterpret_cast<void**>(segmentsArray.getAlias()));
|
||||
}
|
||||
TransliterationRule* temptr = new TransliterationRule(
|
||||
LocalPointer<TransliterationRule> temptr(new TransliterationRule(
|
||||
left->text, left->ante, left->post,
|
||||
right->text, right->cursor, right->cursorOffset,
|
||||
segmentsArray,
|
||||
segmentsArray.getAlias(),
|
||||
segmentObjects.size(),
|
||||
left->anchorStart, left->anchorEnd,
|
||||
curData,
|
||||
status);
|
||||
status), status);
|
||||
//Null pointer check
|
||||
if (temptr == nullptr) {
|
||||
uprv_free(segmentsArray);
|
||||
if (temptr.isValid()) {
|
||||
segmentsArray.orphan();
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
|
||||
}
|
||||
|
||||
curData->ruleSet.addRule(temptr, status);
|
||||
curData->ruleSet.addRule(temptr.orphan(), status);
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
@ -1620,13 +1623,12 @@ void TransliteratorParser::setSegmentObject(int32_t seg, StringMatcher* adopted,
|
|||
*/
|
||||
char16_t TransliteratorParser::getDotStandIn(UErrorCode& status) {
|
||||
if (dotStandIn == static_cast<char16_t>(-1)) {
|
||||
UnicodeSet* tempus = new UnicodeSet(UnicodeString(true, DOT_SET, -1), status);
|
||||
LocalPointer<UnicodeSet> tempus(new UnicodeSet(UnicodeString(true, DOT_SET, -1), status), status);
|
||||
// Null pointer check.
|
||||
if (tempus == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
if (U_FAILURE(status)) {
|
||||
return static_cast<char16_t>(0x0000);
|
||||
}
|
||||
dotStandIn = generateStandInFor(tempus, status);
|
||||
dotStandIn = generateStandInFor(tempus.orphan(), status);
|
||||
}
|
||||
return dotStandIn;
|
||||
}
|
||||
|
|
|
@ -1172,9 +1172,8 @@ TimeZoneGenericNames::createInstance(const Locale& locale, UErrorCode& status) {
|
|||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
TimeZoneGenericNames* instance = new TimeZoneGenericNames();
|
||||
if (instance == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
LocalPointer<TimeZoneGenericNames> instance(new TimeZoneGenericNames(), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1193,7 +1192,6 @@ TimeZoneGenericNames::createInstance(const Locale& locale, UErrorCode& status) {
|
|||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
delete instance;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1252,12 +1250,11 @@ TimeZoneGenericNames::createInstance(const Locale& locale, UErrorCode& status) {
|
|||
} // End of mutex locked block
|
||||
|
||||
if (cacheEntry == nullptr) {
|
||||
delete instance;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
instance->fRef = cacheEntry;
|
||||
return instance;
|
||||
return instance.orphan();
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -715,15 +715,15 @@ private:
|
|||
for (int32_t i = 0; i < UTZNM_INDEX_COUNT; i++) {
|
||||
const char16_t* name = fNames[i];
|
||||
if (name != nullptr) {
|
||||
ZNameInfo* nameinfo = static_cast<ZNameInfo*>(uprv_malloc(sizeof(ZNameInfo)));
|
||||
if (nameinfo == nullptr) {
|
||||
LocalMemory<ZNameInfo> nameinfo(static_cast<ZNameInfo*>(uprv_malloc(sizeof(ZNameInfo))));
|
||||
if (nameinfo.isNull()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
nameinfo->mzID = mzID;
|
||||
nameinfo->tzID = tzID;
|
||||
nameinfo->type = getTZNameType(static_cast<UTimeZoneNameTypeIndex>(i));
|
||||
trie.put(name, nameinfo, status); // trie.put() takes ownership of the key
|
||||
trie.put(name, nameinfo.orphan(), status); // trie.put() takes ownership of the key
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -2847,29 +2847,25 @@ class U_I18N_API MeasureUnit: public UObject {
|
|||
*/
|
||||
static MeasureUnit getTonne();
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Returns by pointer, unit of mass: metric-ton
|
||||
* (renamed to tonne in CLDR 42 / ICU 72).
|
||||
* Caller owns returned value and must free it.
|
||||
* Note: In ICU 74 this will be deprecated in favor of
|
||||
* createTonne(), which is currently draft but will
|
||||
* become stable in ICU 74, and which uses the preferred naming.
|
||||
* Also see {@link #getMetricTon()} and {@link #createTonne()}.
|
||||
* @param status ICU error code.
|
||||
* @stable ICU 54
|
||||
* @deprecated ICU 78 use createTonne(UErrorCode &status)
|
||||
*/
|
||||
static MeasureUnit *createMetricTon(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Returns by value, unit of mass: metric-ton
|
||||
* (renamed to tonne in CLDR 42 / ICU 72).
|
||||
* Note: In ICU 74 this will be deprecated in favor of
|
||||
* getTonne(), which is currently draft but will
|
||||
* become stable in ICU 74, and which uses the preferred naming.
|
||||
* Also see {@link #createMetricTon()} and {@link #getTonne()}.
|
||||
* @stable ICU 64
|
||||
* @deprecated ICU 78 use getTonne()
|
||||
*/
|
||||
static MeasureUnit getMetricTon();
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Returns by pointer, unit of power: gigawatt.
|
||||
|
|
|
@ -355,9 +355,7 @@ namespace message2 {
|
|||
|
||||
// Formatting methods
|
||||
|
||||
// Used for normalizing variable names and keys for comparison
|
||||
UnicodeString normalizeNFC(const UnicodeString&) const;
|
||||
[[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
|
||||
[[nodiscard]] FormattedPlaceholder formatLiteral(const UnicodeString&, const data_model::Literal&) const;
|
||||
void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
|
||||
// Evaluates a function call
|
||||
// Dispatches on argument type
|
||||
|
@ -371,13 +369,21 @@ namespace message2 {
|
|||
MessageContext& context,
|
||||
UErrorCode& status) const;
|
||||
// Formats an expression that appears in a pattern or as the definition of a local variable
|
||||
[[nodiscard]] InternalValue* formatExpression(const Environment&,
|
||||
const data_model::Expression&,
|
||||
MessageContext&,
|
||||
UErrorCode&) const;
|
||||
[[nodiscard]] InternalValue* formatExpression(const UnicodeString&,
|
||||
const Environment&,
|
||||
const data_model::Expression&,
|
||||
MessageContext&,
|
||||
UErrorCode&) const;
|
||||
[[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
|
||||
[[nodiscard]] InternalValue* formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
|
||||
[[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
|
||||
[[nodiscard]] InternalValue* formatOperand(const UnicodeString&,
|
||||
const Environment&,
|
||||
const data_model::Operand&,
|
||||
MessageContext&,
|
||||
UErrorCode&) const;
|
||||
[[nodiscard]] FormattedPlaceholder evalArgument(const UnicodeString&,
|
||||
const data_model::VariableName&,
|
||||
MessageContext&,
|
||||
UErrorCode&) const;
|
||||
void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
|
||||
|
||||
// Function registry methods
|
||||
|
@ -452,9 +458,6 @@ namespace message2 {
|
|||
// The default is false.
|
||||
bool signalErrors = false;
|
||||
|
||||
// Used for implementing normalizeNFC()
|
||||
const Normalizer2* nfcNormalizer = nullptr;
|
||||
|
||||
}; // class MessageFormatter
|
||||
|
||||
} // namespace message2
|
||||
|
|
|
@ -114,8 +114,7 @@ namespace message2 {
|
|||
private:
|
||||
friend class MessageContext;
|
||||
|
||||
const Formattable* getArgument(const MessageFormatter&,
|
||||
const data_model::VariableName&,
|
||||
const Formattable* getArgument(const data_model::VariableName&,
|
||||
UErrorCode&) const;
|
||||
|
||||
// Avoids using Hashtable so that code constructing a Hashtable
|
||||
|
|
|
@ -456,16 +456,23 @@ class U_I18N_API ResolvedFunctionOption : public UObject {
|
|||
|
||||
/* const */ UnicodeString name;
|
||||
/* const */ Formattable value;
|
||||
// True iff this option was represented in the syntax by a literal value.
|
||||
// This is necessary in order to implement the spec for the `select` option
|
||||
// of `:number` and `:integer`.
|
||||
/* const */ bool sourceIsLiteral;
|
||||
|
||||
public:
|
||||
const UnicodeString& getName() const { return name; }
|
||||
const Formattable& getValue() const { return value; }
|
||||
ResolvedFunctionOption(const UnicodeString& n, const Formattable& f) : name(n), value(f) {}
|
||||
bool isLiteral() const { return sourceIsLiteral; }
|
||||
ResolvedFunctionOption(const UnicodeString& n, const Formattable& f, bool s)
|
||||
: name(n), value(f), sourceIsLiteral(s) {}
|
||||
ResolvedFunctionOption() {}
|
||||
ResolvedFunctionOption(ResolvedFunctionOption&&);
|
||||
ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) noexcept {
|
||||
name = std::move(other.name);
|
||||
value = std::move(other.value);
|
||||
sourceIsLiteral = other.sourceIsLiteral;
|
||||
return *this;
|
||||
}
|
||||
virtual ~ResolvedFunctionOption();
|
||||
|
@ -558,9 +565,10 @@ class U_I18N_API FunctionOptions : public UObject {
|
|||
explicit FunctionOptions(UVector&&, UErrorCode&);
|
||||
|
||||
const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const;
|
||||
UBool getFunctionOption(const UnicodeString&, Formattable&) const;
|
||||
UBool getFunctionOption(std::u16string_view, Formattable&) const;
|
||||
UBool wasSetFromLiteral(const UnicodeString&) const;
|
||||
// Returns empty string if option doesn't exist
|
||||
UnicodeString getStringFunctionOption(const UnicodeString&) const;
|
||||
UnicodeString getStringFunctionOption(std::u16string_view) const;
|
||||
int32_t optionsCount() const { return functionOptionsLen; }
|
||||
|
||||
// Named options passed to functions
|
||||
|
@ -570,7 +578,10 @@ class U_I18N_API FunctionOptions : public UObject {
|
|||
ResolvedFunctionOption* options;
|
||||
int32_t functionOptionsLen = 0;
|
||||
|
||||
// Returns a new FunctionOptions
|
||||
/**
|
||||
* The original FunctionOptions isn't usable after this call.
|
||||
* @returns A new, merged FunctionOptions.
|
||||
*/
|
||||
FunctionOptions mergeOptions(FunctionOptions&& other, UErrorCode&);
|
||||
}; // class FunctionOptions
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ FUZZER_TARGETS = \
|
|||
collator_rulebased_fuzzer \
|
||||
converter_fuzzer date_format_fuzzer \
|
||||
date_time_pattern_generator_fuzzer \
|
||||
decimal_format_symbols_fuzzer \
|
||||
dtfmtsym_fuzzer \
|
||||
list_format_fuzzer locale_fuzzer \
|
||||
locale_morph_fuzzer \
|
||||
|
|
95
icu4c/source/test/fuzzer/decimal_format_symbols_fuzzer.cpp
Normal file
95
icu4c/source/test/fuzzer/decimal_format_symbols_fuzzer.cpp
Normal file
|
@ -0,0 +1,95 @@
|
|||
// © 2025 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// Fuzzer for DecimalFormatSymbols::DecimalFormatSymbols.
|
||||
|
||||
#include <cstring>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "fuzzer_utils.h"
|
||||
#include "unicode/dcfmtsym.h"
|
||||
#include "unicode/unum.h"
|
||||
#include "uassert.h"
|
||||
|
||||
IcuEnvironment* env = new IcuEnvironment();
|
||||
|
||||
void testMethods(
|
||||
const icu::DecimalFormatSymbols& dfs,
|
||||
icu::DecimalFormatSymbols::ENumberFormatSymbol symbol,
|
||||
UCurrencySpacing spacing,
|
||||
int32_t digit) {
|
||||
dfs.getLocale();
|
||||
dfs.getSymbol(symbol);
|
||||
dfs.getConstSymbol(symbol);
|
||||
dfs.getCurrencyPattern();
|
||||
dfs.getNumberingSystemName();
|
||||
dfs.isCustomCurrencySymbol();
|
||||
dfs.isCustomIntlCurrencySymbol();
|
||||
dfs.getCodePointZero();
|
||||
dfs.getConstDigitSymbol(digit);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
dfs.getPatternForCurrencySpacing(spacing, true, status);
|
||||
dfs.getPatternForCurrencySpacing(spacing, false, status);
|
||||
|
||||
}
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (size < sizeof(uint16_t)) {
|
||||
return 0;
|
||||
}
|
||||
uint16_t rnd = *(reinterpret_cast<const uint16_t *>(data));
|
||||
const icu::Locale& locale = GetRandomLocale(rnd);
|
||||
data = data + sizeof(uint16_t);
|
||||
size = size - sizeof(uint16_t);
|
||||
|
||||
if (size < sizeof(uint16_t)) {
|
||||
return 0;
|
||||
}
|
||||
uint16_t rnd2 = *(reinterpret_cast<const uint16_t *>(data));
|
||||
std::unique_ptr<const icu::NumberingSystem> ns(CreateRandomNumberingSystem(rnd2, status));
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
data = data + sizeof(uint16_t);
|
||||
size = size - sizeof(uint16_t);
|
||||
|
||||
if (size < sizeof(int32_t)) {
|
||||
return 0;
|
||||
}
|
||||
int32_t digit = *(reinterpret_cast<const int32_t *>(data));
|
||||
data = data + sizeof(int32_t);
|
||||
size = size - sizeof(int32_t);
|
||||
|
||||
if (size < sizeof(uint8_t)) {
|
||||
return 0;
|
||||
}
|
||||
icu::DecimalFormatSymbols::ENumberFormatSymbol symbol =
|
||||
static_cast<icu::DecimalFormatSymbols::ENumberFormatSymbol>(
|
||||
*data % icu::DecimalFormatSymbols::ENumberFormatSymbol::kFormatSymbolCount);
|
||||
data = data + sizeof(uint8_t);
|
||||
size = size - sizeof(uint8_t);
|
||||
|
||||
if (size < sizeof(uint8_t)) {
|
||||
return 0;
|
||||
}
|
||||
UCurrencySpacing spacing =
|
||||
static_cast<UCurrencySpacing>(
|
||||
*data % UCurrencySpacing::UNUM_CURRENCY_SPACING_COUNT);
|
||||
data = data + sizeof(uint8_t);
|
||||
size = size - sizeof(uint8_t);
|
||||
|
||||
size_t unistr_size = size/2;
|
||||
std::unique_ptr<char16_t[]> fuzzbuff(new char16_t[unistr_size]);
|
||||
std::memcpy(fuzzbuff.get(), data, unistr_size * 2);
|
||||
|
||||
icu::UnicodeString fuzzstr(false, fuzzbuff.get(), unistr_size);
|
||||
icu::DecimalFormatSymbols dfs1(locale, status);
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
testMethods(dfs1, symbol, spacing, digit);
|
||||
|
||||
icu::DecimalFormatSymbols dfs2(locale, *ns, status);
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
testMethods(dfs2, symbol, spacing, digit);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -7,6 +7,8 @@
|
|||
#include <assert.h>
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/numsys.h"
|
||||
#include "unicode/strenum.h"
|
||||
|
||||
struct IcuEnvironment {
|
||||
IcuEnvironment() {
|
||||
|
@ -21,4 +23,20 @@ const icu::Locale& GetRandomLocale(uint16_t rnd) {
|
|||
return locales[rnd % num_locales];
|
||||
}
|
||||
|
||||
const icu::NumberingSystem* CreateRandomNumberingSystem(uint16_t rnd, UErrorCode &status) {
|
||||
std::unique_ptr<icu::StringEnumeration> se(icu::NumberingSystem::getAvailableNames(status));
|
||||
if (U_FAILURE(status)) return nullptr;
|
||||
int32_t count = se->count(status);
|
||||
if (U_FAILURE(status)) return nullptr;
|
||||
int32_t index = rnd % count;
|
||||
se->reset(status);
|
||||
for (int32_t i = 0; i < index - 1; i++, se->next(nullptr, status)) {
|
||||
// empty
|
||||
}
|
||||
const char* name = se->next(nullptr, status);
|
||||
if (U_FAILURE(status)) return nullptr;
|
||||
return icu::NumberingSystem::createInstanceByName(name, status);
|
||||
}
|
||||
|
||||
|
||||
#endif // FUZZER_UTILS_H_
|
||||
|
|
|
@ -75,7 +75,9 @@ numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
|
|||
static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
|
||||
formattedvaluetest.o formatted_string_builder_test.o numbertest_permutation.o \
|
||||
units_data_test.o units_router_test.o units_test.o displayoptions_test.o \
|
||||
numbertest_simple.o cplusplus_header_api_build_test.o uchar_type_build_test.o ucolheaderonlytest.o usetheaderonlytest.o
|
||||
numbertest_simple.o \
|
||||
cplusplus_header_api_build_test.o uchar_type_build_test.o \
|
||||
ucolheaderonlytest.o usetheaderonlytest.o utfiteratortest.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
|
|
@ -208,6 +208,7 @@ void CalendarTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
|
|||
TESTCASE_AUTO(Test22633RollTwiceGetTimeOverflow);
|
||||
|
||||
TESTCASE_AUTO(Test22633HebrewLargeNegativeDay);
|
||||
TESTCASE_AUTO(Test23069HebrewHanukkah);
|
||||
TESTCASE_AUTO(Test22730JapaneseOverflow);
|
||||
TESTCASE_AUTO(Test22730CopticOverflow);
|
||||
TESTCASE_AUTO(Test22962ComputeJulianDayOverflow);
|
||||
|
@ -5928,6 +5929,116 @@ void CalendarTest::Test22633HebrewLargeNegativeDay() {
|
|||
calendar->get(UCAL_HOUR, status);
|
||||
assertEquals("status return without hang", status, U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
void CalendarTest::Test23069HebrewHanukkah() {
|
||||
// Based on Hanukkah data in
|
||||
// https://en.wikipedia.org/wiki/Jewish_and_Israeli_holidays_2000%E2%80%932050
|
||||
struct TestCase {
|
||||
int32_t hebrewYear;
|
||||
int32_t gregorianYear;
|
||||
int32_t gregorianMonth;
|
||||
int32_t gregorianDate;
|
||||
} cases[] = {
|
||||
{ 5760, 1999, UCAL_DECEMBER, 4},
|
||||
{ 5761, 2000, UCAL_DECEMBER, 22},
|
||||
{ 5762, 2001, UCAL_DECEMBER, 10},
|
||||
{ 5763, 2002, UCAL_NOVEMBER, 30},
|
||||
{ 5764, 2003, UCAL_DECEMBER, 20},
|
||||
{ 5765, 2004, UCAL_DECEMBER, 8},
|
||||
{ 5766, 2005, UCAL_DECEMBER, 26},
|
||||
{ 5767, 2006, UCAL_DECEMBER, 16},
|
||||
{ 5768, 2007, UCAL_DECEMBER, 5},
|
||||
{ 5769, 2008, UCAL_DECEMBER, 22},
|
||||
{ 5770, 2009, UCAL_DECEMBER, 12},
|
||||
{ 5771, 2010, UCAL_DECEMBER, 2},
|
||||
{ 5772, 2011, UCAL_DECEMBER, 21},
|
||||
{ 5773, 2012, UCAL_DECEMBER, 9},
|
||||
{ 5774, 2013, UCAL_NOVEMBER, 28},
|
||||
{ 5775, 2014, UCAL_DECEMBER, 17},
|
||||
{ 5776, 2015, UCAL_DECEMBER, 7},
|
||||
{ 5777, 2016, UCAL_DECEMBER, 25},
|
||||
{ 5778, 2017, UCAL_DECEMBER, 13},
|
||||
{ 5779, 2018, UCAL_DECEMBER, 3},
|
||||
{ 5780, 2019, UCAL_DECEMBER, 23},
|
||||
{ 5781, 2020, UCAL_DECEMBER, 11},
|
||||
{ 5782, 2021, UCAL_NOVEMBER, 29},
|
||||
{ 5783, 2022, UCAL_DECEMBER, 19},
|
||||
{ 5784, 2023, UCAL_DECEMBER, 8},
|
||||
{ 5785, 2024, UCAL_DECEMBER, 26},
|
||||
{ 5786, 2025, UCAL_DECEMBER, 15},
|
||||
{ 5787, 2026, UCAL_DECEMBER, 5},
|
||||
{ 5788, 2027, UCAL_DECEMBER, 25},
|
||||
{ 5789, 2028, UCAL_DECEMBER, 13},
|
||||
{ 5790, 2029, UCAL_DECEMBER, 2},
|
||||
{ 5791, 2030, UCAL_DECEMBER, 21},
|
||||
{ 5792, 2031, UCAL_DECEMBER, 10},
|
||||
{ 5793, 2032, UCAL_NOVEMBER, 28},
|
||||
{ 5794, 2033, UCAL_DECEMBER, 17},
|
||||
{ 5795, 2034, UCAL_DECEMBER, 7},
|
||||
{ 5796, 2035, UCAL_DECEMBER, 26},
|
||||
{ 5797, 2036, UCAL_DECEMBER, 14},
|
||||
{ 5798, 2037, UCAL_DECEMBER, 3},
|
||||
{ 5799, 2038, UCAL_DECEMBER, 22},
|
||||
{ 5800, 2039, UCAL_DECEMBER, 12},
|
||||
{ 5801, 2040, UCAL_NOVEMBER, 30},
|
||||
{ 5802, 2041, UCAL_DECEMBER, 18},
|
||||
{ 5803, 2042, UCAL_DECEMBER, 8},
|
||||
{ 5804, 2043, UCAL_DECEMBER, 27},
|
||||
{ 5805, 2044, UCAL_DECEMBER, 15},
|
||||
{ 5806, 2045, UCAL_DECEMBER, 4},
|
||||
{ 5807, 2046, UCAL_DECEMBER, 24},
|
||||
{ 5808, 2047, UCAL_DECEMBER, 13},
|
||||
{ 5809, 2048, UCAL_NOVEMBER, 30},
|
||||
{ 5810, 2049, UCAL_DECEMBER, 20},
|
||||
{ 5811, 2050, UCAL_DECEMBER, 10},
|
||||
};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
LocalPointer<Calendar> hebrew(
|
||||
Calendar::createInstance(Locale("en-u-ca-hebrew"), status),
|
||||
status);
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
LocalPointer<GregorianCalendar> gregorian(
|
||||
new GregorianCalendar(hebrew->getTimeZone(), status), status);
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
for (auto& cas : cases) {
|
||||
hebrew->clear();
|
||||
// Test Hebrew Calendar to Gregorian Calendar.
|
||||
// Hanukkah is the 25th day of Kislev
|
||||
hebrew->set(UCAL_YEAR, cas.hebrewYear);
|
||||
hebrew->set(UCAL_MONTH, icu::HebrewCalendar::KISLEV);
|
||||
hebrew->set(UCAL_DATE, 25);
|
||||
gregorian->setTime(hebrew->getTime(status), status);
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
int32_t year = gregorian->get(UCAL_YEAR, status);
|
||||
int32_t month = gregorian->get(UCAL_MONTH, status);
|
||||
int32_t date = gregorian->get(UCAL_DATE, status);
|
||||
assertEquals("Hebrew to Gregorian Calendar year", year, cas.gregorianYear);
|
||||
assertEquals("Hebrew to Gregorian Calendar month", month, cas.gregorianMonth);
|
||||
assertEquals("Hebrew to Gregorian Calendar date", date, cas.gregorianDate);
|
||||
if (year != cas.gregorianYear || month != cas.gregorianMonth || date != cas.gregorianDate) {
|
||||
printf("Hebrew year %d Gregorain Date(%d/%d/%d) but should be Date(%d/%d/%d)\n",
|
||||
cas.hebrewYear, year, 1+month, date,
|
||||
cas.gregorianYear, 1+cas.gregorianMonth, cas.gregorianDate);
|
||||
}
|
||||
// Test Gregorian Calendar to Hebrew Calendar.
|
||||
gregorian->clear();
|
||||
gregorian->set(UCAL_YEAR, cas.gregorianYear);
|
||||
gregorian->set(UCAL_MONTH, cas.gregorianMonth);
|
||||
gregorian->set(UCAL_DATE, cas.gregorianDate);
|
||||
hebrew->setTime(gregorian->getTime(status), status);
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
year = hebrew->get(UCAL_YEAR, status);
|
||||
month = hebrew->get(UCAL_MONTH, status);
|
||||
date = hebrew->get(UCAL_DATE, status);
|
||||
assertEquals("Gregorian to Hebrew Calendar year", year, cas.hebrewYear);
|
||||
assertEquals("Gregorian to Hebrew Calendar month", month, icu::HebrewCalendar::KISLEV);
|
||||
assertEquals("Gregorian to Hebrew Calendar date", date, 25);
|
||||
if (year != cas.hebrewYear || month != icu::HebrewCalendar::KISLEV || date != 25) {
|
||||
printf("Gregorian year %d Hebrew Date(%d/%d/%d) but should be Date(%d/%d/25)\n",
|
||||
cas.gregorianYear, year, 1+month, date,
|
||||
cas.hebrewYear, 1+icu::HebrewCalendar::KISLEV);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CalendarTest::Test22730JapaneseOverflow() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
|
|
@ -363,6 +363,7 @@ public: // package
|
|||
|
||||
void TestChineseCalendarComputeMonthStart();
|
||||
void Test22633HebrewLargeNegativeDay();
|
||||
void Test23069HebrewHanukkah();
|
||||
|
||||
void RunChineseCalendarInTemporalLeapYearTest(Calendar* cal);
|
||||
void RunIslamicCalendarInTemporalLeapYearTest(Calendar* cal);
|
||||
|
|
|
@ -223,6 +223,7 @@
|
|||
<ClCompile Include="sfwdchit.cpp" />
|
||||
<ClCompile Include="strcase.cpp" />
|
||||
<ClCompile Include="ustrtest.cpp" />
|
||||
<ClCompile Include="utfiteratortest.cpp" />
|
||||
<ClCompile Include="utxttest.cpp" />
|
||||
<ClCompile Include="cpdtrtst.cpp" />
|
||||
<ClCompile Include="ittrans.cpp" />
|
||||
|
|
|
@ -490,6 +490,9 @@
|
|||
<ClCompile Include="ustrtest.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="utfiteratortest.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="utxttest.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -51,6 +51,7 @@ extern IntlTest *createPluralMapTest();
|
|||
extern IntlTest *createStaticUnicodeSetsTest();
|
||||
#endif
|
||||
static IntlTest *createUHashTest();
|
||||
extern IntlTest *createUTFIteratorTest();
|
||||
|
||||
void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
|
||||
{
|
||||
|
@ -90,6 +91,7 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
|
|||
TESTCASE_AUTO_CREATE_CLASS(UColHeaderOnlyTest);
|
||||
#endif
|
||||
TESTCASE_AUTO_CREATE_CLASS(USetHeaderOnlyTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(UTFIteratorTest);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ static UErrorCode getExpectedRuntimeErrorFromString(const std::string& errorName
|
|||
return U_MF_OPERAND_MISMATCH_ERROR;
|
||||
}
|
||||
if (errorName == "bad-option") {
|
||||
return U_MF_FORMATTING_ERROR;
|
||||
return U_MF_BAD_OPTION;
|
||||
}
|
||||
if (errorName == "unknown-function") {
|
||||
return U_MF_UNKNOWN_FUNCTION_ERROR;
|
||||
|
@ -112,10 +112,11 @@ static bool setArguments(TestMessageFormat2& t,
|
|||
return false; // For now, boolean and null arguments are unsupported
|
||||
}
|
||||
} else {
|
||||
schemaError = true;
|
||||
break;
|
||||
// Null argument -- not supported
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
t.logln("name is null");
|
||||
schemaError = true;
|
||||
break;
|
||||
}
|
||||
|
@ -138,6 +139,7 @@ static bool setArguments(TestMessageFormat2& t,
|
|||
static void runValidTest(TestMessageFormat2& icuTest,
|
||||
const std::string& testName,
|
||||
const std::string& defaultError,
|
||||
bool anyError,
|
||||
const json& j,
|
||||
IcuTestErrorCode& errorCode) {
|
||||
auto j_object = j.template get<json::object_t>();
|
||||
|
@ -196,15 +198,18 @@ static void runValidTest(TestMessageFormat2& icuTest,
|
|||
if (errorType.length() <= 0) {
|
||||
errorType = errors[0]["name"];
|
||||
}
|
||||
// See TODO(options); ignore these tests for now
|
||||
if (errorType == "bad-option") {
|
||||
return;
|
||||
}
|
||||
// // See TODO(options); ignore these tests for now
|
||||
// if (errorType == "bad-option") {
|
||||
// return;
|
||||
// }
|
||||
test.setExpectedError(getExpectedRuntimeErrorFromString(errorType));
|
||||
expectedError = true;
|
||||
} else if (defaultError.length() > 0) {
|
||||
test.setExpectedError(getExpectedRuntimeErrorFromString(defaultError));
|
||||
expectedError = true;
|
||||
} else if (anyError) {
|
||||
test.setExpectedAnyError();
|
||||
expectedError = true;
|
||||
}
|
||||
|
||||
// If no expected result and no error, then set the test builder to expect success
|
||||
|
@ -250,9 +255,13 @@ static void runTestsFromJsonFile(TestMessageFormat2& t,
|
|||
|
||||
// Some files have an expected error
|
||||
std::string defaultError;
|
||||
bool anyError = false;
|
||||
if (!j_object["defaultTestProperties"].is_null()
|
||||
&& !j_object["defaultTestProperties"]["expErrors"].is_null()) {
|
||||
auto expErrors = j_object["defaultTestProperties"]["expErrors"];
|
||||
// If expErrors is a boolean "true", that means we expect all tests
|
||||
// to emit errors but we don't care which ones.
|
||||
anyError = expErrors.is_boolean() && expErrors.template get<bool>();
|
||||
// expErrors might also be a boolean, in which case we ignore it --
|
||||
// so we have to check if it's an array
|
||||
if (expErrors.is_array()) {
|
||||
|
@ -270,10 +279,10 @@ static void runTestsFromJsonFile(TestMessageFormat2& t,
|
|||
for (auto iter = tests.begin(); iter != tests.end(); ++iter) {
|
||||
makeTestName(testName, sizeof(testName), fileName, ++testNum);
|
||||
t.logln(testName);
|
||||
// Use error_handler_t::ignore because of the patch to allow lone surrogates
|
||||
t.logln(u_str(iter->dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore)));
|
||||
|
||||
t.logln(u_str(iter->dump()));
|
||||
|
||||
runValidTest(t, testName, defaultError, *iter, errorCode);
|
||||
runValidTest(t, testName, defaultError, anyError, *iter, errorCode);
|
||||
}
|
||||
} else {
|
||||
// Test doesn't follow schema -- probably an error
|
||||
|
@ -301,6 +310,7 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
|
|||
|
||||
// Do valid spec tests
|
||||
runTestsFromJsonFile(*this, "spec/syntax.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "spec/fallback.json", errorCode);
|
||||
|
||||
// Uncomment when test functions are implemented in the registry
|
||||
// See https://unicode-org.atlassian.net/browse/ICU-22907
|
||||
|
@ -315,9 +325,6 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
|
|||
runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode);
|
||||
|
||||
// Other tests (non-spec)
|
||||
// TODO: Delete this file after https://github.com/unicode-org/message-format-wg/pull/904
|
||||
// lands and the tests here are updated from the spec repo
|
||||
runTestsFromJsonFile(*this, "normalization.json", errorCode);
|
||||
// TODO: https://github.com/unicode-org/message-format-wg/pull/902 will
|
||||
// move the bidi tests into the spec
|
||||
runTestsFromJsonFile(*this, "bidi.json", errorCode);
|
||||
|
@ -350,15 +357,10 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
|
|||
// (This applies to the expected output for all the U_DUPLICATE_DECLARATION_ERROR tests)
|
||||
runTestsFromJsonFile(*this, "duplicate-declarations.json", errorCode);
|
||||
|
||||
// TODO(options):
|
||||
// Bad options. The spec is unclear about this
|
||||
// -- see https://github.com/unicode-org/message-format-wg/issues/738
|
||||
// The current behavior is to set a U_MF_FORMATTING_ERROR for any invalid options.
|
||||
runTestsFromJsonFile(*this, "invalid-options.json", errorCode);
|
||||
|
||||
runTestsFromJsonFile(*this, "syntax-errors-end-of-input.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "syntax-errors-diagnostics.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "invalid-number-literals-diagnostics.json", errorCode);
|
||||
runTestsFromJsonFile(*this, "syntax-errors-diagnostics-multiline.json", errorCode);
|
||||
|
||||
// ICU4J tests
|
||||
|
|
|
@ -28,6 +28,7 @@ class TestCase : public UMemory {
|
|||
/* const */ Locale locale;
|
||||
/* const */ std::map<UnicodeString, Formattable> arguments;
|
||||
/* const */ UErrorCode expectedError;
|
||||
/* const */ bool arbitraryError = false;
|
||||
/* const */ bool expectedNoSyntaxError;
|
||||
/* const */ bool hasExpectedOutput;
|
||||
/* const */ UnicodeString expected;
|
||||
|
@ -45,11 +46,14 @@ class TestCase : public UMemory {
|
|||
std::map<UnicodeString, Formattable> getArguments() const { return std::move(arguments); }
|
||||
const UnicodeString& getTestName() const { return testName; }
|
||||
bool expectSuccess() const {
|
||||
return (!ignoreError && U_SUCCESS(expectedError));
|
||||
return (!ignoreError && U_SUCCESS(expectedError) && !arbitraryError);
|
||||
}
|
||||
bool expectFailure() const {
|
||||
return (!ignoreError && U_FAILURE(expectedError));
|
||||
}
|
||||
bool expectArbitraryError() const {
|
||||
return arbitraryError;
|
||||
}
|
||||
bool expectNoSyntaxError() const {
|
||||
return expectedNoSyntaxError;
|
||||
}
|
||||
|
@ -139,6 +143,10 @@ class TestCase : public UMemory {
|
|||
expectedError = U_SUCCESS(errorCode) ? U_ZERO_ERROR : errorCode;
|
||||
return *this;
|
||||
}
|
||||
Builder& setExpectedAnyError() {
|
||||
arbitraryError = true;
|
||||
return *this;
|
||||
}
|
||||
Builder& setNoSyntaxError() {
|
||||
expectNoSyntaxError = true;
|
||||
return *this;
|
||||
|
@ -182,6 +190,7 @@ class TestCase : public UMemory {
|
|||
bool hasExpectedOutput;
|
||||
UnicodeString expected;
|
||||
UErrorCode expectedError;
|
||||
bool arbitraryError;
|
||||
bool expectNoSyntaxError;
|
||||
bool hasLineNumberAndOffset;
|
||||
uint32_t lineNumber;
|
||||
|
@ -190,7 +199,7 @@ class TestCase : public UMemory {
|
|||
const MFFunctionRegistry* functionRegistry = nullptr; // Not owned
|
||||
|
||||
public:
|
||||
Builder() : pattern(""), locale(Locale::getDefault()), hasExpectedOutput(false), expected(""), expectedError(U_ZERO_ERROR), expectNoSyntaxError(false), hasLineNumberAndOffset(false), ignoreError(false) {}
|
||||
Builder() : pattern(""), locale(Locale::getDefault()), hasExpectedOutput(false), expected(""), expectedError(U_ZERO_ERROR), arbitraryError(false), expectNoSyntaxError(false), hasLineNumberAndOffset(false), ignoreError(false) {}
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -200,6 +209,7 @@ class TestCase : public UMemory {
|
|||
locale(builder.locale),
|
||||
arguments(builder.arguments),
|
||||
expectedError(builder.expectedError),
|
||||
arbitraryError(builder.arbitraryError),
|
||||
expectedNoSyntaxError(builder.expectNoSyntaxError),
|
||||
hasExpectedOutput(builder.hasExpectedOutput),
|
||||
expected(builder.expected),
|
||||
|
@ -270,6 +280,9 @@ class TestUtils {
|
|||
failExpectedSuccess(tmsg, testCase, errorCode, parseError.line, parseError.offset);
|
||||
return;
|
||||
}
|
||||
if (testCase.expectArbitraryError() && U_SUCCESS(errorCode)) {
|
||||
failExpectedArbitraryError(tmsg, testCase);
|
||||
}
|
||||
if (testCase.expectFailure() && errorCode != testCase.expectedErrorCode()) {
|
||||
failExpectedFailure(tmsg, testCase, errorCode);
|
||||
return;
|
||||
|
@ -323,6 +336,10 @@ class TestUtils {
|
|||
tmsg.errln(testCase.getTestName() + " failed test with wrong error code; pattern: " + testCase.getPattern() + " and error code " + UnicodeString(u_errorName(errorCode)) + " and expected error code: " + UnicodeString(u_errorName(testCase.expectedErrorCode())));
|
||||
errorCode.reset();
|
||||
}
|
||||
static void failExpectedArbitraryError(IntlTest& tmsg, const TestCase& testCase) {
|
||||
tmsg.dataerrln(testCase.getTestName());
|
||||
tmsg.errln(testCase.getTestName() + " succeeded although any error was expected; pattern: " + testCase.getPattern());
|
||||
}
|
||||
static void failWrongOutput(IntlTest& tmsg, const TestCase& testCase, const UnicodeString& result) {
|
||||
tmsg.dataerrln(testCase.getTestName());
|
||||
tmsg.logln(testCase.getTestName() + " failed test with wrong output; pattern: " + testCase.getPattern() + " and expected output = " + testCase.expectedOutput() + " and actual output = " + result);
|
||||
|
|
|
@ -227,6 +227,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
|
|||
TESTCASE_AUTO(Test11649_DecFmtCurrencies);
|
||||
TESTCASE_AUTO(Test13148_ParseGroupingSeparators);
|
||||
TESTCASE_AUTO(Test12753_PatternDecimalPoint);
|
||||
TESTCASE_AUTO(Test22303_PatternDecimalPoint_InfNaN);
|
||||
TESTCASE_AUTO(Test11647_PatternCurrencySymbols);
|
||||
TESTCASE_AUTO(Test11913_BigDecimal);
|
||||
TESTCASE_AUTO(Test11020_RoundingInScientificNotation);
|
||||
|
@ -9423,9 +9424,26 @@ void NumberFormatTest::Test12753_PatternDecimalPoint() {
|
|||
df.parse(u"123",result, status);
|
||||
assertEquals("Parsing integer succeeded even though setDecimalPatternMatchRequired was set",
|
||||
U_INVALID_FORMAT_ERROR, status);
|
||||
}
|
||||
}
|
||||
|
||||
void NumberFormatTest::Test11647_PatternCurrencySymbols() {
|
||||
void NumberFormatTest::Test22303_PatternDecimalPoint_InfNaN() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
DecimalFormatSymbols symbols(Locale::getUS(), status);
|
||||
symbols.setSymbol(DecimalFormatSymbols::kInfinitySymbol, u"infinity", false);
|
||||
symbols.setSymbol(DecimalFormatSymbols::kNaNSymbol, u"notanumber", false);
|
||||
DecimalFormat df(u"0.00", symbols, status);
|
||||
if (!assertSuccess("", status)) return;
|
||||
df.setDecimalPatternMatchRequired(true);
|
||||
Formattable result;
|
||||
df.parse(u"infinity", result, status);
|
||||
assertEquals("Should parse to +INF even though decimal is required", INFINITY, result.getDouble());
|
||||
df.parse(u"notanumber", result, status);
|
||||
assertEquals("Should parse to NaN even though decimal is required", NAN, result.getDouble());
|
||||
df.parse("-infinity", result, status);
|
||||
assertEquals("Should parse to -INF even though decimal is required", -INFINITY, result.getDouble());
|
||||
}
|
||||
|
||||
void NumberFormatTest::Test11647_PatternCurrencySymbols() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
DecimalFormat df(status);
|
||||
df.applyPattern(u"¤¤¤¤#", status);
|
||||
|
|
|
@ -283,6 +283,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
|
|||
void Test11649_DecFmtCurrencies();
|
||||
void Test13148_ParseGroupingSeparators();
|
||||
void Test12753_PatternDecimalPoint();
|
||||
void Test22303_PatternDecimalPoint_InfNaN();
|
||||
void Test11647_PatternCurrencySymbols();
|
||||
void Test11913_BigDecimal();
|
||||
void Test11020_RoundingInScientificNotation();
|
||||
|
|
1197
icu4c/source/test/intltest/utfiteratortest.cpp
Normal file
1197
icu4c/source/test/intltest/utfiteratortest.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -297,7 +297,7 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
|
|||
|
||||
if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
error(*line, "invariant characters required for table keys, binary data, etc.");
|
||||
error((line == nullptr) ? 0 : *line, "invariant characters required for table keys, binary data, etc.");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -7757,22 +7757,22 @@ class lexer : public lexer_base<BasicJsonType>
|
|||
}
|
||||
else
|
||||
{
|
||||
error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
|
||||
return token_type::parse_error;
|
||||
// ICU PATCH - See ICU-23090
|
||||
codepoint = codepoint1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
|
||||
return token_type::parse_error;
|
||||
// ICU PATCH - See ICU-23090
|
||||
codepoint = codepoint1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
|
||||
{
|
||||
error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
|
||||
return token_type::parse_error;
|
||||
// ICU PATCH - See ICU-23090
|
||||
codepoint = codepoint1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
<parent>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j-root</artifactId>
|
||||
<version>77.1</version>
|
||||
<version>78.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>demos</artifactId>
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
<parent>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j-root</artifactId>
|
||||
<version>77.1</version>
|
||||
<version>78.0.1-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
<parent>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j-root</artifactId>
|
||||
<version>77.1</version>
|
||||
<version>78.0.1-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -387,7 +387,7 @@ public final class CollationKey implements Comparable<CollationKey>
|
|||
* src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
|
||||
* </a>
|
||||
* <p>
|
||||
* Collation keys produced may be compared using the <TT>compare</TT> API.
|
||||
* Collation keys produced may be compared using the {@code compare} API.
|
||||
* @param boundType Mode of bound required. It can be BoundMode.LOWER, which
|
||||
* produces a lower inclusive bound, BoundMode.UPPER, that
|
||||
* produces upper bound that matches strings of the same
|
||||
|
@ -510,7 +510,7 @@ public final class CollationKey implements Comparable<CollationKey>
|
|||
* @param source CollationKey to merge with
|
||||
* @return a CollationKey that contains the valid merged sort keys
|
||||
* with a null String representation,
|
||||
* i.e. <tt>new CollationKey(null, merged_sort_keys)</tt>
|
||||
* i.e. {@code new CollationKey(null, merged_sort_keys)}
|
||||
* @exception IllegalArgumentException thrown if source CollationKey
|
||||
* argument is null or of 0 length.
|
||||
* @stable ICU 2.6
|
||||
|
|
|
@ -1487,9 +1487,9 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
|
|||
* {@icu} Returns the locale that was used to create this object, or null.
|
||||
* This may may differ from the locale requested at the time of
|
||||
* this object's creation. For example, if an object is created
|
||||
* for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
|
||||
* drawn from <tt>en</tt> (the <i>actual</i> locale), and
|
||||
* <tt>en_US</tt> may be the most specific locale that exists (the
|
||||
* for locale {@code en_US_CALIFORNIA}, the actual data may be
|
||||
* drawn from {@code en} (the <i>actual</i> locale), and
|
||||
* {@code en_US} may be the most specific locale that exists (the
|
||||
* <i>valid</i> locale).
|
||||
*
|
||||
* <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
|
||||
|
|
|
@ -12,27 +12,27 @@ package com.ibm.icu.text;
|
|||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* <tt>SearchIterator</tt> is an abstract base class that provides
|
||||
* {@code SearchIterator} is an abstract base class that provides
|
||||
* methods to search for a pattern within a text string. Instances of
|
||||
* <tt>SearchIterator</tt> maintain a current position and scan over the
|
||||
* {@code SearchIterator} maintain a current position and scan over the
|
||||
* target text, returning the indices the pattern is matched and the length
|
||||
* of each match.
|
||||
* <p>
|
||||
* <tt>SearchIterator</tt> defines a protocol for text searching.
|
||||
* {@code SearchIterator} defines a protocol for text searching.
|
||||
* Subclasses provide concrete implementations of various search algorithms.
|
||||
* For example, <tt>StringSearch</tt> implements language-sensitive pattern
|
||||
* For example, {@code StringSearch} implements language-sensitive pattern
|
||||
* matching based on the comparison rules defined in a
|
||||
* <tt>RuleBasedCollator</tt> object.
|
||||
* {@code RuleBasedCollator} object.
|
||||
* <p>
|
||||
* Other options for searching include using a BreakIterator to restrict
|
||||
* the points at which matches are detected.
|
||||
* <p>
|
||||
* <tt>SearchIterator</tt> provides an API that is similar to that of
|
||||
* other text iteration classes such as <tt>BreakIterator</tt>. Using
|
||||
* {@code SearchIterator} provides an API that is similar to that of
|
||||
* other text iteration classes such as {@code BreakIterator}. Using
|
||||
* this class, it is easy to scan through text looking for all occurrences of
|
||||
* a given pattern. The following example uses a <tt>StringSearch</tt>
|
||||
* a given pattern. The following example uses a {@code StringSearch}
|
||||
* object to find all instances of "fox" in the target string. Any other
|
||||
* subclass of <tt>SearchIterator</tt> can be used in an identical
|
||||
* subclass of {@code SearchIterator} can be used in an identical
|
||||
* manner.
|
||||
* <pre><code>
|
||||
* String target = "The quick brown fox jumped over the lazy fox";
|
||||
|
@ -220,7 +220,7 @@ public abstract class SearchIterator
|
|||
* found, but the match's start or end index is not a
|
||||
* boundary as determined by the {@link BreakIterator},
|
||||
* the match will be rejected and another will be searched
|
||||
* for. If this parameter is <tt>null</tt>, no break
|
||||
* for. If this parameter is {@code null}, no break
|
||||
* detection is attempted.
|
||||
* @see BreakIterator
|
||||
* @stable ICU 2.0
|
||||
|
@ -329,7 +329,7 @@ public abstract class SearchIterator
|
|||
* Returns the BreakIterator that is used to restrict the indexes at which
|
||||
* matches are detected. This will be the same object that was passed to
|
||||
* the constructor or to {@link #setBreakIterator}.
|
||||
* If the {@link BreakIterator} has not been set, <tt>null</tt> will be returned.
|
||||
* If the {@link BreakIterator} has not been set, {@code null} will be returned.
|
||||
* See {@link #setBreakIterator} for more information.
|
||||
*
|
||||
* @return the BreakIterator set to restrict logic matches
|
||||
|
@ -546,7 +546,7 @@ public abstract class SearchIterator
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the first index equal or greater than <tt>position</tt> at which the
|
||||
* Returns the first index equal or greater than {@code position} at which the
|
||||
* string text matches the search pattern. The iterator is adjusted so
|
||||
* that its current index (as returned by {@link #getIndex()}) is the
|
||||
* match position if one was found.
|
||||
|
@ -555,7 +555,7 @@ public abstract class SearchIterator
|
|||
*
|
||||
* @param position where search if to start from.
|
||||
* @return The character index of the first match following
|
||||
* <tt>position</tt>, or {@link #DONE} if there are no matches.
|
||||
* {@code position}, or {@link #DONE} if there are no matches.
|
||||
* @throws IndexOutOfBoundsException If position is less than or greater
|
||||
* than the text range for searching.
|
||||
* @see #getIndex
|
||||
|
@ -586,7 +586,7 @@ public abstract class SearchIterator
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the first index less than <tt>position</tt> at which the string
|
||||
* Returns the first index less than {@code position} at which the string
|
||||
* text matches the search pattern. The iterator is adjusted so that its
|
||||
* current index (as returned by {@link #getIndex}) is the match
|
||||
* position if one was found. If a match is not found,
|
||||
|
@ -594,13 +594,13 @@ public abstract class SearchIterator
|
|||
* adjusted to the index {@link #DONE}
|
||||
* <p>
|
||||
* When the overlapping option ({@link #isOverlapping}) is off, the last index of the
|
||||
* result match is always less than <tt>position</tt>.
|
||||
* result match is always less than {@code position}.
|
||||
* When the overlapping option is on, the result match may span across
|
||||
* <tt>position</tt>.
|
||||
* {@code position}.
|
||||
*
|
||||
* @param position where search is to start from.
|
||||
* @return The character index of the first match preceding
|
||||
* <tt>position</tt>, or {@link #DONE} if there are
|
||||
* {@code position}, or {@link #DONE} if there are
|
||||
* no matches.
|
||||
* @throws IndexOutOfBoundsException If position is less than or greater than
|
||||
* the text range for searching
|
||||
|
|
|
@ -34,7 +34,7 @@ import com.ibm.icu.util.ULocale;
|
|||
|
||||
/**
|
||||
*
|
||||
* <tt>StringSearch</tt> is a {@link SearchIterator} that provides
|
||||
* {@code StringSearch} is a {@link SearchIterator} that provides
|
||||
* language-sensitive text searching based on the comparison rules defined
|
||||
* in a {@link RuleBasedCollator} object.
|
||||
* StringSearch ensures that language eccentricity can be
|
||||
|
@ -107,20 +107,20 @@ import com.ibm.icu.util.ULocale;
|
|||
* performing matches, there are no APIs here for setting and getting the
|
||||
* attributes. These attributes can be set by getting the collator
|
||||
* from {@link #getCollator} and using the APIs in {@link RuleBasedCollator}.
|
||||
* Lastly to update <tt>StringSearch</tt> to the new collator attributes,
|
||||
* Lastly to update {@code StringSearch} to the new collator attributes,
|
||||
* {@link #reset} has to be called.
|
||||
* <p>
|
||||
* Restriction: <br>
|
||||
* Currently there are no composite characters that consists of a
|
||||
* character with combining class > 0 before a character with combining
|
||||
* class == 0. However, if such a character exists in the future,
|
||||
* <tt>StringSearch</tt> does not guarantee the results for option 1.
|
||||
* {@code StringSearch} does not guarantee the results for option 1.
|
||||
* <p>
|
||||
* Consult the {@link SearchIterator} documentation for information on
|
||||
* and examples of how to use instances of this class to implement text
|
||||
* searching.
|
||||
* <p>
|
||||
* Note, <tt>StringSearch</tt> is not to be subclassed.
|
||||
* Note, {@code StringSearch} is not to be subclassed.
|
||||
* </p>
|
||||
* @see SearchIterator
|
||||
* @see RuleBasedCollator
|
||||
|
@ -290,12 +290,12 @@ public final class StringSearch extends SearchIterator {
|
|||
/**
|
||||
* Gets the {@link RuleBasedCollator} used for the language rules.
|
||||
* <p>
|
||||
* Since <tt>StringSearch</tt> depends on the returned {@link RuleBasedCollator}, any
|
||||
* Since {@code StringSearch} depends on the returned {@link RuleBasedCollator}, any
|
||||
* changes to the {@link RuleBasedCollator} result should follow with a call to
|
||||
* either {@link #reset()} or {@link #setCollator(RuleBasedCollator)} to ensure the correct
|
||||
* search behavior.
|
||||
* </p>
|
||||
* @return {@link RuleBasedCollator} used by this <tt>StringSearch</tt>
|
||||
* @return {@link RuleBasedCollator} used by this {@code StringSearch}
|
||||
* @see RuleBasedCollator
|
||||
* @see #setCollator
|
||||
* @stable ICU 2.0
|
||||
|
@ -308,7 +308,7 @@ public final class StringSearch extends SearchIterator {
|
|||
* Sets the {@link RuleBasedCollator} to be used for language-specific searching.
|
||||
* <p>
|
||||
* The iterator's position will not be changed by this method.
|
||||
* @param collator to use for this <tt>StringSearch</tt>
|
||||
* @param collator to use for this {@code StringSearch}
|
||||
* @throws IllegalArgumentException thrown when collator is null
|
||||
* @see #getCollator
|
||||
* @stable ICU 2.0
|
||||
|
@ -334,7 +334,7 @@ public final class StringSearch extends SearchIterator {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern for which <tt>StringSearch</tt> is searching for.
|
||||
* Returns the pattern for which {@code StringSearch} is searching for.
|
||||
* @return the pattern searched for
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
public class LocaleAliasCollationTest extends com.ibm.icu.dev.test.TestFmwk {
|
||||
private static final ULocale[][] _LOCALES = {
|
||||
{new ULocale("en", "RH"), new ULocale("en", "ZW")},
|
||||
{new ULocale("in"), new ULocale("id")},
|
||||
{new ULocale("in", "ID"), new ULocale("id", "ID")},
|
||||
{new ULocale("iw"), new ULocale("he")},
|
||||
{new ULocale("iw", "IL"), new ULocale("he", "IL")},
|
||||
{new ULocale("ji"), new ULocale("yi")},
|
||||
|
||||
{new ULocale("en", "BU"), new ULocale("en", "MM")},
|
||||
{new ULocale("en", "DY"), new ULocale("en", "BJ")},
|
||||
{new ULocale("en", "HV"), new ULocale("en", "BF")},
|
||||
{new ULocale("en", "NH"), new ULocale("en", "VU")},
|
||||
{new ULocale("en", "TP"), new ULocale("en", "TL")},
|
||||
{new ULocale("en", "ZR"), new ULocale("en", "CD")}
|
||||
};
|
||||
|
||||
private static final int _LOCALE_NUMBER = _LOCALES.length;
|
||||
private ULocale[] available = null;
|
||||
private HashMap<String, String> availableMap = new HashMap<>();
|
||||
private static final ULocale _DEFAULT_LOCALE = ULocale.US;
|
||||
|
||||
public LocaleAliasCollationTest() {
|
||||
}
|
||||
|
||||
@Before
|
||||
public void init() {
|
||||
available = ULocale.getAvailableLocales();
|
||||
for(int i=0; i<available.length;i++){
|
||||
availableMap.put(available[i].toString(),"");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestCollation() {
|
||||
ULocale defLoc = ULocale.getDefault();
|
||||
ULocale.setDefault(_DEFAULT_LOCALE);
|
||||
for (int i=0; i<_LOCALE_NUMBER; i++) {
|
||||
ULocale oldLoc = _LOCALES[i][0];
|
||||
ULocale newLoc = _LOCALES[i][1];
|
||||
if(availableMap.get(_LOCALES[i][1])==null){
|
||||
logln(_LOCALES[i][1]+" is not available. Skipping!");
|
||||
continue;
|
||||
}
|
||||
Collator c1 = Collator.getInstance(oldLoc);
|
||||
Collator c2 = Collator.getInstance(newLoc);
|
||||
|
||||
if (!c1.equals(c2)) {
|
||||
errln("CollationTest: c1!=c2: newLoc= "+newLoc +" oldLoc= "+oldLoc);
|
||||
}
|
||||
|
||||
logln("Collation old:"+oldLoc+" new:"+newLoc);
|
||||
}
|
||||
ULocale.setDefault(defLoc);
|
||||
}
|
||||
}
|
|
@ -8,7 +8,7 @@
|
|||
<parent>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j-root</artifactId>
|
||||
<version>77.1</version>
|
||||
<version>78.0.1-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -5511,6 +5511,22 @@ public class NumberFormatTest extends CoreTestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void Test22303() throws ParseException {
|
||||
ULocale locale = new ULocale("en-US");
|
||||
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
|
||||
symbols.setInfinity("infinity");
|
||||
symbols.setNaN("notanumber");
|
||||
DecimalFormat df = new DecimalFormat("0.00", symbols);
|
||||
df.setDecimalPatternMatchRequired(true);
|
||||
Number result = df.parse("infinity");
|
||||
assertEquals("Should parse to +INF even though decimal is required", Double.POSITIVE_INFINITY, result);
|
||||
result = df.parse("notanumber");
|
||||
assertEquals("Should parse to NaN even though decimal is required", Double.NaN, result);
|
||||
result = df.parse("-infinity");
|
||||
assertEquals("Should parse to -INF even though decimal is required", Double.NEGATIVE_INFINITY, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void Test12962() {
|
||||
String pat = "**0.00";
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
<parent>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j-root</artifactId>
|
||||
<version>77.1</version>
|
||||
<version>78.0.1-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
@ -19,6 +19,12 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>tools_taglets</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>framework</artifactId>
|
||||
|
|
|
@ -615,8 +615,8 @@ public class CalendarAstronomer {
|
|||
* longitude will have the desired value.
|
||||
* <p>
|
||||
* @param desired The desired longitude.
|
||||
* @param next <tt>true</tt> if the next occurrance of the phase
|
||||
* is desired, <tt>false</tt> for the previous occurrance.
|
||||
* @param next {@code true} if the next occurrance of the phase
|
||||
* is desired, {@code false} for the previous occurrance.
|
||||
* @internal
|
||||
*/
|
||||
public long getMoonTime(double desired, boolean next)
|
||||
|
@ -635,8 +635,8 @@ public class CalendarAstronomer {
|
|||
* desired phase.
|
||||
* <p>
|
||||
* @param desired The desired phase of the moon.
|
||||
* @param next <tt>true</tt> if the next occurrance of the phase
|
||||
* is desired, <tt>false</tt> for the previous occurrance.
|
||||
* @param next {@code true} if the next occurrance of the phase
|
||||
* is desired, {@code false} for the previous occurrance.
|
||||
* @internal
|
||||
*/
|
||||
public long getMoonTime(MoonAge desired, boolean next) {
|
||||
|
|
|
@ -27,7 +27,7 @@ import com.ibm.icu.text.UTF16;
|
|||
* when you have a lot of redundance, such as where all 21,000 Han ideographs
|
||||
* have the same value. However, lookup is much faster than a hash table.
|
||||
* A trie of any primitive data type serves two purposes:
|
||||
* <UL type = round>
|
||||
* <UL>
|
||||
* <LI>Fast access of the indexed values.
|
||||
* <LI>Smaller memory footprint.
|
||||
* </UL>
|
||||
|
|
|
@ -23,7 +23,7 @@ import com.ibm.icu.lang.UCharacter;
|
|||
* when you have a lot of redundance, such as where all 21,000 Han ideographs
|
||||
* have the same value. However, lookup is much faster than a hash table.
|
||||
* A trie of any primitive data type serves two purposes:
|
||||
* <UL type = round>
|
||||
* <UL>
|
||||
* <LI>Fast access of the indexed values.
|
||||
* <LI>Smaller memory footprint.
|
||||
* </UL>
|
||||
|
|
|
@ -49,32 +49,32 @@ import com.ibm.icu.text.UTF16;
|
|||
public class UnicodeMapIterator<T> {
|
||||
|
||||
/**
|
||||
* Value of <tt>codepoint</tt> if the iterator points to a string.
|
||||
* If <tt>codepoint == IS_STRING</tt>, then examine
|
||||
* <tt>string</tt> for the current iteration result.
|
||||
* Value of {@code codepoint} if the iterator points to a string.
|
||||
* If {@code codepoint == IS_STRING}, then examine
|
||||
* {@code string} for the current iteration result.
|
||||
*/
|
||||
public static int IS_STRING = -1;
|
||||
|
||||
/**
|
||||
* Current code point, or the special value <tt>IS_STRING</tt>, if
|
||||
* Current code point, or the special value {@code IS_STRING}, if
|
||||
* the iterator points to a string.
|
||||
*/
|
||||
public int codepoint;
|
||||
|
||||
/**
|
||||
* When iterating over ranges using <tt>nextRange()</tt>,
|
||||
* <tt>codepointEnd</tt> contains the inclusive end of the
|
||||
* iteration range, if <tt>codepoint != IS_STRING</tt>. If
|
||||
* iterating over code points using <tt>next()</tt>, or if
|
||||
* <tt>codepoint == IS_STRING</tt>, then the value of
|
||||
* <tt>codepointEnd</tt> is undefined.
|
||||
* When iterating over ranges using {@code nextRange()},
|
||||
* {@code codepointEnd} contains the inclusive end of the
|
||||
* iteration range, if {@code codepoint != IS_STRING}. If
|
||||
* iterating over code points using {@code next()}, or if
|
||||
* {@code codepoint == IS_STRING}, then the value of
|
||||
* {@code codepointEnd} is undefined.
|
||||
*/
|
||||
public int codepointEnd;
|
||||
|
||||
/**
|
||||
* If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
|
||||
* to the current string. If <tt>codepoint != IS_STRING</tt>, the
|
||||
* value of <tt>string</tt> is undefined.
|
||||
* If {@code codepoint == IS_STRING}, then {@code string} points
|
||||
* to the current string. If {@code codepoint != IS_STRING}, the
|
||||
* value of {@code string} is undefined.
|
||||
*/
|
||||
public String string;
|
||||
|
||||
|
@ -92,8 +92,8 @@ public class UnicodeMapIterator<T> {
|
|||
}
|
||||
|
||||
/**
|
||||
* Create an iterator over nothing. <tt>next()</tt> and
|
||||
* <tt>nextRange()</tt> return false. This is a convenience
|
||||
* Create an iterator over nothing. {@code next()} and
|
||||
* {@code nextRange()} return false. This is a convenience
|
||||
* constructor allowing the target to be set later.
|
||||
*/
|
||||
public UnicodeMapIterator() {
|
||||
|
@ -103,16 +103,16 @@ public class UnicodeMapIterator<T> {
|
|||
/**
|
||||
* Returns the next element in the set, either a single code point
|
||||
* or a string. If there are no more elements in the set, return
|
||||
* false. If <tt>codepoint == IS_STRING</tt>, the value is a
|
||||
* string in the <tt>string</tt> field. Otherwise the value is a
|
||||
* single code point in the <tt>codepoint</tt> field.
|
||||
* false. If {@code codepoint == IS_STRING}, the value is a
|
||||
* string in the {@code string} field. Otherwise the value is a
|
||||
* single code point in the {@code codepoint} field.
|
||||
*
|
||||
* <p>The order of iteration is all code points in sorted order,
|
||||
* followed by all strings sorted order. <tt>codepointEnd</tt> is
|
||||
* undefined after calling this method. <tt>string</tt> is
|
||||
* undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
|
||||
* calls to <tt>next()</tt> and <tt>nextRange()</tt> without
|
||||
* calling <tt>reset()</tt> between them. The results of doing so
|
||||
* followed by all strings sorted order. {@code codepointEnd} is
|
||||
* undefined after calling this method. {@code string} is
|
||||
* undefined unless {@code codepoint == IS_STRING}. Do not mix
|
||||
* calls to {@code next()} and {@code nextRange()} without
|
||||
* calling {@code reset()} between them. The results of doing so
|
||||
* are undefined.
|
||||
*
|
||||
* @return true if there was another element in the set and this
|
||||
|
@ -143,17 +143,17 @@ public class UnicodeMapIterator<T> {
|
|||
/**
|
||||
* Returns the next element in the set, either a code point range
|
||||
* or a string. If there are no more elements in the set, return
|
||||
* false. If <tt>codepoint == IS_STRING</tt>, the value is a
|
||||
* string in the <tt>string</tt> field. Otherwise the value is a
|
||||
* range of one or more code points from <tt>codepoint</tt> to
|
||||
* <tt>codepointeEnd</tt> inclusive.
|
||||
* false. If {@code codepoint == IS_STRING}, the value is a
|
||||
* string in the {@code string} field. Otherwise the value is a
|
||||
* range of one or more code points from {@code codepoint} to
|
||||
* {@code codepointeEnd} inclusive.
|
||||
*
|
||||
* <p>The order of iteration is all code points ranges in sorted
|
||||
* order, followed by all strings sorted order. Ranges are
|
||||
* disjoint and non-contiguous. <tt>string</tt> is undefined
|
||||
* unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
|
||||
* <tt>next()</tt> and <tt>nextRange()</tt> without calling
|
||||
* <tt>reset()</tt> between them. The results of doing so are
|
||||
* disjoint and non-contiguous. {@code string} is undefined
|
||||
* unless {@code codepoint == IS_STRING}. Do not mix calls to
|
||||
* {@code next()} and {@code nextRange()} without calling
|
||||
* {@code reset()} between them. The results of doing so are
|
||||
* undefined.
|
||||
*
|
||||
* @return true if there was another element in the set and this
|
||||
|
@ -188,7 +188,7 @@ public class UnicodeMapIterator<T> {
|
|||
/**
|
||||
* Sets this iterator to visit the elements of the given set and
|
||||
* resets it to the start of that set. The iterator is valid only
|
||||
* so long as <tt>set</tt> is valid.
|
||||
* so long as {@code set} is valid.
|
||||
* @param set the set to iterate over.
|
||||
*/
|
||||
public void reset(UnicodeMap set) {
|
||||
|
|
|
@ -75,7 +75,7 @@ public final class InternalLocaleBuilder {
|
|||
_variant = "";
|
||||
} else {
|
||||
// normalize separators to "_"
|
||||
String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP);
|
||||
String var = variant.replace(LanguageTag.SEP, BaseLocale.SEP);
|
||||
int errIdx = checkVariants(var, BaseLocale.SEP);
|
||||
if (errIdx != -1) {
|
||||
throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
|
||||
|
@ -121,7 +121,7 @@ public final class InternalLocaleBuilder {
|
|||
} else {
|
||||
if (type.length() != 0) {
|
||||
// normalize separator to "-"
|
||||
String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
|
||||
String tp = type.replace(BaseLocale.SEP, LanguageTag.SEP);
|
||||
// validate
|
||||
StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP);
|
||||
while (!itr.isDone()) {
|
||||
|
@ -166,7 +166,7 @@ public final class InternalLocaleBuilder {
|
|||
}
|
||||
} else {
|
||||
// validate value
|
||||
String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
|
||||
String val = value.replace(BaseLocale.SEP, LanguageTag.SEP);
|
||||
StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP);
|
||||
while (!itr.isDone()) {
|
||||
String s = itr.current();
|
||||
|
@ -202,7 +202,7 @@ public final class InternalLocaleBuilder {
|
|||
clearExtensions();
|
||||
return this;
|
||||
}
|
||||
subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
|
||||
subtags = subtags.replace(BaseLocale.SEP, LanguageTag.SEP);
|
||||
StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);
|
||||
|
||||
List<String> extensions = null;
|
||||
|
@ -485,7 +485,7 @@ public final class InternalLocaleBuilder {
|
|||
if (sb.length() != 0) {
|
||||
sb.append(BaseLocale.SEP);
|
||||
}
|
||||
sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP));
|
||||
sb.append(privuse.substring(privVarStart).replace(LanguageTag.SEP, BaseLocale.SEP));
|
||||
variant = sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,9 +2,12 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
|
||||
public final class LSR {
|
||||
|
@ -147,44 +150,96 @@ public final class LSR {
|
|||
return (encodeLanguageToInt() + (27*27*27) * encodeRegionToInt(m49)) |
|
||||
(encodeScriptToInt() << 24);
|
||||
}
|
||||
private static String toLanguage(int encoded) {
|
||||
if (encoded == 0) return "";
|
||||
if (encoded == 1) return "skip";
|
||||
encoded &= 0x00ffffff;
|
||||
encoded %= 27*27*27;
|
||||
StringBuilder res = new StringBuilder(3);
|
||||
res.append((char)('a' + ((encoded % 27) - 1)));
|
||||
res.append((char)('a' + (((encoded / 27 ) % 27) - 1)));
|
||||
if (encoded / (27 * 27) != 0) {
|
||||
res.append((char)('a' + ((encoded / (27 * 27)) - 1)));
|
||||
|
||||
/**
|
||||
* CachedDecoder uses string pools to reduce memory needed for creating
|
||||
* strings representing lang, region and script.
|
||||
*/
|
||||
private static class CachedDecoder {
|
||||
private static final String[] DECODED_ZERO =
|
||||
new String[] {/*lang=*/ "", /*script=*/ "", /*region=*/ ""};
|
||||
private static final String[] DECODED_ONE =
|
||||
new String[] {/*lang=*/ "skip", /*script=*/ "script", /*region=*/ ""};
|
||||
|
||||
// Use local string pools instead of String.intern(), because a java runtime may put interned
|
||||
// string into the GC root, and never get released if ICU4J needs to be unloaded.
|
||||
// String.intern() could also be slower than a simple java.util.HashMap.
|
||||
private final HashMap<Integer, String> langsCache;
|
||||
private final HashMap<Integer, String> scriptsCache;
|
||||
private final HashMap<Integer, String> regionsCache;
|
||||
|
||||
private final String[] m49;
|
||||
|
||||
CachedDecoder(String[] m49) {
|
||||
int estLangCacheCapacity = 556; // ~= LocaleIDs._languages.length
|
||||
langsCache = new HashMap<>(estLangCacheCapacity);
|
||||
scriptsCache = new HashMap<>(UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT));
|
||||
int estRegionCacheCapacity = 253; // ~= LocaleIDs._countries.length
|
||||
regionsCache = new HashMap<>(estRegionCacheCapacity);
|
||||
this.m49 = m49;
|
||||
}
|
||||
return res.toString();
|
||||
}
|
||||
private static String toScript(int encoded) {
|
||||
if (encoded == 0) return "";
|
||||
if (encoded == 1) return "script";
|
||||
encoded = (encoded >> 24) & 0x000000ff;
|
||||
return UScript.getShortName(encoded);
|
||||
}
|
||||
private static String toRegion(int encoded, String[] m49) {
|
||||
if (encoded == 0 || encoded == 1) return "";
|
||||
encoded &= 0x00ffffff;
|
||||
encoded /= 27 * 27 * 27;
|
||||
encoded %= 27 * 27;
|
||||
if (encoded < 27) {
|
||||
return m49[encoded];
|
||||
|
||||
/**
|
||||
* @return a String[3] object where the first element is a language code, the second element
|
||||
* is a script code, and the third element is a region code.
|
||||
*/
|
||||
String[] decode(int encoded) {
|
||||
if (encoded == 0) {
|
||||
return DECODED_ZERO;
|
||||
}
|
||||
if (encoded == 1) {
|
||||
return DECODED_ONE;
|
||||
}
|
||||
|
||||
int encodedLang = encoded & 0x00ffffff;
|
||||
encodedLang %= 27*27*27;
|
||||
String lang = langsCache.computeIfAbsent(encodedLang, CachedDecoder::toLanguage);
|
||||
|
||||
int encodedScript = (encoded >> 24) & 0x000000ff;
|
||||
String script = scriptsCache.computeIfAbsent(encodedScript, UScript::getShortName);
|
||||
|
||||
int encodedRegion = encoded & 0x00ffffff;
|
||||
encodedRegion /= 27 * 27 * 27;
|
||||
encodedRegion %= 27 * 27;
|
||||
|
||||
String region;
|
||||
if (encodedRegion < 27) {
|
||||
region = m49[encodedRegion];
|
||||
} else {
|
||||
region = regionsCache.computeIfAbsent(encodedRegion, CachedDecoder::toRegion);
|
||||
}
|
||||
|
||||
return new String[] {lang, script, region};
|
||||
}
|
||||
|
||||
private static String toLanguage(int encoded) {
|
||||
StringBuilder res = new StringBuilder(3);
|
||||
res.append((char)('a' + ((encoded % 27) - 1)));
|
||||
res.append((char)('a' + (((encoded / 27 ) % 27) - 1)));
|
||||
if (encoded / (27 * 27) != 0) {
|
||||
res.append((char)('a' + ((encoded / (27 * 27)) - 1)));
|
||||
}
|
||||
return res.toString();
|
||||
}
|
||||
|
||||
private static String toRegion(int encoded) {
|
||||
StringBuilder res = new StringBuilder(3);
|
||||
res.append((char)('A' + ((encoded % 27) - 1)));
|
||||
res.append((char)('A' + (((encoded / 27) % 27) - 1)));
|
||||
return res.toString();
|
||||
}
|
||||
StringBuilder res = new StringBuilder(3);
|
||||
res.append((char)('A' + ((encoded % 27) - 1)));
|
||||
res.append((char)('A' + (((encoded / 27) % 27) - 1)));
|
||||
return res.toString();
|
||||
}
|
||||
|
||||
public static LSR[] decodeInts(int[] nums, String[] m49) {
|
||||
LSR[] lsrs = new LSR[nums.length];
|
||||
|
||||
// The decoder uses string pools to reduce memory impact.
|
||||
// At least 7k LSR instances are created from this path.
|
||||
CachedDecoder decoder = new CachedDecoder(m49);
|
||||
for (int i = 0; i < nums.length; ++i) {
|
||||
int n = nums[i];
|
||||
lsrs[i] = new LSR(toLanguage(n), toScript(n), toRegion(n, m49), LSR.IMPLICIT_LSR);
|
||||
int encoded = nums[i];
|
||||
String[] lsrStrings = decoder.decode(encoded);
|
||||
lsrs[i] = new LSR(lsrStrings[0], lsrStrings[1], lsrStrings[2], LSR.IMPLICIT_LSR);
|
||||
}
|
||||
return lsrs;
|
||||
}
|
||||
|
|
|
@ -355,11 +355,27 @@ public final class LikelySubtags {
|
|||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value > 0;
|
||||
assert value != 0;
|
||||
if (value < 0) {
|
||||
retainLanguage = !language.isEmpty();
|
||||
retainScript = !script.isEmpty();
|
||||
retainRegion = !region.isEmpty();
|
||||
// Fallback to und_$region =>
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value == 0;
|
||||
long trieUndEmptyState = iter.getState64();
|
||||
value = trieNext(iter, region, 0);
|
||||
// Fallback to und =>
|
||||
if (value < 0) {
|
||||
iter.resetToState64(trieUndEmptyState);
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value > 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
LSR result = lsrs[value];
|
||||
|
||||
if (returnInputIfUnmatch &&
|
||||
(!(matchLanguage || matchScript || (matchRegion && language.isEmpty())))) {
|
||||
|
@ -370,17 +386,21 @@ public final class LikelySubtags {
|
|||
}
|
||||
|
||||
if (! (retainLanguage || retainScript || retainRegion)) {
|
||||
assert result.flags == LSR.IMPLICIT_LSR;
|
||||
return result;
|
||||
assert value >= 0;
|
||||
assert lsrs[value].flags == LSR.IMPLICIT_LSR;
|
||||
return lsrs[value];
|
||||
}
|
||||
if (!retainLanguage) {
|
||||
language = result.language;
|
||||
assert value >= 0;
|
||||
language = lsrs[value].language;
|
||||
}
|
||||
if (!retainScript) {
|
||||
script = result.script;
|
||||
assert value >= 0;
|
||||
script = lsrs[value].script;
|
||||
}
|
||||
if (!retainRegion) {
|
||||
region = result.region;
|
||||
assert value >= 0;
|
||||
region = lsrs[value].region;
|
||||
}
|
||||
int retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
|
|
|
@ -23,8 +23,9 @@ public class RequireDecimalSeparatorValidator extends ValidationMatcher {
|
|||
|
||||
@Override
|
||||
public void postProcess(ParsedNumber result) {
|
||||
boolean parseIsInfNaN = 0 != (result.flags & ParsedNumber.FLAG_INFINITY) || 0 != (result.flags & ParsedNumber.FLAG_NAN);
|
||||
boolean parseHasDecimalSeparator = 0 != (result.flags & ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR);
|
||||
if (parseHasDecimalSeparator != patternHasDecimalSeparator) {
|
||||
if (!parseIsInfNaN && parseHasDecimalSeparator != patternHasDecimalSeparator) {
|
||||
result.flags |= ParsedNumber.FLAG_FAIL;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -183,7 +183,7 @@ public class ConversionRates {
|
|||
|
||||
|
||||
String keyString = key.toString();
|
||||
String valueString = value.toString().replaceAll(" ", "");
|
||||
String valueString = value.toString().replace(" ", "");
|
||||
if ("target".equals(keyString)) {
|
||||
target = valueString;
|
||||
} else if ("factor".equals(keyString)) {
|
||||
|
|
|
@ -11,7 +11,7 @@ import com.ibm.icu.lang.UCharacter;
|
|||
/* ------------------------------------------------------------------ */
|
||||
/* BigDecimal -- Decimal arithmetic for Java */
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Copyright IBM Corporation, 1996-2016. All Rights Reserved. */
|
||||
/* Copyright IBM Corporation, 1996-2016. All Rights Reserved. */
|
||||
/* */
|
||||
/* The BigDecimal class provides immutable arbitrary-precision */
|
||||
/* floating point (including integer) decimal numbers. */
|
||||
|
@ -1237,15 +1237,15 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable
|
|||
* and returns a result of type <code>int</code>.
|
||||
* <p>
|
||||
* The result will be:
|
||||
* <table cellpadding=2>
|
||||
* <table style="padding:2px;">
|
||||
* <tr>
|
||||
* <td align=right><b>-1</b></td> <td>if the current object is less than the first parameter</td>
|
||||
* <td style="text-align:right"><b>-1</b></td> <td>if the current object is less than the first parameter</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td align=right><b>0</b></td> <td>if the current object is equal to the first parameter</td>
|
||||
* <td style="text-align:right"><b>0</b></td> <td>if the current object is equal to the first parameter</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td align=right><b>1</b></td> <td>if the current object is greater than the first parameter.</td>
|
||||
* <td style="text-align:right"><b>1</b></td> <td>if the current object is greater than the first parameter.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
* <p>
|
||||
|
|
|
@ -992,9 +992,9 @@ public abstract class BreakIterator implements Cloneable
|
|||
* {@icu} Returns the locale that was used to create this object, or null.
|
||||
* This may may differ from the locale requested at the time of
|
||||
* this object's creation. For example, if an object is created
|
||||
* for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
|
||||
* drawn from <tt>en</tt> (the <i>actual</i> locale), and
|
||||
* <tt>en_US</tt> may be the most specific locale that exists (the
|
||||
* for locale {@code en_US_CALIFORNIA}, the actual data may be
|
||||
* drawn from {@code en} (the <i>actual</i> locale), and
|
||||
* {@code en_US} may be the most specific locale that exists (the
|
||||
* <i>valid</i> locale).
|
||||
*
|
||||
* <p>Note: The <i>actual</i> locale is returned correctly, but the <i>valid</i>
|
||||
|
|
|
@ -15,7 +15,7 @@ import com.ibm.icu.impl.Normalizer2Impl;
|
|||
* One problem is that this class is not designed to return supplementary characters.
|
||||
* Use the Normalizer2 and UCharacter classes instead.
|
||||
* <p>
|
||||
* <tt>ComposedCharIter</tt> is an iterator class that returns all
|
||||
* {@code ComposedCharIter} is an iterator class that returns all
|
||||
* of the precomposed characters defined in the Unicode standard, along
|
||||
* with their decomposed forms. This is often useful when building
|
||||
* data tables (<i>e.g.</i> collation tables) which need to treat composed
|
||||
|
@ -30,17 +30,17 @@ import com.ibm.icu.impl.Normalizer2Impl;
|
|||
* <p>
|
||||
* You can avoid this problem by ensuring that the collation table contains
|
||||
* rules for both the decomposed <i>and</i> composed versions of each character.
|
||||
* To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the
|
||||
* To do so, use a {@code ComposedCharIter} to iterate through all of the
|
||||
* composed characters in Unicode. If the decomposition for that character
|
||||
* consists solely of characters that are listed in your ruleset, you can
|
||||
* add a new rule for the composed character that makes it equivalent to
|
||||
* its decomposition sequence.
|
||||
* <p>
|
||||
* Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table
|
||||
* Note that {@code ComposedCharIter} iterates over a <em>static</em> table
|
||||
* of the composed characters in Unicode. If you want to iterate over the
|
||||
* composed characters in a particular string, use {@link Normalizer} instead.
|
||||
* <p>
|
||||
* When constructing a <tt>ComposedCharIter</tt> there is one
|
||||
* When constructing a {@code ComposedCharIter} there is one
|
||||
* optional feature that you can enable or disable:
|
||||
* <ul>
|
||||
* <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul
|
||||
|
@ -50,7 +50,7 @@ import com.ibm.icu.impl.Normalizer2Impl;
|
|||
* is a canonical decomposition.
|
||||
* </ul>
|
||||
* <p>
|
||||
* <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
|
||||
* {@code ComposedCharIter} is currently based on version 2.1.8 of the
|
||||
* <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
|
||||
* It will be updated as later versions of Unicode are released.
|
||||
* @deprecated ICU 2.2
|
||||
|
@ -68,7 +68,7 @@ public final class ComposedCharIter {
|
|||
public static final char DONE = (char) Normalizer.DONE;
|
||||
|
||||
/**
|
||||
* Construct a new <tt>ComposedCharIter</tt>. The iterator will return
|
||||
* Construct a new {@code ComposedCharIter}. The iterator will return
|
||||
* all Unicode characters with canonical decompositions, including Korean
|
||||
* Hangul characters.
|
||||
* @deprecated ICU 2.2
|
||||
|
@ -79,10 +79,10 @@ public final class ComposedCharIter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
|
||||
* Constructs a non-default {@code ComposedCharIter} with optional behavior.
|
||||
* <p>
|
||||
* @param compat <tt>false</tt> for canonical decompositions only;
|
||||
* <tt>true</tt> for both canonical and compatibility
|
||||
* @param compat {@code false} for canonical decompositions only;
|
||||
* {@code true} for both canonical and compatibility
|
||||
* decompositions.
|
||||
*
|
||||
* @param options Optional decomposition features. None are supported, so this is ignored.
|
||||
|
@ -112,10 +112,10 @@ public final class ComposedCharIter {
|
|||
|
||||
/**
|
||||
* Returns the next precomposed Unicode character.
|
||||
* Repeated calls to <tt>next</tt> return all of the precomposed characters defined
|
||||
* Repeated calls to {@code next} return all of the precomposed characters defined
|
||||
* by Unicode, in ascending order. After all precomposed characters have
|
||||
* been returned, {@link #hasNext} will return <tt>false</tt> and further calls
|
||||
* to <tt>next</tt> will return {@link #DONE}.
|
||||
* been returned, {@link #hasNext} will return {@code false} and further calls
|
||||
* to {@code next} will return {@link #DONE}.
|
||||
* @deprecated ICU 2.2
|
||||
*/
|
||||
@Deprecated
|
||||
|
|
|
@ -2244,7 +2244,7 @@ public class DateFormatSymbols implements Serializable, Cloneable {
|
|||
}
|
||||
}
|
||||
if (calType == null) {
|
||||
calType = className.replaceAll("Calendar", "").toLowerCase(Locale.ENGLISH);
|
||||
calType = className.replace("Calendar", "").toLowerCase(Locale.ENGLISH);
|
||||
}
|
||||
|
||||
initializeData(locale, calType);
|
||||
|
@ -2354,9 +2354,9 @@ public class DateFormatSymbols implements Serializable, Cloneable {
|
|||
* Returns the locale that was used to create this object, or null.
|
||||
* This may may differ from the locale requested at the time of
|
||||
* this object's creation. For example, if an object is created
|
||||
* for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
|
||||
* drawn from <tt>en</tt> (the <i>actual</i> locale), and
|
||||
* <tt>en_US</tt> may be the most specific locale that exists (the
|
||||
* for locale {@code en_US_CALIFORNIA}, the actual data may be
|
||||
* drawn from {@code en} (the <i>actual</i> locale), and
|
||||
* {@code en_US} may be the most specific locale that exists (the
|
||||
* <i>valid</i> locale).
|
||||
*
|
||||
* <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue