Merge branch 'main' into breakiter-api-modern

This commit is contained in:
Elango Cheran 2025-01-08 15:19:29 -08:00
commit 19364023c3
1456 changed files with 17360 additions and 22053 deletions

View file

@ -1,6 +1,6 @@
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
// Generated using tools/cldr/cldr-to-icu/
//
// Include Japanese adaboost model.
{

View file

@ -1,6 +1,6 @@
// © 2021 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
// Generated using tools/cldr/cldr-to-icu/
//
// Include Burmese and Thai lstm models.
{

View file

@ -1,25 +1,25 @@
TODO: Please describe your changes here.
TODO: Delete the following lines but _keep and fill out_ (!) the checklist below.
TODO: Please read the following on ICU Contributing, and then delete these instructions.
Thank you for your pull request!\
General info on contributing: Please see https://github.com/unicode-org/icu/blob/main/CONTRIBUTING.md
Thank you for your pull request!
Contributors license agreement (CLA):
- You will be automatically asked to sign the CLA before the PR is accepted.
- To sign the CLA: https://cla-assistant.io/unicode-org/icu
* For general info on contributing: https://github.com/unicode-org/icu/blob/main/CONTRIBUTING.md
* Associating PRs with Jira issues
- We require each pull request to be associated with a [Jira issue](https://icu.unicode.org/bugs).
- Reuse existing issues for minor changes:
* ICU 77 docs minor fixes: ICU-22921 — User Guide & API docs typos etc., and version updates (e.g., dependabot for User Guide)
* ICU 77 code warnings/version updates: ICU-22920 — Fix compiler warnings. Update versions of code-related dependencies (e.g., dependabot).
* Contributors license agreement (CLA):
- You will be automatically asked to sign the CLA before the PR is accepted.
- To sign the CLA: https://cla-assistant.io/unicode-org/icu
- For terms of use and license, see https://www.unicode.org/terms_of_use.html
For terms of use and license, see https://www.unicode.org/terms_of_use.html
TODO: End of lines to delete.
TODO: Fill out the checklist below.
#### Checklist
- [ ] Required: Issue filed: https://unicode-org.atlassian.net/browse/ICU-NNNNN
- For minor changes you can use one of the following per-release tickets:
- ICU 77 code warnings/version updates: ICU-22920 — Fix compiler warnings. Update versions of code-related dependencies (e.g., dependabot).
- ICU 77 docs minor fixes: ICU-22921 — User Guide & API docs typos etc., and version updates (e.g., dependabot for User Guide)
- [ ] Required: Issue filed: ICU-NNNNN
- [ ] Required: The PR title must be prefixed with a JIRA Issue number. Example: "ICU-1234 Fix xyz"
- [ ] Required: The PR description must include the link to the Jira Issue, for example by completing the URL in the first checklist item
- [ ] Required: Each commit message must be prefixed with a JIRA Issue number. Example: "ICU-1234 Fix xyz"
- [ ] Issue accepted (done by Technical Committee after discussion)
- [ ] Tests included, if applicable

View file

@ -35,7 +35,11 @@ permissions:
jobs:
retain-maven-cache:
name: Run all tests with Maven
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
# Only run this on the upstream repo. Otherwise, running in a personal fork will cause
# Github to disable the personal fork copy of the workflow
# (Github complains about running a scheduled workflow on a repo with > 60 days of inactivity)
if: github.ref == 'refs/heads/main' && github.repository == 'unicode-org/unicodetools'
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

View file

@ -26,7 +26,7 @@ on:
permissions: {}
jobs:
Fuzzing:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
permissions:
security-events: write
strategy:

View file

@ -42,7 +42,7 @@ jobs:
# ICU4C docs build using doxygen..
icu4c-docs-build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -65,7 +65,7 @@ jobs:
#
# Invokes test/hdrtst to check public headers compliance.
gcc-debug-build-and-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -85,7 +85,7 @@ jobs:
#gcc 11 with c++ 20
gcc11-cpp20:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Install GCC-11
@ -110,7 +110,7 @@ jobs:
# (FORCE guards make this tool pass but won't compile to working code.
# See the testtagsguards.sh script for details.)
clang-release-build-and-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -158,7 +158,7 @@ jobs:
build_option:
[ --enable-static, --enable-static --disable-shared ]
# --disable-shared has a build problem.
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -174,7 +174,7 @@ jobs:
# Out of source build with gcc 10, c++14, and extra warnings; executes icuinfo.
gcc-10-stdlib17:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -195,7 +195,7 @@ jobs:
# Clang Linux with address sanitizer.
clang-asan:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -209,7 +209,7 @@ jobs:
LDFLAGS: -fsanitize=address
# Clang Linux with leak sanitizer.
clang-lsan:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -224,7 +224,7 @@ jobs:
ASAN_OPTIONS: detect_leaks=1
# Clang Linux with undefined-behavior sanitizer.
clang-ubsan:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -239,7 +239,7 @@ jobs:
LDFLAGS: -fsanitize=undefined -fsanitize=alignment -fno-sanitize-recover=undefined,alignment
# Control Flow Integrity.
clang-cfi:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -258,7 +258,7 @@ jobs:
# Clang Linux with thread sanitizer.
clang-tsan:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -280,7 +280,7 @@ jobs:
# this check became flaky. The build apparently was not done copying one or another .ucm file before
# calling makeconv for it, although the Makefile has appropriate dependencies.
clang-datafilter:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Extract ICU version
@ -302,7 +302,7 @@ jobs:
# Clang Linux with CPP 17
clang-cpp17:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Build ICU4C with CPP 17
@ -313,7 +313,7 @@ jobs:
# Clang Linux with LANG: en_US@calendar=gregorian;hours=h12
clang-lang-with-extn-tags:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Linux Clang - LANG has extension tags
@ -324,7 +324,7 @@ jobs:
# Clang Linux 18 with CPP20 and treat warnings as errors
clang18-cpp20-warning-as-errors:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
strategy:
fail-fast: false
matrix:
@ -349,7 +349,7 @@ jobs:
# MacOS with clang
macos-clang:
runs-on: macos-latest
runs-on: macos-14 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: ICU4C with clang on MacOS
@ -362,7 +362,7 @@ jobs:
# Windows MSVC builds
windows-msvc:
runs-on: windows-2022
runs-on: windows-2022 # Updated in BRS
strategy:
fail-fast: false
matrix:
@ -383,10 +383,10 @@ jobs:
uses: microsoft/setup-msbuild@v2
- name: Build Solution x64
if: contains(matrix.test_flags, 'arm Release')
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=Release /p:Platform=x64
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=Release /p:Platform=x64 /p:SkipUWP=true
- name: Build Solution
run: |
msbuild icu4c/source/allinone/allinone.sln ${{ matrix.build_flags }}
msbuild icu4c/source/allinone/allinone.sln ${{ matrix.build_flags }} /p:SkipUWP=true
- name: Run ${{ matrix.test_flags }} Tests (icucheck.bat)
if: contains(matrix.test_flags, 'arm Release') == false
run: |
@ -394,7 +394,7 @@ jobs:
# Windows data filter build
windows-msvc-datafilter:
runs-on: windows-latest
runs-on: windows-2022 # Updated in BRS
timeout-minutes: 30
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -410,7 +410,7 @@ jobs:
# Windows MSVC distribution release
windows-msvc-dist-release:
runs-on: windows-latest
runs-on: windows-2022 # Updated in BRS
permissions:
contents: write # So that we can upload to release
timeout-minutes: 30
@ -433,9 +433,9 @@ jobs:
uses: microsoft/setup-msbuild@v2
- name: Build Solution x64
if: contains(matrix.win_ver, 'ARM64')
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=Release /p:Platform=x64
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=Release /p:Platform=x64 /p:SkipUWP=true
- name: Build Solution
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=Release /p:Platform=${{ matrix.plat }}
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=Release /p:Platform=${{ matrix.plat }} /p:SkipUWP=true
- name: Run Tests (icucheck.bat)
if: contains(matrix.win_ver, 'ARM64') == false
run: icu4c/source/allinone/icucheck.bat ${{ matrix.arch }} Release
@ -482,7 +482,7 @@ jobs:
# Window MSYS2 tests
windows-msys2-gcc-x86_64:
runs-on: windows-latest
runs-on: windows-2022 # Updated in BRS
timeout-minutes: 45
defaults:
run:
@ -519,7 +519,7 @@ jobs:
# Run ICU4C tests with stubdata.
run-with-stubdata:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -562,7 +562,7 @@ jobs:
# Test U_CHARSET_IS_UTF8
u-charset-is-utf8-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: |
@ -572,7 +572,7 @@ jobs:
# Test U_OVERRIDE_CXX_ALLOCATION-is-0-test
u-override-cxx-allocation-is-0-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: |
@ -583,7 +583,7 @@ jobs:
# Test LSTM
lstm-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: |
@ -594,7 +594,7 @@ jobs:
# Test adaboost
adaboost-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: |
@ -605,7 +605,7 @@ jobs:
# Build and run testmap
testmap:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: |
@ -618,28 +618,28 @@ jobs:
# Copyright scan
copyright-scan:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: perl tools/scripts/cpysearch/cpyscan.pl
# Check compilation of internal headers.
internal-header-compilation:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: cd icu4c/source; test/hdrtst/testinternalheaders.sh
# Check source files for valid UTF-8 and for absence of BOM.
valid-UTF-8-and-no-BOM-check:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: tools/scripts/icu-file-utf8-check.py
# Run unit tests with UCONFIG_NO_XXX variations.
uconfig-unit-tests:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
strategy:
# "fail-fast: false" lets other jobs keep running even if the test breaks in some other uconfig.
fail-fast: false
@ -686,7 +686,7 @@ jobs:
# Run header tests with UCONFIG_NO_XXX variations.
uconfig-header-tests:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
strategy:
# "fail-fast: false" lets other jobs keep running even if the test breaks in some other uconfig.
fail-fast: false
@ -737,7 +737,7 @@ jobs:
# Build Unicode update tools
unicode-update-tools:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: bazelbuild/setup-bazelisk@b39c379c82683a5f25d34f0d062761f62693e0b2 # v3.0.0
@ -776,7 +776,7 @@ jobs:
# Build and run ICU4C samples
icu4c-test-samples:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -801,7 +801,7 @@ jobs:
# https://unicode-org.github.io/icu/processes/release/tasks/integration.html#verify-that-icu4c-tests-pass-without-collation-rule-strings
icu4c-without-collation-rule-strings:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Install hjson dependency
@ -841,7 +841,7 @@ jobs:
# https://unicode-org.github.io/icu/processes/release/tasks/healthy-code.html#test-uconfig_no_conversion
icu4c-uconfig-no-conversion:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Set UCONFIG_NO_CONVERSION and configure ICU4C
@ -861,7 +861,7 @@ jobs:
# Workflow for ICU Export Data for ICU4X
icu4c-icuexportdata:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
permissions:
contents: write # So that we can upload to release
timeout-minutes: 30

View file

@ -44,7 +44,7 @@ jobs:
# This job is created according to the cache strategy of reuse from a single job:
# https://github.com/actions/cache/blob/main/caching-strategies.md#make-cache-read-only--reuse-cache-from-centralized-job
icu4j-mvn-init-cache:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@v4
@ -76,7 +76,7 @@ jobs:
fail-fast: false
matrix:
java-version: [ '8', '11', '17', '21' ]
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@v4
@ -109,7 +109,7 @@ jobs:
lstm-icu4j-build-and-test:
if: false # TODO(ICU-22505)
needs: icu4j-mvn-init-cache
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@v4
@ -150,7 +150,7 @@ jobs:
adaboost-icu4j-build-and-test:
if: false # Temporary disable, until we disable the .jar creation from C and distribute the individual files
needs: icu4j-mvn-init-cache
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@v4

View file

@ -33,14 +33,14 @@ jobs:
# Copyright scan
copyright-scan:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@v4
- run: perl tools/scripts/cpysearch/cpyscan.pl
# Check source files for valid UTF-8 and for absence of BOM.
valid-UTF-8-and-no-BOM-check:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@v4
- run: tools/scripts/icu-file-utf8-check.py
@ -50,7 +50,7 @@ jobs:
# This job is created according to the cache strategy of reuse from a single job:
# https://github.com/actions/cache/blob/main/caching-strategies.md#make-cache-read-only--reuse-cache-from-centralized-job
icu4j-mvn-init-cache:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@v4
@ -77,7 +77,7 @@ jobs:
# Verify icu4c release tools buildability.
icu4c-release-tools:
needs: icu4j-mvn-init-cache
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@v4
- name: Restore read-only cache of local Maven repository

View file

@ -37,7 +37,7 @@ jobs:
# Keep in sync with deploy workflow in `jekyll-gh-pages.yml`
test-docs-build:
name: Test build of User Guide docs
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout
uses: actions/checkout@v4

View file

@ -25,7 +25,7 @@ jobs:
#=================================================================
# locale env tests.
env-test-locale:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
strategy:
# "fail-fast: false" let other jobs keep running even if the test break in some locales.
fail-fast: false
@ -82,7 +82,7 @@ jobs:
#=================================================================
# tz env tests.
env-test-tz:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
strategy:
# "fail-fast: false" let other jobs keep running even if the test break in some timezones.
fail-fast: false

View file

@ -38,7 +38,7 @@ concurrency:
jobs:
# Runs exhaustive tests for ICU4J on Linux
icu4j-linux:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
timeout-minutes: 180
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -55,7 +55,7 @@ jobs:
icu4c-linux-clang:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
timeout-minutes: 120
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

View file

@ -36,7 +36,7 @@ jobs:
# This job is created according to the cache strategy of reuse from a single job:
# https://github.com/actions/cache/blob/main/caching-strategies.md#make-cache-read-only--reuse-cache-from-centralized-job
icu4j-mvn-init-cache:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -62,7 +62,7 @@ jobs:
# Test ICU4J with little-endian ICU4C data only
icu4j-little-endian-data-test:
needs: icu4j-mvn-init-cache
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -97,7 +97,7 @@ jobs:
icu4c-store-perf-libs:
# Run performance tests only on the main branch of the ICU repository.
if: github.repository == 'unicode-org/icu' && github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -138,7 +138,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Create directory for lib files
@ -212,7 +212,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Create directory for lib files
@ -277,7 +277,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Create directory for lib files
@ -326,7 +326,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -388,7 +388,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -452,7 +452,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -521,7 +521,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -660,7 +660,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -743,7 +743,7 @@ jobs:
contents: write
deployments: write
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout and setup
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -807,7 +807,7 @@ jobs:
if: github.repository == 'unicode-org/icu' && github.ref == 'refs/heads/main'
name: Copy perf data to remote repo for visualization
needs: [icu4c-performance-tests, icu4c-performance-tests-with-files, icu4c-strsrchperf, icu4j-unicodesetperf, icu4j-ucharacterperf, icu4j-decimalformatperf, icu4j-normperf, icu4j-converterperf, icu4j-dateformatperf]
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -827,7 +827,7 @@ jobs:
# Build ICU and tests sample on some windows configurations
icu4c-windows-msvc-postmerge:
runs-on: windows-2022
runs-on: windows-2022 # Updated in BRS
timeout-minutes: 30
strategy:
matrix:
@ -843,16 +843,16 @@ jobs:
- name: Set up MSBuild
uses: microsoft/setup-msbuild@v2
- name: Build Solution
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=${{ matrix.config }} /p:Platform=${{ matrix.platform }}
run: msbuild icu4c/source/allinone/allinone.sln /p:Configuration=${{ matrix.config }} /p:Platform=${{ matrix.platform }} /p:SkipUWP=true
- name: Run Tests (icucheck.bat)
run: icu4c/source/allinone/icucheck.bat ${{ matrix.arch }} ${{ matrix.config }}
- name: Build Sample Solution
run: msbuild icu4c/source/samples/all/all.sln /p:Configuration=${{ matrix.config }} /p:Platform=${{ matrix.arch }}
run: msbuild icu4c/source/samples/all/all.sln /p:Configuration=${{ matrix.config }} /p:Platform=${{ matrix.arch }} /p:SkipUWP=true
- name: Test Samples (samplecheck.bat)
run: icu4c/source/samples/all/samplecheck.bat ${{ matrix.arch }} ${{ matrix.config }}
icu4c-windows-cygwin-gcc:
runs-on: windows-latest
runs-on: windows-2022 # Updated in BRS
timeout-minutes: 50
env:
ICU_CI_CACHE: c:\icu-ci-cache

View file

@ -34,7 +34,7 @@ permissions:
jobs:
clang-valgrind-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Install valgrind
run: |
@ -72,7 +72,7 @@ jobs:
--show-reachable=yes ./icuinfo;
clang-valgrind-intltest:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
strategy:
# "fail-fast: false" lets other jobs keep running even if the test breaks in some other test.
fail-fast: false

View file

@ -34,7 +34,7 @@ jobs:
# Build job
# Keep in sync with docs test workflow in `icu_docs.yml`
build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
steps:
- name: Checkout
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@ -71,7 +71,7 @@ jobs:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
needs: build
steps:
- name: Deploy to GitHub Pages

View file

@ -14,7 +14,7 @@ permissions:
jobs:
publish:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
permissions:
packages: write
outputs:

View file

@ -13,7 +13,7 @@ env:
jobs:
sign_and_checksums:
if: ${{ inputs.gitReleaseTag && startsWith(inputs.gitReleaseTag, 'release-') }}
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
environment: release-env
permissions:

View file

@ -16,7 +16,7 @@ env:
jobs:
build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
environment: release-env
container:

View file

@ -16,7 +16,7 @@ env:
jobs:
build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
environment: release-env
permissions:

View file

@ -39,7 +39,7 @@ env:
jobs:
publish:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
environment: release-env
permissions:

View file

@ -20,7 +20,7 @@ permissions: read-all
jobs:
analysis:
name: Scorecard analysis
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write

View file

@ -15,7 +15,7 @@ on:
jobs:
enforce-all-checks:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04 # Updated in BRS
permissions:
checks: read
steps:

33
KEYS
View file

@ -1312,3 +1312,36 @@ buAuLkKKI35T4Lz2U41GMVIMZ7drlhbfDGugk1qT1cXJPZbPnHt+uFNUY0pTJaf3
WapxBp2/lKhgsIE=
=ZWln
-----END PGP PUBLIC KEY BLOCK-----
pub ed25519 2024-08-19 [SC] [expires: 2027-08-19]
E52F07877A5805F9AF4AB0ACD46C5610D06E7001
uid [ultimate] ICU Release Robot <icu-robot@unicode.org>
sig 3 D46C5610D06E7001 2024-08-19 ICU Release Robot <icu-robot@unicode.org>
sig 3 9B432B27D1BA20D7 2024-10-01 Fredrik Roubert <fredrik@roubert.name>
sub cv25519 2024-08-19 [E] [expires: 2027-08-19]
sig D46C5610D06E7001 2024-08-19 ICU Release Robot <icu-robot@unicode.org>
-----BEGIN PGP PUBLIC KEY BLOCK-----
mDMEZsO4fRYJKwYBBAHaRw8BAQdAcxaiyqFbaECpSz7mhsXzopzN9Cxwv80WlWGN
gM3qpOi0KUlDVSBSZWxlYXNlIFJvYm90IDxpY3Utcm9ib3RAdW5pY29kZS5vcmc+
iJkEExYKAEEWIQTlLweHelgF+a9KsKzUbFYQ0G5wAQUCZsO4fQIbAwUJBaOagAUL
CQgHAgIiAgYVCgkICwIEFgIDAQIeBwIXgAAKCRDUbFYQ0G5wAQOWAQDs628TYI+F
oTaJ1jzqC3ObL6we4tmrW7yHL1KFR5xrhwD/ZioLjdrIUQZhcjV9rNt7mYZm/Ttl
hn06bx06eyjf0QyJAjMEEwEIAB0WIQT/qRKaGA12W3pb6hybQysn0bog1wUCZvwt
VgAKCRCbQysn0bog11wED/0dUw/riFIixjNRAoi+beQ1f42dG/60xm7qquZ4kE3n
KysN773YgE1Gl8wPsFBbAAznO++fJ2ONjTks4QNOV0MB5Rgv+LwK+5oOKDRA9LxE
QRCRYtFpErbCirIn/EGVJJCCbF+SoUY8rJL+PTfzHGDkJGK0lQIA49rdWjV9PW06
4FRufzNrg4jNMCsJ5i67mjSb267C7I2mcfM3xX2nqZdOFW+wdctm9fvAWpIMQEQ5
6J0wIQxwtF3GulKqr/7EV0LSB0Is0E4BBjwROMFiSa53Va/lcfZaQ7nJ8flsAfkO
gGmvGjM5VH1kO2hnhm5ps5x/fIqSbw7X2nYsUXjbL6YKHbUsbV1ybpS5XCnMcTyx
t5zmWkfZNLZk4+Qh/td6MyW7OmycY2GazRpMA28pX6gQYFBcSoXHNFqIyXU7wFJp
BmDqe8gbi7FIlewwS+XP5dWfNvP5EqfztO7p8hxB2O1ZIYxMKerr0Q6rvY9/fEL5
fd7OMdK5OkQWASF7NjDFG9I50N4W/1+1jUHytp6bBtjjdnebp4a8gidP88HeGidk
2ONV9Yf0hOZUpRfMEFxpLxd9Ker928iZpopXOZ7AipFcXghWrReyTMljqXYvzNtd
qRdH3WmnNx4zw2K/JgAtOw9Domc09UzEFFHgAscUVXvsoVX4g8soZnBWomtlCfU0
/bg4BGbDuH0SCisGAQQBl1UBBQEBB0Djm2qMxtO+jO+uhT5nScClgbSvdmWAjbt9
RIQh60+4QgMBCAeIfgQYFgoAJhYhBOUvB4d6WAX5r0qwrNRsVhDQbnABBQJmw7h9
AhsMBQkFo5qAAAoJENRsVhDQbnABkbUA/ib52SlXmDcBPsvCz+VQOsqt1OvmeioS
NSbBQ0KK5M66AQCs7zuWUQQhptmeB2l7sP528nsFv4FJfZ/McqauwTq7Bg==
=BHrV
-----END PGP PUBLIC KEY BLOCK-----

View file

@ -49,7 +49,7 @@ Please use the [icu-support mailing list](https://icu.unicode.org/contacts) and/
The initial release has library version number 76.1.
* Release date: _planned for_ 2024-10-24
* Release date: 2024-10-24
* [List of tickets fixed in ICU 76](https://unicode-org.atlassian.net/issues/?jql=project%20%3D%20ICU%20AND%20status%20%3D%20Done%20AND%20resolution%20in%20%28Fixed%2C%20%22Fixed%20by%20Other%20Ticket%22%29%20AND%20fixVersion%20%3D%2076.1%20ORDER%20BY%20component%20ASC%2C%20created%20DESC)
If there are maintenance releases, they will be 76.2, 76.3, etc. (During ICU 76 development, the library version number was 76.0.x.)
@ -100,6 +100,7 @@ Note: There may be additional commits on the [maint/maint-76](https://github.com
traditional vs. simplified forms of radicals are now distinguished on a lower level than the number of residual strokes.
In alphabetic indexes for radical-stroke sort orders,
only the traditional forms of radicals are now available as index characters.
* Updated IBM EBCDIC code page 1388 (Simplified Chinese) mapping data to ibm-cp1388_P100-2024 ([ICU-22596](https://unicode-org.atlassian.net/browse/ICU-22596))
* Time zone data (tzdata) version 2024b (2024-sep). Note that pre-1970 data for a number of time zones has been removed, as has been the case in the upstream [tzdata](https://www.iana.org/time-zones) release since 2021b.
* The Asia/Almaty time zone has become an alias following IANA TZ database changes.
* CLDR added support for deprecated timezone codes by remapping:
@ -145,7 +146,9 @@ Note: There may be additional commits on the [maint/maint-76](https://github.com
## Known Issues
* None yet
* One late CLDR 46 bug fix was accidentally omitted from ICU 46:
[CLDR-18002](https://unicode-org.atlassian.net/browse/CLDR-18002) Fix likely subtag inconsistency
([CLDR PR #4105](https://github.com/unicode-org/cldr/pull/4105))
## Migration Issues
@ -242,17 +245,16 @@ ICU4J should work on Android API level 21 and later but may require “[library
## Download
Source and binary downloads are available on the git/GitHub tag page: https://github.com/unicode-org/icu/releases/tag/release-76-rc
Source and binary downloads are available on the git/GitHub tag page: <https://github.com/unicode-org/icu/releases/tag/release-76-1>
See the [Source Code Setup](../devsetup/source/) page for how to download the ICU file tree directly from GitHub.
ICU locale data was generated from CLDR data equivalent to:
* https://github.com/unicode-org/cldr/releases/tag/release-46-beta3
* https://github.com/unicode-org/cldr-staging/releases/tag/release-46-beta3
* <https://github.com/unicode-org/cldr/releases/tag/release-46>
* <https://github.com/unicode-org/cldr-staging/releases/tag/release-46>
[Maven dependency](https://central.sonatype.com/artifact/com.ibm.icu/icu4j):
TODO
```
<dependency>
<groupId>com.ibm.icu</groupId>

View file

@ -14,29 +14,24 @@ License & terms of use: http://www.unicode.org/copyright.html
If you want to use ICU (as opposed to developing it), it is recommended that you download an official packaged version of the ICU source code. These versions are tested more thoroughly than day-to-day development builds of the system, and they are packaged in zip and tar files for convenient download. Here are several recent releases of ICU that are available:
## Release Candidate
***2024-09-30: ICU 76 release candidate is now available.***
## Latest Release
***2024-10-24: ICU 76 is now available.***
It updates to [Unicode 16](https://www.unicode.org/versions/Unicode16.0.0/) ([blog](https://blog.unicode.org/2024/09/announcing-unicode-standard-version-160.html)), including new characters and scripts, emoji, collation & IDNA changes, and corresponding APIs and implementations. It also updates to [CLDR 46](https://cldr.unicode.org/downloads/cldr-46) ([beta blog](https://blog.unicode.org/2024/09/unicode-cldr-46-beta-available-for.html)) locale data with new locales, significant updates to existing locales, and various additions and corrections. For example, the CLDR and Unicode default sort orders are now very nearly the same.
Most of the java.time (Temporal) types can now be formatted directly. There are some new APIs to make ICU easier to use with modern C++ and Java patterns. The Java and C++ technology preview implementations of the CLDR MessageFormat 2.0 specification have been updated to match recent changes. See [ICU 76](76.md).
Please test this release candidate on your platforms and report bugs and regressions by Monday, 2024-oct-21, via the [icu-support](https://icu.unicode.org/contacts) mailing list, and/or please [find/submit error reports](https://icu.unicode.org/bugs).
Please do not use this release candidate in production.
## Latest Release
***2024-04-17: ICU 75 is now available.*** It updates to [CLDR 45](https://cldr.unicode.org/index/downloads/cldr-45) ([beta blog](https://blog.unicode.org/2024/04/unicode-cldr-v45-beta-available-for.html)) locale data with new locales and various additions and corrections. C++ code now requires C++17 and is being made more robust. The CLDR MessageFormat 2.0 specification is now in [technology preview](https://github.com/unicode-org/message-format-wg?tab=readme-ov-file#messageformat-2-technical-preview), together with a corresponding update of the ICU4J (Java) tech preview and a new ICU4C (C++) tech preview. See [Downloading ICU &gt; ICU 75](https://icu.unicode.org/download/75).
## Previous Releases
- 2024-04-17: **ICU 75** updates to [CLDR 45](https://cldr.unicode.org/index/downloads/cldr-45) ([beta blog](https://blog.unicode.org/2024/04/unicode-cldr-v45-beta-available-for.html)) locale data with new locales and various additions and corrections. C++ code now requires C++17 and is being made more robust. The CLDR MessageFormat 2.0 specification is now in [technology preview](https://github.com/unicode-org/message-format-wg?tab=readme-ov-file#messageformat-2-technical-preview), together with a corresponding update of the ICU4J (Java) tech preview and a new ICU4C (C++) tech preview. See [Downloading ICU &gt; ICU 75](https://icu.unicode.org/download/75).
- 2023-12-13: **ICU 74.2** released with date/time formatting bug fixes. See [Downloading ICU &gt; ICU 74](https://icu.unicode.org/download/74).
- 2023-10-31: **ICU 74** is now available. It updates to [Unicode 15.1](http://blog.unicode.org/2023/09/announcing-unicode-standard-version-151.html), including new characters, emoji, security mechanisms, and corresponding APIs and implementations. It also updates to [CLDR 44](https://cldr.unicode.org/index/downloads/cldr-44) ([blog](https://blog.unicode.org/2023/10/unicode-cldr-v44-available.html)) locale data with new locales and various additions and corrections. See [Downloading ICU &gt; ICU 74](https://icu.unicode.org/download/74).
- 2023-10-31: **ICU 74** updates to [Unicode 15.1](http://blog.unicode.org/2023/09/announcing-unicode-standard-version-151.html), including new characters, emoji, security mechanisms, and corresponding APIs and implementations. It also updates to [CLDR 44](https://cldr.unicode.org/index/downloads/cldr-44) ([blog](https://blog.unicode.org/2023/10/unicode-cldr-v44-available.html)) locale data with new locales and various additions and corrections. See [Downloading ICU &gt; ICU 74](https://icu.unicode.org/download/74).
- 2023-06-15: **ICU 73.2** is now available. Maintenance release with changes for GB18030 compliance, English AM/PM spaces, word segmentation around @ sign, etc. See [Downloading ICU &gt; ICU 73](https://icu.unicode.org/download/73).
- 2023-06-15: **ICU 73.2**: Maintenance release with changes for GB18030 compliance, English AM/PM spaces, word segmentation around @ sign, etc. See [Downloading ICU &gt; ICU 73](https://icu.unicode.org/download/73).
- 2023-04-13: **ICU 73** is now available. It updates to [CLDR 43](https://blog.unicode.org/2023/04/the-unicode-cldr-v43-released.html) locale data with various additions and corrections.
- 2023-04-13: **ICU 73** updates to [CLDR 43](https://blog.unicode.org/2023/04/the-unicode-cldr-v43-released.html) locale data with various additions and corrections.
ICU 73 improves Japanese and Korean short-text line breaking, reduces C++ memory use in date formatting, and promotes the Java person name formatter from tech preview to draft.
For details, see [Downloading ICU &gt; ICU 73](https://icu.unicode.org/download/73).

View file

@ -27,8 +27,8 @@ All Rights Reserved.
# Intro and setup
These instructions describe how to regenerate ICU4C locale and linguistic data from CLDR,
and then how to convert that ICU4 data for ICU4J (data jars and maven resources).
They apply to CLDR 44 / ICU 74 and later.
and then how to convert that ICU4C data for ICU4J (data jars and maven resources).
They apply to CLDR 47 / ICU 77 and later.
To use these instructions just for generating ICU4J data from ICU4C, you only need to use
steps 1, 8, and 12 in the Process section.
@ -37,22 +37,26 @@ The full process requires local copies of
* CLDR (the source of most of the data, and some Java tools)
* The complete ICU source tree, including:
* tools: includes the LdmlConverter build tool and associated config files
* icu4c: the target for converted CLDR data, and source for ICU4J data; includes tests for the converted data
* icu4j: the target for updated data jars; includes tests for the converted data
* `tools`: includes the `LdmlConverter` build tool and associated config files
* `icu4c`: the target for converted CLDR data, and source for ICU4J data; includes tests for the converted data
* `icu4j`: the target for updated data jars; includes tests for the converted data
For an official CLDR data integration into ICU, these should be clean, freshly
checked-out. For released CLDR sources, an alternative to checking out sources
for a given version is downloading the zipped sources for the common (core.zip)
and tools (tools.zip) directory subtrees from the Data column in
for a given version is downloading the zipped sources for the common (`core.zip`)
and tools (`tools.zip`) directory subtrees from the Data column in
[CLDR Releases/Downloads](https://cldr.unicode.org/index/downloads)
Besides a standard JDK, the process also requires [ant](https://ant.apache.org) and
Besides a standard JDK 11+, the process also requires [ant](https://ant.apache.org) and
[maven](https://maven.apache.org) plus the xml-apis.jar from the
[Apache xalan package](https://xalan.apache.org/xalan-j/downloads.html) _(Is this
latter requirement still true?)_. You will also need to have performed the
latter requirement still true?)_.
If you do CLDR development you can configure maven as documented at
[CLDR Maven setup](http://cldr.unicode.org/development/maven) (non-Eclipse version).
But for the CLDR to ICU data conversion, or for regular ICU development this is not needed.
Notes:
* Enough things can (and will) fail in this process that it is best to
@ -65,12 +69,12 @@ Notes:
files are used in addition to the CLDR files as inputs to the CLDR data build
process for ICU):
* The primary file to edit for adding/removing locales and/or collation and
rbnf data is<br>
`$TOOLS_ROOT/cldr/cldr-to-icu/build-icu-data.xml`.
`rbnf` data is \
`$ICU_DIR/tools/cldr/cldr-to-icu/config.xml`.
* There are some files in `icu4c/source/data/xml/` that may need editing for
certain additions. This is especially true for brkitr additions; however there
are rbnf files there that add some rules. The collation files there mainly
hook up the UCA collation rules in `icu4c/data/unidata/UCARules.txt` to the
certain additions. This is especially true for `brkitr` additions; however there
are `rbnf` files there that add some rules. The collation files there mainly
hook up the UCA collation rules in `icu4c/source/data/unidata/UCARules.txt` to the
collation data. To process these files, certain CLDR dtds are copied over to
ICU.
@ -88,14 +92,14 @@ considerations:
# CLDR prerequisites for BRS integrations
The following tasks should be done in the CLDR repo before beginning a CLDR-ICU
integration that ss part of the BRS process; handle each of these using a separate
integration that is part of the BRS process; handle each of these using a separate
ticket and a separate PR:
1. Generate updated CLDR test data (which is copied to ICU), using the process in
[Generating CLDR testData](https://docs.google.com/document/d/1-RC99npKcSSwUoYGkSzxaKOe76gYRkWhGdFzCdIBCu4/edit#heading=h.2rum9c6hrr4w)
2. Run CLDRModify with no options with no options and then with -fP. The webpage
for CLDRModify is currently being converted to markdown, a reference to it will
2. Run `CLDRModify` with no options with no options and then with `-fP`. The web page
for `CLDRModify` is currently being converted to markdown, a reference to it will
be added when that process is complete.
# Environment variables
@ -120,61 +124,61 @@ There are several environment variables that need to be defined.
* `CLDR_TMP_DIR`: Parent of temporary CLDR production data. Defaults to
`$CLDR_DIR/../cldr-aux` (sibling to `CLDR_DIR`).
> **NOTE:** As of CLDR 36 and 37, the GenerateProductionData tool no longer
> **NOTE:** As of CLDR 36 and 37, the `GenerateProductionData` tool no longer
generates data by default into `$CLDR_TMP_DIR/production`; instead it
generates data into `$CLDR_DIR/../cldr-staging/production` (though there is
a command-line option to override this). However the rest of the build still
assumes that the generated data is in `$CLDR_TMP_DIR/production`.
So `CLDR_TMP_DIR` must be defined to be `CLDR_DIR/../cldr-staging`.
3. ICU-related variables
* `ICU4C_DIR`: Path to root of ICU4C sources, below which is the source dir.
* `ICU_DIR`: Path to root of ICU directory, below which are (e.g.) the
`icu4c`, `icu4j`, and `tools` directories.
* `ICU4J_ROOT`: Path to root of ICU4J sources, below which is the main dir.
* `ICU4C_DIR`: Path to root of ICU4C sources, below which is the `source` dir.
* `ICU4J_ROOT`: Path to root of ICU4J sources, below which is the `main` dir.
* `TOOLS_ROOT`: Path to root of ICU tools directory, below which are (e.g.) the
cldr and unicodetools dirs.
# Process
## 1 Environment variables
1a. Java, ant, and maven variables, adjust for your system
```
```sh
export JAVA_HOME=/usr/libexec/java_home
export ANT_OPTS="-Xmx8192m"
export MAVEN_ARGS="--no-transfer-progress"
```
1b. CLDR variables, adjust for your setup; with cygwin it might be e.g.
```
```sh
CLDR_DIR=`cygpath -wp /build/cldr`
```
Note that for cldr-staging we do not use personal forks, we commit directly.
```
```sh
export CLDR_DIR=$HOME/cldr-myfork
export CLDR_TMP_DIR=$HOME/cldr-staging
export CLDR_DATA_DIR=$HOME/cldr-staging/production
```
1c. ICU variables
```
```sh
export ICU4C_DIR=$HOME/icu-myfork/icu4c
export ICU4J_ROOT=$HOME/icu-myfork/icu4j
export TOOLS_ROOT=$HOME/icu-myfork/tools
```
1d. Directory for logs/notes (create if does not exist)
```
```sh
export NOTES=...(some directory)...
mkdir -p $NOTES
```
1e. The name of the icu data directory for Java (for example `icudt74b`)
```
```sh
export ICU_DATA_VER=icudt(version)b
```
@ -182,10 +186,10 @@ export ICU_DATA_VER=icudt(version)b
2a. Configure ICU4C, build and test without new data first, to verify that
there are no pre-existing errors, and to build some tools needed for later
steps. Here `<platform>` is the runConfigureICU code for the platform you
steps. Here `<platform>` is the `runConfigureICU` code for the platform you
are building on, e.g. Linux, macOS, Cygwin.
(optionally build with debug enabled)
```
```sh
cd $ICU4C_DIR/source
./runConfigureICU [--enable-debug] <platform>
make clean
@ -195,7 +199,7 @@ make check 2>&1 | tee $NOTES/icu4c-oldData-makeCheck.txt
2b. Now with ICU4J, build and test without new data first, to verify that
there are no pre-existing errors (or at least to have the pre-existing errors
as a base for comparison):
```
```sh
cd $ICU4J_ROOT
mvn clean
mvn verify 2>&1 | tee $NOTES/icu4j-oldData-mvnCheck.txt
@ -210,31 +214,33 @@ cp -p $CLDR_DIR/common/dtd/ldmlICU.dtd $ICU4C_DIR/source/data/dtd/cldr/common/dt
```
3b. Update the cldr-icu tooling to use the latest tagged version of ICU
```
open $TOOLS_ROOT/cldr/cldr-to-icu/pom.xml
```sh
open $ICU_DIR/tools/cldr/cldr-to-icu/pom.xml
```
(search for `icu4j-for-cldr` and update to the latest tagged version per instructions)
3c. Update the build for any new icu version, added locales, etc.
```sh
# ICU version
open $ICU_DIR/tools/cldr/cldr-to-icu/pom.xml
# Locales and other configuration changes
open $ICU_DIR/tools/cldr/cldr-to-icu/config.xml
```
open $TOOLS_ROOT/cldr/cldr-to-icu/build-icu-data.xml
```
(update icuVersion, icuDataVersion if necessary; update lists of locales to include if necessary)
(update `icuVersion`, `icuDataVersion` if necessary; update lists of locales to include if necessary)
3d. If there are new data types or variants in CLDR, you may need to update the
files that specify mapping of CLDR data to ICU rseources:
```
open $TOOLS_ROOT/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt
open $TOOLS_ROOT/cldr/cldr-to-icu/src/main/resources/ldml2icu_supplemental.txt
files that specify mapping of CLDR data to ICU resources:
```sh
open $ICU_DIR/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt
open $ICU_DIR/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_supplemental.txt
```
## 4 Build and install CLDR jar
See `$TOOLS_ROOT/cldr/lib/README.txt` for more information on the CLDR
jar and the `install-cldr-jars.sh` script.
```
cd $TOOLS_ROOT/cldr
ant install-cldr-libs
See `$ICU_DIR/tools/cldr/cldr-to-icu/README.md` for more information on the CLDR jar.
```sh
cd "$CLDR_DIR"
mvn clean install -pl :cldr-all,:cldr-code -DskipTests -DskipITs
```
## 5 Generate CLDR production data and convert for ICU
@ -247,14 +253,15 @@ This process uses ant with ICU4C's `data/build.xml`
(usually `$CLDR_TMP_DIR/production`), required if any CLDR data has changed.
* Running `ant setup` is not required, but it will print useful errors to
debug issues with your path when it fails.
```
```sh
cd $ICU4C_DIR/source/data
ant cleanprod
ant setup
ant proddata 2>&1 | tee $NOTES/cldr-newData-proddataLog.txt
```
> Note, for CLDR development, at this point tests are sometimes run on the
> Note, for CLDR development, at this point tests are sometimes run on the
production data, see
[BRS: Run tests on production data](https://cldr.unicode.org/development/cldr-big-red-switch/brs-run-tests-on-production-data)
@ -262,26 +269,27 @@ ant proddata 2>&1 | tee $NOTES/cldr-newData-proddataLog.txt
These include .txt files and .py files. These new files will replace whatever was
already present in the ICU4C sources. This process uses the `LdmlConverter` in
`$TOOLS_ROOT/cldr/cldr-to-icu/`; see `$TOOLS_ROOT/cldr/cldr-to-icu/README.txt`.
`$ICU_DIR/tools/cldr/cldr-to-icu/`; see `$ICU_DIR/tools/cldr/cldr-to-icu/README.md`.
* This process will take several minutes, during most of which there will be no log
output (so do not assume nothing is happening). Keep a log so you can investigate
anything that looks suspicious.
* Note that `ant clean` should _not_ be run before this. The `build-icu-data.xml` process
* The conversion tool
will automatically run its own "clean" step to delete files it cannot determine to
be ones that it would generate, except for pasts listed in `<retain>` elements such as
`coll/de__PHONEBOOK.txt`, `coll/de_.txt`, etc.
* Before running ant to regenerate the data, make any necessary changes to the
build-icu-data.xml file, such as adding new locales etc.
```
cd $TOOLS_ROOT/cldr/cldr-to-icu
ant -f build-icu-data.xml -DcldrDataDir="$CLDR_TMP_DIR/production" | tee $NOTES/cldr-newData-builddataLog.txt
* Before running the tool to regenerate the data, make any necessary changes to the
`config.xml` file, such as adding new locales etc.
```sh
cd $ICU_DIR/tools/cldr/cldr-to-icu
java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar --cldrDataDir="$CLDR_TMP_DIR/production" | tee $NOTES/cldr-newData-builddataLog.txt
```
5c. Update the CLDR testData files needed by ICU4C/J tests, ensuring
they are representative of the newest CLDR data.
```
cd $TOOLS_ROOT/cldr
```sh
cd $ICU_DIR/tools/cldr
ant copy-cldr-testdata
```
@ -289,7 +297,7 @@ ant copy-cldr-testdata
(This step has been subsumed into 5c above)
5e. For now, manually re-add the `lstm` entries in `data/brkitr/root.txt`
```
```sh
open $ICU4C_DIR/source/data/brkitr/root.txt
```
Paste the following block after the dictionaries block and before the final closing '}':
@ -302,20 +310,20 @@ Paste the following block after the dictionaries block and before the final clos
5f. Update hard-coded lists in ICU
ICU4 has some hard-coded lists of locale-related codes that may need updating. Ideally these should
ICU has some hard-coded lists of locale-related codes that may need updating. Ideally these should
be replaced by data converted from CLDR ([ICU-22839](https://unicode-org.atlassian.net/browse/ICU-22839)). In the
meantime these need to be updated manually.
| code type | icu4c/source library file(s) | icu4c/source test file(s) |
| -------------------------------------------------------------------------------------------- | ------------------------------------------- | ------------------------------------------- |
| language<BR>(at least all language codes in ICU locales or CLDR attributeValueValidity.xml) | common/uloc.cpp: LANGUAGES[], LANGUAGES_3[] | test/testdata/structLocale.txt: Languages |
| region<BR>(at least all region codes in ICU locales or CLDR attributeValueValidity.xml) | common/uloc.cpp: COUNTRIES[], COUNTRIES_3[] | test/testdata/structLocale.txt: Countries |
| currency (see note below)<BR>(at least everything in CLDR supplementalData.xml currencyData) | common/ucurr.cpp: gCurrencyList[]] | test/testdata/structLocale.txt: Currencies,CurrencyPlurals<BR>test/cintltst/currtest.c:TestEnumList() |
| timezone | (not currently aware of hard-coded list) | test/testdata/structLocale.txt: zoneStrings |
| language<BR>(at least all language codes in ICU locales or CLDR `attributeValueValidity.xml`) | `common/uloc.cpp`: `LANGUAGES[], LANGUAGES_3[]` | `test/testdata/structLocale.txt`: Languages |
| region<BR>(at least all region codes in ICU locales or CLDR `attributeValueValidity.xml`) | `common/uloc.cpp`: `COUNTRIES[], COUNTRIES_3[]` | `test/testdata/structLocale.txt`: Countries |
| currency (see note below)<BR>(at least everything in CLDR `supplementalData.xml` `currencyData`) | `common/ucurr.cpp`: `gCurrencyList[]]` | `test/testdata/structLocale.txt`: `Currencies`,`CurrencyPlurals`<BR>`test/cintltst/currtest.c`:`TestEnumList()` |
| timezone | (not currently aware of hard-coded list) | `test/testdata/structLocale.txt`: `zoneStrings` |
Note: currency code lists are also in other code lists along with measurement units,
but these are re-generated using the procedure in
[Updating MeasureUnit with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html)
[Updating `MeasureUnit` with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html)
(also mentioned in step 14 below).
## 6 Check the results
@ -323,7 +331,7 @@ but these are re-generated using the procedure in
Check which data files have modifications, which have been added or removed
(if there are no changes, you may not need to proceed further). Make sure the
list seems reasonable. You may want to save logs, and possibly examine them...
```
```sh
cd $ICU4C_DIR/..
git status
git status > $NOTES/gitStatusDelta-data.txt
@ -332,7 +340,7 @@ open $NOTES/gitDiffDelta-data.txt
```
6a. You may also want to check which files were modified in CLDR production data:
```
```sh
cd $CLDR_TMP_DIR
git status
git status > $NOTES/gitStatusDelta-staging.txt
@ -342,25 +350,25 @@ git diff > $NOTES/gitDiffDelta-staging.txt
## 7 Fix data generation errors
Look for evident errors in the list of file changes, or in the file diffs.
Fixing them may entail modifying CLDR source data or `TOOLS_ROOT` config files or
Fixing them may entail modifying CLDR source data or `$ICU_DIR/tools/cldr/cldr-to-icu` config files or
tooling.
## 8 Rebuild ICU4C with new data, run tests
8a. Re-run configure and make clean, necessary to handle any files added or deleted:
```
```sh
cd $ICU4C_DIR/source
./runConfigureICU [--enable-debug] <platform>
make clean
```
8b. Do the rebuild, keeping a log as before:
```
```sh
make check 2>&1 | tee $NOTES/icu4c-newData-makeCheck.txt
```
To re-run a specific test if necessary when fixing bugs; for example:
```
```sh
cd test/intltest
DYLD_LIBRARY_PATH=../../lib:../../stubdata:../../tools/ctestfw:$DYLD_LIBRARY_PATH ./intltest -e -G format/NumberTest/NumberPermutationTest
cd ../..
@ -380,7 +388,8 @@ ticket under which you are performing the integration, if you have one), fix the
and regenerate from step 4.
If the data is OK , other sources of failure can include:
* Problems with the CLDR-ICU conversion process (pehaps some locale data is not getting
* Problems with the CLDR-ICU conversion process (perhaps some locale data is not getting
converted properly; go back to step 3, adjust and repeat from there.
* Problems with ICU library code that may not be using new resources properly. Fix and
repeat from step 8.
@ -390,9 +399,9 @@ If the data is OK , other sources of failure can include:
you will need to update `icu4c/test/testdata/structLocale.txt` (otherwise
`/tsutil/cldrtest/TestLocaleStructure` may fail).
## 10 Running ICU4C tests in exhaustive mode.
## 10 Running ICU4C tests in exhaustive mode
Exhautive tests should always be run for a CLDR-ICU integration PR before it is merged.
Exhaustive tests should always be run for a CLDR-ICU integration PR before it is merged.
Once you have a PR, you can do this for both C and J as part of the pre-merge CI tests
by manually running a workflow (the exhaustive tests are not run automatically on every PR).
See [Continuous Integration / Exhaustive Tests](../userguide/dev/ci.md#exhaustive-tests).
@ -400,7 +409,7 @@ See [Continuous Integration / Exhaustive Tests](../userguide/dev/ci.md#exhaustiv
The following instructions run the ICU4C exhaustive tests locally (which you may want to do
before even committing changes, or which may be necessary to diagnose failures in the
CI tests):
```
```sh
cd $ICU4C_DIR/source
export INTLTEST_OPTS="-e"
export CINTLTST_OPTS="-e"
@ -415,13 +424,13 @@ appropriate, and repeating from step 4 or 8 as appropriate.
## 12 Transfer the ICU4C data to ICU4J
12a. You need to reconfigure ICU4C to include the unicore data.
```
```sh
cd $ICU4C_DIR/source
ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data ./runConfigureICU <platform>
```
12b. Rebuild the data with the new config setting, then create the ICU4J data jar.
```
```sh
cd $ICU4C_DIR/source/data
make clean
make -j -l2.5
@ -429,13 +438,13 @@ make icu4j-data-install
```
12c. Create the test data jar
```
```sh
cd $ICU4C_DIR/source/test/testdata
make icu4j-data-install
```
12d. Update the extracted {main, test} data files in the Maven build
```
```sh
cd $ICU4J_ROOT
./extract-data-files.sh
```
@ -443,7 +452,7 @@ cd $ICU4J_ROOT
## 13 Rebuild ICU4J with new data, run tests
13a. Run the tests using the maven build
```
```sh
cd $ICU4J_ROOT
mvn clean
mvn install 2>&1 | tee $NOTES/icu4j-newData-mvnCheck.txt
@ -451,26 +460,29 @@ mvn install 2>&1 | tee $NOTES/icu4j-newData-mvnCheck.txt
It is possible to re-run a specific test class or method if necessary when fixing bugs.
For example (using artifactId, full class name, test all methods):
```
For example (using `artifactId`, full class name, test all methods):
```sh
mvn install -pl :core -Dtest=com.ibm.icu.dev.test.util.LocaleBuilderTest
```
or (example of using module path, class name, one method):
```
```sh
mvn install -pl main/common_tests -Dtest=MeasureUnitTest#TestGreek
```
13b. Optionally run the tests in exhautive mode
13b. Optionally run the tests in exhaustive mode
Optionally run before committing changes, or run to diagnose failures from
running exhastive CI tests in the PR using `/azp run CI-Exhaustive`:
```
Optionally run exhaustive tests locally before committing changes:
```sh
cd $ICU4J_ROOT
mvn install -DICU.exhaustive=10 2>&1 | tee $NOTES/icu4j-newData-mvnCheckEx.txt
```
Exhaustive tests in CI can be triggered by running the "Exhaustive Tests for ICU"
action from the GitHub web UI.
See [Continuous Integration / Exhaustive Tests](../userguide/dev/ci.md#exhaustive-tests).
Running a specific test is the same as above:
```
```sh
mvn install --pl :core -DICU.exhaustive=10 -Dtest=ExhaustiveNumberTest
```
@ -482,7 +494,7 @@ step 4, as appropriate, until there are no more failures in ICU4C or ICU4J.
Note that certain data changes and related test failures may require the
rebuilding of other kinds of data and/or code. For example:
### Updating MeasureUnit code and tests
### Updating `MeasureUnit` code and tests
If you see a failure such as
```
@ -490,7 +502,7 @@ MeasureUnitTest testCLDRUnitAvailability Failure (MeasureUnitTest.java:3410) : U
```
then you will need to update the C and J library and test code for new measurement
units, see the procedure at
[Updating MeasureUnit with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html)
[Updating `MeasureUnit` with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html)
### Updating plurals test data
@ -503,12 +515,12 @@ To address these requires updating the LOCALE_SNAPSHOT data in
```
$ICU4J_ROOT/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/PluralRulesTest.java
```
by modifying the TestLocales() test there to run `generateLOCALE_SNAPSHOT()` and
by modifying the `TestLocales()` test there to run `generateLOCALE_SNAPSHOT()` and
then copying in the updated data.
## 15 Check the ICU file changes and commit
```
```sh
cd $ICU4C_DIR/source
make clean
cd $ICU4J_ROOT
@ -528,13 +540,13 @@ git push origin ICU-nnnnn-branchname
(Only for an official integration from CLDR git repositories)
16a. Check cldr-staging changes, and commit
```
```sh
cd $CLDR_TMP_DIR
git status
```
Then `git add` or `git rm` files as necessary. Record the changes, commit and push.
```
```sh
git status > $NOTES/gitStatusDelta-production-afterAdd.txt
git commit -m 'CLDR-nnnnn production data corresponding to CLDR release-nn-stage'
git push origin main
@ -545,8 +557,8 @@ git push origin main
(There may be other cldr-staging changes unrelated to production data, such as charts
or spec; we want to include them in the tag, so pull first, but log to see what the
chnages are first)
```
changes are first)
```sh
cd $CLDR_TMP_DIR
git pull
git log
@ -559,7 +571,7 @@ git push --tags
We need to tag the main cldr repository. If $CLDR_DIR represents that repository,
this is easy:
```
```sh
cd $CLDR_DIR
git tag -a "release-nn-stage" -m "CLDR-nnnnn: tag CLDR release-nn-stage"
git push --tags
@ -567,7 +579,7 @@ git push --tags
However if $CLDR_DIR represents your personal fork or a branch from it, you need to
figure out what commit hash yo have integrated, and tag that hash in the main repo.
```
```sh
cd $CLDR_DIR
git log
```
@ -575,7 +587,7 @@ Note the latest commit hash hhhhhhhh...
Then switch to the main repo, update it, and tag the appropriate hash (making sure
it is in that repo!):
```
```sh
cd $HOME/cldr
git pull
git log
@ -583,7 +595,7 @@ git tag -a "release-nn-stage" -m "CLDR-nnnnn: tag CLDR release-nn-stage" hhhhhhh
git push --tags
```
## 18 Pubish the cldr tags in github
## 18 Publish the cldr tags in github
You should publish the cldr and cldr-staging tags in github.

View file

@ -59,6 +59,10 @@ Also, please look out for this type of message: "\***\*\* WARNING Bad namespace
not defined inside the "icu" namespace. Consider adding **U_NAMESPACE_BEGIN**
and **U_NAMESPACE_END** around the class and member definitions.
## Update the runners
In all workflow yaml files, update macos-n, ubuntu-p.q, windows-yyyy to the version currently designated -latest on https://github.com/actions/runner-images?tab=readme-ov-file#available-images.
## Update the pool bundles
*Obsolete for ICU 64+*: The pool bundles are no longer checked in. Instead,

View file

@ -126,6 +126,37 @@ the UTF-8 signature byte sequence ("BOM").~~
## Clean up import statements
### From command line
This can be done from command line using the
[Google Java Format](https://github.com/google/google-java-format) tool.
**WARNING:** requires JDK 17 or newer (December 2024)
Download the latest Google Java Format from Maven Central:
```sh
mvn dependency:copy -Dartifact=com.google.googlejavaformat:google-java-format:LATEST:jar:all-deps \
-DoutputDirectory=/tmp \
-Dmdep.stripVersion=true \
-q -ntp
```
Cleanup all Java files (only imports, nothing else):
```sh
find . -type f -name '*.java' | xargs java -jar /tmp/google-java-format-all-deps.jar -i --aosp --fix-imports-only --skip-sorting-imports
```
Remove the Google Java Format artifact from the temporary folder:
```sh
rm /tmp/google-java-format-all-deps.jar
```
You can (of course) download it from
[GitHub Releases](https://github.com/google/google-java-format/releases). \
Or save it in a personal tools folder and keep it around.
### From Eclipse
The Eclipse IDE provides a feature which allow you to organize import statements
for multiple files. Right click on projects/source folders/files, you can select
\[Source\] - \[Organize Imports\] which resolve all wildcard imports and sort

View file

@ -0,0 +1,150 @@
---
layout: default
title: Updating MeasureUnit with new CLDR data
parent: Release & Milestone Tasks
grand_parent: Contributors
nav_order: 120
---
<!--
© 2020 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html
-->
# Updating MeasureUnit with new CLDR data
{: .no_toc }
## Contents
{: .no_toc .text-delta }
1. TOC
{:toc}
---
This document explains how to update the C++ and Java version of the MeasureUnit
class with new CLDR data.
Code is generated by running MeasureUnitTest.java unit tests, which writes
generated code to System.out. Two ways to access this:
1. Within **eclipse**:
- Open MeasureUnitTest.java, run it by clicking on the green play button on
menu bar.
- Copy the generated code from the eclipse console to the clipboard.
2. With **ant**:
- Run: `ant checkTest
-Dtestclass='com.ibm.icu.dev.test.format.MeasureUnitTest'`
- Open the checkTest output: `out/junit-results/checkTest/html/index.html`
- Navigate to "System.out" at the bottom of the MeasureUnitTest page to find
the generated code, and copy to the clipboard.
After syncing CLDR data with ICU do the following. This documentation assumes
that you are updating the MeasureUnit clases for ICU 68.
* Check out
$GIT_ROOT/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/MeasureUnitTest.java
* Open MeasureUnitTest.java.
* Find the `testZZZ` test, its code should all be commented out. This test will
execute last and will run the desired code.
Make sure DRAFT_VERSIONS at top of MeasureUnitTest.java is set correctly.
These are the ICU versions that have draft methods.
## Update MeasureUnit.java
* Change `testZZZ` to run `generateConstants(“68”); // ICU 68.`
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Open MeasureUnit.java:
$GIT_ROOT/icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java
* Look for line containing:
`// Start generated MeasureUnit constants`
* Look for line containing:
`// End generated MeasureUnit constants`
* Replace all the generated code in between with the contents of the clipboard
* Run the MeasureUnitTest.java to ensure that the new code is backward
compatible. These compatibility tests are called something like
`TestCompatible65`, which tests backward compatibility with ICU 65.
* Create a compatibility test for ICU 68. Change `testZZZ` to run
`generateBackwardCompatibilityTest(“68”)`
* Run tests.
* Copy generated test (see instructions above) into MeasureUnitTest.java
* Run tests again to ensure that new code is backward compatible with itself
## Update ICU4C
* checkout ICU4C
### Update measunit.h
* Change testZZZ to run `generateCXXHConstants(“68”); // ICU 68`.
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Open $GIT_ROOT/icu4c/source/i18n/unicode/measunit.h. Look for line containing:
`// Start generated createXXX methods`
* Look for line:
`// End generated createXXX methods`
* Replace all the generated code in between with the contents of the clipboard
### Update measunit.cpp
* Change testZZZ to run generateCXXConstants();
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Open $GIT_ROOT/icu4c/source/i18n/measunit.cpp. Look for line containing:
`// Start generated code for measunit.cpp`
* Look for lines
`// End generated code for measunit.cpp`
* Replace all the generated code in between with the contents of the clipboard
### Run C++ tests
* Run `./intltest format/MeasureFormatTest` from `test/intltest` to ensure new
code is backward compatible.
* Create a compatibility test for ICU 68. Change `testZZZ` in eclipse to run
`generateCXXBackwardCompatibilityTest(“68”)`
* Run tests.
* Copy generated test (see instructions above) into
$GIT_ROOT/icu4c/source/test/intltest/measfmttest.cpp. Make other necessary
changes to make test compile. You can find these changes by searching for
`TestCompatible65()`
* Run tests again to ensure that new code is backward compatible with itself
## Finishing changes
These last changes are necessary to permanently record the ICU version number of
any new measure units. Without these changes any new functions for this release
will be considered new for the next release too.
* Change `testZZZ` to run `updateJAVAVersions(“68”);`
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Append the clipboard contents to the values of the JAVA_VERSIONS variable
near the top of MeasureUnitTest.java.
**Important:** what you are copying are just the new functions for the current
ICU version, in this case 68. Therefore append, do not replace.
## Updating units.txt and unitConstants
The standard ldml2icu process is used to update ICU's resource files (see
[cldr-icu-readme.txt](https://github.com/unicode-org/icu/blob/main/icu4c/source/data/cldr-icu-readme.txt)).
CLDR's units.xml defines conversion rates in terms of some constants defined in
`unitConstants`.
For efficiency and simplicity, ICU does not read `unitConstants` from the
resource file. If any new constants are added, some code changes would be
needed. This would be caught by `testUnitConstantFreshness` unit test in
`units_test.cpp`.
They are hard-coded:
* Java: `UnitConverter.java` has the constant names in
`UnitConverter.Factor.addEntity()` and constant values in
`UnitConverter.Factor.getConversionRate()`.
* C++: `units_converter.cpp` has the constant names in
`addSingleFactorConstant()`, with the constant values in `double
constantsValues[]` in the `units_converter.h` header file.

View file

@ -12,9 +12,11 @@ License & terms of use: http://www.unicode.org/copyright.html
-->
# Updating MeasureUnit with new CLDR data
{: .no_toc }
## Contents
{: .no_toc .text-delta }
1. TOC
@ -22,117 +24,94 @@ License & terms of use: http://www.unicode.org/copyright.html
---
This document explains how to update the C++ and Java version of the MeasureUnit
This document explains how to update the C++ and Java version of the `MeasureUnit`
class with new CLDR data.
Code is generated by running MeasureUnitTest.java unit tests, which writes
generated code to System.out. Two ways to access this:
This document applies to ICU 77 and later.
For older versions see updating-measure-unit-old.md
1. Within **eclipse**:
- Open MeasureUnitTest.java, run it by clicking on the green play button on
menu bar.
- Copy the generated code from the eclipse console to the clipboard.
Make sure `DRAFT_VERSION_SET` at top of
`./icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/MeasureUnitGeneratorTest.java`
is set correctly. \
These are the ICU versions that have draft methods.
2. With **ant**:
- Run: `ant checkTest
-Dtestclass='com.ibm.icu.dev.test.format.MeasureUnitTest'`
- Open the checkTest output: `out/junit-results/checkTest/html/index.html`
- Navigate to "System.out" at the bottom of the MeasureUnitTest page to find
the generated code, and copy to the clipboard.
The code is generated by running `MeasureUnitGeneratorTest.java` unit tests, which writes
generated code to various file.
After syncing CLDR data with ICU do the following. This documentation assumes
that you are updating the MeasureUnit clases for ICU 68.
1. With **maven** (command line):
- Change folder to `{icuRoot}/icu4j`
- run `mvn install -DskipTests -DskipITs`
- run `mvn install -q -Dtest=MeasureUnitGeneratorTest -DgenerateMeasureUnitUpdate -f main/common_tests`
* Check out
$GIT_ROOT/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/MeasureUnitTest.java
* Open MeasureUnitTest.java.
* Find the `testZZZ` test, its code should all be commented out. This test will
execute last and will run the desired code.
2. Within **Eclipse**:
- Open `MeasureUnitGeneratorTest.java`, find the `generateUnitTestsUpdate` methods
and run it by clicking on the green play button on menu bar. \
Choose "JUnit Test" if asked. \
This will not generate the update, but it will run the test and create a "Run Configuration". \
Open it (Main menu -- "Run" -- "Run Configurations"), select the one named
`MeasureUnitGeneratorTest.generateUnitTestsUpdate`, go to the "Arguments" tab and add
`-DgenerateMeasureUnitUpdate` to the "VM Arguments" text area.
Make sure DRAFT_VERSIONS at top of MeasureUnitTest.java is set correctly.
These are the ICU versions that have draft methods.
Both methods will generate files with in `icu4j/main/common_tests/target/` folder. \
The file names and the logging to the standard output will guide you.
## Update MeasureUnit.java
It currently looks something like this:
```
Copy the generated code fragments from / to
/some/absolute/path/icu4j/main/common_tests/target/MeasureUnit.java \
/some/absolute/path/icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java
* Change `testZZZ` to run `generateConstants(“68”); // ICU 68.`
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Open MeasureUnit.java:
$GIT_ROOT/icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java
* Look for line containing:
Copy the generated code fragments from / to
/some/absolute/path/icu4j/main/common_tests/target/MeasureUnitCompatibilityTest.java \
/some/absolute/path/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/MeasureUnitCompatibilityTest.java
`// Start generated MeasureUnit constants`
* Look for line containing:
Copy the generated code fragments from / to
/some/absolute/path/icu4j/main/common_tests/target/measunit.h \
/some/absolute/path/icu4c/source/i18n/unicode/measunit.h
`// End generated MeasureUnit constants`
* Replace all the generated code in between with the contents of the clipboard
* Run the MeasureUnitTest.java to ensure that the new code is backward
compatible. These compatibility tests are called something like
`TestCompatible65`, which tests backward compatibility with ICU 65.
* Create a compatibility test for ICU 68. Change `testZZZ` to run
`generateBackwardCompatibilityTest(“68”)`
* Run tests.
* Copy generated test (see instructions above) into MeasureUnitTest.java
* Run tests again to ensure that new code is backward compatible with itself
Copy the generated code fragments from / to
/some/absolute/path/icu4j/main/common_tests/target/measunit.cpp \
/some/absolute/path/icu4c/source/i18n/measunit.cpp
## Update ICU4C
Copy the generated code fragments from / to
/some/absolute/path/icu4j/main/common_tests/target/measfmttest.cpp \
/some/absolute/path/icu4c/source/test/intltest/measfmttest.cpp
* checkout ICU4C
Copy the generated code fragments from / to
/some/absolute/path/icu4j/main/common_tests/target/MeasureUnitGeneratorTest.java \
/some/absolute/path/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/MeasureUnitGeneratorTest.java
```
### Update measunit.h
Some kind of diff tool or editor (for example `vi -d`) work nicely.
* Change testZZZ to run `generateCXXHConstants(“68”); // ICU 68`.
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Open $GIT_ROOT/icu4c/source/i18n/unicode/measunit.h. Look for line containing:
Look for line containing `// Start generated ...` and `// End generated ...`
These lines exist in both the original files, and the generated one. \
Replace all the generated code in between with the contents of the clipboard.
`// Start generated createXXX methods`
* Look for line:
If the generated code has no `// Start` ... `// End ...` pair then the new
code should be appended at some fixed place (details below).
`// End generated createXXX methods`
* Replace all the generated code in between with the contents of the clipboard
* **`MeasureUnit.java`:** replace range.
* **`MeasureUnitCompatibilityTest.java`:** append the new generated method at the end. \
It is named something like `TestCompatible<version>()`. \
Don't add it if it already exists.
* **`measunit.h`:** replace range.
* **`measunit.cpp`:** replace range.
* **`measfmttest.cpp`:** append the new generated method after the last
`MeasureFormatTest::TestCompatible<version>()` method. \
Don't add it if it already exists. \
WARNING: here you should add the method in two places. The method proper, with code,
as generated, and the declaration in the class definition.
* **`MeasureUnitGeneratorTest.java`:** append the new pairs of measure + version at
the end of the `JAVA_VERSIONS` structure. \
Don't add them if they already exist.
### Update measunit.cpp
## Run tests for both `icu4c` and `icu4j`
* Change testZZZ to run generateCXXConstants();
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Open $GIT_ROOT/icu4c/source/i18n/measunit.cpp. Look for line containing:
## Updating `units.txt` and `unitConstants`
`// Start generated code for measunit.cpp`
* Look for lines
`// End generated code for measunit.cpp`
* Replace all the generated code in between with the contents of the clipboard
### Run C++ tests
* Run `./intltest format/MeasureFormatTest` from `test/intltest` to ensure new
code is backward compatible.
* Create a compatibility test for ICU 68. Change `testZZZ` in eclipse to run
`generateCXXBackwardCompatibilityTest(“68”)`
* Run tests.
* Copy generated test (see instructions above) into
$GIT_ROOT/icu4c/source/test/intltest/measfmttest.cpp. Make other necessary
changes to make test compile. You can find these changes by searching for
`TestCompatible65()`
* Run tests again to ensure that new code is backward compatible with itself
## Finishing changes
These last changes are necessary to permanently record the ICU version number of
any new measure units. Without these changes any new functions for this release
will be considered new for the next release too.
* Change `testZZZ` to run `updateJAVAVersions(“68”);`
* Run MeasureUnitTest.java, copy the generated code (see instructions above).
* Append the clipboard contents to the values of the JAVA_VERSIONS variable
near the top of MeasureUnitTest.java.
**Important:** what you are copying are just the new functions for the current
ICU version, in this case 68. Therefore append, do not replace.
## Updating units.txt and unitConstants
The standard ldml2icu process is used to update ICU's resource files (see
[cldr-icu-readme.txt](https://github.com/unicode-org/icu/blob/main/icu4c/source/data/cldr-icu-readme.txt)).
The standard `ldml2icu` process is used to update ICU's resource files (see
[`cldr-icu-readme.txt`](https://github.com/unicode-org/icu/blob/main/icu4c/source/data/cldr-icu-readme.txt)).
CLDR's units.xml defines conversion rates in terms of some constants defined in
`unitConstants`.
@ -142,6 +121,7 @@ needed. This would be caught by `testUnitConstantFreshness` unit test in
`units_test.cpp`.
They are hard-coded:
* Java: `UnitConverter.java` has the constant names in
`UnitConverter.Factor.addEntity()` and constant values in
`UnitConverter.Factor.getConversionRate()`.

View file

@ -53,6 +53,15 @@ need to be correspondingly updated. See below for more files to be updated and s
[icu4c/source/data/misc/icuver.txt](https://github.com/unicode-org/icu/blob/main/icu4c/source/data/misc/icuver.txt)
needs to be updated with the correct version number for ICU and its data.
### CLDR-to-ICU Conversion
#### Since ICU 77
The tool takes the `icuVersion` and `icuDataVersion` from the official ICU APIs.
(from the icu4j listed as a dependency of the tool, usually the one you just built from the `icu4j` folder).
If you need values different than that, you can specify them as the command line parameters (`--icuVersion` and `--icuDataVersion`).
#### Since ICU 68
In
@ -164,7 +173,7 @@ Changing the version for Java starting with ICU 74 requires a few steps:
2. [icu4j/main/core/src/test/java/com/ibm/icu/dev/test/util/DebugUtilitiesData.java](https://github.com/unicode-org/icu/blob/main/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/util/DebugUtilitiesData.java)
There is a public string named `ICU4C_VERSION` which should be updated accordingly.
'public static final String ICU4C_VERSION="74.0.1";'
`public static final String ICU4C_VERSION="74.0.1";`
3. When creating the final release of a major ICU version,
or a point release (minor version update on a maintenance branch),
@ -201,7 +210,7 @@ The command requires a version number string that follows the typical Java / Mav
This should happen at the same time and along with the work in the previous step for the version number
when the version number is updated on `main` in the commit after the release/maintanence branch is cut.
In other words, the above `versions:set-property` step should be executed at the same time
`mvn versions:set -DnewVersion=74.0.1-SNAPSHOT` is executed.
`mvn versions:set -DnewVersion=74.0.1-SNAPSHOT -DgenerateBackupPoms=false` is executed.
5. Update the following variables in `icu4j/releases_tools/shared.sh`
@ -212,8 +221,18 @@ The command requires a version number string that follows the typical Java / Mav
6. cldr-to-icu build tool has a dependency on the icu4j packages which needs to be updated in [`tools/cldr/cldr-to-icu/pom.xml`](https://github.com/unicode-org/icu/blob/main/tools/cldr/cldr-to-icu/pom.xml). Please update it to match the version that was updated in `icu4j/pom.xml` in the steps above.
`<version>74.0.1-SNAPSHOT</version>`
```xml
version>74.0.1-SNAPSHOT</version>
```
Since ICU 77 this moved to a property:
```xml
<icu4j.version>77.0.1-SNAPSHOT</icu4j.version>
```
Which can be easily be set from command line:
```sh
mvn versions:set-property -Dproperty=icu4j.version -DnewVersion=77.1 -f $ICU_DIR/tools/cldr/cldr-to-icu -DgenerateBackupPoms=false
```
#### Until ICU 73 (inclusive)
@ -274,7 +293,7 @@ For updating ICU version numbers, follow the steps below.
3. [icu4j/main/core/src/main/java/com/ibm/icu/util/VersionInfo.java](https://github.com/unicode-org/icu/blob/main/icu4j/main/core/src/main/java/com/ibm/icu/util/VersionInfo.java)
There is a static block starting at line 501 (as of 54.1) in the source file.
Update the `ICU_VERSION` value, where the first three arguments represent the
major, minor, and patch versions of a semantic version.
Use the `getInstance(major, 0, 1, 0)` as the version during pre-release development,

View file

@ -66,8 +66,7 @@ classes.*
When a calendar object is created, via either `Calendar::create()`, or
`ucal_open()`, or indirectly within a date formatter, ICU looks up the 'default'
calendar type for that locale. At present, all locales default to a Gregorian
calendar, except for the compatibility locales th_TH_TRADITIONAL and
ja_JP_TRADITIONAL. If the "calendar" keyword is supplied, this value will
calendar. If the "calendar" keyword is supplied, this value will
override the default for that locale.
For instance, `Calendar::createInstance("fr_FR", status)` will create a Gregorian calendar,

View file

@ -596,4 +596,4 @@ every Saturday (at 4:00 AM UTC) and post merging on the maintenance branches.
They do not run on pull-requests by default as they take 1-2 hours to run.
However, you can manually request the CI builds to run the exhaustive tests.
See [Continuous Integration / Exhaustive Tests](../userguide/dev/ci.md#exhaustive-tests).
See [Continuous Integration / Exhaustive Tests](../ci.md#exhaustive-tests).

View file

@ -212,6 +212,16 @@ The rule updates are done first for ICU4C, and then ported (code changes) or mov
Updating the test with new or revised rules requires changing the test source code, in `icu4c/source/test/intltest/rbbitst.cpp`. Look for the classes RBBICharMonkey, RBBIWordMonkey, RBBISentMonkey and RBBILineMonkey. The body of each class tracks the corresponding UAX-14 or UAX-29 specifications in defining the character classes and break rules.
The rules, as well as the partition of the code space used to generate the random sample strings,
are defined by regular expressions and Unicode sets generated by GenerateBreakTest in the
Unicode tools, which runs as part of MakeUnicodeFiles.
Copy the relevant lines from `Generated/UCD/17.0.0/extra/*BreakTest.cpp.txt` into `rbbitst.cpp`.
When developing changes to the line breaking algorithms that require changes to property assignments,
the generated rules and partition may need to be adjusted for testing.
However, the updated rules should only be merged into ICU once the property changes have actually been
made in the UCD and imported into ICU, at which point the unmodified generated partition and rules can
be used in `rbbitst.cpp`.
After making changes, as a final check, let the test run for an extended period of time, on the order of several hours.
Run it from a terminal, and just interrupt it (Ctrl-C) when it's gone long enough.

View file

@ -27,31 +27,33 @@ It will be a successor to the current [ICU MessageFormat](index.md).
MessageFormat 2.0 is being developed
[in a working group](https://github.com/unicode-org/message-format-wg),
which has created a [draft specification](https://github.com/unicode-org/message-format-wg/tree/main/spec).
A version of the specification is included in [LDML 45](https://unicode.org/reports/tr35/tr35-messageFormat.html#Contents).
Also see the
[API docs for `MessageFormatter`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/index.html?com/ibm/icu/message2/MessageFormatter.html).*
## Overview of `MessageFormatter`
In ICU4J, the `MessageFormatter` class is the next iteration of [MessageFormat](https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/MessageFormat.html).
This new version will build on the lessons learned from using MessageFormat for 25 years in various environments, when used directly or as a base for other public APIs.
The `MessageFormatter` class is the next iteration of MessageFormat, implemented in both [ICU4J](https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/MessageFormat.html) and [ICU4C](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1message2_1_1MessageFormatter.html)
This new version builds on the lessons learned from using MessageFormat for 25 years in various environments, when used directly or as a base for other public APIs.
The effort to design a succesor to `MessageFormat` will result in a specification referred to as MessageFormat 2.0.
The effort to design a successor to `MessageFormat` is a specification referred to as MessageFormat 2.0.
The reasoning for this effort is shared in the [“Why MessageFormat needs a successor”](https://github.com/unicode-org/message-format-wg/blob/main/docs/why_mf_next.md) document.
MessageFormat 2.0 will be more modular and easier to port and backport.
It will also provide extension points via interfaces to allow users to supply new formatters and selectors without having to modify the specification.
MessageFormat 2.0 is more modular and easier to port and backport.
It also provides extension points via interfaces to allow users to supply new formatters and selectors without having to modify the specification.
ICU will eventually include support for new formatters, such as intervals, relative time, lists, measurement units, personal names, and more, as well as the ability for users to supply their own custom implementations.
These will potentially support use cases like grammatical gender, inflection, markup regimes (such as those require for text-to-speech), and other complex message management needs.
The MessageFormat Working Group, which develops the new data model, semantics, and syntax, is hosted on [GitHub](https://github.com/unicode-org/message-format-wg).
The current specification for the syntax and data model can be found [here](https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md).
This technical preview implements enough functions for `MessageFormater` to be useful in many situations,
but the final set of functions and the parameters accepted by those functions is not yet finalized.
This technical preview implements functions according to the LDML 45 version of the specification.
## Examples
### ICU4J
### Basic usage
#### Examples
##### Basic usage
```java
import static org.junit.Assert.assertEquals;
@ -81,7 +83,7 @@ public void testMf2() {
}
```
### Placeholder examples
##### Placeholder examples
| Code to set runtime value for placeholder | Examples of placeholder in message pattern |
|----------------------------------------------------|------------------------------------------------------------------------------|
@ -91,7 +93,7 @@ public void testMf2() {
| No argument for fixed values known at build time | `{(123456789.531) :number}` |
### Plural selection message
#### Plural selection message
```java
@Test
@ -123,12 +125,11 @@ public void testMf2Selection() {
}
```
### Built-in formatter functions
#### Built-in formatter functions
The tech preview implementation comes with formatters for numbers (`number`),
date / time (`datetime`),
plural selectors (`plural` and `selectordinal`),
and general selector (`select`),
The tech preview implementation comes with a formatter and selector for numbers (`number`),
date / time formatters (`datetime`),
and a string selector (`string`),
very similar to what MessageFormat offers.
The [ICU test code](https://github.com/unicode-org/icu/tree/main/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2)
@ -136,9 +137,10 @@ covers most features, and has examples of how to make custom placeholder formatt
you can look for classes that implement `com.ibm.icu.message2.FormatterFactory`
(they are named `Custom*Test.java`).
## Functions currently implemented
#### Functions currently implemented
These are the functions implemented right now:
These are the functions implemented in ICU 75.1. The functions will change
in a future release to be consistent with the current MF2 specification.
<table border="1">
@ -191,19 +193,19 @@ TBD if this will be merged into <code>plural</code> (with some <code>kind</code>
<tr><td><code>select</code></td><td>Literal match, same as MessageFormat's <code>select</code>.</td></tr>
</table>
## Quickstart guide
#### Quickstart guide
If you don't have ICU set up, here are instructions for doing that using Maven or Gradle:
### Requirements
##### Requirements
- JDK (version 8 or newer)
- Maven or Gradle
- Your preferred IDE or text editor
### Maven
##### Maven
#### Create a new project
###### Create a new project
```
$ mvn archetype:generate -DgroupId=org.unicode -DartifactId=mf2 -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4 -DinteractiveMode=false
@ -211,7 +213,7 @@ $ mvn archetype:generate -DgroupId=org.unicode -DartifactId=mf2 -DarchetypeArtif
$ cd mf2
```
#### Add a dependency to ICU4J 72.1 (or newer)
###### Add a dependency to ICU4J 75.1 (or newer)
In the `pom.xml` find the `<dependencies>` element and add this:
@ -219,24 +221,24 @@ In the `pom.xml` find the `<dependencies>` element and add this:
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>72.1</version>
<version>75.1</version>
</dependency>
```
#### Add a bit of code
###### Add a bit of code
Open the test file (`src/test/java/org/unicode/AppTest.java`)
and copy / paste the include directives and the `testMf2()` method shown in the previous section.
#### Test it
###### Test it
```
$ mvn test
```
### Gradle
##### Gradle
#### Create a new project
###### Create a new project
```
$ mkdir mf2
@ -246,25 +248,25 @@ $ cd mf2
$ gradle init --dsl groovy --test-framework junit --type java-application --package org.unicode --project-name mf2
```
#### Add a dependency to ICU4J 72.1 (or newer)
###### Add a dependency to ICU4J 75.1 (or newer)
In the `app/build.gradle` file, find the `dependencies {...}` section add this:
```
implementation 'com.ibm.icu:icu4j:72.1'
```
#### Add a bit of code
###### Add a bit of code
Open the test file (`src/test/java/org/unicode/AppTest.java`)
and copy / paste the include directives and the `testMf2()` method shown in the previous section.
#### Test it
##### Test it
```
$ gradle test
```
### Experiment from here
##### Experiment from here
At this point you have a basic application using MessageFormat 2.
@ -276,3 +278,8 @@ You can experiment with more messages using as inspiration:
You should be able to use your preferred IDE (Eclipse, IntelliJ, Visual Studio Code, more), use a different build system, etc.
### ICU4C
Some helpful documentation for ICU4C in MF2 is being developed at [messageformat.dev](https://messageformat.dev/docs/integration/cpp/),
as well an unofficial [MF2 syntax documentation](https://messageformat.dev/docs/quick-start/)
and an interactive [playground](https://messageformat.dev/playground/) for testing messages.

View file

@ -163,6 +163,10 @@ example, even if ICU is built in "files" mode, you must still link against the
This option builds ICU data as a single (large) static library. This mode is
more complex to use. If you encounter errors, you may need to build ICU
multiple times.
* `--with-data-packaging=auto`
With this option, `configure` will pick `library` unless the options
`--enable-static` and `--disable-shared` are also given, in which case
it'll pic `static` instead.
* `--with-data-packaging=files`
With this option, ICU outputs separate individual files (.res, .cnv, etc)
which will be loaded at runtime. Read the rest of this document, especially

View file

@ -391,11 +391,7 @@ and `Locale::createCanonical`. The latter API exists in both C++ and Java.
es@collation=traditional, hi__DIRECT => hi@collation=direct, zh_TW_STROKE =>
zh_TW@collation=stroke, zh__PINYIN => zh@collation=pinyin.
9. Variants specifying a calendar are remapped to calendar keyword specifiers,
as follows: ja_JP_TRADITIONAL => ja_JP@calendar=japanese, th_TH_TRADITIONAL
=> th_TH@calendar=buddhist.
10. Special case: C => en_US_POSIX.
9. Special case: C => en_US_POSIX.
Certain other operations are not performed by either level 1 or level 2
canonicalization. These are listed here for completeness.

View file

@ -503,7 +503,7 @@ the position, for those cases:
The first rule will convert "x", when preceded by a vowel, into "ks". The
transform will then backup to the position before the vowel and continue. In the
next pass, the "ak" will match and be invoked. Thus, if the source text is "ax",
the result will be "ack".
the result will be "acks".
> :point_right: **Note**: *Although you can move the cursor forward or backward, it is limited in two
ways: (a) to the text that is matched, (b) within the original substring that is

View file

@ -449,7 +449,7 @@ AC_DEFUN([AC_CHECK_64BIT_LIBS],
AC_DEFUN([AC_CHECK_STRICT_COMPILE],
[
AC_MSG_CHECKING([whether strict compiling is on])
AC_ARG_ENABLE(strict,[ --enable-strict compile with strict compiler options [default=yes]], [
AC_ARG_ENABLE(strict,[ --disable-strict do not compile with strict compiler options], [
if test "$enableval" = no
then
ac_use_strict_options=no

View file

@ -3,6 +3,6 @@
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set the ICU Major Version number, which is used as a suffix on various file names in other Visual Studio projects. -->
<PropertyGroup>
<IcuMajorVersion>76</IcuMajorVersion>
<IcuMajorVersion>77</IcuMajorVersion>
</PropertyGroup>
</Project>

View file

@ -59,7 +59,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
{
char fnbuff[256];
char ext[4]={'\0'};
CharString actualLocale;
CharString actual;
int32_t size;
const char16_t* brkfname = nullptr;
UResourceBundle brkRulesStack;
@ -94,7 +94,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
// Use the string if we found it
if (U_SUCCESS(status) && brkfname) {
actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
actual.append(ures_getLocaleInternal(brkName, &status), -1, status);
char16_t* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
@ -123,10 +123,9 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
if (U_SUCCESS(status) && result != nullptr) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data());
uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY);
result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actual.data(), status);
LocaleBased::setLocaleID(loc.getName(), result->requestLocale, status);
}
ures_close(b);
@ -206,26 +205,32 @@ BreakIterator::getAvailableLocales(int32_t& count)
BreakIterator::BreakIterator()
{
*validLocale = *actualLocale = *requestLocale = 0;
}
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, *this);
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
U_ASSERT(U_SUCCESS(status));
}
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, *this);
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
U_ASSERT(U_SUCCESS(status));
}
return *this;
}
BreakIterator::~BreakIterator()
{
delete validLocale;
delete actualLocale;
delete requestLocale;
}
// ------------------------------------------
@ -394,7 +399,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
U_LOCALE_BASED(locBased, *result);
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status);
}
return result;
}
@ -488,6 +493,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
}
if (U_FAILURE(status)) {
delete result;
return nullptr;
}
@ -496,20 +502,25 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) {
return {requestLocale};
if (U_FAILURE(status)) {
return Locale::getRoot();
}
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);
if (type == ULOC_REQUESTED_LOCALE) {
return requestLocale == nullptr ?
Locale::getRoot() : Locale(requestLocale->data());
}
return LocaleBased::getLocale(validLocale, actualLocale, type, status);
}
const char *
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) {
return requestLocale;
if (U_FAILURE(status)) {
return nullptr;
}
U_LOCALE_BASED(locBased, *this);
return locBased.getLocaleID(type, status);
if (type == ULOC_REQUESTED_LOCALE) {
return requestLocale == nullptr ? "" : requestLocale->data();
}
return LocaleBased::getLocaleID(validLocale, actualLocale, type, status);
}
@ -536,8 +547,10 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
}
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, (*this));
locBased.setLocaleIDs(valid, actual);
locBased.setLocaleIDs(valid.getName(), actual.getName(), status);
U_ASSERT(U_SUCCESS(status));
}
U_NAMESPACE_END

View file

@ -70,6 +70,15 @@ CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
return *this;
}
CharString &CharString::copyFrom(StringPiece s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return *this;
}
len = 0;
append(s, errorCode);
return *this;
}
int32_t CharString::lastIndexOf(char c) const {
for(int32_t i=len; i>0;) {
if(buffer[--i]==c) {

View file

@ -74,6 +74,7 @@ public:
* use a UErrorCode where memory allocations might be needed.
*/
CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
CharString &copyFrom(StringPiece s, UErrorCode &errorCode);
UBool isEmpty() const { return len==0; }
int32_t length() const { return len; }

File diff suppressed because it is too large Load diff

View file

@ -12,44 +12,84 @@
*/
#include "locbased.h"
#include "cstring.h"
#include "charstr.h"
U_NAMESPACE_BEGIN
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
const char* id = getLocaleID(type, status);
Locale LocaleBased::getLocale(const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status) {
const char* id = getLocaleID(valid, actual, type, status);
return Locale(id != nullptr ? id : "");
}
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
const char* LocaleBased::getLocaleID(const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status) {
if (U_FAILURE(status)) {
return nullptr;
}
switch(type) {
case ULOC_VALID_LOCALE:
return valid;
return valid == nullptr ? "" : valid->data();
case ULOC_ACTUAL_LOCALE:
return actual;
return actual == nullptr ? "" : actual->data();
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
}
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
if (validID != nullptr) {
uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
}
if (actualID != nullptr) {
uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) {
setValidLocaleID(validID, status);
setActualLocaleID(actualID,status);
}
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) {
setValidLocaleID(validID, status);
setActualLocaleID(actualID,status);
}
void LocaleBased::setLocaleID(const char* id, CharString*& dest, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
if (id == nullptr || *id == 0) {
delete dest;
dest = nullptr;
} else {
if (dest == nullptr) {
dest = new CharString(id, status);
if (dest == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
dest->copyFrom(id, status);
}
}
}
void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
uprv_strcpy(valid, validID.getName());
uprv_strcpy(actual, actualID.getName());
void LocaleBased::setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
if (id == nullptr || id->isEmpty()) {
delete dest;
dest = nullptr;
} else {
if (dest == nullptr) {
dest = new CharString(*id, status);
if (dest == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
dest->copyFrom(*id, status);
}
}
}
bool LocaleBased::equalIDs(const CharString* left, const CharString* right) {
// true if both are nullptr
if (left == nullptr && right == nullptr) return true;
// false if only one is nullptr
if (left == nullptr || right == nullptr) return false;
return *left == *right;
}
U_NAMESPACE_END

View file

@ -19,13 +19,14 @@
/**
* Macro to declare a locale LocaleBased wrapper object for the given
* object, which must have two members named `validLocale' and
* `actualLocale' of size ULOC_FULLNAME_CAPACITY
* `actualLocale' of which are pointers to the internal icu::CharString.
*/
#define U_LOCALE_BASED(varname, objname) \
LocaleBased varname((objname).validLocale, (objname).actualLocale)
U_NAMESPACE_BEGIN
class CharString;
/**
* A utility class that unifies the implementation of getLocale() by
* various ICU services. This class is likely to be removed in the
@ -41,33 +42,35 @@ class U_COMMON_API LocaleBased : public UMemory {
* Construct a LocaleBased wrapper around the two pointers. These
* will be aliased for the lifetime of this object.
*/
inline LocaleBased(char* validAlias, char* actualAlias);
/**
* Construct a LocaleBased wrapper around the two const pointers.
* These will be aliased for the lifetime of this object.
*/
inline LocaleBased(const char* validAlias, const char* actualAlias);
inline LocaleBased(CharString*& validAlias, CharString*& actualAlias);
/**
* Return locale meta-data for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param valid The valid locale.
* @param actual The actual locale.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale
*/
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
static Locale getLocale(
const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status);
/**
* Return the locale ID for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param valid The valid locale.
* @param actual The actual locale.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale ID
*/
const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
static const char* getLocaleID(
const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status);
/**
* Set the locale meta-data for the service object wrapped by this
@ -75,31 +78,40 @@ class U_COMMON_API LocaleBased : public UMemory {
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const char* valid, const char* actual);
void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status);
void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status);
/**
* Set the locale meta-data for the service object wrapped by this
* object.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const Locale& valid, const Locale& actual);
static void setLocaleID(const char* id, CharString*& dest, UErrorCode& status);
static void setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status);
static bool equalIDs(const CharString* left, const CharString* right);
private:
char* valid;
char* actual;
void setValidLocaleID(const CharString* id, UErrorCode& status);
void setActualLocaleID(const CharString* id, UErrorCode& status);
void setValidLocaleID(const char* id, UErrorCode& status);
void setActualLocaleID(const char* id, UErrorCode& status);
CharString*& valid;
CharString*& actual;
};
inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) :
valid(validAlias), actual(actualAlias) {
}
inline LocaleBased::LocaleBased(const char* validAlias,
const char* actualAlias) :
// ugh: cast away const
valid(const_cast<char*>(validAlias)), actual(const_cast<char*>(actualAlias)) {
inline void LocaleBased::setValidLocaleID(const CharString* id, UErrorCode& status) {
setLocaleID(id, valid, status);
}
inline void LocaleBased::setActualLocaleID(const CharString* id, UErrorCode& status) {
setLocaleID(id, actual, status);
}
inline void LocaleBased::setValidLocaleID(const char* id, UErrorCode& status) {
setLocaleID(id, valid, status);
}
inline void LocaleBased::setActualLocaleID(const char* id, UErrorCode& status) {
setLocaleID(id, actual, status);
}
U_NAMESPACE_END

View file

@ -19,6 +19,8 @@
* that then do not depend on resource bundle code and display name data.
*/
#include <string_view>
#include "unicode/utypes.h"
#include "unicode/brkiter.h"
#include "unicode/locid.h"
@ -359,7 +361,7 @@ _getStringOrCopyKey(const char *path, const char *locale,
return u_terminateUChars(dest, destCapacity, length, &errorCode);
}
using UDisplayNameGetter = icu::CharString(const char*, UErrorCode&);
using UDisplayNameGetter = icu::CharString(std::string_view, UErrorCode&);
int32_t
_getDisplayNameForComponent(const char *locale,
@ -377,6 +379,10 @@ _getDisplayNameForComponent(const char *locale,
return 0;
}
if (locale == nullptr) {
locale = uloc_getDefault();
}
localStatus = U_ZERO_ERROR;
icu::CharString localeBuffer = (*getter)(locale, localStatus);
if (U_FAILURE(localStatus)) {

View file

@ -349,7 +349,9 @@ uloc_isRightToLeft(const char *locale) {
UErrorCode errorCode = U_ZERO_ERROR;
icu::CharString lang;
icu::CharString script;
ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, errorCode);
ulocimp_getSubtags(
locale == nullptr ? uloc_getDefault() : locale,
&lang, &script, nullptr, nullptr, nullptr, errorCode);
if (U_FAILURE(errorCode) || script.isEmpty()) {
// Fastpath: We know the likely scripts and their writing direction
// for some common languages.
@ -369,7 +371,7 @@ uloc_isRightToLeft(const char *locale) {
if (U_FAILURE(errorCode)) {
return false;
}
ulocimp_getSubtags(likely.data(), nullptr, &script, nullptr, nullptr, nullptr, errorCode);
ulocimp_getSubtags(likely.toStringPiece(), nullptr, &script, nullptr, nullptr, nullptr, errorCode);
if (U_FAILURE(errorCode) || script.isEmpty()) {
return false;
}
@ -430,7 +432,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
icu::CharString rgBuf = GetRegionFromKey(localeID, "rg", status);
if (U_SUCCESS(status) && rgBuf.isEmpty()) {
// No valid rg keyword value, try for unicode_region_subtag
rgBuf = ulocimp_getRegion(localeID, status);
rgBuf = ulocimp_getRegion(localeID == nullptr ? uloc_getDefault() : localeID, status);
if (U_SUCCESS(status) && rgBuf.isEmpty() && inferRegion) {
// Second check for sd keyword value
rgBuf = GetRegionFromKey(localeID, "sd", status);
@ -439,7 +441,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
UErrorCode rgStatus = U_ZERO_ERROR;
icu::CharString locBuf = ulocimp_addLikelySubtags(localeID, rgStatus);
if (U_SUCCESS(rgStatus)) {
rgBuf = ulocimp_getRegion(locBuf.data(), status);
rgBuf = ulocimp_getRegion(locBuf.toStringPiece(), status);
}
}
}

View file

@ -527,7 +527,7 @@ LSR LikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
return {};
}
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
if (!returnInputIfUnmatch && uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
// Private use language tag x-subtag-subtag... which CLDR changes to
// und-x-subtag-subtag...
return LSR(name, "", "", LSR::EXPLICIT_LSR);

View file

@ -76,7 +76,7 @@
#include <float.h>
#ifndef U_COMMON_IMPLEMENTATION
#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html
#endif

View file

@ -2020,6 +2020,7 @@ static const struct CurrencyList {
{"ZRN", UCURR_COMMON|UCURR_DEPRECATED},
{"ZRZ", UCURR_COMMON|UCURR_DEPRECATED},
{"ZWD", UCURR_COMMON|UCURR_DEPRECATED},
{"ZWG", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"ZWL", UCURR_COMMON|UCURR_DEPRECATED},
{"ZWR", UCURR_COMMON|UCURR_DEPRECATED},
{ nullptr, 0 } // Leave here to denote the end of the list.

View file

@ -482,8 +482,8 @@ constexpr CanonicalizationMap CANONICALIZE_MAP[] = {
/* ### BCP47 Conversion *******************************************/
/* Gets the size of the shortest subtag in the given localeID. */
int32_t getShortestSubtagLength(const char *localeID) {
int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
int32_t getShortestSubtagLength(std::string_view localeID) {
int32_t localeIDLength = static_cast<int32_t>(localeID.length());
int32_t length = localeIDLength;
int32_t tmpLength = 0;
int32_t i;
@ -507,8 +507,8 @@ int32_t getShortestSubtagLength(const char *localeID) {
return length;
}
/* Test if the locale id has BCP47 u extension and does not have '@' */
inline bool _hasBCP47Extension(const char *id) {
return id != nullptr && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(id) == 1;
inline bool _hasBCP47Extension(std::string_view id) {
return id.find('@') == std::string_view::npos && getShortestSubtagLength(id) == 1;
}
/* ### Keywords **************************************************/
@ -523,10 +523,9 @@ inline bool UPRV_OK_VALUE_PUNCTUATION(char c) { return c == '_' || c == '-' || c
#define ULOC_MAX_NO_KEYWORDS 25
U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID) {
const char *result = nullptr;
if((result = uprv_strchr(localeID, '@')) != nullptr) {
return result;
locale_getKeywordsStart(std::string_view localeID) {
if (size_t pos = localeID.find('@'); pos != std::string_view::npos) {
return localeID.data() + pos;
}
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
else {
@ -536,8 +535,8 @@ locale_getKeywordsStart(const char *localeID) {
static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
const uint8_t *charToFind = ebcdicSigns;
while(*charToFind) {
if((result = uprv_strchr(localeID, *charToFind)) != nullptr) {
return result;
if (size_t pos = localeID.find(*charToFind); pos != std::string_view::npos) {
return localeID.data() + pos;
}
charToFind++;
}
@ -784,7 +783,7 @@ ulocimp_getKeywordValue(const char* localeID,
return;
}
if (_hasBCP47Extension(localeID)) {
if (localeID != nullptr && _hasBCP47Extension(localeID)) {
tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, status);
tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
@ -889,7 +888,8 @@ uloc_setKeywordValue(const char* keywordName,
return 0;
}
char* keywords = const_cast<char*>(locale_getKeywordsStart(buffer));
char* keywords = const_cast<char*>(
locale_getKeywordsStart({buffer, static_cast<std::string_view::size_type>(bufLen)}));
int32_t baseLen = keywords == nullptr ? bufLen : keywords - buffer;
// Remove -1 from the capacity so that this function can guarantee NUL termination.
CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
@ -921,7 +921,7 @@ ulocimp_setKeywordValue(std::string_view keywordName,
{
if (U_FAILURE(status)) { return; }
std::string_view keywords;
if (const char* start = locale_getKeywordsStart(localeID.data()); start != nullptr) {
if (const char* start = locale_getKeywordsStart(localeID.toStringPiece()); start != nullptr) {
// This is safe because CharString::truncate() doesn't actually erase any
// data, but simply sets the position for where new data will be written.
int32_t size = start - localeID.data();
@ -1138,15 +1138,18 @@ inline bool _isPrefixLetter(char a) { return a == 'x' || a == 'X' || a == 'i' ||
/*returns true if one of the special prefixes is here (s=string)
'x-' or 'i-' */
inline bool _isIDPrefix(const char *s) { return _isPrefixLetter(s[0]) && _isIDSeparator(s[1]); }
inline bool _isIDPrefix(std::string_view s) {
return s.size() >= 2 && _isPrefixLetter(s[0]) && _isIDSeparator(s[1]);
}
/* Dot terminates it because of POSIX form where dot precedes the codepage
* except for variant
*/
inline bool _isTerminator(char a) { return a == 0 || a == '.' || a == '@'; }
inline bool _isTerminator(char a) { return a == '.' || a == '@'; }
inline bool _isBCP47Extension(const char* p) {
return p[0] == '-' &&
inline bool _isBCP47Extension(std::string_view p) {
return p.size() >= 3 &&
p[0] == '-' &&
(p[1] == 't' || p[1] == 'T' ||
p[1] == 'u' || p[1] == 'U' ||
p[1] == 'x' || p[1] == 'X') &&
@ -1202,49 +1205,44 @@ namespace {
* TODO try to use this in Locale
*/
void
_getLanguage(const char* localeID,
ByteSink* sink,
const char** pEnd,
UErrorCode& status) {
U_ASSERT(pEnd != nullptr);
*pEnd = localeID;
if (uprv_stricmp(localeID, "root") == 0) {
localeID += 4;
} else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
(localeID[3] == '\0' ||
size_t _getLanguage(std::string_view localeID, ByteSink* sink, UErrorCode& status) {
size_t skip = 0;
if (localeID.size() == 4 && uprv_strnicmp(localeID.data(), "root", 4) == 0) {
skip = 4;
localeID.remove_prefix(skip);
} else if (localeID.size() >= 3 && uprv_strnicmp(localeID.data(), "und", 3) == 0 &&
(localeID.size() == 3 ||
localeID[3] == '-' ||
localeID[3] == '_' ||
localeID[3] == '@')) {
localeID += 3;
skip = 3;
localeID.remove_prefix(skip);
}
constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1; // Minus NUL.
/* if it starts with i- or x- then copy that prefix */
int32_t len = _isIDPrefix(localeID) ? 2 : 0;
while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
size_t len = _isIDPrefix(localeID) ? 2 : 0;
while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
if (len == MAXLEN) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
return 0;
}
len++;
}
*pEnd = localeID + len;
if (sink == nullptr || len == 0) { return; }
if (sink == nullptr || len == 0) { return skip + len; }
int32_t minCapacity = uprv_max(len, 4); // Minimum 3 letters plus NUL.
int32_t minCapacity = uprv_max(static_cast<int32_t>(len), 4); // Minimum 3 letters plus NUL.
char scratch[MAXLEN];
int32_t capacity = 0;
char* buffer = sink->GetAppendBuffer(
minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
for (int32_t i = 0; i < len; ++i) {
for (size_t i = 0; i < len; ++i) {
buffer[i] = uprv_tolower(localeID[i]);
}
if (_isIDSeparator(localeID[1])) {
if (localeID.size() >= 2 && _isIDSeparator(localeID[1])) {
buffer[1] = '-';
}
@ -1256,32 +1254,26 @@ _getLanguage(const char* localeID,
if (offset.has_value()) {
const char* const alias = LANGUAGES[*offset];
sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
return;
return skip + len;
}
}
sink->Append(buffer, len);
sink->Append(buffer, static_cast<int32_t>(len));
return skip + len;
}
void
_getScript(const char* localeID,
ByteSink* sink,
const char** pEnd) {
U_ASSERT(pEnd != nullptr);
*pEnd = localeID;
size_t _getScript(std::string_view localeID, ByteSink* sink) {
constexpr int32_t LENGTH = 4;
int32_t len = 0;
while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
size_t len = 0;
while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
uprv_isASCIILetter(localeID[len])) {
if (len == LENGTH) { return; }
if (len == LENGTH) { return 0; }
len++;
}
if (len != LENGTH) { return; }
if (len != LENGTH) { return 0; }
*pEnd = localeID + LENGTH;
if (sink == nullptr) { return; }
if (sink == nullptr) { return len; }
char scratch[LENGTH];
int32_t capacity = 0;
@ -1294,27 +1286,21 @@ _getScript(const char* localeID,
}
sink->Append(buffer, LENGTH);
return len;
}
void
_getRegion(const char* localeID,
ByteSink* sink,
const char** pEnd) {
U_ASSERT(pEnd != nullptr);
*pEnd = localeID;
size_t _getRegion(std::string_view localeID, ByteSink* sink) {
constexpr int32_t MINLEN = 2;
constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1; // Minus NUL.
int32_t len = 0;
while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
if (len == MAXLEN) { return; }
size_t len = 0;
while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
if (len == MAXLEN) { return 0; }
len++;
}
if (len < MINLEN) { return; }
if (len < MINLEN) { return 0; }
*pEnd = localeID + len;
if (sink == nullptr) { return; }
if (sink == nullptr) { return len; }
char scratch[ULOC_COUNTRY_CAPACITY];
int32_t capacity = 0;
@ -1325,7 +1311,7 @@ _getRegion(const char* localeID,
UPRV_LENGTHOF(scratch),
&capacity);
for (int32_t i = 0; i < len; ++i) {
for (size_t i = 0; i < len; ++i) {
buffer[i] = uprv_toupper(localeID[i]);
}
@ -1337,26 +1323,25 @@ _getRegion(const char* localeID,
if (offset.has_value()) {
const char* const alias = COUNTRIES[*offset];
sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
return;
return len;
}
}
sink->Append(buffer, len);
sink->Append(buffer, static_cast<int32_t>(len));
return len;
}
/**
* @param needSeparator if true, then add leading '_' if any variants
* are added to 'variant'
*/
void
_getVariant(const char* localeID,
size_t
_getVariant(std::string_view localeID,
char prev,
ByteSink* sink,
const char** pEnd,
bool needSeparator,
UErrorCode& status) {
if (U_FAILURE(status)) return;
if (pEnd != nullptr) { *pEnd = localeID; }
if (U_FAILURE(status)) return 0;
// Reasonable upper limit for variants
// There are no strict limitation of the syntax of variant in the legacy
@ -1369,13 +1354,13 @@ _getVariant(const char* localeID,
constexpr int32_t MAX_VARIANTS_LENGTH = 179;
/* get one or more variant tags and separate them with '_' */
int32_t index = 0;
size_t index = 0;
if (_isIDSeparator(prev)) {
/* get a variant string after a '-' or '_' */
for (index=0; !_isTerminator(localeID[index]); index++) {
for (; index < localeID.size() && !_isTerminator(localeID[index]); index++) {
if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
return 0;
}
if (needSeparator) {
if (sink != nullptr) {
@ -1389,43 +1374,46 @@ _getVariant(const char* localeID,
sink->Append(&c, 1);
}
}
if (pEnd != nullptr) { *pEnd = localeID+index; }
}
/* if there is no variant tag after a '-' or '_' then look for '@' */
if (index == 0) {
if (prev=='@') {
/* keep localeID */
} else if((localeID=locale_getKeywordsStart(localeID))!=nullptr) {
++localeID; /* point after the '@' */
} else {
return;
}
for(; !_isTerminator(localeID[index]); index++) {
if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (needSeparator) {
if (sink != nullptr) {
sink->Append("_", 1);
}
needSeparator = false;
}
if (sink != nullptr) {
char c = uprv_toupper(localeID[index]);
if (c == '-' || c == ',') c = '_';
sink->Append(&c, 1);
}
}
if (pEnd != nullptr) { *pEnd = localeID + index; }
if (index > 0) {
return index;
}
size_t skip = 0;
/* if there is no variant tag after a '-' or '_' then look for '@' */
if (prev == '@') {
/* keep localeID */
} else if (const char* p = locale_getKeywordsStart(localeID); p != nullptr) {
skip = 1 + p - localeID.data(); /* point after the '@' */
localeID.remove_prefix(skip);
} else {
return 0;
}
for (; index < localeID.size() && !_isTerminator(localeID[index]); index++) {
if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if (needSeparator) {
if (sink != nullptr) {
sink->Append("_", 1);
}
needSeparator = false;
}
if (sink != nullptr) {
char c = uprv_toupper(localeID[index]);
if (c == '-' || c == ',') c = '_';
sink->Append(&c, 1);
}
}
return skip + index;
}
} // namespace
U_EXPORT CharString
ulocimp_getLanguage(const char* localeID, UErrorCode& status) {
ulocimp_getLanguage(std::string_view localeID, UErrorCode& status) {
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
@ -1441,7 +1429,7 @@ ulocimp_getLanguage(const char* localeID, UErrorCode& status) {
}
U_EXPORT CharString
ulocimp_getScript(const char* localeID, UErrorCode& status) {
ulocimp_getScript(std::string_view localeID, UErrorCode& status) {
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
@ -1457,7 +1445,7 @@ ulocimp_getScript(const char* localeID, UErrorCode& status) {
}
U_EXPORT CharString
ulocimp_getRegion(const char* localeID, UErrorCode& status) {
ulocimp_getRegion(std::string_view localeID, UErrorCode& status) {
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
@ -1473,7 +1461,7 @@ ulocimp_getRegion(const char* localeID, UErrorCode& status) {
}
U_EXPORT CharString
ulocimp_getVariant(const char* localeID, UErrorCode& status) {
ulocimp_getVariant(std::string_view localeID, UErrorCode& status) {
return ByteSinkUtil::viaByteSinkToCharString(
[&](ByteSink& sink, UErrorCode& status) {
ulocimp_getSubtags(
@ -1490,7 +1478,7 @@ ulocimp_getVariant(const char* localeID, UErrorCode& status) {
U_EXPORT void
ulocimp_getSubtags(
const char* localeID,
std::string_view localeID,
CharString* language,
CharString* script,
CharString* region,
@ -1521,7 +1509,7 @@ ulocimp_getSubtags(
U_EXPORT void
ulocimp_getSubtags(
const char* localeID,
std::string_view localeID,
ByteSink* language,
ByteSink* script,
ByteSink* region,
@ -1531,7 +1519,7 @@ ulocimp_getSubtags(
if (U_FAILURE(status)) { return; }
if (pEnd != nullptr) {
*pEnd = localeID;
*pEnd = localeID.data();
} else if (language == nullptr &&
script == nullptr &&
region == nullptr &&
@ -1539,62 +1527,61 @@ ulocimp_getSubtags(
return;
}
if (localeID.empty()) { return; }
bool hasRegion = false;
if (localeID == nullptr) {
localeID = uloc_getDefault();
{
size_t len = _getLanguage(localeID, language, status);
if (U_FAILURE(status)) { return; }
if (len > 0) {
localeID.remove_prefix(len);
}
}
_getLanguage(localeID, language, &localeID, status);
if (U_FAILURE(status)) { return; }
U_ASSERT(localeID != nullptr);
if (pEnd != nullptr) {
*pEnd = localeID;
*pEnd = localeID.data();
} else if (script == nullptr &&
region == nullptr &&
variant == nullptr) {
return;
}
if (_isIDSeparator(*localeID)) {
const char* begin = localeID + 1;
const char* end = nullptr;
_getScript(begin, script, &end);
U_ASSERT(end != nullptr);
if (end != begin) {
localeID = end;
if (pEnd != nullptr) { *pEnd = localeID; }
if (localeID.empty()) { return; }
if (_isIDSeparator(localeID.front())) {
std::string_view sub = localeID;
sub.remove_prefix(1);
size_t len = _getScript(sub, script);
if (len > 0) {
localeID.remove_prefix(len + 1);
if (pEnd != nullptr) { *pEnd = localeID.data(); }
}
}
if (region == nullptr && variant == nullptr && pEnd == nullptr) { return; }
if ((region == nullptr && variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
if (_isIDSeparator(*localeID)) {
const char* begin = localeID + 1;
const char* end = nullptr;
_getRegion(begin, region, &end);
U_ASSERT(end != nullptr);
if (end != begin) {
if (_isIDSeparator(localeID.front())) {
std::string_view sub = localeID;
sub.remove_prefix(1);
size_t len = _getRegion(sub, region);
if (len > 0) {
hasRegion = true;
localeID = end;
if (pEnd != nullptr) { *pEnd = localeID; }
localeID.remove_prefix(len + 1);
if (pEnd != nullptr) { *pEnd = localeID.data(); }
}
}
if (variant == nullptr && pEnd == nullptr) { return; }
if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
if (_isIDSeparator(*localeID) && !_isBCP47Extension(localeID)) {
if (_isIDSeparator(localeID.front()) && !_isBCP47Extension(localeID)) {
std::string_view sub = localeID;
/* If there was no country ID, skip a possible extra IDSeparator */
if (!hasRegion && _isIDSeparator(localeID[1])) {
localeID++;
}
const char* begin = localeID + 1;
const char* end = nullptr;
_getVariant(begin, *localeID, variant, &end, false, status);
size_t skip = !hasRegion && localeID.size() > 1 && _isIDSeparator(localeID[1]) ? 2 : 1;
sub.remove_prefix(skip);
size_t len = _getVariant(sub, localeID[0], variant, false, status);
if (U_FAILURE(status)) { return; }
U_ASSERT(end != nullptr);
if (end != begin && pEnd != nullptr) { *pEnd = end; }
if (len > 0 && pEnd != nullptr) { *pEnd = localeID.data() + skip + len; }
}
}
@ -1700,7 +1687,7 @@ uloc_openKeywords(const char* localeID,
CharString tempBuffer;
const char* tmpLocaleID;
if (_hasBCP47Extension(localeID)) {
if (localeID != nullptr && _hasBCP47Extension(localeID)) {
tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status);
tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
@ -1769,7 +1756,7 @@ _canonicalize(const char* localeID,
const char* keywordAssign = nullptr;
const char* separatorIndicator = nullptr;
if (_hasBCP47Extension(localeID)) {
if (localeID != nullptr && _hasBCP47Extension(localeID)) {
const char* localeIDPtr = localeID;
// convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
@ -1895,7 +1882,7 @@ _canonicalize(const char* localeID,
}
CharStringByteSink s(&tag);
_getVariant(tmpLocaleID+1, '@', &s, nullptr, !variant.isEmpty(), err);
_getVariant(tmpLocaleID+1, '@', &s, !variant.isEmpty(), err);
if (U_FAILURE(err)) { return; }
}
@ -1989,6 +1976,10 @@ uloc_getLanguage(const char* localeID,
int32_t languageCapacity,
UErrorCode* err)
{
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
/* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
return ByteSinkUtil::viaByteSinkToTerminatedChars(
language, languageCapacity,
@ -2011,6 +2002,10 @@ uloc_getScript(const char* localeID,
int32_t scriptCapacity,
UErrorCode* err)
{
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
return ByteSinkUtil::viaByteSinkToTerminatedChars(
script, scriptCapacity,
[&](ByteSink& sink, UErrorCode& status) {
@ -2032,6 +2027,10 @@ uloc_getCountry(const char* localeID,
int32_t countryCapacity,
UErrorCode* err)
{
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
return ByteSinkUtil::viaByteSinkToTerminatedChars(
country, countryCapacity,
[&](ByteSink& sink, UErrorCode& status) {
@ -2053,6 +2052,10 @@ uloc_getVariant(const char* localeID,
int32_t variantCapacity,
UErrorCode* err)
{
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
return ByteSinkUtil::viaByteSinkToTerminatedChars(
variant, variantCapacity,
[&](ByteSink& sink, UErrorCode& status) {

View file

@ -1043,7 +1043,7 @@ _initializeULanguageTag(ULanguageTag* langtag) {
}
void
_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
_appendLanguageToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
UErrorCode tmpStatus = U_ZERO_ERROR;
if (U_FAILURE(status)) {
@ -1088,7 +1088,7 @@ _appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool str
}
void
_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
_appendScriptToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
UErrorCode tmpStatus = U_ZERO_ERROR;
if (U_FAILURE(status)) {
@ -1118,7 +1118,7 @@ _appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool stric
}
void
_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
_appendRegionToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
UErrorCode tmpStatus = U_ZERO_ERROR;
if (U_FAILURE(status)) {
@ -1169,7 +1169,7 @@ void _sortVariants(VariantListEntry* first) {
}
void
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) {
_appendVariantsToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
UErrorCode tmpStatus = U_ZERO_ERROR;
@ -1872,7 +1872,7 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode& status)
}
void
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) {
_appendPrivateuseToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
UErrorCode tmpStatus = U_ZERO_ERROR;
@ -2604,7 +2604,7 @@ ulocimp_toLanguageTag(const char* localeID,
}
/* For handling special case - private use only tag */
pKeywordStart = locale_getKeywordsStart(canonical.data());
pKeywordStart = locale_getKeywordsStart(canonical.toStringPiece());
if (pKeywordStart == canonical.data()) {
int kwdCnt = 0;
bool done = false;
@ -2642,12 +2642,12 @@ ulocimp_toLanguageTag(const char* localeID,
}
}
_appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
_appendScriptToLanguageTag(canonical.data(), sink, strict, status);
_appendRegionToLanguageTag(canonical.data(), sink, strict, status);
_appendVariantsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
_appendLanguageToLanguageTag(canonical.toStringPiece(), sink, strict, status);
_appendScriptToLanguageTag(canonical.toStringPiece(), sink, strict, status);
_appendRegionToLanguageTag(canonical.toStringPiece(), sink, strict, status);
_appendVariantsToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status);
_appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
_appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
_appendPrivateuseToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status);
}

View file

@ -119,16 +119,16 @@ ulocimp_getKeywordValue(const char* localeID,
UErrorCode& status);
U_EXPORT icu::CharString
ulocimp_getLanguage(const char* localeID, UErrorCode& status);
ulocimp_getLanguage(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString
ulocimp_getScript(const char* localeID, UErrorCode& status);
ulocimp_getScript(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString
ulocimp_getRegion(const char* localeID, UErrorCode& status);
ulocimp_getRegion(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString
ulocimp_getVariant(const char* localeID, UErrorCode& status);
ulocimp_getVariant(std::string_view localeID, UErrorCode& status);
U_EXPORT void
ulocimp_setKeywordValue(std::string_view keywordName,
@ -145,7 +145,7 @@ ulocimp_setKeywordValue(std::string_view keywords,
U_EXPORT void
ulocimp_getSubtags(
const char* localeID,
std::string_view localeID,
icu::CharString* language,
icu::CharString* script,
icu::CharString* region,
@ -155,7 +155,7 @@ ulocimp_getSubtags(
U_EXPORT void
ulocimp_getSubtags(
const char* localeID,
std::string_view localeID,
icu::ByteSink* language,
icu::ByteSink* script,
icu::ByteSink* region,
@ -165,7 +165,7 @@ ulocimp_getSubtags(
inline void
ulocimp_getSubtags(
const char* localeID,
std::string_view localeID,
std::nullptr_t,
std::nullptr_t,
std::nullptr_t,
@ -364,7 +364,7 @@ ulocimp_minimizeSubtags(const char* localeID,
UErrorCode& err);
U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID);
locale_getKeywordsStart(std::string_view localeID);
bool
ultag_isExtensionSubtags(const char* s, int32_t len);

View file

@ -237,7 +237,7 @@ typedef HANDLE MemoryMap;
pData->map = (char *)data + length;
pData->pHeader=(const DataHeader *)data;
pData->mapAddr = data;
#if U_PLATFORM == U_PF_IPHONE
#if U_PLATFORM == U_PF_IPHONE || U_PLATFORM == U_PF_ANDROID
posix_madvise(data, length, POSIX_MADV_RANDOM);
#endif
return true;

View file

@ -58,6 +58,8 @@ U_NAMESPACE_END
U_NAMESPACE_BEGIN
class CharString;
/**
* The BreakIterator class implements methods for finding the location
* of boundaries in text. BreakIterator is an abstract base class.
@ -646,9 +648,9 @@ protected:
private:
/** @internal (private) */
char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY];
char requestLocale[ULOC_FULLNAME_CAPACITY];
CharString* actualLocale = nullptr;
CharString* validLocale = nullptr;
CharString* requestLocale = nullptr;
};
#ifndef U_HIDE_DEPRECATED_API

View file

@ -9,10 +9,13 @@
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#include <cstddef>
#include <string_view>
#include <type_traits>
#endif
/**
* \file
@ -21,8 +24,6 @@
* Also conversion functions from char16_t * to UChar * and OldUChar *.
*/
U_NAMESPACE_BEGIN
/**
* \def U_ALIASING_BARRIER
* Barrier for pointer anti-aliasing optimizations even across function boundaries.
@ -36,6 +37,11 @@ U_NAMESPACE_BEGIN
# define U_ALIASING_BARRIER(ptr)
#endif
// ICU DLL-exported
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @stable ICU 59
@ -251,6 +257,34 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif
/// \endcond
U_NAMESPACE_END
#endif // U_SHOW_CPLUSPLUS_API
// Usable in header-only definitions
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
namespace U_ICU_NAMESPACE_OR_INTERNAL {
#ifndef U_FORCE_HIDE_INTERNAL_API
/** @internal */
template<typename T, typename = std::enable_if_t<std::is_same_v<T, UChar>>>
inline const char16_t *uprv_char16PtrFromUChar(const T *p) {
if constexpr (std::is_same_v<UChar, char16_t>) {
return p;
} else {
#if U_SHOW_CPLUSPLUS_API
return ConstChar16Ptr(p).get();
#else
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const char16_t *>(p);
#endif
}
}
#endif
/**
* Converts from const char16_t * to const UChar *.
* Includes an aliasing barrier if available.
@ -307,6 +341,15 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {
return reinterpret_cast<OldUChar *>(p);
}
} // U_ICU_NAMESPACE_OR_INTERNAL
#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
// ICU DLL-exported
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
#ifndef U_FORCE_HIDE_INTERNAL_API
/**
* Is T convertible to a std::u16string_view or some other 16-bit string view?
@ -379,6 +422,6 @@ inline std::u16string_view toU16StringViewNullable(const T& text) {
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // U_SHOW_CPLUSPLUS_API
#endif // __CHAR16PTR_H__

View file

@ -1173,10 +1173,12 @@ public:
inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
}
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/**
* Returns a C++ iterator for iterating over all of the elements of this set.
* Convenient all-in one iteration, but creates a UnicodeString for each
* Convenient all-in one iteration, but creates a std::u16string for each
* code point or string.
* (Similar to how Java UnicodeSet *is an* Iterable&lt;String&gt;.)
*
@ -1185,13 +1187,14 @@ public:
* \code
* UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
* for (auto el : set) {
* UnicodeString us(el);
* std::string u8;
* printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
* printf("set.string length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
* }
* \endcode
*
* @return an all-elements iterator.
* @draft ICU 76
* @draft ICU 77
* @see end
* @see codePoints
* @see ranges
@ -1203,7 +1206,7 @@ public:
/**
* @return an exclusive-end sentinel for iterating over all of the elements of this set.
* @draft ICU 76
* @draft ICU 77
* @see begin
* @see codePoints
* @see ranges

View file

@ -1820,7 +1820,6 @@
#define usnum_setMinimumIntegerDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMinimumIntegerDigits)
#define usnum_setSign U_ICU_ENTRY_POINT_RENAME(usnum_setSign)
#define usnum_setToInt64 U_ICU_ENTRY_POINT_RENAME(usnum_setToInt64)
#define usnum_truncateStart U_ICU_ENTRY_POINT_RENAME(usnum_truncateStart)
#define usnumf_close U_ICU_ENTRY_POINT_RENAME(usnumf_close)
#define usnumf_format U_ICU_ENTRY_POINT_RENAME(usnumf_format)
#define usnumf_formatInt64 U_ICU_ENTRY_POINT_RENAME(usnumf_formatInt64)

View file

@ -32,12 +32,13 @@
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#if U_SHOW_CPLUSPLUS_API
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#include <string>
#include <string_view>
#include "unicode/char16ptr.h"
#include "unicode/localpointer.h"
#include "unicode/unistr.h"
#endif // U_SHOW_CPLUSPLUS_API
#include "unicode/utf16.h"
#endif
#ifndef USET_DEFINED
@ -1657,7 +1658,7 @@ public:
int32_t length;
const UChar *uchars = uset_getString(uset, index, &length);
// assert uchars != nullptr;
return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)};
return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
}
return {};
}
@ -1737,17 +1738,19 @@ private:
const USet *uset;
int32_t count;
};
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/**
* Iterator returned by USetElements.
* @draft ICU 76
* @draft ICU 77
*/
class USetElementIterator {
public:
/** @draft ICU 76 */
/** @draft ICU 77 */
USetElementIterator(const USetElementIterator &other) = default;
/** @draft ICU 76 */
/** @draft ICU 77 */
bool operator==(const USetElementIterator &other) const {
// No need to compare rangeCount & end given private constructor
// and assuming we don't compare iterators across the set being modified.
@ -1756,26 +1759,28 @@ public:
return uset == other.uset && c == other.c && index == other.index;
}
/** @draft ICU 76 */
/** @draft ICU 77 */
bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
UnicodeString operator*() const {
/** @draft ICU 77 */
std::u16string operator*() const {
if (c >= 0) {
return UnicodeString(c);
return c <= 0xffff ?
std::u16string({static_cast<char16_t>(c)}) :
std::u16string({U16_LEAD(c), U16_TRAIL(c)});
} else if (index < totalCount) {
int32_t length;
const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
// assert uchars != nullptr;
return UnicodeString(uchars, length);
return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
} else {
return UnicodeString();
return {};
}
}
/**
* Pre-increment.
* @draft ICU 76
* @draft ICU 77
*/
USetElementIterator &operator++() {
if (c < end) {
@ -1800,7 +1805,7 @@ public:
/**
* Post-increment.
* @draft ICU 76
* @draft ICU 77
*/
USetElementIterator operator++(int) {
USetElementIterator result(*this);
@ -1840,7 +1845,7 @@ private:
/**
* A C++ "range" for iterating over all of the elements of a USet.
* Convenient all-in one iteration, but creates a UnicodeString for each
* Convenient all-in one iteration, but creates a std::u16string for each
* code point or string.
*
* Code points are returned first, then empty and multi-character strings.
@ -1849,15 +1854,16 @@ private:
* using U_HEADER_NESTED_NAMESPACE::USetElements;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
* for (auto el : USetElements(uset.getAlias())) {
* UnicodeString us(el);
* std::string u8;
* printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
* printf("uset.string length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
* }
* \endcode
*
* C++ UnicodeSet has member functions for iteration, including begin() and end().
*
* @return an all-elements iterator.
* @draft ICU 76
* @draft ICU 77
* @see USetCodePoints
* @see USetRanges
* @see USetStrings
@ -1866,21 +1872,21 @@ class USetElements {
public:
/**
* Constructs a C++ "range" object over all of the elements of the USet.
* @draft ICU 76
* @draft ICU 77
*/
USetElements(const USet *uset)
: uset(uset), rangeCount(uset_getRangeCount(uset)),
stringCount(uset_getStringCount(uset)) {}
/** @draft ICU 76 */
/** @draft ICU 77 */
USetElements(const USetElements &other) = default;
/** @draft ICU 76 */
/** @draft ICU 77 */
USetElementIterator begin() const {
return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
}
/** @draft ICU 76 */
/** @draft ICU 77 */
USetElementIterator end() const {
return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
}

View file

@ -124,7 +124,7 @@
* @internal
*/
U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict);
/**
* Function for handling "append code point" with error-checking.
@ -148,7 +148,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool
* @internal
*/
U_CAPI UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict);
/**
* Function for handling "skip backward one code point" with error-checking.

View file

@ -53,19 +53,19 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 76
#define U_ICU_VERSION_MAJOR_NUM 77
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_MINOR_NUM 1
#define U_ICU_VERSION_MINOR_NUM 0
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
#define U_ICU_VERSION_PATCHLEVEL_NUM 1
/** The current ICU build level version as an integer.
* This value is for use by ICU clients. It defaults to 0.
@ -79,7 +79,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _76
#define U_ICU_VERSION_SUFFIX _77
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -132,7 +132,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "76.1"
#define U_ICU_VERSION "77.0.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@ -145,13 +145,13 @@
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "76"
#define U_ICU_VERSION_SHORT "77"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "76.1"
#define U_ICU_DATA_VERSION "77.0.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

View file

@ -125,7 +125,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
U_NAMESPACE_USE
# endif
#ifndef U_HIDE_DRAFT_API
#ifndef U_FORCE_HIDE_DRAFT_API
/**
* \def U_HEADER_NESTED_NAMESPACE
* Nested namespace used inside U_ICU_NAMESPACE for header-only APIs.
@ -150,22 +150,37 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* @draft ICU 76
*/
/**
* \def U_ICU_NAMESPACE_OR_INTERNAL
* Namespace used for header-only APIs that used to be regular C++ APIs.
* Different when used inside ICU to prevent public use of internal instantiations.
* Similar to U_HEADER_ONLY_NAMESPACE, but the public definition is the same as U_ICU_NAMESPACE.
* "U_ICU_NAMESPACE" or "U_ICU_NAMESPACE::internal".
*
* @draft ICU 77
*/
// The first test is the same as for defining U_EXPORT for Windows.
#if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
# define U_HEADER_NESTED_NAMESPACE header
# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_HEADER_NESTED_NAMESPACE internal
# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE::internal
namespace U_ICU_NAMESPACE_OR_INTERNAL {}
using namespace U_ICU_NAMESPACE_OR_INTERNAL;
#else
# define U_HEADER_NESTED_NAMESPACE header
# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE
#endif
#define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE
namespace U_HEADER_ONLY_NAMESPACE {}
#endif // U_HIDE_DRAFT_API
#endif // U_FORCE_HIDE_DRAFT_API
#endif /* __cplusplus */

View file

@ -3244,7 +3244,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
const char *validLoc = ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus);
if (U_SUCCESS(subStatus) && validLoc != nullptr && validLoc[0] != 0 && uprv_strcmp(validLoc, "root") != 0) {
CharString validLang = ulocimp_getLanguage(validLoc, subStatus);
CharString parentLang = ulocimp_getLanguage(parent.data(), subStatus);
CharString parentLang = ulocimp_getLanguage(parent.toStringPiece(), subStatus);
if (U_SUCCESS(subStatus) && validLang != parentLang) {
// validLoc is not root and has a different language than parent, use it instead
found.clear().append(validLoc, subStatus);

View file

@ -59,6 +59,9 @@ getCodesFromLocale(const char *locale,
if (U_FAILURE(*err)) { return 0; }
icu::CharString lang;
icu::CharString script;
if (locale == nullptr) {
locale = uloc_getDefault();
}
ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err);
if (U_FAILURE(*err)) { return 0; }
// Multi-script languages, equivalent to the LocaleScript data

View file

@ -126,7 +126,7 @@ compareEntries(const UHashTok p1, const UHashTok p2) {
name2.pointer = b2->name;
path1.pointer = b1->path;
path2.pointer = b2->path;
return uhash_compareChars(name1, name2) & uhash_compareChars(path1, path2);
return uhash_compareChars(name1, name2) && uhash_compareChars(path1, path2);
}
static void

View file

@ -124,11 +124,9 @@ errorValue(int32_t count, int8_t strict) {
* >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., true):
* Same as the obsolete "safe" behavior, but non-characters are also treated
* like illegal sequences.
*
* Note that a UBool is the same as an int8_t.
*/
U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict) {
// *pi is one after byte c.
int32_t i=*pi;
// length can be negative for NUL-terminated strings: Read and validate one byte at a time.
@ -233,7 +231,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool
}
U_CAPI UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) {
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict) {
// *pi is the index of byte c.
int32_t i=*pi;
if(U8_IS_TRAIL(c) && i>start) {

View file

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.72 for ICU 76.1.
# Generated by GNU Autoconf 2.72 for ICU 77.0.1.
#
# Report bugs to <https://icu.unicode.org/bugs>.
#
@ -606,8 +606,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ICU'
PACKAGE_TARNAME='icu4c'
PACKAGE_VERSION='76.1'
PACKAGE_STRING='ICU 76.1'
PACKAGE_VERSION='77.0.1'
PACKAGE_STRING='ICU 77.0.1'
PACKAGE_BUGREPORT='https://icu.unicode.org/bugs'
PACKAGE_URL='https://icu.unicode.org/'
@ -1387,7 +1387,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
'configure' configures ICU 76.1 to adapt to many kinds of systems.
'configure' configures ICU 77.0.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1453,7 +1453,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ICU 76.1:";;
short | recursive ) echo "Configuration of ICU 77.0.1:";;
esac
cat <<\_ACEOF
@ -1461,30 +1461,30 @@ Optional Features:
--disable-option-checking ignore unrecognized --enable/--with options
--disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
--enable-icu-config install icu-config
--enable-debug build debug libraries and enable the U_DEBUG define default=no
--enable-release build release libraries default=yes
--enable-strict compile with strict compiler options default=yes
--disable-icu-config do not install icu-config
--enable-debug build debug libraries and enable the U_DEBUG define
--disable-release do not build release libraries
--disable-strict do not compile with strict compiler options
--enable-64bit-libs (deprecated, use --with-library-bits) build 64-bit libraries default= platform default
--enable-shared build shared libraries default=yes
--enable-static build static libraries default=no
--enable-auto-cleanup enable auto cleanup of libraries default=no
--enable-draft enable draft APIs (and internal APIs) default=yes
--enable-renaming add a version suffix to symbols default=yes
--enable-tracing enable function and data tracing default=no
--enable-plugins enable plugins default=no
--disable-dyload disable dynamic loading default=no
--disable-shared do not build shared libraries
--enable-static build static libraries
--enable-auto-cleanup enable auto cleanup of libraries
--disable-draft do not enable draft APIs (and internal APIs)
--disable-renaming do not add a version suffix to symbols
--enable-tracing enable function and data tracing
--enable-plugins enable plugins
--disable-dyload disable dynamic loading
--enable-rpath use rpath when linking default is only if necessary
--enable-weak-threads weakly reference the threading library default=no
--enable-extras build ICU extras default=yes
--enable-icuio build ICU's icuio library default=yes
--enable-layoutex build ICU's Paragraph Layout library default=no.
--enable-weak-threads weakly reference the threading library
--disable-extras do not build ICU extras
--disable-icuio do not build ICU's icuio library
--enable-layoutex build ICU's Paragraph Layout library.
icu-le-hb must be installed via pkg-config. See http://harfbuzz.org
--enable-tools build ICU's tools default=yes
--enable-fuzzer build ICU's fuzzer test targets default=no
--enable-tests build ICU tests default=yes
--enable-samples build ICU samples default=yes
--disable-tools do not build ICU's tools
--enable-fuzzer build ICU's fuzzer test targets
--disable-tests do not build ICU tests
--disable-samples do not build ICU samples
Additionally, the variable FORCE_LIBS may be set before calling configure.
If set, it will REPLACE any automatic list of libraries.
@ -1592,7 +1592,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ICU configure 76.1
ICU configure 77.0.1
generated by GNU Autoconf 2.72
Copyright (C) 2023 Free Software Foundation, Inc.
@ -2184,7 +2184,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ICU $as_me 76.1, which was
It was created by ICU $as_me 77.0.1, which was
generated by GNU Autoconf 2.72. Invocation command line was
$ $0$ac_configure_args_raw
@ -8481,7 +8481,7 @@ echo "CXXFLAGS=$CXXFLAGS"
# output the Makefiles
ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/icuexportdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile tools/escapesrc/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/localecanperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/perf/leperf/Makefile test/fuzzer/Makefile samples/Makefile"
ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/icuexportdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile tools/escapesrc/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/localecanperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/fuzzer/Makefile samples/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
@ -9019,7 +9019,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ICU $as_me 76.1, which was
This file was extended by ICU $as_me 77.0.1, which was
generated by GNU Autoconf 2.72. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -9075,7 +9075,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
ICU config.status 76.1
ICU config.status 77.0.1
configured by $0, generated by GNU Autoconf 2.72,
with options \\"\$ac_cs_config\\"
@ -9250,7 +9250,6 @@ do
"test/perf/ustrperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/ustrperf/Makefile" ;;
"test/perf/utfperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/utfperf/Makefile" ;;
"test/perf/utrie2perf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/utrie2perf/Makefile" ;;
"test/perf/leperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/leperf/Makefile" ;;
"test/fuzzer/Makefile") CONFIG_FILES="$CONFIG_FILES test/fuzzer/Makefile" ;;
"samples/Makefile") CONFIG_FILES="$CONFIG_FILES samples/Makefile" ;;

View file

@ -89,7 +89,7 @@ UCONFIG_CFLAGS=""
# Check whether to install icu-config
AC_ARG_ENABLE([icu-config],
AS_HELP_STRING([--enable-icu-config], [install icu-config]),
AS_HELP_STRING([--disable-icu-config], [do not install icu-config]),
[case "${enableval}" in
yes) enable_icu_config=true ;;
no) enable_icu_config=false ;;
@ -102,7 +102,7 @@ AC_MSG_CHECKING([whether to build debug libraries])
enabled=no
ENABLE_DEBUG=0
AC_ARG_ENABLE(debug,
[ --enable-debug build debug libraries and enable the U_DEBUG define [default=no]],
[ --enable-debug build debug libraries and enable the U_DEBUG define],
[ case "${enableval}" in
yes|"") enabled=yes; ENABLE_DEBUG=1; CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DU_DEBUG=1" ;;
*) ;;
@ -116,7 +116,7 @@ AC_MSG_CHECKING([whether to build release libraries])
enabled=yes
ENABLE_RELEASE=1
AC_ARG_ENABLE(release,
[ --enable-release build release libraries [default=yes]],
[ --disable-release do not build release libraries],
[ case "${enableval}" in
no) enabled=no; ENABLE_RELEASE=0 ;;
*) ;;
@ -270,7 +270,7 @@ AC_SUBST(LIB_M)
AC_MSG_CHECKING([whether to build shared libraries])
enabled=no
AC_ARG_ENABLE(shared,
[ --enable-shared build shared libraries [default=yes]],
[ --disable-shared do not build shared libraries],
[ case "${enableval}" in
yes|"") enabled=yes; ENABLE_SHARED=YES ;;
no);;
@ -285,7 +285,7 @@ AC_SUBST(ENABLE_SHARED)
AC_MSG_CHECKING([whether to build static libraries])
enabled=no
AC_ARG_ENABLE(static,
[ --enable-static build static libraries [default=no]],
[ --enable-static build static libraries],
[ case "${enableval}" in
yes|"") enabled=yes; ENABLE_STATIC=YES ;;
no) ;;
@ -332,7 +332,7 @@ AC_MSG_CHECKING([whether to enable auto cleanup of libraries])
enabled=no
UCLN_NO_AUTO_CLEANUP=1
AC_ARG_ENABLE(auto-cleanup,
[ --enable-auto-cleanup enable auto cleanup of libraries [default=no]],
[ --enable-auto-cleanup enable auto cleanup of libraries],
[ case "${enableval}" in
yes) enabled=yes;
CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DUCLN_NO_AUTO_CLEANUP=0";
@ -349,7 +349,7 @@ AC_MSG_CHECKING([whether to enable draft APIs])
enabled=yes
U_DEFAULT_SHOW_DRAFT=1
AC_ARG_ENABLE(draft,
[ --enable-draft enable draft APIs (and internal APIs) [default=yes]],
[ --disable-draft do not enable draft APIs (and internal APIs)],
[ case "${enableval}" in
no) enabled=no; U_DEFAULT_SHOW_DRAFT=0;
CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DU_DEFAULT_SHOW_DRAFT=0"
@ -376,7 +376,7 @@ AC_MSG_CHECKING([whether to enable renaming of symbols])
enabled=yes
U_DISABLE_RENAMING=0
AC_ARG_ENABLE(renaming,
[ --enable-renaming add a version suffix to symbols [default=yes]],
[ --disable-renaming do not add a version suffix to symbols],
[ case "${enableval}" in
yes|"") enabled=yes ;;
no) enabled=no; U_DISABLE_RENAMING=1;
@ -392,7 +392,7 @@ AC_MSG_CHECKING([whether to enable function and data tracing])
enabled=no
U_ENABLE_TRACING=0
AC_ARG_ENABLE(tracing,
[ --enable-tracing enable function and data tracing [default=no]],
[ --enable-tracing enable function and data tracing],
[ case "${enableval}" in
yes|"") enabled=yes;
CONFIG_CPPFLAGS="$CONFIG_CPPFLAGS -DU_ENABLE_TRACING=1";
@ -412,7 +412,7 @@ fi
# Enable/disable plugins
AC_ARG_ENABLE(plugins,
[ --enable-plugins enable plugins [default=no]],
[ --enable-plugins enable plugins],
[case "${enableval}" in
yes) plugins=true ;;
no) plugins=false ;;
@ -430,7 +430,7 @@ U_ENABLE_DYLOAD=1
enable=yes
AC_MSG_CHECKING([whether to enable dynamic loading of plugins. Ignored if plugins disabled.])
AC_ARG_ENABLE(dyload,
[ --disable-dyload disable dynamic loading [default=no]],
[ --disable-dyload disable dynamic loading],
[ case "${enableval}" in
yes|"")
U_ENABLE_DYLOAD=1
@ -577,7 +577,7 @@ case "${host}" in
esac
AC_ARG_ENABLE(weak-threads,
[ --enable-weak-threads weakly reference the threading library [default=no]],
[ --enable-weak-threads weakly reference the threading library],
[case "${enableval}" in
yes)
LIB_THREAD="${LIBS%${OLD_LIBS}}"
@ -974,7 +974,7 @@ AC_MSG_RESULT($CHECK_UTF16_STRING_RESULT)
# Enable/disable extras
AC_ARG_ENABLE(extras,
[ --enable-extras build ICU extras [default=yes]],
[ --disable-extras do not build ICU extras],
[case "${enableval}" in
yes) extras=true ;;
no) extras=false ;;
@ -983,7 +983,7 @@ AC_ARG_ENABLE(extras,
extras=true)
ICU_CONDITIONAL(EXTRAS, test "$extras" = true)
AC_ARG_ENABLE(icuio,
[ --enable-icuio build ICU's icuio library [default=yes]],
[ --disable-icuio do not build ICU's icuio library],
[case "${enableval}" in
yes) icuio=true ;;
no) icuio=false ;;
@ -994,7 +994,7 @@ ICU_CONDITIONAL(ICUIO, test "$icuio" = true)
# Enable/disable layoutex
AC_ARG_ENABLE(layoutex,
[ --enable-layoutex build ICU's Paragraph Layout library [default=no].
[ --enable-layoutex build ICU's Paragraph Layout library.
icu-le-hb must be installed via pkg-config. See http://harfbuzz.org],
[case "${enableval}" in
yes) layoutex=$have_icu_le_hb ;;
@ -1016,7 +1016,7 @@ AC_ARG_ENABLE(layout,
# Enable/disable tools
AC_ARG_ENABLE(tools,
[ --enable-tools build ICU's tools [default=yes]],
[ --disable-tools do not build ICU's tools],
[case "${enableval}" in
yes) tools=true ;;
no) tools=false ;;
@ -1027,7 +1027,7 @@ ICU_CONDITIONAL(TOOLS, test "$tools" = true)
# Enable/disable fuzzer target tests.
AC_ARG_ENABLE(fuzzer,
[ --enable-fuzzer build ICU's fuzzer test targets [default=no]],
[ --enable-fuzzer build ICU's fuzzer test targets],
[case "${enableval}" in
yes) fuzzer=true ;;
no) fuzzer=false ;;
@ -1144,7 +1144,7 @@ AC_SUBST(ICULIBSUFFIXCNAME)
# Enable/disable tests
AC_ARG_ENABLE(tests,
[ --enable-tests build ICU tests [default=yes]],
[ --disable-tests do not build ICU tests],
[case "${enableval}" in
yes) tests=true ;;
no) tests=false ;;
@ -1155,7 +1155,7 @@ ICU_CONDITIONAL(TESTS, test "$tests" = true)
# Enable/disable samples
AC_ARG_ENABLE(samples,
[ --enable-samples build ICU samples [default=yes]
[ --disable-samples do not build ICU samples
Additionally, the variable FORCE_LIBS may be set before calling configure.
If set, it will REPLACE any automatic list of libraries.],
@ -1304,7 +1304,6 @@ AC_CONFIG_FILES([icudefs.mk \
test/perf/ustrperf/Makefile \
test/perf/utfperf/Makefile \
test/perf/utrie2perf/Makefile \
test/perf/leperf/Makefile \
test/fuzzer/Makefile \
samples/Makefile])
AC_OUTPUT

View file

@ -1,8 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
fi{
boundaries{
word:process(dependency){"word_fi_sv.brk"}
}
}

View file

@ -25,5 +25,5 @@ root{
lstm{
Thai{"Thai_graphclust_model4_heavy.res"}
Mymr{"Burmese_graphclust_model5_heavy.res"}
}
}
}

View file

@ -24,13 +24,9 @@ $Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
$Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
#
# From cldr/common/properties/segments/
# and issue CLDR-10994
#
$Virama = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Virama}];
$LinkingConsonant = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Consonant}];
$ExtCccZwj = [[\p{gcb=Extend}-\p{ccc=0}] \p{gcb=ZWJ}];
$InCBConsonant = [\p{InCB=Consonant}];
$InCBExtend = [\p{InCB=Extend}];
$InCBLinker = [\p{InCB=Linker}];
# Korean Syllable Definitions
#
@ -64,8 +60,8 @@ $L ($L | $V | $LV | $LVT);
# GB 9b
$Prepend [^$Control $CR $LF];
# GB 9.3, from CLDR-10994
$LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* $LinkingConsonant;
# GB 9c
$InCBConsonant [ $InCBExtend $InCBLinker ]* $InCBLinker [ $InCBExtend $InCBLinker ]* $InCBConsonant;
# GB 11 Do not break within emoji modifier sequences or emoji zwj sequences.
$Extended_Pict $Extend* $ZWJ $Extended_Pict;

View file

@ -297,7 +297,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -298,7 +298,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -306,7 +306,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -318,7 +318,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -331,7 +331,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -299,7 +299,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -304,7 +304,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -317,7 +317,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -310,7 +310,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# and then to default UAX #14 behaviour (UTC-179-C32).
#
^($HY | $HH) $CM* $ALPlus;
$GL ($HY | $HH) $CM* $ALPlus;
$GL $CM* ($HY | $HH) $CM* $ALPlus;
# Non-breaking CB from LB8a:
$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
# Non-breaking SP from LB14:

View file

@ -11,7 +11,7 @@
# These rules are based on UAX #29 Revision 34 for Unicode Version 12.0
#
# Note: Updates to word.txt will usually need to be merged into
# word_POSIX.txt and word_fi_sv.txt also.
# word_POSIX.txt also.
##############################################################################
#
@ -42,7 +42,7 @@ $ALetter = [\p{Word_Break = ALetter}];
$Single_Quote = [\p{Word_Break = Single_Quote}];
$Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidLetter = [\p{Word_Break = MidLetter} - [\: \uFE55 \uFF1A]];
$MidLetter = [\p{Word_Break = MidLetter}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];

View file

@ -42,7 +42,7 @@ $ALetter = [\p{Word_Break = ALetter}];
$Single_Quote = [\p{Word_Break = Single_Quote}];
$Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
$MidLetter = [\p{Word_Break = MidLetter} - [\: \uFE55 \uFF1A]];
$MidLetter = [\p{Word_Break = MidLetter} - [\:]];
$MidNum = [\p{Word_Break = MidNum} [.]];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];

View file

@ -1,172 +0,0 @@
#
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Copyright (C) 2002-2016, International Business Machines Corporation
# and others. All Rights Reserved.
#
# file: word_fi_sv.txt
#
# ICU Word Break Rules
# See Unicode Standard Annex #29.
# These rules are based on UAX #29 Revision 34 for Unicode Version 12.0
#
# Note: Updates to word.txt will usually need to be merged into
# word_fi_sv.txt also.
##############################################################################
#
# Character class definitions from TR 29
#
##############################################################################
!!chain;
!!quoted_literals_only;
#
# Character Class Definitions.
#
$Han = [:Han:];
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
$Newline = [\p{Word_Break = Newline}];
$Extend = [\p{Word_Break = Extend}-$Han];
$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [\p{Word_Break = Format}];
$Katakana = [\p{Word_Break = Katakana}];
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
$ALetter = [\p{Word_Break = ALetter}];
$Single_Quote = [\p{Word_Break = Single_Quote}];
$Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidLetter = [\p{Word_Break = MidLetter}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$WSegSpace = [\p{Word_Break = WSegSpace}];
$Extended_Pict = [\p{Extended_Pictographic}];
$Hiragana = [:Hiragana:];
$Ideographic = [\p{Ideographic}];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
$Control = [\p{Grapheme_Cluster_Break = Control}];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana];
$dictionaryCJK = [$KanaKanji $HangulSyllable];
$dictionary = [$ComplexContext $dictionaryCJK];
# TODO: check if handling of katakana in dictionary makes rules incorrect/void
# leave CJK scripts out of ALetterPlus
$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
## -------------------------------------------------
# Rule 3 - CR x LF
#
$CR $LF;
# Rule 3c Do not break within emoji zwj sequences.
# ZWJ × \p{Extended_Pictographic}. Precedes WB4, so no intervening Extend chars allowed.
#
$ZWJ $Extended_Pict;
# Rule 3d - Keep horizontal whitespace together.
#
$WSegSpace $WSegSpace;
# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
# of a region of Text.
$ExFm = [$Extend $Format $ZWJ];
^$ExFm+; # This rule fires only when there are format or extend characters at the
# start of text, or immediately following another boundary. It groups them, in
# the event there are more than one.
[^$CR $LF $Newline $ExFm] $ExFm*; # This rule rule attaches trailing format/extends to words,
# with no special rule status value.
$Numeric $ExFm* {100}; # This group of rules also attach trailing format/extends, but
$ALetterPlus $ExFm* {200}; # with rule status set based on the word's final base character.
$HangulSyllable {200};
$Hebrew_Letter $ExFm* {200};
$Katakana $ExFm* {400}; # note: these status values override those from rule 5
$Hiragana $ExFm* {400}; # by virtue of being numerically larger.
$Ideographic $ExFm* {400}; #
#
# rule 5
# Do not break between most letters.
#
($ALetterPlus | $Hebrew_Letter) $ExFm* ($ALetterPlus | $Hebrew_Letter);
# rule 6 and 7
($ALetterPlus | $Hebrew_Letter) $ExFm* ($MidLetter | $MidNumLet | $Single_Quote) $ExFm* ($ALetterPlus | $Hebrew_Letter) {200};
# rule 7a
$Hebrew_Letter $ExFm* $Single_Quote {200};
# rule 7b and 7c
$Hebrew_Letter $ExFm* $Double_Quote $ExFm* $Hebrew_Letter;
# rule 8
$Numeric $ExFm* $Numeric;
# rule 9
($ALetterPlus | $Hebrew_Letter) $ExFm* $Numeric;
# rule 10
$Numeric $ExFm* ($ALetterPlus | $Hebrew_Letter);
# rule 11 and 12
$Numeric $ExFm* ($MidNum | $MidNumLet | $Single_Quote) $ExFm* $Numeric;
# rule 13
# to be consistent with $KanaKanji $KanaKanhi, changed
# from 300 to 400.
# See also TestRuleStatus in intltest/rbbiapts.cpp
$Katakana $ExFm* $Katakana {400};
# rule 13a/b
$ALetterPlus $ExFm* $ExtendNumLet {200}; # (13a)
$Hebrew_Letter $ExFm* $ExtendNumLet {200}; # (13a)
$Numeric $ExFm* $ExtendNumLet {100}; # (13a)
$Katakana $ExFm* $ExtendNumLet {400}; # (13a)
$ExtendNumLet $ExFm* $ExtendNumLet {200}; # (13a)
$ExtendNumLet $ExFm* $ALetterPlus {200}; # (13b)
$ExtendNumLet $ExFm* $Hebrew_Letter {200}; # (13b)
$ExtendNumLet $ExFm* $Numeric {100}; # (13b)
$ExtendNumLet $ExFm* $Katakana {400}; # (13b)
# rules 15 - 17
# Pairs of Regional Indicators stay together.
# With incoming rule chaining disabled by ^, this rule will match exactly two of them.
# No other rule begins with a Regional_Indicator, so chaining cannot extend the match.
#
^$Regional_Indicator $ExFm* $Regional_Indicator;
# special handling for CJK characters: chain for later dictionary segmentation
$HangulSyllable $HangulSyllable {200};
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
# Rule 999
# Match a single code point if no other rule applies.
.;

View file

@ -1,8 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
sv{
boundaries{
word:process(dependency){"word_fi_sv.brk"}
}
}

View file

@ -42,7 +42,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<!ATTLIST version number CDATA #REQUIRED >
<!--@MATCH:regex/\$Revision.*\$-->
<!--@METADATA-->
<!ATTLIST version cldrVersion CDATA #FIXED "46" >
<!ATTLIST version cldrVersion CDATA #FIXED "46.1" >
<!--@MATCH:any-->
<!--@VALUE-->
<!ATTLIST version draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >

View file

@ -26,12 +26,15 @@ In the following,
*Windows*
- Add ICU_DATA_BUILDTOOL_OPTS as a system environment variable with value "--include_uni_core_data"
- Build $icu4c_root\source\allinone\allinone.sln in Visual Studio
set ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data
- Build %icu4c_root%\source\allinone\allinone.sln in Visual Studio
You can also do it from command line (adjust Platform if needed):
msbuild %icu4c_root%\source\allinone\allinone.sln /p:Configuration=Release /p:Platform=x64 /p:SkipUWP=true
For more instructions on downloading and building ICU4C,
see the ICU4C readme at:
https://htmlpreview.github.io/?https://github.com/unicode-org/icu/blob/main/icu4c/readme.html#HowToBuild
(Windows: build as 'x86, Release' otherwise you will have to set 'CFG' differently below.)
(Windows: build as 'x64, Release' otherwise you will have to set 'CFG' differently below.)
*NOTE* You should do a full rebuild after any data changes.
@ -43,10 +46,13 @@ platform.
*Windows*
2a. On Developer Command Prompt for VS, cd to $icu4c_root\source\data.
2a. On Developer Command Prompt for VS do
cd %icu4c_root%\source\data
nmake -f makedata.mak ICUMAKE=%icu4c_root%\source\data\ CFG=x64\Release JAR="%jdk_bin%\jar" ICU4J_ROOT=%icu4j_root% icu4j-data-install
2b. On Developer Command Prompt for VS,
nmake -f makedata.mak ICUMAKE=$icu4c_root\source\data\ CFG=x86\Release JAR="$jdk_bin\jar" ICU4J_ROOT=$icu4j_root icu4j-data-install
2b. Do
cd %icu4j_root%
.\extract-data-files.bat
Continue with step 3 below, in Java:
@ -64,6 +70,11 @@ platform.
(You can omit the JAR if it's just jar.)
2e. Do
cd $icu4j_root
./extract-data-files.sh
Continue with step 3, in Java:
Step 2 on either platform will produce two files: icudata.jar and

View file

@ -283,8 +283,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GGGGyMEEEEd",
"GGGGyMd",
"GGGGyMMMEEEEd",
"GGGGyMMMd",
"GyMMdd",
"GyMMdd",
}
@ -761,8 +761,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GGGGGyMMdd",
"GGGGGyMd",
}
@ -1002,8 +1002,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"yMEEEEd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMdd",
"yMMdd",
}
@ -1326,8 +1326,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMdd",
"GyMMdd",
}
@ -1398,8 +1398,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMdd",
"GyMMdd",
}
@ -1466,10 +1466,10 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
{
"GyMd",
"GyMMMd",
"y=jpanyear",
}
"GGGGGyMd",
@ -2005,8 +2005,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMdd",
"GyMMdd",
}

View file

@ -856,8 +856,8 @@ ko{
"ahmsz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMd",
"GyMd",
}
@ -1085,8 +1085,8 @@ ko{
"ahmsz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMMMMEEEEd",
"yMMMMd",
"yMd",
"yyMd",
}

View file

@ -24,8 +24,8 @@ ko_CN{
"HHmmssz",
"HHmmss",
"HHmm",
"yMEEEEd",
"yMd",
"yMMMMEEEEd",
"yMMMMd",
"yMd",
"yyMd",
}

View file

@ -770,9 +770,9 @@ yue{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -981,9 +981,9 @@ yue{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
availableFormats{

View file

@ -206,9 +206,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -766,9 +766,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMMd",
"GyMMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -972,9 +972,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMdMM",
"yMMMd",
"yMd",
}
appendItems{
@ -1287,9 +1287,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
eras{
@ -1364,9 +1364,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -1446,9 +1446,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMMdd",
}
availableFormats{
@ -1756,9 +1756,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -2,4 +2,33 @@
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
yue_Hant_CN{
calendar{
gregorian{
DateTimePatterns{
"HH:mm:ss [zzzz]",
"HH:mm:ss [z]",
"HH:mm:ss",
"HH:mm",
"y年M月d日 EEEE",
"y年M月d日",
"y年M月d日",
"y/M/d",
"{1} {0}",
"{1} {0}",
"{1} {0}",
"{1} {0}",
"{1} {0}",
}
DateTimeSkeletons{
"HHmmsszzzz",
"HHmmssz",
"HHmmss",
"HHmm",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
}
}
}

View file

@ -677,8 +677,8 @@ zh{
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMd",
"GyMd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
eras{
@ -744,8 +744,8 @@ zh{
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMd",
"GyMd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
eras{
@ -821,9 +821,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -1022,9 +1022,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
appendItems{
@ -1417,9 +1417,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMMd",
"GyMMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -1542,9 +1542,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMMdd",
}
availableFormats{

View file

@ -24,9 +24,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{
@ -107,9 +107,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GGGGGyyMd",
}
availableFormats{
@ -173,9 +173,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yyMd",
}
availableFormats{
@ -240,9 +240,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
}
@ -267,9 +267,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{
@ -298,9 +298,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -77,9 +77,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GGGGGyyMd",
}
availableFormats{
@ -149,9 +149,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yyMd",
}
availableFormats{
@ -224,9 +224,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
}
@ -251,9 +251,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -24,9 +24,9 @@ zh_Hans_MY{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
}

View file

@ -24,9 +24,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
intervalFormats{
@ -102,9 +102,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GGGGGyyMMdd",
}
availableFormats{
@ -171,9 +171,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yyMMdd",
}
availableFormats{
@ -244,9 +244,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
}
@ -271,9 +271,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{
@ -302,9 +302,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -807,9 +807,9 @@ zh_Hant{
"Bhmmssz",
"Bhmmss",
"Bhmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -967,9 +967,9 @@ zh_Hant{
"Bhmmssz",
"Bhmmss",
"Bhmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
availableFormats{

View file

@ -315,9 +315,9 @@ zh_Hant_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
availableFormats{

Some files were not shown because too many files have changed in this diff Show more