ICU-22221 update root collation again from CLDR 43

This commit is contained in:
Markus Scherer 2023-04-05 17:03:55 -07:00
parent 2c584abf7e
commit f4687fc25a
14 changed files with 79 additions and 75 deletions

View file

@ -1,5 +1,5 @@
# Fractional UCA Table, generated from the UCA DUCET
# 2023-02-01 [MS]
# 2023-04-03 [MS]
# VERSION: UCA=15.0.0, UCD=15.0.0
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
@ -2785,11 +2785,11 @@ A95F; [09 41 79, 05, 05]
0027; [09 6E, 05, 05]
FF07; [09 6E, 05, 20]
05F3; [09 6E, 70, 05]
2018; [09 6E, 70, 05]
2019; [09 6E, 70, 05]
201A; [09 6E, 70, 05]
201B; [09 6E, 70, 05]
2019; [09 6E, 73, 05]
201A; [09 6E, 76, 05]
201B; [09 6E, 79, 05]
05F3; [09 6E, 7C, 05]
2039; [09 70, 05, 05]
@ -2797,15 +2797,15 @@ FF07; [09 6E, 05, 20]
0022; [09 74, 05, 05]
FF02; [09 74, 05, 20]
05F4; [09 74, 70, 05]
201C; [09 74, 70, 05]
201D; [09 74, 70, 05]
201E; [09 74, 70, 05]
201F; [09 74, 70, 05]
2E42; [09 74, 70, 05]
301D; [09 74, 70, 05]
301E; [09 74, 70, 05]
301F; [09 74, 70, 05]
201D; [09 74, 72, 05]
201E; [09 74, 74, 05]
201F; [09 74, 76, 05]
2E42; [09 74, 78, 05]
301D; [09 74, 7A, 05]
301E; [09 74, 7C, 05]
301F; [09 74, 7E 02, 05]
05F4; [09 74, 7F 02, 05]
00AB; [09 76, 05, 05]
@ -65902,7 +65902,7 @@ FDD0 0052; [, , 3D 02] # CONSTRUCTED FAKE SECONDARY-IGNORABLE
[first secondary ignorable [,, 3D 02]] # CONSTRUCTED
[last secondary ignorable [,, 3D 02]] # CONSTRUCTED
[first secondary in primary non-ignorable [X, 05, X]] # U+0009 <CHARACTER TABULATION>
[last secondary in primary non-ignorable [X, 7C, X]] # U+A7D9 LATIN SMALL LETTER SIGMOID S
[last secondary in primary non-ignorable [X, 7F 02, X]] # U+05F4 HEBREW PUNCTUATION GERSHAYIM
[first primary ignorable [, 82, 05]] # U+0332 COMBINING LOW LINE
[last primary ignorable [, FB 94, 05]] # U+00B7 MIDDLE DOT
[first variable [03 04, 05, 05]] # U+0009 <CHARACTER TABULATION>

View file

@ -1,5 +1,5 @@
# UCA_Rules_SHORT.txt
# Date: 2023-02-01, 17:37:01 GMT
# Date: 2023-04-04, 00:45:16 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@ -1974,24 +1974,26 @@
< 𒿲
< ''
<<<
<<
<<
<<
<<
<< ׳
=
=
=
=
<
<
< '"'
# TODO: hand-patched rules until https://github.com/unicode-org/unicodetools/issues/429 is fixed
<<<
<< ‎״‎
= “
= ”
= „
= ‟
= ⹂
= 〝
= 〞
= 〟
<< “
<< ”
<< „
<< ‟
<< ⹂
# TODO: hand-patched rules until https://github.com/unicode-org/unicodetools/issues/429 is fixed
<< 〝 # 1.1 [Ps] [0231.0020.0002] [0000.0121.0002] U+301D REVERSED DOUBLE PRIME QUOTATION MARK
<< 〞 # 1.1 [Pe] [0231.0020.0002] [0000.0122.0002] U+301E DOUBLE PRIME QUOTATION MARK
<< 〟 # 1.1 [Pe] [0231.0020.0002] [0000.0123.0002] U+301F LOW DOUBLE PRIME QUOTATION MARK
<< ‎״‎ # 1.1 [Po] [0231.0020.0002] [0000.0124.0002] U+05F4 HEBREW PUNCTUATION GERSHAYIM
< «
< »
< '('

View file

@ -46,6 +46,8 @@ See
Treat quote marks as equivalent when strength=UCOL_PRIMARY
- https://github.com/unicode-org/cldr/pull/2691
CLDR-15946 make fancy quotes primary-equal to ASCII fallbacks
- https://github.com/unicode-org/cldr/pull/2833
CLDR-15946 make fancy quotes secondary-different from each other
The related changes to tailorings were already integrated in an earlier PR for
https://unicode-org.atlassian.net/browse/ICU-22220 ICU 73rc BRS.
@ -153,7 +155,7 @@ copying that version number into the $ICU_SRC/.bazeliskrc config file.
cp -v com/ibm/icu/impl/data/$ICUDT/coll/* /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT/coll
jar uvf $ICU_SRC/icu4j/main/shared/data/icudata.jar -C /tmp/icu4j com/ibm/icu/impl/data/$ICUDT
- new for ICU 73: also copy the binary data files directly into the ICU4J tree
cp -v com/ibm/icu/impl/data/$ICUDT/coll/* $ICU_SRC/icu4j/maven-build/maven-icu4j-datafiles/src/main/resources/com/ibm/icu/impl/data/icudt73b/coll
cp -v com/ibm/icu/impl/data/$ICUDT/coll/* $ICU_SRC/icu4j/maven-build/maven-icu4j-datafiles/src/main/resources/com/ibm/icu/impl/data/$ICUDT/coll
* When refreshing all of ICU4J data from ICU4C
- $ICU_ROOT/dbg/icu4c$ make ICU4J_ROOT=/tmp/icu4j icu4j-data-install

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2023-02-01, 17:37:08 GMT
# Date: 2023-04-04, 00:45:24 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@ -6400,39 +6400,39 @@ A95F 0062
12FF2 0062
0027 0021
FF07 0021
05F3 0021
2018 0021
2019 0021
201A 0021
201B 0021
05F3 0021
0027 003F
FF07 003F
05F3 003F
2018 003F
2019 003F
201A 003F
201B 003F
05F3 003F
0027 0061
0027 0041
FF07 0061
FF07 0041
05F3 0061
2018 0061
2019 0061
201A 0061
201B 0061
05F3 0041
2018 0041
2019 0061
2019 0041
201A 0061
201A 0041
201B 0061
201B 0041
05F3 0061
05F3 0041
0027 0062
FF07 0062
05F3 0062
2018 0062
2019 0062
201A 0062
201B 0062
05F3 0062
2039 0021
2039 003F
2039 0061
@ -6445,7 +6445,6 @@ FF07 0062
203A 0062
0022 0021
FF02 0021
05F4 0021
201C 0021
201D 0021
201E 0021
@ -6454,9 +6453,9 @@ FF02 0021
301D 0021
301E 0021
301F 0021
05F4 0021
0022 003F
FF02 003F
05F4 003F
201C 003F
201D 003F
201E 003F
@ -6465,31 +6464,31 @@ FF02 003F
301D 003F
301E 003F
301F 003F
05F4 003F
0022 0061
0022 0041
FF02 0061
FF02 0041
05F4 0061
201C 0061
201D 0061
201E 0061
201F 0061
2E42 0061
301D 0061
301E 0061
301F 0061
05F4 0041
201C 0041
201D 0061
201D 0041
201E 0061
201E 0041
201F 0061
201F 0041
2E42 0061
2E42 0041
301D 0061
301D 0041
301E 0061
301E 0041
301F 0061
301F 0041
05F4 0061
05F4 0041
0022 0062
FF02 0062
05F4 0062
201C 0062
201D 0062
201E 0062
@ -6498,6 +6497,7 @@ FF02 0062
301D 0062
301E 0062
301F 0062
05F4 0062
00AB 0021
00AB 003F
00AB 0061

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2023-02-01, 17:37:10 GMT
# Date: 2023-04-04, 00:45:26 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ed002e6f698efb2b50ad0e1d874783c7d2fe151f82793e6d417761420de7fb95
size 14330665
oid sha256:f572156e2da2758f94c5edf6de95d1642ccdcf7567182dd90c89e339b648d986
size 14330737

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2023-02-01, 17:37:08 GMT
# Date: 2023-04-04, 00:45:24 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@ -6400,39 +6400,39 @@ A95F 0062
12FF2 0062
0027 0021
FF07 0021
05F3 0021
2018 0021
2019 0021
201A 0021
201B 0021
05F3 0021
0027 003F
FF07 003F
05F3 003F
2018 003F
2019 003F
201A 003F
201B 003F
05F3 003F
0027 0061
0027 0041
FF07 0061
FF07 0041
05F3 0061
2018 0061
2019 0061
201A 0061
201B 0061
05F3 0041
2018 0041
2019 0061
2019 0041
201A 0061
201A 0041
201B 0061
201B 0041
05F3 0061
05F3 0041
0027 0062
FF07 0062
05F3 0062
2018 0062
2019 0062
201A 0062
201B 0062
05F3 0062
2039 0021
2039 003F
2039 0061
@ -6445,7 +6445,6 @@ FF07 0062
203A 0062
0022 0021
FF02 0021
05F4 0021
201C 0021
201D 0021
201E 0021
@ -6454,9 +6453,9 @@ FF02 0021
301D 0021
301E 0021
301F 0021
05F4 0021
0022 003F
FF02 003F
05F4 003F
201C 003F
201D 003F
201E 003F
@ -6465,31 +6464,31 @@ FF02 003F
301D 003F
301E 003F
301F 003F
05F4 003F
0022 0061
0022 0041
FF02 0061
FF02 0041
05F4 0061
201C 0061
201D 0061
201E 0061
201F 0061
2E42 0061
301D 0061
301E 0061
301F 0061
05F4 0041
201C 0041
201D 0061
201D 0041
201E 0061
201E 0041
201F 0061
201F 0041
2E42 0061
2E42 0041
301D 0061
301D 0041
301E 0061
301E 0041
301F 0061
301F 0041
05F4 0061
05F4 0041
0022 0062
FF02 0062
05F4 0062
201C 0062
201D 0062
201E 0062
@ -6498,6 +6497,7 @@ FF02 0062
301D 0062
301E 0062
301F 0062
05F4 0062
00AB 0021
00AB 003F
00AB 0061

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2023-02-01, 17:37:10 GMT
# Date: 2023-04-04, 00:45:26 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html