mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-12526 Unicode 9 data 2016-jun-09
X-SVN-Rev: 38821
This commit is contained in:
parent
1a6d70fa44
commit
e876c89842
6 changed files with 42 additions and 31 deletions
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2bd9654a895ed4a0aff9e9145040495dc7994732d50a1907e7a89c4caf01eedd
|
||||
size 11733166
|
||||
oid sha256:b882f018fe19d1d295817d91b5a99f1eb7a01d08e538adf83a6253e981ce95f5
|
||||
size 11732712
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
|
||||
# Date: 2016-05-06, 18:35:32 GMT
|
||||
# Date: 2016-06-03, 18:31:07 GMT
|
||||
# © 2016 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# CollationTest_CLDR_SHIFTED_SHORT.txt
|
||||
# Date: 2016-05-06, 18:35:34 GMT
|
||||
# Date: 2016-06-03, 18:31:11 GMT
|
||||
# © 2016 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
|
|
|
@ -20179,10 +20179,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
|
|||
11C34;BHAIKSUKI VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;;
|
||||
11C35;BHAIKSUKI VOWEL SIGN VOCALIC RR;Mn;0;NSM;;;;;N;;;;;
|
||||
11C36;BHAIKSUKI VOWEL SIGN VOCALIC L;Mn;0;NSM;;;;;N;;;;;
|
||||
11C38;BHAIKSUKI VOWEL SIGN E;Mc;0;L;;;;;N;;;;;
|
||||
11C39;BHAIKSUKI VOWEL SIGN AI;Mc;0;L;;;;;N;;;;;
|
||||
11C3A;BHAIKSUKI VOWEL SIGN O;Mc;0;L;;;;;N;;;;;
|
||||
11C3B;BHAIKSUKI VOWEL SIGN AU;Mc;0;L;;;;;N;;;;;
|
||||
11C38;BHAIKSUKI VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;;
|
||||
11C39;BHAIKSUKI VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;;
|
||||
11C3A;BHAIKSUKI VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;;
|
||||
11C3B;BHAIKSUKI VOWEL SIGN AU;Mn;0;NSM;;;;;N;;;;;
|
||||
11C3C;BHAIKSUKI SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;;
|
||||
11C3D;BHAIKSUKI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;;
|
||||
11C3E;BHAIKSUKI SIGN VISARGA;Mc;0;L;;;;;N;;;;;
|
||||
|
|
|
@ -159,6 +159,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
int breakPos = -1;
|
||||
|
||||
int c0, c1, c2, c3; // The code points at p0, p1, p2 & p3.
|
||||
int cBase; // for (X Extend*) patterns, the X character.
|
||||
|
||||
// Previous break at end of string. return DONE.
|
||||
if (prevPos >= fText.length()) {
|
||||
|
@ -166,7 +167,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
}
|
||||
/* p0 = */ p1 = p2 = p3 = prevPos;
|
||||
c3 = UTF16.charAt(fText, prevPos);
|
||||
c0 = c1 = c2 = 0;
|
||||
c0 = c1 = c2 = cBase = 0;
|
||||
|
||||
// Loop runs once per "significant" character position in the input text.
|
||||
for (;;) {
|
||||
|
@ -233,22 +234,11 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB8a) Regional_Indicator x Regional_Indicator
|
||||
// Note: The first if condition is a little tricky. We only need to force
|
||||
// a break if there are three or more contiguous RIs. If there are
|
||||
// only two, a break following will occur via other rules, and will include
|
||||
// any trailing extend characters, which is needed behavior.
|
||||
if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)
|
||||
&& fRegionalIndicatorSet.contains(c2)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB9) x (Extend | ZWJ)
|
||||
if (fExtendSet.contains(c2) || fZWJSet.contains(c2)) {
|
||||
if (!fExtendSet.contains(c1)) {
|
||||
cBase = c1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -261,16 +251,33 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
if (fPrependSet.contains(c1)) {
|
||||
continue;
|
||||
}
|
||||
// Rule (GB10) (Emoji_Base | EBG) x Emoji_Modifier
|
||||
// Rule (GB10) (Emoji_Base | EBG) Extend* x Emoji_Modifier
|
||||
if ((fEmojiBaseSet.contains(c1) || fEBGSet.contains(c1)) && fEmojiModifierSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
if ((fEmojiBaseSet.contains(cBase) || fEBGSet.contains(cBase)) &&
|
||||
fExtendSet.contains(c1) && fEmojiModifierSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB11) ZWJ x (Glue_After_Zwj | EBG)
|
||||
if (fZWJSet.contains(c1) && (fGAZSet.contains(c2) || fEBGSet.contains(c2))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB12-13) Regional_Indicator x Regional_Indicator
|
||||
// Note: The first if condition is a little tricky. We only need to force
|
||||
// a break if there are three or more contiguous RIs. If there are
|
||||
// only two, a break following will occur via other rules, and will include
|
||||
// any trailing extend characters, which is needed behavior.
|
||||
if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)
|
||||
&& fRegionalIndicatorSet.contains(c2)) {
|
||||
break;
|
||||
}
|
||||
if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rule (GB999) Any <break> Any
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -600,19 +600,23 @@ public class SpoofCheckerTest extends TestFmwk {
|
|||
String[][] tests = {
|
||||
// String, restriction-level, numerics, scripts, alternates, common-alternates
|
||||
{"a♥", "UNRESTRICTIVE", "[]", "Latn", "", ""},
|
||||
{"a〆", "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"},
|
||||
{"aー〆", "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hira Kana", "Hira Kana"},
|
||||
{"aー〆ア", "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
|
||||
{"アaー〆", "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
|
||||
{"a\u303c", "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"},
|
||||
{"aー\u303c", "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hira Kana", "Hira Kana"},
|
||||
{"aー\u303cア", "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
|
||||
{ "アaー\u303c", "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
|
||||
{"a1١", "UNRESTRICTIVE", "[0٠]", "Latn", "Arab Thaa", "Arab Thaa"},
|
||||
{"a1١۱", "UNRESTRICTIVE", "[0٠۰]", "Latn Arab", "", ""},
|
||||
{"١ー〆aア1१۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
|
||||
{"aアー〆1१١۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
|
||||
{"١ー\u303caア1१۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
|
||||
{"aアー\u303c1१١۱", "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
|
||||
};
|
||||
for (String[] test : tests) {
|
||||
String testString = test[0];
|
||||
IdentifierInfo idInfo = new IdentifierInfo();
|
||||
idInfo.setIdentifierProfile(SpoofChecker.RECOMMENDED);
|
||||
UnicodeSet allowedChars = new UnicodeSet();
|
||||
// Allowed Identifier Characters. In addition to the Recommended Set,
|
||||
// allow u303c, which has an interesting script extension of Hani Hira Kana.
|
||||
allowedChars.addAll(SpoofChecker.RECOMMENDED).add(0x303c);
|
||||
idInfo.setIdentifierProfile(allowedChars);
|
||||
idInfo.setIdentifier(testString);
|
||||
assertEquals("Identifier " + testString, testString, idInfo.getIdentifier());
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue