mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-10176 No line break in $SY $HL; update tests, including old missing updates for RBBITestMonkey (J)
X-SVN-Rev: 34143
This commit is contained in:
parent
b6dcdfcd25
commit
839ec55eab
4 changed files with 53 additions and 14 deletions
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ec461f02d26e5167a56539e4bf2aabb81c038cec0498eb2bbbc44378977b719b
|
||||
size 10966492
|
||||
oid sha256:0281eb436d3f76c50252cc66bbe357ba00aeb6db06839224cfefa18d386f3338
|
||||
size 10966706
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d08aec73aa20b1669db6e144039aae4bd6357e39e8445986c02be057c169e7f1
|
||||
oid sha256:45587463e8dcef07be8a580b224bfbc9ccafd9e9f14d435f401c7b08294c36d9
|
||||
size 725607
|
||||
|
|
|
@ -557,6 +557,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
UnicodeSet fSY;
|
||||
UnicodeSet fAI;
|
||||
UnicodeSet fAL;
|
||||
UnicodeSet fHL;
|
||||
UnicodeSet fID;
|
||||
UnicodeSet fSA;
|
||||
UnicodeSet fJL;
|
||||
|
@ -605,6 +606,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
fSY = new UnicodeSet("[\\p{Line_break=SY}]");
|
||||
fAI = new UnicodeSet("[\\p{Line_break=AI}]");
|
||||
fAL = new UnicodeSet("[\\p{Line_break=AL}]");
|
||||
fHL = new UnicodeSet("[\\p{Line_break=HL}]");
|
||||
fID = new UnicodeSet("[\\p{Line_break=ID}]");
|
||||
fSA = new UnicodeSet("[\\p{Line_break=SA}]");
|
||||
fJL = new UnicodeSet("[\\p{Line_break=JL}]");
|
||||
|
@ -657,6 +659,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
fSets.add(fSY);
|
||||
fSets.add(fAI);
|
||||
fSets.add(fAL);
|
||||
fSets.add(fHL);
|
||||
fSets.add(fID);
|
||||
fSets.add(fWJ);
|
||||
fSets.add(fSA);
|
||||
|
@ -679,6 +682,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
int prevChar; // Character at above position. Note that prevChar
|
||||
// and thisChar may not be adjacent because combining
|
||||
// characters between them will be ignored.
|
||||
int prevCharX2; // Character before prevChar, more contex for LB 21a
|
||||
|
||||
int nextPos; // Index of the next character following pos.
|
||||
// Usually skips over combining marks.
|
||||
|
@ -695,7 +699,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
// while the invalid values shift out and the "this" and
|
||||
// "prev" positions are filled in with good values.
|
||||
pos = prevPos = -1; // Invalid value, serves as flag for initial loop iteration.
|
||||
thisChar = prevChar = 0;
|
||||
thisChar = prevChar = prevCharX2 = 0;
|
||||
nextPos = startPos;
|
||||
|
||||
|
||||
|
@ -706,6 +710,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
// "prevPos" can be arbitrarily far before "pos".
|
||||
for (;;) {
|
||||
// Advance to the next position to be tested.
|
||||
prevCharX2 = prevChar;
|
||||
prevPos = pos;
|
||||
prevChar = thisChar;
|
||||
pos = nextPos;
|
||||
|
@ -920,8 +925,19 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
continue;
|
||||
}
|
||||
|
||||
// LB 22
|
||||
// LB 21a, HL (HY | BA) x
|
||||
if (fHL.contains(prevCharX2) && (fHY.contains(prevChar) || fBA.contains(prevChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 21b, SY x HL
|
||||
if (fSY.contains(prevChar) && fHL.contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 22
|
||||
if (fAL.contains(prevChar) && fIN.contains(thisChar) ||
|
||||
fHL.contains(prevChar) && fIN.contains(thisChar) ||
|
||||
fID.contains(prevChar) && fIN.contains(thisChar) ||
|
||||
fIN.contains(prevChar) && fIN.contains(thisChar) ||
|
||||
fNU.contains(prevChar) && fIN.contains(thisChar) ) {
|
||||
|
@ -934,8 +950,10 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
// NU x AL
|
||||
if (fID.contains(prevChar) && fPO.contains(thisChar) ||
|
||||
fAL.contains(prevChar) && fNU.contains(thisChar) ||
|
||||
fNU.contains(prevChar) && fAL.contains(thisChar) ) {
|
||||
continue;
|
||||
fHL.contains(prevChar) && fNU.contains(thisChar) ||
|
||||
fNU.contains(prevChar) && fAL.contains(thisChar) ||
|
||||
fNU.contains(prevChar) && fHL.contains(thisChar) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 24 Do not break between prefix and letters or ideographs.
|
||||
|
@ -943,8 +961,8 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
// PR x AL
|
||||
// PO x AL
|
||||
if (fPR.contains(prevChar) && fID.contains(thisChar) ||
|
||||
fPR.contains(prevChar) && fAL.contains(thisChar) ||
|
||||
fPO.contains(prevChar) && fAL.contains(thisChar)) {
|
||||
fPR.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar)) ||
|
||||
fPO.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1011,22 +1029,22 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
|
||||
|
||||
// LB 28 Do not break between alphabetics
|
||||
if (fAL.contains(prevChar) && fAL.contains(thisChar)) {
|
||||
if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 29 Do not break between numeric punctuation and alphabetics
|
||||
if (fIS.contains(prevChar) && fAL.contains(thisChar)) {
|
||||
if (fIS.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 30 Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.
|
||||
// (AL | NU) x OP
|
||||
// CP x (AL | NU)
|
||||
if ((fAL.contains(prevChar) || fNU.contains(prevChar)) && fOP.contains(thisChar)) {
|
||||
if ((fAL.contains(prevChar) || fHL.contains(prevChar) || fNU.contains(prevChar)) && fOP.contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
if (fCP.contains(prevChar) && (fAL.contains(thisChar) || fNU.contains(thisChar))) {
|
||||
if (fCP.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar) || fNU.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -550,7 +550,14 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
|
|||
<data>•\ufffc•\u30e3\u000c<100>\u1b39\u300a\u002f\u203a\u200b•\ufffc•\uaf64•\udcfb•</data>
|
||||
<data>•\u114d\u31f3•\ube44\u002d•\u0362\u24e2\u276e\u2014\u205f\ufe16•\uc877•\u0fd0\u000a<100>\u20a3•</data>
|
||||
<data>•\u080a\u215b\U0001d7d3\u002c•\u2025\U000e012e•\u02df\u118d\u0029\ua8d6\u0085<100>\u6cc4\u2024\u202f\ufffc•</data>
|
||||
|
||||
|
||||
# Test for #10176 (in root)
|
||||
<line>
|
||||
<data>•abc/•s •def•</data>
|
||||
<data>•abc/\u05D9 •def•</data>
|
||||
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
|
||||
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
|
||||
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
|
@ -696,6 +703,13 @@ Bangkok)•</data>
|
|||
<word>
|
||||
<data>•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュータ<400>が<400>ある<400>。<0>奈<400>々<400>は<400>ワ<400>ー<400>ドで<400>あ<400>る<400>。•</data>
|
||||
|
||||
# Test for #10176 (in ja)
|
||||
<line>
|
||||
<data>•abc/•s •def•</data>
|
||||
<data>•abc/\u05D9 •def•</data>
|
||||
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
|
||||
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
|
||||
|
||||
<locale root>
|
||||
<word>
|
||||
<data>•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュータ<400>が<400>ある<400>。<0>奈<400>々<400>は<400>ワ<400>ー<400>ドで<400>あ<400>る<400>。•</data>
|
||||
|
@ -772,3 +786,10 @@ Bangkok)•</data>
|
|||
|
||||
<data>•abc •- •def •abc •-def •abc- •def •</data> # With ASCII hyphen
|
||||
<data>•abc •‐ •def •abc •‐def •abc‐ •def •</data> # With Unicode u2010 hyphen
|
||||
|
||||
# Test for #10176 (in fi)
|
||||
<line>
|
||||
<data>•abc/•s •def•</data>
|
||||
<data>•abc/\u05D9 •def•</data>
|
||||
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
|
||||
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
|
||||
|
|
Loading…
Add table
Reference in a new issue