From 782e4ff95e0cabbdf37f5fb612cfe87df9eed5ed Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 7 Mar 2017 22:57:46 +0000 Subject: [PATCH] ICU-12922 bidi explicit level 0: lift it up to resolved paragraph level except leave it at 0 for paragraph separators X-SVN-Rev: 39741 --- icu4c/source/common/ubidi.cpp | 52 +++++++--- icu4c/source/common/unicode/ubidi.h | 9 +- icu4c/source/test/cintltst/cbiditst.c | 23 +++++ .../core/src/com/ibm/icu/text/Bidi.java | 96 ++++++++++++------- .../com/ibm/icu/dev/test/bidi/TestBidi.java | 20 +++- 5 files changed, 145 insertions(+), 55 deletions(-) diff --git a/icu4c/source/common/ubidi.cpp b/icu4c/source/common/ubidi.cpp index 29153499b0d..8e2fc36e5f1 100644 --- a/icu4c/source/common/ubidi.cpp +++ b/icu4c/source/common/ubidi.cpp @@ -1348,18 +1348,20 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { static UBiDiDirection checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { DirProp *dirProps=pBiDi->dirProps; - DirProp dirProp; UBiDiLevel *levels=pBiDi->levels; int32_t isolateCount=0; - int32_t i, length=pBiDi->length; + int32_t length=pBiDi->length; Flags flags=0; /* collect all directionalities in the text */ - UBiDiLevel level; pBiDi->isolateCount=0; - for(i=0; iparas[0].limit; + int32_t currentParaLevel = pBiDi->paraLevel; + + for(int32_t i=0; ipBiDi->isolateCount) @@ -1369,21 +1371,41 @@ checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { isolateCount--; else if(dirProp==B) isolateCount=0; - if(level&UBIDI_LEVEL_OVERRIDE) { + + // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i); + if (pBiDi->defaultParaLevel != 0 && + i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) { + currentParaLevel = pBiDi->paras[++currentParaIndex].level; + currentParaLimit = pBiDi->paras[currentParaIndex].limit; + } + + UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE; + level &= ~UBIDI_LEVEL_OVERRIDE; + if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) { + if (level == 0) { + if (dirProp == B) { + // Paragraph separators are ok with explicit level 0. + // Prevents reordering of paragraphs. + } else { + // Treat explicit level 0 as a wildcard for the paragraph level. + // Avoid making the caller guess what the paragraph level would be. + level = (UBiDiLevel)currentParaLevel; + levels[i] = level | overrideFlag; + } + } else { + // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level + /* level out of bounds */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UBIDI_LTR; + } + } + if (overrideFlag != 0) { /* keep the override flag in levels[i] but adjust the flags */ - level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ flags|=DIRPROP_FLAG_O(level); } else { /* set the flags */ flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); } - if((levelparaLevel); diff --git a/icu4c/source/common/unicode/ubidi.h b/icu4c/source/common/unicode/ubidi.h index 3f29ecdc47e..3b255f928b6 100644 --- a/icu4c/source/common/unicode/ubidi.h +++ b/icu4c/source/common/unicode/ubidi.h @@ -1196,11 +1196,12 @@ ubidi_setContext(UBiDi *pBiDi, * A level overrides the directional property of its corresponding * (same index) character if the level has the * #UBIDI_LEVEL_OVERRIDE bit set.

- * Except for that bit, it must be + * Aside from that bit, it must be * paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL, - * with one exception: a level of zero may be specified for a paragraph - * separator even if paraLevel>0 when multiple paragraphs - * are submitted in the same call to ubidi_setPara().

+ * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.

* Caution: A copy of this pointer, not of the levels, * will be stored in the UBiDi object; * the embeddingLevels array must not be diff --git a/icu4c/source/test/cintltst/cbiditst.c b/icu4c/source/test/cintltst/cbiditst.c index e2a7d4e753a..a902da7a493 100644 --- a/icu4c/source/test/cintltst/cbiditst.c +++ b/icu4c/source/test/cintltst/cbiditst.c @@ -90,6 +90,7 @@ static void testContext(void); static void doTailTest(void); static void testBracketOverflow(void); +static void TestExplicitLevel0(); /* new BIDI API */ static void testReorderingMode(void); @@ -138,6 +139,7 @@ addComplexTest(TestNode** root) { addTest(root, testGetBaseDirection, "complex/bidi/testGetBaseDirection"); addTest(root, testContext, "complex/bidi/testContext"); addTest(root, testBracketOverflow, "complex/bidi/TestBracketOverflow"); + addTest(root, &TestExplicitLevel0, "complex/bidi/TestExplicitLevel0"); addTest(root, doArabicShapingTest, "complex/arabic-shaping/ArabicShapingTest"); addTest(root, doLamAlefSpecialVLTRArabicShapingTest, "complex/arabic-shaping/lamalef"); @@ -4922,3 +4924,24 @@ testBracketOverflow(void) { ubidi_close(bidi); } +static void TestExplicitLevel0() { + // The following used to fail with an error, see ICU ticket #12922. + static const UChar text[2] = { 0x202d, 0x05d0 }; + static UBiDiLevel embeddings[2] = { 0, 0 }; + UErrorCode errorCode = U_ZERO_ERROR; + UBiDi *bidi = ubidi_open(); + ubidi_setPara(bidi, text, 2, UBIDI_DEFAULT_LTR , embeddings, &errorCode); + if (U_FAILURE(errorCode)) { + log_err("ubidi_setPara() - %s", u_errorName(errorCode)); + } else { + UBiDiLevel level0 = ubidi_getLevelAt(bidi, 0); + UBiDiLevel level1 = ubidi_getLevelAt(bidi, 1); + if (level0 != 1 || level1 != 1) { + log_err("resolved levels != 1: { %d, %d }\n", level0, level1); + } + if (embeddings[0] != 1 || embeddings[1] != 1) { + log_err("modified embeddings[] levels != 1: { %d, %d }\n", embeddings[0], embeddings[1]); + } + } + ubidi_close(bidi); +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java index 62a276dc723..623f0aa6d2e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java @@ -2674,28 +2674,29 @@ public class Bidi { return dirct; } - /* + /** * Use a pre-specified embedding levels array: * - * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + *

Adjust the directional properties for overrides (->LEVEL_OVERRIDE), * ignore all explicit codes (X9), * and check all the preset levels. * - * Recalculate the flags to have them reflect the real properties + *

Recalculate the flags to have them reflect the real properties * after taking the explicit embeddings into account. */ private byte checkExplicitLevels() { - byte dirProp; - int i; int isolateCount = 0; this.flags = 0; /* collect all directionalities in the text */ - byte level; this.isolateCount = 0; - for (i = 0; i < length; ++i) { - level = levels[i]; - dirProp = dirProps[i]; + int currentParaIndex = 0; + int currentParaLimit = paras_limit[0]; + byte currentParaLevel = paraLevel; + + for (int i = 0; i < length; ++i) { + byte level = levels[i]; + byte dirProp = dirProps[i]; if (dirProp == LRI || dirProp == RLI) { isolateCount++; if (isolateCount > this.isolateCount) @@ -2705,21 +2706,40 @@ public class Bidi { isolateCount--; else if (dirProp == B) isolateCount = 0; - if ((level & LEVEL_OVERRIDE) != 0) { + + // optimized version of byte currentParaLevel = GetParaLevelAt(i); + if (defaultParaLevel != 0 && + i == currentParaLimit && (currentParaIndex + 1) < paraCount) { + currentParaLevel = paras_level[++currentParaIndex]; + currentParaLimit = paras_limit[currentParaIndex]; + } + + int overrideFlag = level & LEVEL_OVERRIDE; + level &= ~LEVEL_OVERRIDE; + if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) { + if (level == 0) { + if (dirProp == B) { + // Paragraph separators are ok with explicit level 0. + // Prevents reordering of paragraphs. + } else { + // Treat explicit level 0 as a wildcard for the paragraph level. + // Avoid making the caller guess what the paragraph level would be. + level = currentParaLevel; + levels[i] = (byte)(level | overrideFlag); + } + } else { + // 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level + throw new IllegalArgumentException("level " + level + + " out of bounds at " + i); + } + } + if (overrideFlag != 0) { /* keep the override flag in levels[i] but adjust the flags */ - level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */ flags |= DirPropFlagO(level); } else { /* set the flags */ flags |= DirPropFlagE(level) | DirPropFlag(dirProp); } - if ((level < GetParaLevelAt(i) && - !((0 == level) && (dirProp == B))) || - (MAX_EXPLICIT_LEVEL < level)) { - /* level out of bounds */ - throw new IllegalArgumentException("level " + level + - " out of bounds at " + i); - } } if ((flags & MASK_EMBEDDING) != 0) flags |= DirPropFlagLR(paraLevel); @@ -3950,11 +3970,12 @@ public class Bidi { * A level overrides the directional property of its corresponding * (same index) character if the level has the * LEVEL_OVERRIDE bit set.

- * Except for that bit, it must be + * Aside from that bit, it must be * paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL, - * with one exception: a level of zero may be specified for a - * paragraph separator even if paraLevel>0 when multiple - * paragraphs are submitted in the same call to setPara().

+ * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.

* Caution: A reference to this array, not a copy * of the levels, will be stored in the Bidi object; * the embeddingLevels @@ -5294,13 +5315,19 @@ public class Bidi { /** * Create Bidi from the given text, embedding, and direction information. - * The embeddings array may be null. If present, the values represent - * embedding level information. Negative values from -1 to -61 indicate - * overrides at the absolute value of the level. Positive values from 1 to - * 61 indicate embeddings. Where values are zero, the base embedding level - * as determined by the base direction is assumed.

* - * Note: this constructor calls setPara() internally. + *

The embeddings array may be null. If present, the values represent + * embedding level information. + * Negative values from -1 to -{@link #MAX_EXPLICIT_LEVEL} + * indicate overrides at the absolute value of the level. + * Positive values from 1 to {@link #MAX_EXPLICIT_LEVEL} indicate embeddings. + * Where values are zero, the base embedding level + * as determined by the base direction is assumed, + * except for paragraph separators which remain at 0 to prevent reordering of paragraphs.

+ * + *

Note: This constructor calls setPara() internally, + * after converting the java.text.Bidi-style embeddings with negative overrides + * into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level. * * @param text an array containing the paragraph of text to process. * @param textStart the index into the text array of the start of the @@ -5354,22 +5381,23 @@ public class Bidi { if (embeddings == null) { paraEmbeddings = null; } else { + // Convert from java.text.Bidi embeddings to ICU setPara() levels: + // Copy to the start of a new array and convert java.text negative overrides + // to ICU bit-field-and-mask overrides. + // A copy of the embeddings is always required because + // setPara() may modify its embeddings. paraEmbeddings = new byte[paragraphLength]; byte lev; for (int i = 0; i < paragraphLength; i++) { lev = embeddings[i + embStart]; if (lev < 0) { lev = (byte)((- lev) | LEVEL_OVERRIDE); - } else if (lev == 0) { - lev = paraLvl; - if (paraLvl > MAX_EXPLICIT_LEVEL) { - lev &= 1; - } } + // setPara() lifts level 0 up to the resolved paragraph level. paraEmbeddings[i] = lev; } } - if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { + if (textStart == 0 && paragraphLength == text.length) { setPara(text, paraLvl, paraEmbeddings); } else { char[] paraText = new char[paragraphLength]; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java index 9fe50a65a35..af155ed8d25 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java @@ -526,7 +526,7 @@ public class TestBidi extends BidiFmwk { bidi.setReorderingMode(Bidi.REORDER_RUNS_ONLY); bidi.setPara("a \u05d0 b \u05d1 c \u05d2 d ", Bidi.LTR, null); assertEquals("\nWrong number of runs #4", 14, bidi.countRuns()); - + /* test testGetBaseDirection to verify fast string direction detection function */ /* mixed start with L */ String mixedEnglishFirst = "\u0061\u0627\u0032\u06f3\u0061\u0034"; @@ -566,7 +566,7 @@ public class TestBidi extends BidiFmwk { assertEquals("\nWrong direction through fast detection #12", Bidi.NEUTRAL, Bidi.getBaseDirection(allArabicDigits)); /* null string */ String nullString = null; - assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString)); + assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString)); /* first L (English) others are R (Hebrew etc.) */ String startEnglishOthersHebrew = "\u0071\u0590\u05D5\u05EA\u05F1"; assertEquals("\nWrong direction through fast detection #14", Bidi.LTR, Bidi.getBaseDirection(startEnglishOthersHebrew)); @@ -574,4 +574,20 @@ public class TestBidi extends BidiFmwk { String lastHebrewOthersEnglishDigit = "\u0031\u0032\u0033\u05F1"; assertEquals("\nWrong direction through fast detection #15", Bidi.RTL, Bidi.getBaseDirection(lastHebrewOthersEnglishDigit)); } + + @Test + public void testExplicitLevel0() { + // The following used to fail with an error, see ICU ticket #12922. + String text = "\u202d\u05d0"; + byte[] embeddings = new byte[2]; // all 0 + int flags = Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // 0x7e + Bidi bidi = new Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags); + assertEquals("resolved level at 0", 1, bidi.getLevelAt(0)); + assertEquals("resolved level at 1", 1, bidi.getLevelAt(1)); + + flags = java.text.Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // -2 + java.text.Bidi jb = new java.text.Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags); + assertEquals("java.text resolved level at 0", 1, jb.getLevelAt(0)); + assertEquals("java.text resolved level at 1", 1, jb.getLevelAt(1)); + } }