diff --git a/.gitattributes b/.gitattributes index b7a57346b84..883f6102810 100644 --- a/.gitattributes +++ b/.gitattributes @@ -177,10 +177,12 @@ icu4c/source/tools/pkgdata/pkgdata.vcxproj -text icu4c/source/tools/tzcode/icuregions -text icu4j/ivy.xml -text icu4j/lib/.project -text +icu4j/main/classes/core/src/com/ibm/icu/text/BidiTransform.java -text icu4j/main/shared/data/icudata.jar -text icu4j/main/shared/data/icutzdata.jar -text icu4j/main/shared/data/testdata.jar -text icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/BidiFmwk.java -text +icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidiTransform.java -text icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarTestFmwk.java -text icu4j/main/tests/core/src/com/ibm/icu/dev/test/duration/LanguageTestFmwk.java -text icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DataDrivenNumberFormatTestUtility.java -text diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BidiTransform.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BidiTransform.java new file mode 100644 index 00000000000..bb51462d0fe --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BidiTransform.java @@ -0,0 +1,669 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +package com.ibm.icu.text; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.RelativeDateTimeFormatter.Direction; + +/** + * Bidi Layout Transformation Engine. + * + * @author Lina Kemmel + * + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ +public class BidiTransform +{ + /** + * {@link Order} indicates the order of text. + *

+ * This bidi transformation engine supports all possible combinations (4 in + * total) of input and output text order: + *

+ * + * @see Bidi#setInverse + * @see Bidi#setReorderingMode + * @see Bidi#REORDER_DEFAULT + * @see Bidi#REORDER_INVERSE_LIKE_DIRECT + * @see Bidi#REORDER_RUNS_ONLY + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + public enum Order { + /** + * Constant indicating a logical order. + * + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + LOGICAL, + /** + * Constant indicating a visual order. + * + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + VISUAL; + } + + /** + * {@link Mirroring} indicates whether or not characters with + * the "mirrored" property in RTL runs should be replaced with their + * mirror-image counterparts. + * + * @see Bidi#DO_MIRRORING + * @see Bidi#setReorderingOptions + * @see Bidi#writeReordered + * @see Bidi#writeReverse + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + public enum Mirroring { + /** + * Constant indicating that character mirroring should not be + * performed. + * + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + OFF, + /** + * Constant indicating that character mirroring should be performed. + *

+ * This corresponds to calling {@link Bidi#writeReordered} + * or {@link Bidi#writeReverse} with the + * {@link Bidi#DO_MIRRORING} option bit set. + * + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + ON; + } + + private Bidi bidi; + private String text; + private int reorderingOptions; + private int shapingOptions; + + /** + * {@link BidiTransform} default constructor. + * + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + public BidiTransform() + { + } + + /** + * Performs transformation of text from the bidi layout defined by the + * input ordering scheme to the bidi layout defined by the output ordering + * scheme, and applies character mirroring and Arabic shaping operations. + *

+ * In terms of {@link Bidi} class, such a transformation + * implies: + *

+ * An "ordering scheme" encompasses the base direction and the order of + * text, and these characteristics must be defined by the caller for both + * input and output explicitly .

+ * There are 36 possible combinations of {input, output} ordering schemes, + * which are partially supported by {@link Bidi} already. + * Examples of the currently supported combinations: + *

+ * All combinations that involve the Visual RTL scheme are unsupported by + * {@link Bidi}, for instance: + *

+ *

Example of usage of the transformation engine:
+ *

+     * BidiTransform bidiTransform = new BidiTransform();
+     * String in = "abc \u06f0123"; // "abc \\u06f0123"
+     * // Run a transformation.
+     * String out = bidiTransform.transform(in,
+     *          Bidi.LTR, Order.VISUAL,
+     *          Bidi.RTL, Order.LOGICAL,
+     *          Mirroring.OFF,
+     *          ArabicShaping.DIGITS_AN2EN | ArabicShaping.DIGIT_TYPE_AN_EXTENDED);
+     * // Result: "0123 abc".
+     * // Do something with out.
+     * out = out.replace('0', '4');
+     * // Result: "4123 abc".
+     * // Run a reverse transformation.
+     * String inNew = bidiTransform.transform(out,
+     *          Bidi.RTL, Order.LOGICAL,
+     *          Bidi.LTR, Order.VISUAL,
+     *          Mirroring.OFF,
+     *          ArabicShaping.DIGITS_EN2AN | ArabicShaping.DIGIT_TYPE_AN_EXTENDED);
+     * // Result: "abc \\u06f4\\u06f1\\u06f2\\u06f3"
+     * 
+ *

+ * + * @param text An input character sequence that the Bidi layout + * transformations will be performed on. + * @param inParaLevel A base embedding level of the input as defined in + * {@link Bidi#setPara(String, byte, byte[])} + * documentation for the paraLevel parameter. + * @param inOrder An order of the input, which can be one of the + * {@link Order} values. + * @param outParaLevel A base embedding level of the output as defined in + * {@link Bidi#setPara(String, byte, byte[])} + * documentation for the paraLevel parameter. + * @param outOrder An order of the output, which can be one of the + * {@link Order} values. + * @param doMirroring Indicates whether or not to perform character + * mirroring, and can accept one of the + * {@link Mirroring} values. + * @param shapingOptions Arabic digit and letter shaping options defined in + * the {@link ArabicShaping} documentation. + *

Note: Direction indicator options are + * computed by the transformation engine based on the effective + * ordering schemes, so user-defined direction indicators will be + * ignored. + * @return The output string, which is the result of the layout + * transformation. + * @throws IllegalArgumentException if text, + * inOrder, outOrder, or + * doMirroring parameter is null. + * @draft ICU 58 + * @provisional This API might change or be removed in a future release. + */ + public String transform(CharSequence text, + byte inParaLevel, Order inOrder, + byte outParaLevel, Order outOrder, + Mirroring doMirroring, int shapingOptions) + { + if (text == null || inOrder == null || outOrder == null || doMirroring == null) { + throw new IllegalArgumentException(); + } + this.text = text.toString(); + + byte[] levels = {inParaLevel, outParaLevel}; + resolveBaseDirection(levels); + + ReorderingScheme currentScheme = findMatchingScheme(levels[0], inOrder, + levels[1], outOrder); + if (currentScheme != null) { + this.bidi = new Bidi(); + this.reorderingOptions = Mirroring.ON.equals(doMirroring) + ? Bidi.DO_MIRRORING : Bidi.REORDER_DEFAULT; + + /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the + text scheme at the time shaping is invoked. */ + this.shapingOptions = shapingOptions & ~ArabicShaping.TEXT_DIRECTION_MASK; + currentScheme.doTransform(this); + } + return this.text; + } + + /** + * When the direction option is + * {@link Direction#DIRECTION_DEFAULT_LTR} or + * {@link Direction#DIRECTION_DEFAULT_RTL}, resolves the base + * direction according to that of the first strong directional character in + * the text. + * + * @param levels Byte array, where levels[0] is an input level levels[1] is + * an output level. Resolved levels override these. + */ + private void resolveBaseDirection(byte[] levels) { + if (Bidi.IsDefaultLevel(levels[0])) { + byte level = Bidi.getBaseDirection(text); + levels[0] = level != Bidi.NEUTRAL ? level + : levels[0] == Bidi.LEVEL_DEFAULT_RTL ? Bidi.RTL : Bidi.LTR; + } else { + levels[0] &= 1; + } + if (Bidi.IsDefaultLevel(levels[1])) { + levels[1] = levels[0]; + } else { + levels[1] &= 1; + } + } + + /** + * Finds a valid {@link ReorderingScheme} matching the + * caller-defined scheme. + * + * @return A valid ReorderingScheme object or null + */ + private ReorderingScheme findMatchingScheme(byte inLevel, Order inOrder, + byte outLevel, Order outOrder) { + for (ReorderingScheme scheme : ReorderingScheme.values()) { + if (scheme.matches(inLevel, inOrder, outLevel, outOrder)) { + return scheme; + } + } + return null; + } + + /** + * Performs bidi resolution of text. + * + * @param level Base embedding level + * @param options Reordering options + */ + private void resolve(byte level, int options) { + bidi.setInverse((options & Bidi.REORDER_INVERSE_LIKE_DIRECT) != 0); + bidi.setReorderingMode(options); + bidi.setPara(text, level, null); + } + + /** + * Performs basic reordering of text (Logical LTR or RTL to Visual LTR). + * + */ + private void reorder() { + text = bidi.writeReordered(reorderingOptions); + reorderingOptions = Bidi.REORDER_DEFAULT; + } + + /** + * Performs string reverse. + */ + private void reverse() { + text = Bidi.writeReverse(text, Bidi.OPTION_DEFAULT); + } + + /** + * Performs character mirroring without reordering. When this method is + * called, {@link #text} should be in a Logical form. + */ + private void mirror() { + if ((reorderingOptions & Bidi.DO_MIRRORING) == 0) { + return; + } + StringBuffer sb = new StringBuffer(text); + byte[] levels = bidi.getLevels(); + for (int i = 0, n = levels.length; i < n;) { + int ch = UTF16.charAt(sb, i); + if ((levels[i] & 1) != 0) { + UTF16.setCharAt(sb, i, UCharacter.getMirror(ch)); + } + i += UTF16.getCharCount(ch); + } + text = sb.toString(); + reorderingOptions &= ~Bidi.DO_MIRRORING; + } + + /** + * Performs digit and letter shaping + * + * @param digitsDir Digit shaping option that indicates whether the text + * should be treated as logical or visual. + * @param lettersDir Letter shaping option that indicates whether the text + * should be treated as logical or visual form (can mismatch the digit + * option). + */ + private void shapeArabic(int digitsDir, int lettersDir) { + if (digitsDir == lettersDir) { + shapeArabic(shapingOptions | digitsDir); + } else { + /* Honor all shape options other than letters (not necessarily digits + only) */ + shapeArabic((shapingOptions & ~ArabicShaping.LETTERS_MASK) | digitsDir); + + /* Honor all shape options other than digits (not necessarily letters + only) */ + shapeArabic((shapingOptions & ~ArabicShaping.DIGITS_MASK) | lettersDir); + } + } + + /** + * Performs digit and letter shaping + * + * @param options Shaping options covering both letters and digits + */ + private void shapeArabic(int options) { + if (options != 0) { + ArabicShaping shaper = new ArabicShaping(options); + try { + text = shaper.shape(text); + } catch(ArabicShapingException e) { + } + } + } + + private enum ReorderingScheme { + LOG_LTR_TO_VIS_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsLogical(inOrder) + && IsLTR(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL); + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.reorder(); + } + }, + LOG_RTL_TO_VIS_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsLogical(inOrder) + && IsLTR(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT); + transform.reorder(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + } + }, + LOG_LTR_TO_VIS_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsLogical(inOrder) + && IsRTL(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL); + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.reorder(); + transform.reverse(); + } + }, + LOG_RTL_TO_VIS_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsLogical(inOrder) + && IsRTL(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT); + transform.reorder(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + transform.reverse(); + } + }, + VIS_LTR_TO_LOG_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsVisual(inOrder) + && IsRTL(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + transform.resolve(Bidi.RTL, Bidi.REORDER_INVERSE_LIKE_DIRECT); + transform.reorder(); + } + }, + VIS_RTL_TO_LOG_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsVisual(inOrder) + && IsRTL(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.reverse(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + transform.resolve(Bidi.RTL, Bidi.REORDER_INVERSE_LIKE_DIRECT); + transform.reorder(); + } + }, + VIS_LTR_TO_LOG_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsVisual(inOrder) + && IsLTR(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.LTR, Bidi.REORDER_INVERSE_LIKE_DIRECT); + transform.reorder(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL); + } + }, + VIS_RTL_TO_LOG_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsVisual(inOrder) + && IsLTR(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.reverse(); + transform.resolve(Bidi.LTR, Bidi.REORDER_INVERSE_LIKE_DIRECT); + transform.reorder(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL); + } + }, + LOG_LTR_TO_LOG_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsLogical(inOrder) + && IsRTL(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL); + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.resolve(Bidi.LTR, Bidi.REORDER_RUNS_ONLY); + transform.reorder(); + } + }, + LOG_RTL_TO_LOG_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsLogical(inOrder) + && IsLTR(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.resolve(Bidi.RTL, Bidi.REORDER_RUNS_ONLY); + transform.reorder(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL); + } + }, + VIS_LTR_TO_VIS_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsVisual(inOrder) + && IsRTL(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + transform.reverse(); + } + }, + VIS_RTL_TO_VIS_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsVisual(inOrder) + && IsLTR(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.reverse(); + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + } + }, + LOG_LTR_TO_LOG_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsLogical(inOrder) + && IsLTR(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL); + } + }, + LOG_RTL_TO_LOG_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsLogical(inOrder) + && IsRTL(outLevel) && IsLogical(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_VISUAL_LTR, ArabicShaping.TEXT_DIRECTION_LOGICAL); + } + }, + VIS_LTR_TO_VIS_LTR { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsLTR(inLevel) && IsVisual(inOrder) + && IsLTR(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + } + }, + VIS_RTL_TO_VIS_RTL { + @Override + boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) { + return IsRTL(inLevel) && IsVisual(inOrder) + && IsRTL(outLevel) && IsVisual(outOrder); + } + @Override + void doTransform(BidiTransform transform) { + transform.reverse(); + transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT); + transform.mirror(); + transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR); + transform.reverse(); + } + }; + + /** + * Indicates whether this scheme matches another one in terms of + * equality of base direction and ordering scheme. + * + * @param inLevel Base level of the input text + * @param inOrder Order of the input text + * @param outLevel Base level of the output text + * @param outOrder Order of the output text + * + * @return true if it's a match, false + * otherwise + */ + abstract boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder); + + /** + * Performs a series of bidi layout transformations unique for the current + * scheme. + + * @param transform Bidi transformation engine + */ + abstract void doTransform(BidiTransform transform); + } + + /** + * Is level LTR? convenience method + + * @param level Embedding level + */ + private static boolean IsLTR(byte level) { + return (level & 1) == 0; + } + + /** + * Is level RTL? convenience method + + * @param level Embedding level + */ + private static boolean IsRTL(byte level) { + return (level & 1) == 1; + } + + /** + * Is order logical? convenience method + + * @param level Order value + */ + private static boolean IsLogical(Order order) { + return Order.LOGICAL.equals(order); + } + + /** + * Is order visual? convenience method + + * @param level Order value + */ + private static boolean IsVisual(Order order) { + return Order.VISUAL.equals(order); + } + +} diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidiTransform.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidiTransform.java new file mode 100644 index 00000000000..3bb53aa4762 --- /dev/null +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidiTransform.java @@ -0,0 +1,336 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +package com.ibm.icu.dev.test.bidi; + +import org.junit.Test; + +import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.ArabicShaping; +import com.ibm.icu.text.Bidi; +import com.ibm.icu.text.BidiTransform; +import com.ibm.icu.text.BidiTransform.Mirroring; +import com.ibm.icu.text.BidiTransform.Order; + +/** + * Verify Bidi Layout Transformations + * + * @author Lina Kemmel + * + */ +public class TestBidiTransform extends TestFmwk { + + static final char LATN_ZERO = '\u0030'; + static final char ARAB_ZERO = '\u0660'; + static final char MIN_HEB_LETTER = '\u05d0'; + static final char MIN_ARAB_LETTER = '\u0630'; // relevant to this test only + static final char MIN_SHAPED_LETTER = '\ufeab'; // relevant to this test only + + + private BidiTransform bidiTransform; + private Bidi bidi; + + public TestBidiTransform() {} + + @Test + public void testBidiTransform() { + logln("\nEntering TestBidiTransform\n"); + + bidi = new Bidi(); + bidiTransform = new BidiTransform(); + + autoDirectionTest(); + allTransformOptionsTest(); + + logln("\nExiting TestBidiTransform\n"); + } + + /** + * Tests various combinations of base directions, with the input either + * Bidi.LEVEL_DEFAULT_LTR or + * Bidi.LEVEL_DEFAULT_LTR, and the output either + * Bidi.LEVEL_LTR or Bidi.LEVEL_RTL. Order is + * always Order.LOGICAL for the input and + * Order.VISUAL for the output. + */ + private void autoDirectionTest() { + final String[] inTexts = { + "abc \u05d0\u05d1", + "... abc \u05d0\u05d1", + "\u05d0\u05d1 abc", + "... \u05d0\u05d1 abc", + ".*^" + }; + final byte[] inLevels = { + Bidi.LEVEL_DEFAULT_LTR, Bidi.LEVEL_DEFAULT_RTL + }; + final byte[] outLevels = { + Bidi.LTR, Bidi.RTL + }; + logln("\nEntering autoDirectionTest\n"); + + for (String inText : inTexts) { + for (byte inLevel : inLevels) { + for (byte outLevel : outLevels) { + String outText = bidiTransform.transform(inText, inLevel, Order.LOGICAL, + outLevel, Order.VISUAL, Mirroring.OFF, 0); + bidi.setPara(inText, inLevel, null); + String expectedText = bidi.writeReordered(Bidi.REORDER_DEFAULT); + if ((outLevel & 1) != 0) { + expectedText = Bidi.writeReverse(expectedText, Bidi.OUTPUT_REVERSE); + } + logResultsForDir(inText, outText, expectedText, inLevel, outLevel); + } + } + } + logln("\nExiting autoDirectionTest\n"); + } + + /** + * This method covers: + *

+ */ + private void allTransformOptionsTest() { + final String inText = "a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662"; + + final Object[][] testCases = { + { Bidi.LTR, Order.LOGICAL, Bidi.LTR, Order.LOGICAL, + inText, // reordering without mirroring + "a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662", // mirroring + "a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u0662\u0663\u0660 e\u0631\u0664\u0665\u0666 f \ufeaf \u0661\u0662", // context digit shaping + "1: Logical LTR ==> Logical LTR" }, // message + { Bidi.LTR, Order.LOGICAL, Bidi.LTR, Order.VISUAL, + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d \u0662\u0663\u0660 \u0630 e\u0664\u0665\u0666\u0631 f \u0661\u0662 \ufeaf", + "2: Logical LTR ==> Visual LTR" }, + { Bidi.LTR, Order.LOGICAL, Bidi.RTL, Order.LOGICAL, + "\ufeaf \u0661\u0662 f \u0631e456 \u0630 23\u0660 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 a[b]c", + "\ufeaf \u0661\u0662 f \u0631e456 \u0630 23\u0660 d \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 a[b]c", + "\ufeaf \u0661\u0662 f \u0631e\u0664\u0665\u0666 \u0630 \u0662\u0663\u0660 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 a[b]c", + "3: Logical LTR ==> Logical RTL" }, + { Bidi.LTR, Order.LOGICAL, Bidi.RTL, Order.VISUAL, + "\ufeaf \u0662\u0661 f \u0631654e \u0630 \u066032 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 c]b[a", + "\ufeaf \u0662\u0661 f \u0631654e \u0630 \u066032 d \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 c]b[a", + "\ufeaf \u0662\u0661 f \u0631\u0666\u0665\u0664e \u0630 \u0660\u0663\u0662 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 c]b[a", + "4: Logical LTR ==> Visual RTL" }, + + { Bidi.RTL, Order.LOGICAL, Bidi.RTL, Order.LOGICAL, inText, + "a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662", + "a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662", + "5: Logical RTL ==> Logical RTL" }, + { Bidi.RTL, Order.LOGICAL, Bidi.RTL, Order.VISUAL, + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "6: Logical RTL ==> Visual RTL" }, + { Bidi.RTL, Order.LOGICAL, Bidi.LTR, Order.LOGICAL, + "\ufeaf \u0661\u0662 f 456\u0631e 23\u0630 \u0660 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 a[b]c", + "\ufeaf \u0661\u0662 f 456\u0631e 23\u0630 \u0660 d 1 \u05d0)\u05d1\u05d2 \u05d3(\u05d4 a[b]c", + "\ufeaf \u0661\u0662 f 456\u0631e 23\u0630 \u0660 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 a[b]c", + "7: Logical RTL ==> Logical LTR" }, + { Bidi.RTL, Order.LOGICAL, Bidi.LTR, Order.VISUAL, + "\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c", + "\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 a[b]c", + "\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c", + "8: Logical RTL ==> Visual LTR" }, + + { Bidi.LTR, Order.VISUAL, Bidi.LTR, Order.VISUAL, inText, + "a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662", + "a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u0662\u0663\u0660 e\u0631\u0664\u0665\u0666 f \ufeaf \u0661\u0662", + "9: Visual LTR ==> Visual LTR" }, + { Bidi.LTR, Order.VISUAL, Bidi.LTR, Order.LOGICAL, + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "10: Visual LTR ==> Logical LTR" }, + { Bidi.LTR, Order.VISUAL, Bidi.RTL, Order.VISUAL, + "\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a", + "\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 c]b[a", + "\u0662\u0661 \ufeaf f \u0666\u0665\u0664\u0631e \u0660\u0663\u0662 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a", + "11: Visual LTR ==> Visual RTL" }, + { Bidi.LTR, Order.VISUAL, Bidi.RTL, Order.LOGICAL, + "\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c", + "\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 a[b]c", + "\u0661\u0662 \ufeaf f \u0664\u0665\u0666\u0631e \u0662\u0663\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c", + "12: Visual LTR ==> Logical RTL" }, + + { Bidi.RTL, Order.VISUAL, Bidi.RTL, Order.VISUAL, inText, + "a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662", + "a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662", + "13: Visual RTL ==> Visual RTL" }, + { Bidi.RTL, Order.VISUAL, Bidi.RTL, Order.LOGICAL, + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "14: Visual RTL ==> Logical RTL" }, + { Bidi.RTL, Order.VISUAL, Bidi.LTR, Order.VISUAL, + "\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a", + "\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 c]b[a", + "\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a", + "15: Visual RTL ==> Visual LTR" }, + { Bidi.RTL, Order.VISUAL, Bidi.LTR, Order.LOGICAL, + "\ufeaf \u0662\u0661 f 654\u0631e \u066032 \u0630 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 c]b[a", + "\ufeaf \u0662\u0661 f 654\u0631e \u066032 \u0630 d 1 \u05d0)\u05d1\u05d2 \u05d3(\u05d4 c]b[a", + "\ufeaf \u0662\u0661 f 654\u0631e \u066032 \u0630 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 c]b[a", + "16: Visual RTL ==> Logical LTR" }, + + { Bidi.LEVEL_DEFAULT_RTL, Order.LOGICAL, Bidi.LTR, Order.VISUAL, + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d \u0662\u0663\u0660 \u0630 e\u0664\u0665\u0666\u0631 f \u0661\u0662 \ufeaf", + "17: Logical DEFAULT_RTL ==> Visual LTR" }, + { Bidi.RTL, Order.LOGICAL, Bidi.LEVEL_DEFAULT_LTR, Order.VISUAL, + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "18: Logical RTL ==> Visual DEFAULT_LTR" }, + { Bidi.LEVEL_DEFAULT_LTR, Order.LOGICAL, Bidi.LTR, Order.VISUAL, + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf", + "a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d \u0662\u0663\u0660 \u0630 e\u0664\u0665\u0666\u0631 f \u0661\u0662 \ufeaf", + "19: Logical DEFAULT_LTR ==> Visual LTR" }, + { Bidi.RTL, Order.LOGICAL, Bidi.LEVEL_DEFAULT_RTL, Order.VISUAL, + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661", + "20: Logical RTL ==> Visual DEFAULT_RTL" }, + }; + + final int[] digits = { + ArabicShaping.DIGITS_NOOP, ArabicShaping.DIGITS_EN2AN, ArabicShaping.DIGITS_AN2EN, ArabicShaping.DIGITS_EN2AN_INIT_AL + }; + final int[] letters = { + ArabicShaping.LETTERS_NOOP, ArabicShaping.LETTERS_SHAPE, ArabicShaping.LETTERS_UNSHAPE + }; + + char[] expectedChars; + + logln("\nEntering allTransformOptionsTest\n"); + + // Test various combinations of base level, order, mirroring, digits and letters + for (Object[] test : testCases) { + expectedChars = ((String)test[5]).toCharArray(); + verifyResultsForAllOpts(test, inText, bidiTransform.transform(inText, (Byte)test[0], (Order)test[1], + (Byte)test[2], (Order)test[3], Mirroring.ON, 0), expectedChars, 0, 0); + + for (int digit : digits) { + expectedChars = ((String)(digit == ArabicShaping.DIGITS_EN2AN_INIT_AL ? test[6] : test[4])) + .toCharArray(); + for (int letter : letters) { + verifyResultsForAllOpts(test, inText, bidiTransform.transform(inText, (Byte)test[0], + (Order)test[1], (Byte)test[2], (Order)test[3], Mirroring.OFF, digit | letter), + expectedChars, digit, letter); + } + } + } + logln("\nExiting allTransformOptionsTest\n"); + } + + private void logResultsForDir(String inText, String outText, String expected, + byte inLevel, byte outLevel) { + + assertEquals("inLevel: " + inLevel + ", outLevel: " + outLevel + /* TODO: BidiFwk#u16ToPseudo isn't good for us, needs an update to be used here */ + + "\ninText: " + pseudoScript(inText) + "\noutText: " + pseudoScript(outText) + + "\nexpected: " + pseudoScript(expected) + "\n", expected, outText); + } + + private void verifyResultsForAllOpts(Object[] test, String inText, String outText, char[] expectedChars, int digits, int letters) { + switch (digits) { + case ArabicShaping.DIGITS_AN2EN: + shapeDigits(expectedChars, ARAB_ZERO, LATN_ZERO); + break; + case ArabicShaping.DIGITS_EN2AN: + shapeDigits(expectedChars, LATN_ZERO, ARAB_ZERO); + break; + default: + break; + } + switch (letters) { + case ArabicShaping.LETTERS_SHAPE: + shapeLetters(expectedChars, 0); + break; + case ArabicShaping.LETTERS_UNSHAPE: + shapeLetters(expectedChars, 1); + break; + default: + break; + } + String expected = new String(expectedChars); + assertEquals("\nTest " + test[7] + "\ndigits: " + digits + ", letters: " + letters + /* TODO: BidiFwk#u16ToPseudo isn't good for us, needs an update to be used here */ + + "\ninText: " + pseudoScript(inText) + "\noutText: " + pseudoScript(outText) + + "\nexpected: " + pseudoScript(expected) + "\n", expected, outText); + } + + /* + * Using the following conventions: + * AL unshaped: A-E + * AL shaped: F-J + * R: K-Z + * EN: 0-4 + * AN: 5-9 + */ + private static char substituteChar(char uch, char baseReal, + char basePseudo, char max) { + char dest = (char)(basePseudo + (uch - baseReal)); + return dest > max ? max : dest; + } + + private static String pseudoScript(String text) { + char[] uchars = text.toCharArray(); + for (int i = uchars.length; i-- > 0;) { + char uch = uchars[i]; + switch (UCharacter.getDirectionality(uch)) { + case UCharacter.RIGHT_TO_LEFT: + uchars[i] = substituteChar(uch, MIN_HEB_LETTER, 'K', 'Z'); + break; + case UCharacter.RIGHT_TO_LEFT_ARABIC: + if (uch > 0xFE00) { + uchars[i] = substituteChar(uch, MIN_SHAPED_LETTER, 'F', 'J'); + } else { + uchars[i] = substituteChar(uch, MIN_ARAB_LETTER, 'A', 'E'); + } + break; + case UCharacter.ARABIC_NUMBER: + uchars[i] = substituteChar(uch, ARAB_ZERO, '5', '9'); + break; + default: + break; + } + } + return new String(uchars); + } + + private static void shapeDigits(char[] chars, char srcZero, char destZero) { + for (int i = chars.length; i-- > 0;) { + if (chars[i] >= srcZero && chars[i] <= srcZero + 9) { + chars[i] = substituteChar(chars[i], srcZero, destZero, (char)(destZero + 9)); + } + } + } + + /* + * TODO: the goal is not to thoroughly test ArabicShaping, so the test can be quite trivial, + * but maybe still more sophisticated? + */ + private static final String letters = "\u0630\ufeab\u0631\ufead\u0632\ufeaf"; + + private static void shapeLetters(char[] chars, int indexParity) { + for (int i = chars.length; i-- > 0;) { + int index = letters.indexOf(chars[i]); + if (index >= 0 && (index & 1) == indexParity) { + chars[i] = letters.charAt(index ^ 1); + } + } + } +}