ICU-11679 Merging BidiTransform work from Lina's work branch.

X-SVN-Rev: 39223
This commit is contained in:
Yoshito Umaoka 2016-09-14 20:58:05 +00:00
parent 8d59026fd4
commit 6fa604826e
3 changed files with 1007 additions and 0 deletions

2
.gitattributes vendored
View file

@ -177,10 +177,12 @@ icu4c/source/tools/pkgdata/pkgdata.vcxproj -text
icu4c/source/tools/tzcode/icuregions -text
icu4j/ivy.xml -text
icu4j/lib/.project -text
icu4j/main/classes/core/src/com/ibm/icu/text/BidiTransform.java -text
icu4j/main/shared/data/icudata.jar -text
icu4j/main/shared/data/icutzdata.jar -text
icu4j/main/shared/data/testdata.jar -text
icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/BidiFmwk.java -text
icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidiTransform.java -text
icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarTestFmwk.java -text
icu4j/main/tests/core/src/com/ibm/icu/dev/test/duration/LanguageTestFmwk.java -text
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DataDrivenNumberFormatTestUtility.java -text

View file

@ -0,0 +1,669 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.text;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.RelativeDateTimeFormatter.Direction;
/**
* Bidi Layout Transformation Engine.
*
* @author Lina Kemmel
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public class BidiTransform
{
/**
* <code>{@link Order}</code> indicates the order of text.
* <p>
* This bidi transformation engine supports all possible combinations (4 in
* total) of input and output text order:
* <ul>
* <li>{logical input, visual output}: unless the output direction is RTL,
* this corresponds to a normal operation of the Bidi algorithm as
* described in the Unicode Technical Report and implemented by
* <code>{@link Bidi}</code> when the reordering mode is set to
* <code>Bidi#REORDER_DEFAULT</code>. Visual RTL mode is not supported by
* <code>{@link Bidi}</code> and is accomplished through reversing a visual
* LTR string,</li>
* <li>{visual input, logical output}: unless the input direction is RTL,
* this corresponds to an "inverse bidi algorithm" in
* <code>{@link Bidi}</code> with the reordering mode set to
* <code>{@link Bidi#REORDER_INVERSE_LIKE_DIRECT}</code>. Visual RTL mode
* is not not supported by <code>{@link Bidi}</code> and is accomplished
* through reversing a visual LTR string,</li>
* <li>{logical input, logical output}: if the input and output base
* directions mismatch, this corresponds to the <code>{@link Bidi}</code>
* implementation with the reordering mode set to
* <code>{@link Bidi#REORDER_RUNS_ONLY}</code>; and if the input and output
* base directions are identical, the transformation engine will only
* handle character mirroring and Arabic shaping operations without
* reordering,</li>
* <li>{visual input, visual output}: this reordering mode is not supported
* by the <code>{@link Bidi}</code> engine; it implies character mirroring,
* Arabic shaping, and - if the input/output base directions mismatch -
* string reverse operations.</li>
* </ul>
*
* @see Bidi#setInverse
* @see Bidi#setReorderingMode
* @see Bidi#REORDER_DEFAULT
* @see Bidi#REORDER_INVERSE_LIKE_DIRECT
* @see Bidi#REORDER_RUNS_ONLY
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public enum Order {
/**
* Constant indicating a logical order.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
LOGICAL,
/**
* Constant indicating a visual order.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
VISUAL;
}
/**
* <code>{@link Mirroring}</code> indicates whether or not characters with
* the "mirrored" property in RTL runs should be replaced with their
* mirror-image counterparts.
*
* @see Bidi#DO_MIRRORING
* @see Bidi#setReorderingOptions
* @see Bidi#writeReordered
* @see Bidi#writeReverse
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public enum Mirroring {
/**
* Constant indicating that character mirroring should not be
* performed.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
OFF,
/**
* Constant indicating that character mirroring should be performed.
* <p>
* This corresponds to calling <code>{@link Bidi#writeReordered}</code>
* or <code>{@link Bidi#writeReverse}</code> with the
* <code>{@link Bidi#DO_MIRRORING}</code> option bit set.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
ON;
}
private Bidi bidi;
private String text;
private int reorderingOptions;
private int shapingOptions;
/**
* <code>{@link BidiTransform}</code> default constructor.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public BidiTransform()
{
}
/**
* Performs transformation of text from the bidi layout defined by the
* input ordering scheme to the bidi layout defined by the output ordering
* scheme, and applies character mirroring and Arabic shaping operations.
* <p>
* In terms of <code>{@link Bidi}</code> class, such a transformation
* implies:
* <ul>
* <li>calling <code>{@link Bidi#setReorderingMode}</code> as needed (when
* the reordering mode is other than normal),</li>
* <li>calling <code>{@link Bidi#setInverse}</code> as needed (when text
* should be transformed from a visual to a logical form),</li>
* <li>resolving embedding levels of each character in the input text by
* calling <code>{@link Bidi#setPara}</code>,</li>
* <li>reordering the characters based on the computed embedding levels,
* also performing character mirroring as needed, and streaming the result
* to the output, by calling <code>{@link Bidi#writeReordered}</code>,</li>
* <li>performing Arabic digit and letter shaping on the output text by
* calling <code>{@link ArabicShaping#shape}</code>.</li>
* </ul><p>
* An "ordering scheme" encompasses the base direction and the order of
* text, and these characteristics must be defined by the caller for both
* input and output explicitly .<p>
* There are 36 possible combinations of {input, output} ordering schemes,
* which are partially supported by <code>{@link Bidi}</code> already.
* Examples of the currently supported combinations:
* <ul>
* <li>{Logical LTR, Visual LTR}: this is equivalent to calling
* <code>{@link Bidi#setPara}</code> with
* <code>paraLevel == {@link Bidi#LTR}</code>,</li>
* <li>{Logical RTL, Visual LTR}: this is equivalent to calling
* <code>{@link Bidi#setPara}</code> with
* <code>paraLevel == {@link Bidi#RTL}</code>,</li>
* <li>{Logical Default ("Auto") LTR, Visual LTR}: this is equivalent to
* calling <code>{@link Bidi#setPara}</code> with
* <code>paraLevel == {@link Bidi#LEVEL_DEFAULT_LTR}</code>,</li>
* <li>{Logical Default ("Auto") RTL, Visual LTR}: this is equivalent to
* calling <code>{@link Bidi#setPara}</code> with
* <code>paraLevel == {@link Bidi#LEVEL_DEFAULT_RTL}</code>,</li>
* <li>{Visual LTR, Logical LTR}: this is equivalent to
* calling <code>{@link Bidi#setInverse}(true)</code> and then
* <code>{@link Bidi#setPara}</code> with
* <code>paraLevel == {@link Bidi#LTR}</code>,</li>
* <li>{Visual LTR, Logical RTL}: this is equivalent to calling
* <code>{@link Bidi#setInverse}(true)</code> and then
* <code>{@link Bidi#setPara}</code> with
* <code>paraLevel == {@link Bidi#RTL}</code>.</li>
* </ul><p>
* All combinations that involve the Visual RTL scheme are unsupported by
* <code>{@link Bidi}</code>, for instance:
* <ul>
* <li>{Logical LTR, Visual RTL},</li>
* <li>{Visual RTL, Logical RTL}.</li>
* </ul>
* <p>Example of usage of the transformation engine:<br>
* <pre>
* BidiTransform bidiTransform = new BidiTransform();
* String in = "abc \u06f0123"; // "abc \\u06f0123"
* // Run a transformation.
* String out = bidiTransform.transform(in,
* Bidi.LTR, Order.VISUAL,
* Bidi.RTL, Order.LOGICAL,
* Mirroring.OFF,
* ArabicShaping.DIGITS_AN2EN | ArabicShaping.DIGIT_TYPE_AN_EXTENDED);
* // Result: "0123 abc".
* // Do something with out.
* out = out.replace('0', '4');
* // Result: "4123 abc".
* // Run a reverse transformation.
* String inNew = bidiTransform.transform(out,
* Bidi.RTL, Order.LOGICAL,
* Bidi.LTR, Order.VISUAL,
* Mirroring.OFF,
* ArabicShaping.DIGITS_EN2AN | ArabicShaping.DIGIT_TYPE_AN_EXTENDED);
* // Result: "abc \\u06f4\\u06f1\\u06f2\\u06f3"
* </pre>
* </p>
*
* @param text An input character sequence that the Bidi layout
* transformations will be performed on.
* @param inParaLevel A base embedding level of the input as defined in
* <code>{@link Bidi#setPara(String, byte, byte[])}</code>
* documentation for the <code>paraLevel</code> parameter.
* @param inOrder An order of the input, which can be one of the
* <code>{@link Order}</code> values.
* @param outParaLevel A base embedding level of the output as defined in
* <code>{@link Bidi#setPara(String, byte, byte[])}</code>
* documentation for the <code>paraLevel</code> parameter.
* @param outOrder An order of the output, which can be one of the
* <code>{@link Order}</code> values.
* @param doMirroring Indicates whether or not to perform character
* mirroring, and can accept one of the
* <code>{@link Mirroring}</code> values.
* @param shapingOptions Arabic digit and letter shaping options defined in
* the <code>{@link ArabicShaping}</code> documentation.
* <p><strong>Note:</strong> Direction indicator options are
* computed by the transformation engine based on the effective
* ordering schemes, so user-defined direction indicators will be
* ignored.
* @return The output string, which is the result of the layout
* transformation.
* @throws IllegalArgumentException if <code>text</code>,
* <code>inOrder</code>, <code>outOrder</code>, or
* <code>doMirroring</code> parameter is <code>null</code>.
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public String transform(CharSequence text,
byte inParaLevel, Order inOrder,
byte outParaLevel, Order outOrder,
Mirroring doMirroring, int shapingOptions)
{
if (text == null || inOrder == null || outOrder == null || doMirroring == null) {
throw new IllegalArgumentException();
}
this.text = text.toString();
byte[] levels = {inParaLevel, outParaLevel};
resolveBaseDirection(levels);
ReorderingScheme currentScheme = findMatchingScheme(levels[0], inOrder,
levels[1], outOrder);
if (currentScheme != null) {
this.bidi = new Bidi();
this.reorderingOptions = Mirroring.ON.equals(doMirroring)
? Bidi.DO_MIRRORING : Bidi.REORDER_DEFAULT;
/* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the
text scheme at the time shaping is invoked. */
this.shapingOptions = shapingOptions & ~ArabicShaping.TEXT_DIRECTION_MASK;
currentScheme.doTransform(this);
}
return this.text;
}
/**
* When the direction option is
* <code>{@link Direction#DIRECTION_DEFAULT_LTR}</code> or
* <code>{@link Direction#DIRECTION_DEFAULT_RTL}</code>, resolves the base
* direction according to that of the first strong directional character in
* the text.
*
* @param levels Byte array, where levels[0] is an input level levels[1] is
* an output level. Resolved levels override these.
*/
private void resolveBaseDirection(byte[] levels) {
if (Bidi.IsDefaultLevel(levels[0])) {
byte level = Bidi.getBaseDirection(text);
levels[0] = level != Bidi.NEUTRAL ? level
: levels[0] == Bidi.LEVEL_DEFAULT_RTL ? Bidi.RTL : Bidi.LTR;
} else {
levels[0] &= 1;
}
if (Bidi.IsDefaultLevel(levels[1])) {
levels[1] = levels[0];
} else {
levels[1] &= 1;
}
}
/**
* Finds a valid <code>{@link ReorderingScheme}</code> matching the
* caller-defined scheme.
*
* @return A valid <code>ReorderingScheme</code> object or null
*/
private ReorderingScheme findMatchingScheme(byte inLevel, Order inOrder,
byte outLevel, Order outOrder) {
for (ReorderingScheme scheme : ReorderingScheme.values()) {
if (scheme.matches(inLevel, inOrder, outLevel, outOrder)) {
return scheme;
}
}
return null;
}
/**
* Performs bidi resolution of text.
*
* @param level Base embedding level
* @param options Reordering options
*/
private void resolve(byte level, int options) {
bidi.setInverse((options & Bidi.REORDER_INVERSE_LIKE_DIRECT) != 0);
bidi.setReorderingMode(options);
bidi.setPara(text, level, null);
}
/**
* Performs basic reordering of text (Logical LTR or RTL to Visual LTR).
*
*/
private void reorder() {
text = bidi.writeReordered(reorderingOptions);
reorderingOptions = Bidi.REORDER_DEFAULT;
}
/**
* Performs string reverse.
*/
private void reverse() {
text = Bidi.writeReverse(text, Bidi.OPTION_DEFAULT);
}
/**
* Performs character mirroring without reordering. When this method is
* called, <code>{@link #text}</code> should be in a Logical form.
*/
private void mirror() {
if ((reorderingOptions & Bidi.DO_MIRRORING) == 0) {
return;
}
StringBuffer sb = new StringBuffer(text);
byte[] levels = bidi.getLevels();
for (int i = 0, n = levels.length; i < n;) {
int ch = UTF16.charAt(sb, i);
if ((levels[i] & 1) != 0) {
UTF16.setCharAt(sb, i, UCharacter.getMirror(ch));
}
i += UTF16.getCharCount(ch);
}
text = sb.toString();
reorderingOptions &= ~Bidi.DO_MIRRORING;
}
/**
* Performs digit and letter shaping
*
* @param digitsDir Digit shaping option that indicates whether the text
* should be treated as logical or visual.
* @param lettersDir Letter shaping option that indicates whether the text
* should be treated as logical or visual form (can mismatch the digit
* option).
*/
private void shapeArabic(int digitsDir, int lettersDir) {
if (digitsDir == lettersDir) {
shapeArabic(shapingOptions | digitsDir);
} else {
/* Honor all shape options other than letters (not necessarily digits
only) */
shapeArabic((shapingOptions & ~ArabicShaping.LETTERS_MASK) | digitsDir);
/* Honor all shape options other than digits (not necessarily letters
only) */
shapeArabic((shapingOptions & ~ArabicShaping.DIGITS_MASK) | lettersDir);
}
}
/**
* Performs digit and letter shaping
*
* @param options Shaping options covering both letters and digits
*/
private void shapeArabic(int options) {
if (options != 0) {
ArabicShaping shaper = new ArabicShaping(options);
try {
text = shaper.shape(text);
} catch(ArabicShapingException e) {
}
}
}
private enum ReorderingScheme {
LOG_LTR_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.reorder();
}
},
LOG_RTL_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
}
},
LOG_LTR_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.reorder();
transform.reverse();
}
},
LOG_RTL_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.reverse();
}
},
VIS_LTR_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.resolve(Bidi.RTL, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
}
},
VIS_RTL_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.resolve(Bidi.RTL, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
}
},
VIS_LTR_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
VIS_RTL_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.resolve(Bidi.LTR, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
LOG_LTR_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.resolve(Bidi.LTR, Bidi.REORDER_RUNS_ONLY);
transform.reorder();
}
},
LOG_RTL_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.resolve(Bidi.RTL, Bidi.REORDER_RUNS_ONLY);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
VIS_LTR_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.reverse();
}
},
VIS_RTL_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
}
},
LOG_LTR_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
LOG_RTL_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_VISUAL_LTR, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
VIS_LTR_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
}
},
VIS_RTL_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.reverse();
}
};
/**
* Indicates whether this scheme matches another one in terms of
* equality of base direction and ordering scheme.
*
* @param inLevel Base level of the input text
* @param inOrder Order of the input text
* @param outLevel Base level of the output text
* @param outOrder Order of the output text
*
* @return <code>true</code> if it's a match, <code>false</code>
* otherwise
*/
abstract boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder);
/**
* Performs a series of bidi layout transformations unique for the current
* scheme.
* @param transform Bidi transformation engine
*/
abstract void doTransform(BidiTransform transform);
}
/**
* Is level LTR? convenience method
* @param level Embedding level
*/
private static boolean IsLTR(byte level) {
return (level & 1) == 0;
}
/**
* Is level RTL? convenience method
* @param level Embedding level
*/
private static boolean IsRTL(byte level) {
return (level & 1) == 1;
}
/**
* Is order logical? convenience method
* @param level Order value
*/
private static boolean IsLogical(Order order) {
return Order.LOGICAL.equals(order);
}
/**
* Is order visual? convenience method
* @param level Order value
*/
private static boolean IsVisual(Order order) {
return Order.VISUAL.equals(order);
}
}

View file

@ -0,0 +1,336 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.bidi;
import org.junit.Test;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.ArabicShaping;
import com.ibm.icu.text.Bidi;
import com.ibm.icu.text.BidiTransform;
import com.ibm.icu.text.BidiTransform.Mirroring;
import com.ibm.icu.text.BidiTransform.Order;
/**
* Verify Bidi Layout Transformations
*
* @author Lina Kemmel
*
*/
public class TestBidiTransform extends TestFmwk {
static final char LATN_ZERO = '\u0030';
static final char ARAB_ZERO = '\u0660';
static final char MIN_HEB_LETTER = '\u05d0';
static final char MIN_ARAB_LETTER = '\u0630'; // relevant to this test only
static final char MIN_SHAPED_LETTER = '\ufeab'; // relevant to this test only
private BidiTransform bidiTransform;
private Bidi bidi;
public TestBidiTransform() {}
@Test
public void testBidiTransform() {
logln("\nEntering TestBidiTransform\n");
bidi = new Bidi();
bidiTransform = new BidiTransform();
autoDirectionTest();
allTransformOptionsTest();
logln("\nExiting TestBidiTransform\n");
}
/**
* Tests various combinations of base directions, with the input either
* <code>Bidi.LEVEL_DEFAULT_LTR</code> or
* <code>Bidi.LEVEL_DEFAULT_LTR</code>, and the output either
* <code>Bidi.LEVEL_LTR</code> or <code>Bidi.LEVEL_RTL</code>. Order is
* always <code>Order.LOGICAL</code> for the input and
* <code>Order.VISUAL</code> for the output.
*/
private void autoDirectionTest() {
final String[] inTexts = {
"abc \u05d0\u05d1",
"... abc \u05d0\u05d1",
"\u05d0\u05d1 abc",
"... \u05d0\u05d1 abc",
".*^"
};
final byte[] inLevels = {
Bidi.LEVEL_DEFAULT_LTR, Bidi.LEVEL_DEFAULT_RTL
};
final byte[] outLevels = {
Bidi.LTR, Bidi.RTL
};
logln("\nEntering autoDirectionTest\n");
for (String inText : inTexts) {
for (byte inLevel : inLevels) {
for (byte outLevel : outLevels) {
String outText = bidiTransform.transform(inText, inLevel, Order.LOGICAL,
outLevel, Order.VISUAL, Mirroring.OFF, 0);
bidi.setPara(inText, inLevel, null);
String expectedText = bidi.writeReordered(Bidi.REORDER_DEFAULT);
if ((outLevel & 1) != 0) {
expectedText = Bidi.writeReverse(expectedText, Bidi.OUTPUT_REVERSE);
}
logResultsForDir(inText, outText, expectedText, inLevel, outLevel);
}
}
}
logln("\nExiting autoDirectionTest\n");
}
/**
* This method covers:
* <ul>
* <li>all possible combinations of ordering schemes and <strong>explicit</strong>
* base levels, applied to both input and output,</li>
* <li>selected tests for auto direction (systematically, auto direction is
* covered in a dedicated test) applied on both input and output,</li>
* <li>all possible combinations of mirroring, digits and letters applied
* to output only.</li>
* </ul>
*/
private void allTransformOptionsTest() {
final String inText = "a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662";
final Object[][] testCases = {
{ Bidi.LTR, Order.LOGICAL, Bidi.LTR, Order.LOGICAL,
inText, // reordering without mirroring
"a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662", // mirroring
"a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u0662\u0663\u0660 e\u0631\u0664\u0665\u0666 f \ufeaf \u0661\u0662", // context digit shaping
"1: Logical LTR ==> Logical LTR" }, // message
{ Bidi.LTR, Order.LOGICAL, Bidi.LTR, Order.VISUAL,
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d \u0662\u0663\u0660 \u0630 e\u0664\u0665\u0666\u0631 f \u0661\u0662 \ufeaf",
"2: Logical LTR ==> Visual LTR" },
{ Bidi.LTR, Order.LOGICAL, Bidi.RTL, Order.LOGICAL,
"\ufeaf \u0661\u0662 f \u0631e456 \u0630 23\u0660 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 a[b]c",
"\ufeaf \u0661\u0662 f \u0631e456 \u0630 23\u0660 d \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 a[b]c",
"\ufeaf \u0661\u0662 f \u0631e\u0664\u0665\u0666 \u0630 \u0662\u0663\u0660 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 a[b]c",
"3: Logical LTR ==> Logical RTL" },
{ Bidi.LTR, Order.LOGICAL, Bidi.RTL, Order.VISUAL,
"\ufeaf \u0662\u0661 f \u0631654e \u0630 \u066032 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 c]b[a",
"\ufeaf \u0662\u0661 f \u0631654e \u0630 \u066032 d \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 c]b[a",
"\ufeaf \u0662\u0661 f \u0631\u0666\u0665\u0664e \u0630 \u0660\u0663\u0662 d \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 c]b[a",
"4: Logical LTR ==> Visual RTL" },
{ Bidi.RTL, Order.LOGICAL, Bidi.RTL, Order.LOGICAL, inText,
"a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662",
"a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662",
"5: Logical RTL ==> Logical RTL" },
{ Bidi.RTL, Order.LOGICAL, Bidi.RTL, Order.VISUAL,
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"6: Logical RTL ==> Visual RTL" },
{ Bidi.RTL, Order.LOGICAL, Bidi.LTR, Order.LOGICAL,
"\ufeaf \u0661\u0662 f 456\u0631e 23\u0630 \u0660 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 a[b]c",
"\ufeaf \u0661\u0662 f 456\u0631e 23\u0630 \u0660 d 1 \u05d0)\u05d1\u05d2 \u05d3(\u05d4 a[b]c",
"\ufeaf \u0661\u0662 f 456\u0631e 23\u0630 \u0660 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 a[b]c",
"7: Logical RTL ==> Logical LTR" },
{ Bidi.RTL, Order.LOGICAL, Bidi.LTR, Order.VISUAL,
"\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c",
"\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 a[b]c",
"\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c",
"8: Logical RTL ==> Visual LTR" },
{ Bidi.LTR, Order.VISUAL, Bidi.LTR, Order.VISUAL, inText,
"a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662",
"a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u0662\u0663\u0660 e\u0631\u0664\u0665\u0666 f \ufeaf \u0661\u0662",
"9: Visual LTR ==> Visual LTR" },
{ Bidi.LTR, Order.VISUAL, Bidi.LTR, Order.LOGICAL,
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"10: Visual LTR ==> Logical LTR" },
{ Bidi.LTR, Order.VISUAL, Bidi.RTL, Order.VISUAL,
"\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a",
"\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 c]b[a",
"\u0662\u0661 \ufeaf f \u0666\u0665\u0664\u0631e \u0660\u0663\u0662 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a",
"11: Visual LTR ==> Visual RTL" },
{ Bidi.LTR, Order.VISUAL, Bidi.RTL, Order.LOGICAL,
"\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c",
"\u0661\u0662 \ufeaf f 456\u0631e 23\u0660 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 a[b]c",
"\u0661\u0662 \ufeaf f \u0664\u0665\u0666\u0631e \u0662\u0663\u0660 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 a[b]c",
"12: Visual LTR ==> Logical RTL" },
{ Bidi.RTL, Order.VISUAL, Bidi.RTL, Order.VISUAL, inText,
"a[b]c \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662",
"a[b]c \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 23\u0660 e\u0631456 f \ufeaf \u0661\u0662",
"13: Visual RTL ==> Visual RTL" },
{ Bidi.RTL, Order.VISUAL, Bidi.RTL, Order.LOGICAL,
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"14: Visual RTL ==> Logical RTL" },
{ Bidi.RTL, Order.VISUAL, Bidi.LTR, Order.VISUAL,
"\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a",
"\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 c]b[a",
"\u0662\u0661 \ufeaf f 654\u0631e \u066032 \u0630 d 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 c]b[a",
"15: Visual RTL ==> Visual LTR" },
{ Bidi.RTL, Order.VISUAL, Bidi.LTR, Order.LOGICAL,
"\ufeaf \u0662\u0661 f 654\u0631e \u066032 \u0630 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 c]b[a",
"\ufeaf \u0662\u0661 f 654\u0631e \u066032 \u0630 d 1 \u05d0)\u05d1\u05d2 \u05d3(\u05d4 c]b[a",
"\ufeaf \u0662\u0661 f 654\u0631e \u066032 \u0630 d 1 \u05d0(\u05d1\u05d2 \u05d3)\u05d4 c]b[a",
"16: Visual RTL ==> Logical LTR" },
{ Bidi.LEVEL_DEFAULT_RTL, Order.LOGICAL, Bidi.LTR, Order.VISUAL,
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d \u0662\u0663\u0660 \u0630 e\u0664\u0665\u0666\u0631 f \u0661\u0662 \ufeaf",
"17: Logical DEFAULT_RTL ==> Visual LTR" },
{ Bidi.RTL, Order.LOGICAL, Bidi.LEVEL_DEFAULT_LTR, Order.VISUAL,
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"18: Logical RTL ==> Visual DEFAULT_LTR" },
{ Bidi.LEVEL_DEFAULT_LTR, Order.LOGICAL, Bidi.LTR, Order.VISUAL,
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4(\u05d3 \u05d2\u05d1)\u05d0 d 23\u0660 \u0630 e456\u0631 f \u0661\u0662 \ufeaf",
"a[b]c 1 \u05d4)\u05d3 \u05d2\u05d1(\u05d0 d \u0662\u0663\u0660 \u0630 e\u0664\u0665\u0666\u0631 f \u0661\u0662 \ufeaf",
"19: Logical DEFAULT_LTR ==> Visual LTR" },
{ Bidi.RTL, Order.LOGICAL, Bidi.LEVEL_DEFAULT_RTL, Order.VISUAL,
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0)\u05d1\u05d2 \u05d3(\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"c]b[a \u05d0(\u05d1\u05d2 \u05d3)\u05d4 1 d \u0630 \u066032 e\u0631654 f \ufeaf \u0662\u0661",
"20: Logical RTL ==> Visual DEFAULT_RTL" },
};
final int[] digits = {
ArabicShaping.DIGITS_NOOP, ArabicShaping.DIGITS_EN2AN, ArabicShaping.DIGITS_AN2EN, ArabicShaping.DIGITS_EN2AN_INIT_AL
};
final int[] letters = {
ArabicShaping.LETTERS_NOOP, ArabicShaping.LETTERS_SHAPE, ArabicShaping.LETTERS_UNSHAPE
};
char[] expectedChars;
logln("\nEntering allTransformOptionsTest\n");
// Test various combinations of base level, order, mirroring, digits and letters
for (Object[] test : testCases) {
expectedChars = ((String)test[5]).toCharArray();
verifyResultsForAllOpts(test, inText, bidiTransform.transform(inText, (Byte)test[0], (Order)test[1],
(Byte)test[2], (Order)test[3], Mirroring.ON, 0), expectedChars, 0, 0);
for (int digit : digits) {
expectedChars = ((String)(digit == ArabicShaping.DIGITS_EN2AN_INIT_AL ? test[6] : test[4]))
.toCharArray();
for (int letter : letters) {
verifyResultsForAllOpts(test, inText, bidiTransform.transform(inText, (Byte)test[0],
(Order)test[1], (Byte)test[2], (Order)test[3], Mirroring.OFF, digit | letter),
expectedChars, digit, letter);
}
}
}
logln("\nExiting allTransformOptionsTest\n");
}
private void logResultsForDir(String inText, String outText, String expected,
byte inLevel, byte outLevel) {
assertEquals("inLevel: " + inLevel + ", outLevel: " + outLevel
/* TODO: BidiFwk#u16ToPseudo isn't good for us, needs an update to be used here */
+ "\ninText: " + pseudoScript(inText) + "\noutText: " + pseudoScript(outText)
+ "\nexpected: " + pseudoScript(expected) + "\n", expected, outText);
}
private void verifyResultsForAllOpts(Object[] test, String inText, String outText, char[] expectedChars, int digits, int letters) {
switch (digits) {
case ArabicShaping.DIGITS_AN2EN:
shapeDigits(expectedChars, ARAB_ZERO, LATN_ZERO);
break;
case ArabicShaping.DIGITS_EN2AN:
shapeDigits(expectedChars, LATN_ZERO, ARAB_ZERO);
break;
default:
break;
}
switch (letters) {
case ArabicShaping.LETTERS_SHAPE:
shapeLetters(expectedChars, 0);
break;
case ArabicShaping.LETTERS_UNSHAPE:
shapeLetters(expectedChars, 1);
break;
default:
break;
}
String expected = new String(expectedChars);
assertEquals("\nTest " + test[7] + "\ndigits: " + digits + ", letters: " + letters
/* TODO: BidiFwk#u16ToPseudo isn't good for us, needs an update to be used here */
+ "\ninText: " + pseudoScript(inText) + "\noutText: " + pseudoScript(outText)
+ "\nexpected: " + pseudoScript(expected) + "\n", expected, outText);
}
/*
* Using the following conventions:
* AL unshaped: A-E
* AL shaped: F-J
* R: K-Z
* EN: 0-4
* AN: 5-9
*/
private static char substituteChar(char uch, char baseReal,
char basePseudo, char max) {
char dest = (char)(basePseudo + (uch - baseReal));
return dest > max ? max : dest;
}
private static String pseudoScript(String text) {
char[] uchars = text.toCharArray();
for (int i = uchars.length; i-- > 0;) {
char uch = uchars[i];
switch (UCharacter.getDirectionality(uch)) {
case UCharacter.RIGHT_TO_LEFT:
uchars[i] = substituteChar(uch, MIN_HEB_LETTER, 'K', 'Z');
break;
case UCharacter.RIGHT_TO_LEFT_ARABIC:
if (uch > 0xFE00) {
uchars[i] = substituteChar(uch, MIN_SHAPED_LETTER, 'F', 'J');
} else {
uchars[i] = substituteChar(uch, MIN_ARAB_LETTER, 'A', 'E');
}
break;
case UCharacter.ARABIC_NUMBER:
uchars[i] = substituteChar(uch, ARAB_ZERO, '5', '9');
break;
default:
break;
}
}
return new String(uchars);
}
private static void shapeDigits(char[] chars, char srcZero, char destZero) {
for (int i = chars.length; i-- > 0;) {
if (chars[i] >= srcZero && chars[i] <= srcZero + 9) {
chars[i] = substituteChar(chars[i], srcZero, destZero, (char)(destZero + 9));
}
}
}
/*
* TODO: the goal is not to thoroughly test ArabicShaping, so the test can be quite trivial,
* but maybe still more sophisticated?
*/
private static final String letters = "\u0630\ufeab\u0631\ufead\u0632\ufeaf";
private static void shapeLetters(char[] chars, int indexParity) {
for (int i = chars.length; i-- > 0;) {
int index = letters.indexOf(chars[i]);
if (index >= 0 && (index & 1) == indexParity) {
chars[i] = letters.charAt(index ^ 1);
}
}
}
}