From 6ce57afd03cc4d85b529e48bb98e624bbe067851 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Fri, 9 Sep 2016 16:41:15 +0000 Subject: [PATCH] ICU-11679 merge C BiDi Transform from branch * fixed from branch: inadvertently removed Unicode attribution from Makefiles * fixed from branch: Unicode attribution * bonus: moved icuplug into the 'registration' filter on windows X-SVN-Rev: 39170 --- .gitattributes | 3 + .gitignore | 4 + icu4c/source/common/Makefile.in | 1 + icu4c/source/common/common.vcxproj | 2 + icu4c/source/common/common.vcxproj.filters | 12 +- icu4c/source/common/ubiditransform.c | 528 ++++++++++++++++++ icu4c/source/common/unicode/ubiditransform.h | 312 +++++++++++ icu4c/source/test/cintltst/Makefile.in | 1 + icu4c/source/test/cintltst/calltest.c | 2 + .../source/test/cintltst/cbiditransformtst.c | 430 ++++++++++++++ icu4c/source/test/cintltst/cintltst.vcxproj | 1 + .../test/cintltst/cintltst.vcxproj.filters | 5 +- 12 files changed, 1298 insertions(+), 3 deletions(-) create mode 100644 icu4c/source/common/ubiditransform.c create mode 100644 icu4c/source/common/unicode/ubiditransform.h create mode 100644 icu4c/source/test/cintltst/cbiditransformtst.c diff --git a/.gitattributes b/.gitattributes index b7a57346b84..dde3a76979a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -51,6 +51,8 @@ README text !eol icu4c/icu4c.css -text icu4c/source/aclocal.m4 -text icu4c/source/allinone/icucheck.bat -text +icu4c/source/common/ubiditransform.c -text +icu4c/source/common/unicode/ubiditransform.h -text icu4c/source/config/m4/icu-conditional.m4 -text icu4c/source/data/curr/pool.res -text icu4c/source/data/in/coll/ucadata-implicithan.icu -text @@ -132,6 +134,7 @@ icu4c/source/samples/ugrep/ugrep.vcxproj -text icu4c/source/samples/uresb/resources.vcxproj -text icu4c/source/samples/uresb/uresb.vcxproj -text icu4c/source/samples/ustring/ustring.vcxproj -text +icu4c/source/test/cintltst/cbiditransformtst.c -text icu4c/source/test/depstest/icu-dependencies-mode.el -text icu4c/source/test/iotest/iotest.vcxproj -text icu4c/source/test/letest/cletest.vcxproj -text diff --git a/.gitignore b/.gitignore index bb7a6f9ff66..23bd593b58a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,11 +6,15 @@ icu4c/lib64 icu4c/source/Doxyfile icu4c/source/Makefile icu4c/source/README +icu4c/source/allinone/*.db icu4c/source/allinone/*.ncb +icu4c/source/allinone/*.opendb icu4c/source/allinone/*.opensdf icu4c/source/allinone/*.opt icu4c/source/allinone/*.sdf icu4c/source/allinone/*.suo +icu4c/source/allinone/.vs +icu4c/source/allinone/Debug icu4c/source/allinone/ipch icu4c/source/autom4te.cache icu4c/source/bin diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index 25c042926d2..59ffb7377d7 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -109,6 +109,7 @@ uidna.o usprep.o uts46.o punycode.o \ util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \ ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ulistformatter.o \ sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \ +ubiditransform.o \ pluralmap.o ## Header files to install diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index 4574ddad9b3..e6c9acffb49 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -238,6 +238,7 @@ + @@ -586,6 +587,7 @@ + diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index aebb6cc404c..56e8c12f04d 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -586,13 +586,18 @@ collections - locales & resources break iteration + + registration + + + bidi + @@ -910,6 +915,9 @@ break iteration + + bidi + @@ -1173,4 +1181,4 @@ collections - + \ No newline at end of file diff --git a/icu4c/source/common/ubiditransform.c b/icu4c/source/common/ubiditransform.c new file mode 100644 index 00000000000..c2d8f3c7afa --- /dev/null +++ b/icu4c/source/common/ubiditransform.c @@ -0,0 +1,528 @@ +/* +****************************************************************************** +* +* Copyright (C) 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html +* +****************************************************************************** +* file name: ubiditransform.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2016jul24 +* created by: Lina Kemmel +* +*/ + +#include "cmemory.h" +#include "unicode/ubidi.h" +#include "unicode/ustring.h" +#include "unicode/ushape.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "unicode/ubiditransform.h" + +/* Some convenience defines */ +#define LTR UBIDI_LTR +#define RTL UBIDI_RTL +#define LOGICAL UBIDI_LOGICAL +#define VISUAL UBIDI_VISUAL +#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL +#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR + +#define CHECK_LEN(STR, LEN, ERROR) { \ + if (LEN == 0) return 0; \ + if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ + if (LEN == -1) LEN = u_strlen(STR); \ + } + +#define MAX_ACTIONS 7 + +/** + * Typedef for a pointer to a function, which performs some operation (such as + * reordering, setting "inverse" mode, character mirroring, etc.). Return value + * indicates whether the text was changed in the course of this operation or + * not. + */ +typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); + +/** + * Structure that holds a predefined reordering scheme, including the following + * information: + * + */ +typedef struct { + UBiDiLevel inLevel; /* input level */ + UBiDiOrder inOrder; /* input order */ + UBiDiLevel outLevel; /* output level */ + UBiDiOrder outOrder; /* output order */ + uint32_t digitsDir; /* digit shaping direction */ + uint32_t lettersDir; /* letter shaping direction */ + UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ + const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ +} ReorderingScheme; + +struct UBiDiTransform { + UBiDi *pBidi; /* pointer to a UBiDi object */ + const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ + UChar *src; /* input text */ + UChar *dest; /* output text */ + uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ + uint32_t srcSize; /* input text capacity excluding the trailing zero */ + uint32_t destSize; /* output text capacity */ + uint32_t *pDestLength; /* number of UChars written to dest */ + uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ + uint32_t digits; /* digit option for ArabicShaping */ + uint32_t letters; /* letter option for ArabicShaping */ +}; + +U_DRAFT UBiDiTransform* U_EXPORT2 +ubiditransform_open(UErrorCode *pErrorCode) +{ + UBiDiTransform *pBiDiTransform = NULL; + if (U_SUCCESS(*pErrorCode)) { + pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); + if (pBiDiTransform == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + return pBiDiTransform; +} + +U_DRAFT void U_EXPORT2 +ubiditransform_close(UBiDiTransform *pBiDiTransform) +{ + if (pBiDiTransform != NULL) { + if (pBiDiTransform->pBidi != NULL) { + ubidi_close(pBiDiTransform->pBidi); + } + if (pBiDiTransform->src != NULL) { + uprv_free(pBiDiTransform->src); + } + uprv_free(pBiDiTransform); + } +} + +/** + * Performs Bidi resolution of text. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, + pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); + return FALSE; +} + +/** + * Performs basic reordering of text (Logical -> Visual LTR). + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, + pTransform->reorderingOptions, pErrorCode); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * Sets "inverse" mode on the UBiDi object. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setInverse(pTransform->pBidi, TRUE); + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); + return FALSE; +} + +/** + * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL + * transformation. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); + return FALSE; +} + +/** + * Performs string reverse. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReverse(pTransform->src, pTransform->srcLength, + pTransform->dest, pTransform->destSize, + UBIDI_REORDER_DEFAULT, pErrorCode); + *pTransform->pDestLength = pTransform->srcLength; + return TRUE; +} + +/** + * Applies a new value to the text that serves as input at the current + * processing step. This value is identical to the original one when we begin + * the processing, but usually changes as the transformation progresses. + * + * @param pTransform A pointer to the UBiDiTransform structure. + * @param newSrc A pointer whose value is to be used as input text. + * @param newLength A length of the new text in UChars. + * @param newSize A new source capacity in UChars. + * @param pErrorCode Pointer to the error code value. + */ +static void +updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, + uint32_t newSize, UErrorCode *pErrorCode) +{ + if (newSize < newLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return; + } + if (newSize > pTransform->srcSize) { + newSize += 50; // allocate slightly more than needed right now + if (pTransform->src != NULL) { + uprv_free(pTransform->src); + pTransform->src = NULL; + } + pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); + if (pTransform->src == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + //pTransform->srcLength = pTransform->srcSize = 0; + return; + } + pTransform->srcSize = newSize; + } + u_strncpy(pTransform->src, newSrc, newLength); + pTransform->srcLength = u_terminateUChars(pTransform->src, + pTransform->srcSize, newLength, pErrorCode); +} + +/** + * Calls a lower level shaping function. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param options Shaping options. + * @param pErrorCode Pointer to the error code value. + */ +static void +doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) +{ + *pTransform->pDestLength = u_shapeArabic(pTransform->src, + pTransform->srcLength, pTransform->dest, pTransform->destSize, + options, pErrorCode); +} + +/** + * Performs digit and letter shaping. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + if ((pTransform->letters | pTransform->digits) == 0) { + return FALSE; + } + if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { + doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } else { + doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); + if (U_SUCCESS(*pErrorCode)) { + updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, + *pTransform->pDestLength, pErrorCode); + doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } + } + return TRUE; +} + +/** + * Performs character mirroring. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + UChar32 c; + uint32_t i = 0, j = 0; + if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { + return FALSE; + } + if (pTransform->destSize < pTransform->srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + do { + UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; + U16_NEXT(pTransform->src, i, pTransform->srcLength, c); + U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); + } while (i < pTransform->srcLength); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * All possible reordering schemes. + * + */ +static const ReorderingScheme Schemes[] = +{ + /* 0: Logical LTR => Visual LTR */ + {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, NULL}}, + /* 1: Logical RTL => Visual LTR */ + {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 2: Logical LTR => Visual RTL */ + {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, + /* 3: Logical RTL => Visual RTL */ + {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, + /* 4: Visual LTR => Logical RTL */ + {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 5: Visual RTL => Logical RTL */ + {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 6: Visual LTR => Logical LTR */ + {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 7: Visual RTL => Logical LTR */ + {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 8: Logical LTR => Logical RTL */ + {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, + /* 9: Logical RTL => Logical LTR */ + {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 10: Visual LTR => Visual RTL */ + {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, + /* 11: Visual RTL => Visual LTR */ + {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, + /* 12: Logical LTR => Logical LTR */ + {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 13: Logical RTL => Logical RTL */ + {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 14: Visual LTR => Visual LTR */ + {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 15: Visual RTL => Visual RTL */ + {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} +}; + +static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); + +/** + * When the direction option is UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL, resolve the base direction according to that + * of the first strong bidi character. + */ +static void +resolveBaseDirection(const UChar *text, uint32_t length, + UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) +{ + switch (*pInLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: { + UBiDiLevel level = ubidi_getBaseDirection(text, length); + *pInLevel = level != UBIDI_NEUTRAL ? level + : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; + break; + } + default: + *pInLevel &= 1; + break; + } + switch (*pOutLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: + *pOutLevel = *pInLevel; + break; + default: + *pOutLevel &= 1; + break; + } +} + +/** + * Finds a valid ReorderingScheme matching the + * caller-defined scheme. + * + * @return A valid ReorderingScheme object or NULL + */ +static const ReorderingScheme* +findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, + UBiDiOrder inOrder, UBiDiOrder outOrder) +{ + uint32_t i; + for (i = 0; i < nSchemes; i++) { + const ReorderingScheme *pScheme = Schemes + i; + if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel + && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { + return pScheme; + } + } + return NULL; +} + +U_DRAFT uint32_t U_EXPORT2 +ubiditransform_transform(UBiDiTransform *pBiDiTransform, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + UBiDiLevel inParaLevel, UBiDiOrder inOrder, + UBiDiLevel outParaLevel, UBiDiOrder outOrder, + UBiDiMirroring doMirroring, uint32_t shapingOptions, + UErrorCode *pErrorCode) +{ + uint32_t destLength = 0; + UBool textChanged = FALSE; + const UBiDiTransform *pOrigTransform = pBiDiTransform; + const UBiDiAction *action = NULL; + + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (src == NULL || dest == NULL) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + CHECK_LEN(src, srcLength, pErrorCode); + CHECK_LEN(dest, destSize, pErrorCode); + + if (pBiDiTransform == NULL) { + pBiDiTransform = ubiditransform_open(pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; + } + } + /* Current limitation: in multiple paragraphs will be resolved according + to the 1st paragraph */ + resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); + + pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, + inOrder, outOrder); + if (pBiDiTransform->pActiveScheme == NULL || pBiDiTransform->pActiveScheme->actions == NULL) { + goto cleanup; + } + pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING + : UBIDI_REORDER_DEFAULT; + + /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text + scheme at the time shaping is invoked. */ + shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; + pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; + pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; + + updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + if (pBiDiTransform->pBidi == NULL) { + pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + } + pBiDiTransform->dest = dest; + pBiDiTransform->destSize = destSize; + pBiDiTransform->pDestLength = &destLength; + + /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ + for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { + if ((*action)(pBiDiTransform, pErrorCode)) { + if (action + 1) { + updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, + *pBiDiTransform->pDestLength, pErrorCode); + } + textChanged = TRUE; + } + } + ubidi_setInverse(pBiDiTransform->pBidi, FALSE); + + if (!textChanged && U_SUCCESS(*pErrorCode)) { + /* Text was not changed - just copy src to dest */ + if (destSize < srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + } else { + u_strncpy(dest, src, srcLength); + destLength = srcLength; + } + } +cleanup: + if (pOrigTransform != pBiDiTransform) { + ubiditransform_close(pBiDiTransform); + } else { + pBiDiTransform->dest = NULL; + pBiDiTransform->pDestLength = NULL; + pBiDiTransform->srcLength = 0; + pBiDiTransform->destSize = 0; + } + return U_FAILURE(*pErrorCode) ? 0 : destLength; +} diff --git a/icu4c/source/common/unicode/ubiditransform.h b/icu4c/source/common/unicode/ubiditransform.h new file mode 100644 index 00000000000..d3fbe9c96fc --- /dev/null +++ b/icu4c/source/common/unicode/ubiditransform.h @@ -0,0 +1,312 @@ +/* +****************************************************************************** +* +* Copyright (C) 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html +* +****************************************************************************** +* file name: ubiditransform.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2016jul24 +* created by: Lina Kemmel +* +*/ + +#ifndef UBIDITRANSFORM_H +#define UBIDITRANSFORM_H + +#include "unicode/uchar.h" +#include "unicode/localpointer.h" + +/** + * UBiDiOrder indicates the order of text.

+ * This bidi transformation engine supports all possible combinations (4 in + * total) of input and output text order: + *

+ * @see ubidi_setInverse + * @see ubidi_setReorderingMode + * @see UBIDI_REORDER_DEFAULT + * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT + * @see UBIDI_REORDER_RUNS_ONLY + * @draft ICU 58 + */ +typedef enum { + /** 0: Constant indicating a logical order. + * This is the default for input text. + * @draft ICU 58 + */ + UBIDI_LOGICAL = 0, + /** 1: Constant indicating a visual order. + * This is a default for output text. + * @draft ICU 58 + */ + UBIDI_VISUAL +} UBiDiOrder; + +/** + * UBiDiMirroring indicates whether or not characters with the + * "mirrored" property in RTL runs should be replaced with their mirror-image + * counterparts. + * @see UBIDI_DO_MIRRORING + * @see ubidi_setReorderingOptions + * @see ubidi_writeReordered + * @see ubidi_writeReverse + * @draft ICU 58 + */ +typedef enum { + /** 0: Constant indicating that character mirroring should not be + * performed. + * This is the default. + * @draft ICU 58 + */ + UBIDI_MIRRORING_OFF = 0, + /** 1: Constant indicating that character mirroring should be performed. + * This corresponds to calling ubidi_writeReordered or + * ubidi_writeReverse with the + * UBIDI_DO_MIRRORING option bit set. + * @draft ICU 58 + */ + UBIDI_MIRRORING_ON +} UBiDiMirroring; + +/** + * Forward declaration of the UBiDiTransform structure that stores + * information used by the layout transformation engine. + * @draft ICU 58 + */ +typedef struct UBiDiTransform UBiDiTransform; + +/** + * Performs transformation of text from the bidi layout defined by the input + * ordering scheme to the bidi layout defined by the output ordering scheme, + * and applies character mirroring and Arabic shaping operations.

+ * In terms of UBiDi, such a transformation implies: + *

  • calling ubidi_setReorderingMode as needed (when the + * reordering mode is other than normal),
  • + *
  • calling ubidi_setInverse as needed (when text should be + * transformed from a visual to a logical form),
  • + *
  • resolving embedding levels of each character in the input text by + * calling ubidi_setPara,
  • + *
  • reordering the characters based on the computed embedding levels, also + * performing character mirroring as needed, and streaming the result to the + * output, by calling ubidi_writeReordered,
  • + *
  • performing Arabic digit and letter shaping on the output text by calling + * u_shapeArabic.
  • + * + * An "ordering scheme" encompasses the base direction and the order of text, + * and these characteristics must be defined by the caller for both input and + * output explicitly .

    + * There are 36 possible combinations of ordering schemes, + * which are partially supported by UBiDi already. Examples of the + * currently supported combinations: + *

    + * All combinations that involve the Visual RTL scheme are unsupported by + * UBiDi, for instance: + * + *

    Example of usage of the transformation engine:
    + *

    + * \code
    + * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
    + * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
    + * UErrorCode errorCode = U_ZERO_ERROR;
    + * // Run a transformation.
    + * ubiditransform_transform(pBidiTransform,
    + *          text1, -1, text2, -1,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_RTL, UBIDI_LOGICAL,
    + *          UBIDI_MIRRORING_OFF,
    + *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
    + *          &errorCode);
    + * // Do something with text2.
    + *  text2[4] = '2';
    + * // Run a reverse transformation.
    + * ubiditransform_transform(pBidiTransform,
    + *          text2, -1, text1, -1,
    + *          UBIDI_RTL, UBIDI_LOGICAL,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_MIRRORING_OFF,
    + *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
    + *          &errorCode);
    + *\endcode
    + * 
    + *

    + * + * @param pBiDiTransform A pointer to a UBiDiTransform object + * allocated with ubiditransform_open() or + * NULL.

    + * This object serves for one-time setup to amortize initialization + * overheads. Use of this object is not thread-safe. All other threads + * should allocate a new UBiDiTransform object by calling + * ubiditransform_open() before using it. Alternatively, + * a caller can set this parameter to NULL, in which case + * the object will be allocated by the engine on the fly.

    + * @param src A pointer to the text that the Bidi layout transformations will + * be performed on. + *

    Note: the text must be (at least) + * srcLength long.

    + * @param srcLength The length of the text, in number of UChars. If + * length == -1 then the text must be zero-terminated. + * @param dest A pointer to where the processed text is to be copied. + * @param destSize The size of the dest buffer, in number of + * UChars. If the U_SHAPE_LETTERS_UNSHAPE option is set, + * then the destination length could be as large as + * srcLength * 2. Otherwise, the destination length will + * not exceed srcLength. If the caller reserves the last + * position for zero-termination, it should be excluded from + * destSize. + *

    destSize == -1 is allowed and makes sense when + * dest was holds some meaningful value, e.g. that of + * src. In this case dest must be + * zero-terminated.

    + * @param inParaLevel A base embedding level of the input as defined in + * ubidi_setPara documentation for the + * paraLevel parameter. + * @param inOrder An order of the input, which can be one of the + * UBiDiOrder values. + * @param outParaLevel A base embedding level of the output as defined in + * ubidi_setPara documentation for the + * paraLevel parameter. + * @param outOrder An order of the output, which can be one of the + * UBiDiOrder values. + * @param doMirroring Indicates whether or not to perform character mirroring, + * and can accept one of the UBiDiMirroring values. + * @param shapingOptions Arabic digit and letter shaping options defined in the + * ushape.h documentation. + *

    Note: Direction indicator options are computed by + * the transformation engine based on the effective ordering schemes, so + * user-defined direction indicators will be ignored.

    + * @param pErrorCode A pointer to an error code value. + * + * @return The destination length, i.e. the number of UChars written to + * dest. If the transformation fails, the return value + * will be 0 (and the error code will be written to + * pErrorCode). + * + * @see UBiDiLevel + * @see UBiDiOrder + * @see UBiDiMirroring + * @see ubidi_setPara + * @see u_shapeArabic + * @draft ICU 58 + */ +U_DRAFT uint32_t U_EXPORT2 +ubiditransform_transform(UBiDiTransform *pBiDiTransform, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + UBiDiLevel inParaLevel, UBiDiOrder inOrder, + UBiDiLevel outParaLevel, UBiDiOrder outOrder, + UBiDiMirroring doMirroring, uint32_t shapingOptions, + UErrorCode *pErrorCode); + +/** + * Allocates a UBiDiTransform object. This object can be reused, + * e.g. with different ordering schemes, mirroring or shaping options.

    + * Note:The object can only be reused in the same thread. + * All other threads should allocate a new UBiDiTransform object + * before using it.

    + * Example of usage:

    + *

    + * \code
    + * UErrorCode errorCode = U_ZERO_ERROR;
    + * // Open a new UBiDiTransform.
    + * UBiDiTransform* transform = ubiditransform_open(&errorCode);
    + * // Run a transformation.
    + * ubiditransform_transform(transform,
    + *          text1, -1, text2, -1,
    + *          UBIDI_RTL, UBIDI_LOGICAL,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_MIRRORING_ON,
    + *          U_SHAPE_DIGITS_EN2AN,
    + *          &errorCode);
    + * // Do something with the output text and invoke another transformation using
    + * //   that text as input.
    + * ubiditransform_transform(transform,
    + *          text2, -1, text3, -1,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_RTL, UBIDI_VISUAL,
    + *          UBIDI_MIRRORING_ON,
    + *          0, &errorCode);
    + *\endcode
    + * 
    + *

    + * The UBiDiTransform object must be deallocated by calling + * ubiditransform_close(). + * + * @return An empty UBiDiTransform object. + * @draft ICU 58 + */ +U_DRAFT UBiDiTransform* U_EXPORT2 +ubiditransform_open(UErrorCode *pErrorCode); + +/** + * Deallocates the given UBiDiTransform object. + * @draft ICU 58 + */ +U_DRAFT void U_EXPORT2 +ubiditransform_close(UBiDiTransform *pBidiTransform); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUBiDiTransformPointer + * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 58 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); + +U_NAMESPACE_END + +#endif + +#endif diff --git a/icu4c/source/test/cintltst/Makefile.in b/icu4c/source/test/cintltst/Makefile.in index 904b2bec22a..f60bb66db83 100644 --- a/icu4c/source/test/cintltst/Makefile.in +++ b/icu4c/source/test/cintltst/Makefile.in @@ -53,6 +53,7 @@ uenumtst.o utmstest.o currtest.o \ idnatest.o nfsprep.o spreptst.o sprpdata.o \ hpmufn.o tracetst.o reapits.o uregiontest.o ulistfmttest.o\ utexttst.o ucsdetst.o spooftest.o \ +cbiditransformtst.o \ cgendtst.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/test/cintltst/calltest.c b/icu4c/source/test/cintltst/calltest.c index c35aae1268a..e759be8f6b7 100644 --- a/icu4c/source/test/cintltst/calltest.c +++ b/icu4c/source/test/cintltst/calltest.c @@ -25,6 +25,7 @@ void addFormatTest(TestNode** root); void addConvert(TestNode** root); void addCollTest(TestNode** root); void addComplexTest(TestNode** root); +void addBidiTransformTest(TestNode** root); void addUDataTest(TestNode** root); void addUTF16Test(TestNode** root); void addUTF8Test(TestNode** root); @@ -60,6 +61,7 @@ void addAllTests(TestNode** root) addStandardNamesTest(root); addUCsdetTest(root); addComplexTest(root); + addBidiTransformTest(root); addUSetTest(root); #if !UCONFIG_NO_IDNA addUStringPrepTest(root); diff --git a/icu4c/source/test/cintltst/cbiditransformtst.c b/icu4c/source/test/cintltst/cbiditransformtst.c new file mode 100644 index 00000000000..0e18d1e642a --- /dev/null +++ b/icu4c/source/test/cintltst/cbiditransformtst.c @@ -0,0 +1,430 @@ +/******************************************************************** + * Copyright (C) 2016 and later: Unicode, Inc. and others. + * License & terms of use: http://www.unicode.org/copyright.html + ********************************************************************/ +/* file name: cbiditransformtst.c + * encoding: US-ASCII + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2016aug21 + * created by: Lina Kemmel +*/ + +#include "cintltst.h" +#include "unicode/ubidi.h" +#include "unicode/ubiditransform.h" +#include "unicode/ushape.h" +#include "unicode/ustring.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LATN_ZERO 0x0030 +#define ARAB_ZERO 0x0660 +#define MIN_HEB_LETTER 0x05D0 +#define MIN_ARAB_LETTER 0x0630 +#define MIN_SHAPED_LETTER 0xFEAB + +#define STR_CAPACITY 100 + +typedef struct { + UBiDiLevel inLevel; + UBiDiOrder inOr; + UBiDiLevel outLevel; + UBiDiOrder outOr; + const char *pReorderNoMirror; + const char *pReorderAndMirror; + const char *pContextShapes; + const char *pMessage; +} UBidiTestCases; + +UChar src[STR_CAPACITY] = { 0 }; +UChar dest[STR_CAPACITY] = { 0 }; +UChar expected[STR_CAPACITY] = { 0 }; +UChar temp[STR_CAPACITY * 2] = { 0 }; +char pseudo[STR_CAPACITY] = { 0 }; + +void addBidiTransformTest(TestNode** root); + +static void testAutoDirection(void); + +static void testAllTransformOptions(void); + +static char* pseudoScript(const UChar *str); + +static void shapeDigits(UChar *str, uint32_t digits); + +static void logResultsForDir(const UChar *srcText, const UChar *destTxt, + const UChar *expectedTxt, UBiDiLevel inLevel, UBiDiLevel outLevel); + +static void verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt, + const UChar *destTxt, const char *expectedChars, uint32_t digits, + uint32_t letters); + +#if 0 +static void substituteByPseudoChar(const UChar *src, char *dest, + const UChar baseReal, const char basePseudo, const char max); + + +/* TODO: This code assumes the codepage is ASCII based. */ + +/* + * Using the following conventions: + * AL unshaped: A-E + * AL shaped: F-J + * R: K-Z + * EN: 0-4 + * AN: 5-9 +*/ +static void +substituteByPseudoChar(const UChar *src, char *dest, const UChar baseReal, + const char basePseudo, const char max) { + *dest = basePseudo + (*src - baseReal); /* (range math won't work on EBCDIC) */ + if (*dest > max) { + *dest = max; + } +} + +static char* +pseudoScript(const UChar *str) { + char *p; + if (!str) { + return "\0"; + } + for (p = pseudo; *str; str++, p++) { + switch (u_charDirection(*str)) { + case U_RIGHT_TO_LEFT: + substituteByPseudoChar(str, p, MIN_HEB_LETTER, 'K', 'Z'); + break; + case U_RIGHT_TO_LEFT_ARABIC: + if (*str > 0xFE00) { + substituteByPseudoChar(str, p, MIN_SHAPED_LETTER, 'F', 'J'); + } else { + substituteByPseudoChar(str, p, MIN_ARAB_LETTER, 'A', 'E'); + } + break; + case U_ARABIC_NUMBER: + substituteByPseudoChar(str, p, ARAB_ZERO, '5', '9'); + break; + default: + *p = (char)*str; + break; + } + } + *p = '\0'; + return pseudo; +} +#else +static char* +pseudoScript(const UChar *str) { + return aescstrdup(str, -1); +} +#endif + +static void +logResultsForDir(const UChar *srcTxt, const UChar *destTxt, const UChar *expectedTxt, + UBiDiLevel inLevel, UBiDiLevel outLevel) +{ + if (u_strcmp(expectedTxt, destTxt)) { + log_err("Unexpected transform Dest: inLevel: 0x%02x; outLevel: 0x%02x;\ninText: %s; outText: %s; expected: %s\n", + inLevel, outLevel, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expectedTxt)); + } +} + +/** + * Tests various combinations of base directions, with the input either + * UBIDI_DEFAULT_LTR or UBIDI_DEFAULT_RTL, and the + * output either UBIDI_LTR or UBIDI_RTL. Order is + * always UBIDI_LOGICAL for the input and UBIDI_VISUAL + * for the output. + */ +static void +testAutoDirection(void) +{ + static const UBiDiLevel inLevels[] = { + UBIDI_DEFAULT_LTR, UBIDI_DEFAULT_RTL + }; + static const UBiDiLevel outLevels[] = { + UBIDI_LTR, UBIDI_RTL + }; + static const char *srcTexts[] = { + "abc \\u05d0\\u05d1\0", + "... abc \\u05d0\\u05d1\0", + "\\u05d0\\u05d1 abc\0", + "... \\u05d0\\u05d1 abc\0", + ".*:" + }; + uint32_t nTexts = sizeof(srcTexts) / sizeof(srcTexts[0]); + uint32_t i, nInLevels = sizeof(inLevels) / sizeof(inLevels[0]); + uint32_t j, nOutLevels = sizeof(outLevels) / sizeof(outLevels[0]); + + UBiDi *pBidi = ubidi_open(); + + UErrorCode errorCode = U_ZERO_ERROR; + UBiDiTransform *pTransform = ubiditransform_open(&errorCode); + + while (nTexts-- > 0) { + uint32_t srcLen; + u_unescape(srcTexts[nTexts], src, STR_CAPACITY); + srcLen = u_strlen(src); + for (i = 0; i < nInLevels; i++) { + for (j = 0; j < nOutLevels; j++) { + ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY - 1, + inLevels[i], UBIDI_LOGICAL, outLevels[j], UBIDI_VISUAL, + UBIDI_MIRRORING_OFF, 0, &errorCode); + /* Use UBiDi as a model we compare to */ + ubidi_setPara(pBidi, src, srcLen, inLevels[i], NULL, &errorCode); + ubidi_writeReordered(pBidi, expected, STR_CAPACITY, UBIDI_REORDER_DEFAULT, &errorCode); + if (outLevels[j] == UBIDI_RTL) { + ubidi_writeReverse(expected, u_strlen(expected), temp, STR_CAPACITY, + UBIDI_OUTPUT_REVERSE, &errorCode); + logResultsForDir(src, dest, temp, inLevels[i], outLevels[j]); + } else { + logResultsForDir(src, dest, expected, inLevels[i], outLevels[j]); + } + } + } + } + ubidi_close(pBidi); + ubiditransform_close(pTransform); +} + +static void +shapeDigits(UChar *str, uint32_t digits) +{ + const UChar srcZero = (digits & U_SHAPE_DIGITS_EN2AN) ? LATN_ZERO : ARAB_ZERO; + const UChar extent = srcZero == ARAB_ZERO ? LATN_ZERO - ARAB_ZERO : ARAB_ZERO - LATN_ZERO; + UChar32 c = 0; + uint32_t i = 0, j, length = u_strlen(str); + while (i < length) { + j = i; + U16_NEXT(str, i, length, c); + if (c >= srcZero && c <= srcZero + 9) { + /* length of c here is always a single UChar16 */ + str[j] = c + extent; + } + } +} + +static void +verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt, + const UChar *destTxt, const char *expectedChars, uint32_t digits, uint32_t letters) +{ + switch (digits) { + case U_SHAPE_DIGITS_EN2AN: + case U_SHAPE_DIGITS_AN2EN: + u_unescape(expectedChars, expected, STR_CAPACITY); + shapeDigits(expected, digits); + break; + case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: + u_unescape(pTest->pContextShapes, expected, STR_CAPACITY); + break; + case U_SHAPE_DIGITS_NOOP: + u_unescape(expectedChars, expected, STR_CAPACITY); + break; + } + if (letters & U_SHAPE_LETTERS_SHAPE) { + uint32_t i = 0, j, length = u_strlen(expected); + UChar32 c = 0; + while (i < length) { + j = i; + U16_NEXT(expected, i, length, c); + /* below the length of old and new values is always a single + UChar16, so can just assign a new value to expected[j] */ + if (c == 0x0630) { + expected[j] = 0xfeab; + } else if (c == 0x0631) { + expected[j] = 0xfead; + } else if (c == 0x0632) { + expected[j] = 0xfeaf; + } + } + } + if (u_strcmp(expected, dest)) { + log_err("Unexpected transform Dest: Test: %s; Digits: 0x%08x; Letters: 0x%08x\ninText: %s; outText: %s; expected: %s\n", + pTest->pMessage, digits, letters, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expected)); + } +} + +/** + * This function covers: + *

    + */ +static void +testAllTransformOptions(void) +{ + static const char *inText = + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0"; + + static const UBidiTestCases testCases[] = { + { UBIDI_LTR, UBIDI_LOGICAL, + UBIDI_LTR, UBIDI_LOGICAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", // reordering no mirroring + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662", // context numeric shaping + "1: Logical LTR ==> Logical LTR" }, + { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", + "2: Logical LTR ==> Visual LTR" }, + { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL, + "\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c", + "\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 a[b]c", + "\\u0632 \\u0661\\u0662 f \\u0631e\\u0664 \\u0630 \\u0662\\u0663\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c", + "3: Logical LTR ==> Logical RTL" }, + { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL, + "\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a", + "\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 c]b[a", + "\\u0632 \\u0662\\u0661 f \\u0631\\u0664e \\u0630 \\u0660\\u0663\\u0662 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a", + "4: Logical LTR ==> Visual RTL" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", + "5: Logical RTL ==> Logical RTL" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "6: Logical RTL ==> Visual RTL" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_LOGICAL, + "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c", + "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 a[b]c", + "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c", + "7: Logical RTL ==> Logical LTR" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "8: Logical RTL ==> Visual LTR" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662", + "9: Visual LTR ==> Visual LTR" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "10: Visual LTR ==> Logical LTR" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL, + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f \\u0664\\u0631e \\u0660\\u0663\\u0662 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "11: Visual LTR ==> Visual RTL" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL, + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f \\u0664\\u0631e \\u0662\\u0663\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "12: Visual LTR ==> Logical RTL" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", + "13: Visual RTL ==> Visual RTL" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "14: Visual RTL ==> Logical RTL" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL, + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "15: Visual RTL ==> Visual LTR" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL, + "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a", + "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 c]b[a", + "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a", + "16: Visual RTL ==> Logical LTR" }, + + { UBIDI_DEFAULT_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", + "17: Logical DEFAULT_RTL ==> Visual LTR" }, +#if 0 + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_LTR, UBIDI_VISUAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "18: Logical RTL ==> Visual DEFAULT_LTR" }, +#endif + { UBIDI_DEFAULT_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", + "19: Logical DEFAULT_LTR ==> Visual LTR" }, +#if 0 + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_RTL, UBIDI_VISUAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "20: Logical RTL ==> Visual DEFAULT_RTL" } +#endif + }; + static const uint32_t digits[] = { + U_SHAPE_DIGITS_NOOP, + U_SHAPE_DIGITS_AN2EN, + U_SHAPE_DIGITS_EN2AN, + U_SHAPE_DIGITS_ALEN2AN_INIT_LR + }; + static const uint32_t letters[] = { + U_SHAPE_LETTERS_UNSHAPE, + U_SHAPE_LETTERS_SHAPE + }; + uint32_t i, nTestCases = sizeof(testCases) / sizeof(testCases[0]); + uint32_t j, nDigits = sizeof(digits) / sizeof(digits[0]); + uint32_t k, nLetters = sizeof(letters) / sizeof(letters[0]); + + UErrorCode errorCode = U_ZERO_ERROR; + UBiDiTransform *pTransform = ubiditransform_open(&errorCode); + + u_unescape(inText, src, STR_CAPACITY); + + // Test various combinations of para level, order, mirroring, digits and letters + for (i = 0; i < nTestCases; i++) { + ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY, + testCases[i].inLevel, testCases[i].inOr, + testCases[i].outLevel, testCases[i].outOr, + UBIDI_MIRRORING_ON, 0, &errorCode); + verifyResultsForAllOpt(&testCases[i], src, dest, + testCases[i].pReorderAndMirror, U_SHAPE_DIGITS_NOOP, + U_SHAPE_LETTERS_NOOP); + + for (j = 0; j < nDigits; j++) { + for (k = 0; k < nLetters; k++) { + /* Use here NULL for pTransform */ + ubiditransform_transform(NULL, src, -1, dest, STR_CAPACITY, + testCases[i].inLevel, testCases[i].inOr, + testCases[i].outLevel, testCases[i].outOr, + UBIDI_MIRRORING_OFF, digits[j] | letters[k], + &errorCode); + verifyResultsForAllOpt(&testCases[i], src, dest, + testCases[i].pReorderNoMirror, digits[j], letters[k]); + } + } + } + ubiditransform_close(pTransform); +} + +void +addBidiTransformTest(TestNode** root) +{ + addTest(root, testAutoDirection, "complex/bidi-transform/TestAutoDirection"); + addTest(root, testAllTransformOptions, "complex/bidi-transform/TestAllTransformOptions"); +} + +#ifdef __cplusplus +} +#endif diff --git a/icu4c/source/test/cintltst/cintltst.vcxproj b/icu4c/source/test/cintltst/cintltst.vcxproj index 5f2b48e282b..81403f6c0ad 100644 --- a/icu4c/source/test/cintltst/cintltst.vcxproj +++ b/icu4c/source/test/cintltst/cintltst.vcxproj @@ -231,6 +231,7 @@ + diff --git a/icu4c/source/test/cintltst/cintltst.vcxproj.filters b/icu4c/source/test/cintltst/cintltst.vcxproj.filters index b8a340e76c9..e51a98f964f 100644 --- a/icu4c/source/test/cintltst/cintltst.vcxproj.filters +++ b/icu4c/source/test/cintltst/cintltst.vcxproj.filters @@ -303,6 +303,9 @@ spoof + + bidi + @@ -411,4 +414,4 @@ sprep & idna - + \ No newline at end of file