diff --git a/.gitattributes b/.gitattributes index dde3a76979a..b7a57346b84 100644 --- a/.gitattributes +++ b/.gitattributes @@ -51,8 +51,6 @@ README text !eol icu4c/icu4c.css -text icu4c/source/aclocal.m4 -text icu4c/source/allinone/icucheck.bat -text -icu4c/source/common/ubiditransform.c -text -icu4c/source/common/unicode/ubiditransform.h -text icu4c/source/config/m4/icu-conditional.m4 -text icu4c/source/data/curr/pool.res -text icu4c/source/data/in/coll/ucadata-implicithan.icu -text @@ -134,7 +132,6 @@ icu4c/source/samples/ugrep/ugrep.vcxproj -text icu4c/source/samples/uresb/resources.vcxproj -text icu4c/source/samples/uresb/uresb.vcxproj -text icu4c/source/samples/ustring/ustring.vcxproj -text -icu4c/source/test/cintltst/cbiditransformtst.c -text icu4c/source/test/depstest/icu-dependencies-mode.el -text icu4c/source/test/iotest/iotest.vcxproj -text icu4c/source/test/letest/cletest.vcxproj -text diff --git a/icu4c/source/common/ubiditransform.c b/icu4c/source/common/ubiditransform.c index c2d8f3c7afa..61bfd7763c7 100644 --- a/icu4c/source/common/ubiditransform.c +++ b/icu4c/source/common/ubiditransform.c @@ -1,528 +1,528 @@ -/* -****************************************************************************** -* -* Copyright (C) 2016 and later: Unicode, Inc. and others. -* License & terms of use: http://www.unicode.org/copyright.html -* -****************************************************************************** -* file name: ubiditransform.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2016jul24 -* created by: Lina Kemmel -* -*/ - -#include "cmemory.h" -#include "unicode/ubidi.h" -#include "unicode/ustring.h" -#include "unicode/ushape.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" -#include "unicode/ubiditransform.h" - -/* Some convenience defines */ -#define LTR UBIDI_LTR -#define RTL UBIDI_RTL -#define LOGICAL UBIDI_LOGICAL -#define VISUAL UBIDI_VISUAL -#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL -#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR - -#define CHECK_LEN(STR, LEN, ERROR) { \ - if (LEN == 0) return 0; \ - if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ - if (LEN == -1) LEN = u_strlen(STR); \ - } - -#define MAX_ACTIONS 7 - -/** - * Typedef for a pointer to a function, which performs some operation (such as - * reordering, setting "inverse" mode, character mirroring, etc.). Return value - * indicates whether the text was changed in the course of this operation or - * not. - */ -typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); - -/** - * Structure that holds a predefined reordering scheme, including the following - * information: - * - */ -typedef struct { - UBiDiLevel inLevel; /* input level */ - UBiDiOrder inOrder; /* input order */ - UBiDiLevel outLevel; /* output level */ - UBiDiOrder outOrder; /* output order */ - uint32_t digitsDir; /* digit shaping direction */ - uint32_t lettersDir; /* letter shaping direction */ - UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ - const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ -} ReorderingScheme; - -struct UBiDiTransform { - UBiDi *pBidi; /* pointer to a UBiDi object */ - const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ - UChar *src; /* input text */ - UChar *dest; /* output text */ - uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ - uint32_t srcSize; /* input text capacity excluding the trailing zero */ - uint32_t destSize; /* output text capacity */ - uint32_t *pDestLength; /* number of UChars written to dest */ - uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ - uint32_t digits; /* digit option for ArabicShaping */ - uint32_t letters; /* letter option for ArabicShaping */ -}; - -U_DRAFT UBiDiTransform* U_EXPORT2 -ubiditransform_open(UErrorCode *pErrorCode) -{ - UBiDiTransform *pBiDiTransform = NULL; - if (U_SUCCESS(*pErrorCode)) { - pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); - if (pBiDiTransform == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - } - } - return pBiDiTransform; -} - -U_DRAFT void U_EXPORT2 -ubiditransform_close(UBiDiTransform *pBiDiTransform) -{ - if (pBiDiTransform != NULL) { - if (pBiDiTransform->pBidi != NULL) { - ubidi_close(pBiDiTransform->pBidi); - } - if (pBiDiTransform->src != NULL) { - uprv_free(pBiDiTransform->src); - } - uprv_free(pBiDiTransform); - } -} - -/** - * Performs Bidi resolution of text. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, - pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); - return FALSE; -} - -/** - * Performs basic reordering of text (Logical -> Visual LTR). - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, - pTransform->reorderingOptions, pErrorCode); - - *pTransform->pDestLength = pTransform->srcLength; - pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; - return TRUE; -} - -/** - * Sets "inverse" mode on the UBiDi object. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_setInverse(pTransform->pBidi, TRUE); - ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); - return FALSE; -} - -/** - * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL - * transformation. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); - return FALSE; -} - -/** - * Performs string reverse. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_writeReverse(pTransform->src, pTransform->srcLength, - pTransform->dest, pTransform->destSize, - UBIDI_REORDER_DEFAULT, pErrorCode); - *pTransform->pDestLength = pTransform->srcLength; - return TRUE; -} - -/** - * Applies a new value to the text that serves as input at the current - * processing step. This value is identical to the original one when we begin - * the processing, but usually changes as the transformation progresses. - * - * @param pTransform A pointer to the UBiDiTransform structure. - * @param newSrc A pointer whose value is to be used as input text. - * @param newLength A length of the new text in UChars. - * @param newSize A new source capacity in UChars. - * @param pErrorCode Pointer to the error code value. - */ -static void -updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, - uint32_t newSize, UErrorCode *pErrorCode) -{ - if (newSize < newLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return; - } - if (newSize > pTransform->srcSize) { - newSize += 50; // allocate slightly more than needed right now - if (pTransform->src != NULL) { - uprv_free(pTransform->src); - pTransform->src = NULL; - } - pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); - if (pTransform->src == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - //pTransform->srcLength = pTransform->srcSize = 0; - return; - } - pTransform->srcSize = newSize; - } - u_strncpy(pTransform->src, newSrc, newLength); - pTransform->srcLength = u_terminateUChars(pTransform->src, - pTransform->srcSize, newLength, pErrorCode); -} - -/** - * Calls a lower level shaping function. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param options Shaping options. - * @param pErrorCode Pointer to the error code value. - */ -static void -doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) -{ - *pTransform->pDestLength = u_shapeArabic(pTransform->src, - pTransform->srcLength, pTransform->dest, pTransform->destSize, - options, pErrorCode); -} - -/** - * Performs digit and letter shaping. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - if ((pTransform->letters | pTransform->digits) == 0) { - return FALSE; - } - if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { - doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, - pErrorCode); - } else { - doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); - if (U_SUCCESS(*pErrorCode)) { - updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, - *pTransform->pDestLength, pErrorCode); - doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, - pErrorCode); - } - } - return TRUE; -} - -/** - * Performs character mirroring. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - UChar32 c; - uint32_t i = 0, j = 0; - if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { - return FALSE; - } - if (pTransform->destSize < pTransform->srcLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return FALSE; - } - do { - UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; - U16_NEXT(pTransform->src, i, pTransform->srcLength, c); - U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); - } while (i < pTransform->srcLength); - - *pTransform->pDestLength = pTransform->srcLength; - pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; - return TRUE; -} - -/** - * All possible reordering schemes. - * - */ -static const ReorderingScheme Schemes[] = -{ - /* 0: Logical LTR => Visual LTR */ - {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_reorder, NULL}}, - /* 1: Logical RTL => Visual LTR */ - {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 2: Logical LTR => Visual RTL */ - {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, - /* 3: Logical RTL => Visual RTL */ - {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, - /* 4: Visual LTR => Logical RTL */ - {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, - /* 5: Visual RTL => Logical RTL */ - {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, - /* 6: Visual LTR => Logical LTR */ - {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 7: Visual RTL => Logical LTR */ - {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 8: Logical LTR => Logical RTL */ - {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, - /* 9: Logical RTL => Logical LTR */ - {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, - {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 10: Visual LTR => Visual RTL */ - {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, - /* 11: Visual RTL => Visual LTR */ - {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, - /* 12: Logical LTR => Logical LTR */ - {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 13: Logical RTL => Logical RTL */ - {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 14: Visual LTR => Visual LTR */ - {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 15: Visual RTL => Visual RTL */ - {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} -}; - -static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); - -/** - * When the direction option is UBIDI_DEFAULT_LTR or - * UBIDI_DEFAULT_RTL, resolve the base direction according to that - * of the first strong bidi character. - */ -static void -resolveBaseDirection(const UChar *text, uint32_t length, - UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) -{ - switch (*pInLevel) { - case UBIDI_DEFAULT_LTR: - case UBIDI_DEFAULT_RTL: { - UBiDiLevel level = ubidi_getBaseDirection(text, length); - *pInLevel = level != UBIDI_NEUTRAL ? level - : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; - break; - } - default: - *pInLevel &= 1; - break; - } - switch (*pOutLevel) { - case UBIDI_DEFAULT_LTR: - case UBIDI_DEFAULT_RTL: - *pOutLevel = *pInLevel; - break; - default: - *pOutLevel &= 1; - break; - } -} - -/** - * Finds a valid ReorderingScheme matching the - * caller-defined scheme. - * - * @return A valid ReorderingScheme object or NULL - */ -static const ReorderingScheme* -findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, - UBiDiOrder inOrder, UBiDiOrder outOrder) -{ - uint32_t i; - for (i = 0; i < nSchemes; i++) { - const ReorderingScheme *pScheme = Schemes + i; - if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel - && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { - return pScheme; - } - } - return NULL; -} - -U_DRAFT uint32_t U_EXPORT2 -ubiditransform_transform(UBiDiTransform *pBiDiTransform, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - UBiDiLevel inParaLevel, UBiDiOrder inOrder, - UBiDiLevel outParaLevel, UBiDiOrder outOrder, - UBiDiMirroring doMirroring, uint32_t shapingOptions, - UErrorCode *pErrorCode) -{ - uint32_t destLength = 0; - UBool textChanged = FALSE; - const UBiDiTransform *pOrigTransform = pBiDiTransform; - const UBiDiAction *action = NULL; - - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (src == NULL || dest == NULL) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - CHECK_LEN(src, srcLength, pErrorCode); - CHECK_LEN(dest, destSize, pErrorCode); - - if (pBiDiTransform == NULL) { - pBiDiTransform = ubiditransform_open(pErrorCode); - if (U_FAILURE(*pErrorCode)) { - return 0; - } - } - /* Current limitation: in multiple paragraphs will be resolved according - to the 1st paragraph */ - resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); - - pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, - inOrder, outOrder); - if (pBiDiTransform->pActiveScheme == NULL || pBiDiTransform->pActiveScheme->actions == NULL) { - goto cleanup; - } - pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING - : UBIDI_REORDER_DEFAULT; - - /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text - scheme at the time shaping is invoked. */ - shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; - pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; - pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; - - updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); - if (U_FAILURE(*pErrorCode)) { - goto cleanup; - } - if (pBiDiTransform->pBidi == NULL) { - pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); - if (U_FAILURE(*pErrorCode)) { - goto cleanup; - } - } - pBiDiTransform->dest = dest; - pBiDiTransform->destSize = destSize; - pBiDiTransform->pDestLength = &destLength; - - /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ - for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { - if ((*action)(pBiDiTransform, pErrorCode)) { - if (action + 1) { - updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, - *pBiDiTransform->pDestLength, pErrorCode); - } - textChanged = TRUE; - } - } - ubidi_setInverse(pBiDiTransform->pBidi, FALSE); - - if (!textChanged && U_SUCCESS(*pErrorCode)) { - /* Text was not changed - just copy src to dest */ - if (destSize < srcLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - } else { - u_strncpy(dest, src, srcLength); - destLength = srcLength; - } - } -cleanup: - if (pOrigTransform != pBiDiTransform) { - ubiditransform_close(pBiDiTransform); - } else { - pBiDiTransform->dest = NULL; - pBiDiTransform->pDestLength = NULL; - pBiDiTransform->srcLength = 0; - pBiDiTransform->destSize = 0; - } - return U_FAILURE(*pErrorCode) ? 0 : destLength; -} +/* +****************************************************************************** +* +* Copyright (C) 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html +* +****************************************************************************** +* file name: ubiditransform.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2016jul24 +* created by: Lina Kemmel +* +*/ + +#include "cmemory.h" +#include "unicode/ubidi.h" +#include "unicode/ustring.h" +#include "unicode/ushape.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "unicode/ubiditransform.h" + +/* Some convenience defines */ +#define LTR UBIDI_LTR +#define RTL UBIDI_RTL +#define LOGICAL UBIDI_LOGICAL +#define VISUAL UBIDI_VISUAL +#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL +#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR + +#define CHECK_LEN(STR, LEN, ERROR) { \ + if (LEN == 0) return 0; \ + if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ + if (LEN == -1) LEN = u_strlen(STR); \ + } + +#define MAX_ACTIONS 7 + +/** + * Typedef for a pointer to a function, which performs some operation (such as + * reordering, setting "inverse" mode, character mirroring, etc.). Return value + * indicates whether the text was changed in the course of this operation or + * not. + */ +typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); + +/** + * Structure that holds a predefined reordering scheme, including the following + * information: + * + */ +typedef struct { + UBiDiLevel inLevel; /* input level */ + UBiDiOrder inOrder; /* input order */ + UBiDiLevel outLevel; /* output level */ + UBiDiOrder outOrder; /* output order */ + uint32_t digitsDir; /* digit shaping direction */ + uint32_t lettersDir; /* letter shaping direction */ + UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ + const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ +} ReorderingScheme; + +struct UBiDiTransform { + UBiDi *pBidi; /* pointer to a UBiDi object */ + const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ + UChar *src; /* input text */ + UChar *dest; /* output text */ + uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ + uint32_t srcSize; /* input text capacity excluding the trailing zero */ + uint32_t destSize; /* output text capacity */ + uint32_t *pDestLength; /* number of UChars written to dest */ + uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ + uint32_t digits; /* digit option for ArabicShaping */ + uint32_t letters; /* letter option for ArabicShaping */ +}; + +U_DRAFT UBiDiTransform* U_EXPORT2 +ubiditransform_open(UErrorCode *pErrorCode) +{ + UBiDiTransform *pBiDiTransform = NULL; + if (U_SUCCESS(*pErrorCode)) { + pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); + if (pBiDiTransform == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + return pBiDiTransform; +} + +U_DRAFT void U_EXPORT2 +ubiditransform_close(UBiDiTransform *pBiDiTransform) +{ + if (pBiDiTransform != NULL) { + if (pBiDiTransform->pBidi != NULL) { + ubidi_close(pBiDiTransform->pBidi); + } + if (pBiDiTransform->src != NULL) { + uprv_free(pBiDiTransform->src); + } + uprv_free(pBiDiTransform); + } +} + +/** + * Performs Bidi resolution of text. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, + pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); + return FALSE; +} + +/** + * Performs basic reordering of text (Logical -> Visual LTR). + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, + pTransform->reorderingOptions, pErrorCode); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * Sets "inverse" mode on the UBiDi object. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setInverse(pTransform->pBidi, TRUE); + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); + return FALSE; +} + +/** + * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL + * transformation. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); + return FALSE; +} + +/** + * Performs string reverse. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReverse(pTransform->src, pTransform->srcLength, + pTransform->dest, pTransform->destSize, + UBIDI_REORDER_DEFAULT, pErrorCode); + *pTransform->pDestLength = pTransform->srcLength; + return TRUE; +} + +/** + * Applies a new value to the text that serves as input at the current + * processing step. This value is identical to the original one when we begin + * the processing, but usually changes as the transformation progresses. + * + * @param pTransform A pointer to the UBiDiTransform structure. + * @param newSrc A pointer whose value is to be used as input text. + * @param newLength A length of the new text in UChars. + * @param newSize A new source capacity in UChars. + * @param pErrorCode Pointer to the error code value. + */ +static void +updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, + uint32_t newSize, UErrorCode *pErrorCode) +{ + if (newSize < newLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return; + } + if (newSize > pTransform->srcSize) { + newSize += 50; // allocate slightly more than needed right now + if (pTransform->src != NULL) { + uprv_free(pTransform->src); + pTransform->src = NULL; + } + pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); + if (pTransform->src == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + //pTransform->srcLength = pTransform->srcSize = 0; + return; + } + pTransform->srcSize = newSize; + } + u_strncpy(pTransform->src, newSrc, newLength); + pTransform->srcLength = u_terminateUChars(pTransform->src, + pTransform->srcSize, newLength, pErrorCode); +} + +/** + * Calls a lower level shaping function. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param options Shaping options. + * @param pErrorCode Pointer to the error code value. + */ +static void +doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) +{ + *pTransform->pDestLength = u_shapeArabic(pTransform->src, + pTransform->srcLength, pTransform->dest, pTransform->destSize, + options, pErrorCode); +} + +/** + * Performs digit and letter shaping. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + if ((pTransform->letters | pTransform->digits) == 0) { + return FALSE; + } + if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { + doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } else { + doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); + if (U_SUCCESS(*pErrorCode)) { + updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, + *pTransform->pDestLength, pErrorCode); + doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } + } + return TRUE; +} + +/** + * Performs character mirroring. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + UChar32 c; + uint32_t i = 0, j = 0; + if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { + return FALSE; + } + if (pTransform->destSize < pTransform->srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + do { + UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; + U16_NEXT(pTransform->src, i, pTransform->srcLength, c); + U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); + } while (i < pTransform->srcLength); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * All possible reordering schemes. + * + */ +static const ReorderingScheme Schemes[] = +{ + /* 0: Logical LTR => Visual LTR */ + {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, NULL}}, + /* 1: Logical RTL => Visual LTR */ + {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 2: Logical LTR => Visual RTL */ + {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, + /* 3: Logical RTL => Visual RTL */ + {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, + /* 4: Visual LTR => Logical RTL */ + {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 5: Visual RTL => Logical RTL */ + {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 6: Visual LTR => Logical LTR */ + {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 7: Visual RTL => Logical LTR */ + {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 8: Logical LTR => Logical RTL */ + {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, + /* 9: Logical RTL => Logical LTR */ + {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 10: Visual LTR => Visual RTL */ + {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, + /* 11: Visual RTL => Visual LTR */ + {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, + /* 12: Logical LTR => Logical LTR */ + {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 13: Logical RTL => Logical RTL */ + {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 14: Visual LTR => Visual LTR */ + {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 15: Visual RTL => Visual RTL */ + {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} +}; + +static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); + +/** + * When the direction option is UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL, resolve the base direction according to that + * of the first strong bidi character. + */ +static void +resolveBaseDirection(const UChar *text, uint32_t length, + UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) +{ + switch (*pInLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: { + UBiDiLevel level = ubidi_getBaseDirection(text, length); + *pInLevel = level != UBIDI_NEUTRAL ? level + : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; + break; + } + default: + *pInLevel &= 1; + break; + } + switch (*pOutLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: + *pOutLevel = *pInLevel; + break; + default: + *pOutLevel &= 1; + break; + } +} + +/** + * Finds a valid ReorderingScheme matching the + * caller-defined scheme. + * + * @return A valid ReorderingScheme object or NULL + */ +static const ReorderingScheme* +findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, + UBiDiOrder inOrder, UBiDiOrder outOrder) +{ + uint32_t i; + for (i = 0; i < nSchemes; i++) { + const ReorderingScheme *pScheme = Schemes + i; + if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel + && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { + return pScheme; + } + } + return NULL; +} + +U_DRAFT uint32_t U_EXPORT2 +ubiditransform_transform(UBiDiTransform *pBiDiTransform, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + UBiDiLevel inParaLevel, UBiDiOrder inOrder, + UBiDiLevel outParaLevel, UBiDiOrder outOrder, + UBiDiMirroring doMirroring, uint32_t shapingOptions, + UErrorCode *pErrorCode) +{ + uint32_t destLength = 0; + UBool textChanged = FALSE; + const UBiDiTransform *pOrigTransform = pBiDiTransform; + const UBiDiAction *action = NULL; + + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (src == NULL || dest == NULL) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + CHECK_LEN(src, srcLength, pErrorCode); + CHECK_LEN(dest, destSize, pErrorCode); + + if (pBiDiTransform == NULL) { + pBiDiTransform = ubiditransform_open(pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; + } + } + /* Current limitation: in multiple paragraphs will be resolved according + to the 1st paragraph */ + resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); + + pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, + inOrder, outOrder); + if (pBiDiTransform->pActiveScheme == NULL || pBiDiTransform->pActiveScheme->actions == NULL) { + goto cleanup; + } + pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING + : UBIDI_REORDER_DEFAULT; + + /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text + scheme at the time shaping is invoked. */ + shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; + pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; + pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; + + updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + if (pBiDiTransform->pBidi == NULL) { + pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + } + pBiDiTransform->dest = dest; + pBiDiTransform->destSize = destSize; + pBiDiTransform->pDestLength = &destLength; + + /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ + for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { + if ((*action)(pBiDiTransform, pErrorCode)) { + if (action + 1) { + updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, + *pBiDiTransform->pDestLength, pErrorCode); + } + textChanged = TRUE; + } + } + ubidi_setInverse(pBiDiTransform->pBidi, FALSE); + + if (!textChanged && U_SUCCESS(*pErrorCode)) { + /* Text was not changed - just copy src to dest */ + if (destSize < srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + } else { + u_strncpy(dest, src, srcLength); + destLength = srcLength; + } + } +cleanup: + if (pOrigTransform != pBiDiTransform) { + ubiditransform_close(pBiDiTransform); + } else { + pBiDiTransform->dest = NULL; + pBiDiTransform->pDestLength = NULL; + pBiDiTransform->srcLength = 0; + pBiDiTransform->destSize = 0; + } + return U_FAILURE(*pErrorCode) ? 0 : destLength; +} diff --git a/icu4c/source/common/unicode/ubiditransform.h b/icu4c/source/common/unicode/ubiditransform.h index d3fbe9c96fc..91a028bb3b1 100644 --- a/icu4c/source/common/unicode/ubiditransform.h +++ b/icu4c/source/common/unicode/ubiditransform.h @@ -1,312 +1,312 @@ -/* -****************************************************************************** -* -* Copyright (C) 2016 and later: Unicode, Inc. and others. -* License & terms of use: http://www.unicode.org/copyright.html -* -****************************************************************************** -* file name: ubiditransform.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2016jul24 -* created by: Lina Kemmel -* -*/ - -#ifndef UBIDITRANSFORM_H -#define UBIDITRANSFORM_H - -#include "unicode/uchar.h" -#include "unicode/localpointer.h" - -/** - * UBiDiOrder indicates the order of text.

- * This bidi transformation engine supports all possible combinations (4 in - * total) of input and output text order: - *

- * @see ubidi_setInverse - * @see ubidi_setReorderingMode - * @see UBIDI_REORDER_DEFAULT - * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT - * @see UBIDI_REORDER_RUNS_ONLY - * @draft ICU 58 - */ -typedef enum { - /** 0: Constant indicating a logical order. - * This is the default for input text. - * @draft ICU 58 - */ - UBIDI_LOGICAL = 0, - /** 1: Constant indicating a visual order. - * This is a default for output text. - * @draft ICU 58 - */ - UBIDI_VISUAL -} UBiDiOrder; - -/** - * UBiDiMirroring indicates whether or not characters with the - * "mirrored" property in RTL runs should be replaced with their mirror-image - * counterparts. - * @see UBIDI_DO_MIRRORING - * @see ubidi_setReorderingOptions - * @see ubidi_writeReordered - * @see ubidi_writeReverse - * @draft ICU 58 - */ -typedef enum { - /** 0: Constant indicating that character mirroring should not be - * performed. - * This is the default. - * @draft ICU 58 - */ - UBIDI_MIRRORING_OFF = 0, - /** 1: Constant indicating that character mirroring should be performed. - * This corresponds to calling ubidi_writeReordered or - * ubidi_writeReverse with the - * UBIDI_DO_MIRRORING option bit set. - * @draft ICU 58 - */ - UBIDI_MIRRORING_ON -} UBiDiMirroring; - -/** - * Forward declaration of the UBiDiTransform structure that stores - * information used by the layout transformation engine. - * @draft ICU 58 - */ -typedef struct UBiDiTransform UBiDiTransform; - -/** - * Performs transformation of text from the bidi layout defined by the input - * ordering scheme to the bidi layout defined by the output ordering scheme, - * and applies character mirroring and Arabic shaping operations.

- * In terms of UBiDi, such a transformation implies: - *

  • calling ubidi_setReorderingMode as needed (when the - * reordering mode is other than normal),
  • - *
  • calling ubidi_setInverse as needed (when text should be - * transformed from a visual to a logical form),
  • - *
  • resolving embedding levels of each character in the input text by - * calling ubidi_setPara,
  • - *
  • reordering the characters based on the computed embedding levels, also - * performing character mirroring as needed, and streaming the result to the - * output, by calling ubidi_writeReordered,
  • - *
  • performing Arabic digit and letter shaping on the output text by calling - * u_shapeArabic.
  • - * - * An "ordering scheme" encompasses the base direction and the order of text, - * and these characteristics must be defined by the caller for both input and - * output explicitly .

    - * There are 36 possible combinations of ordering schemes, - * which are partially supported by UBiDi already. Examples of the - * currently supported combinations: - *

    - * All combinations that involve the Visual RTL scheme are unsupported by - * UBiDi, for instance: - * - *

    Example of usage of the transformation engine:
    - *

    - * \code
    - * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
    - * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
    - * UErrorCode errorCode = U_ZERO_ERROR;
    - * // Run a transformation.
    - * ubiditransform_transform(pBidiTransform,
    - *          text1, -1, text2, -1,
    - *          UBIDI_LTR, UBIDI_VISUAL,
    - *          UBIDI_RTL, UBIDI_LOGICAL,
    - *          UBIDI_MIRRORING_OFF,
    - *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
    - *          &errorCode);
    - * // Do something with text2.
    - *  text2[4] = '2';
    - * // Run a reverse transformation.
    - * ubiditransform_transform(pBidiTransform,
    - *          text2, -1, text1, -1,
    - *          UBIDI_RTL, UBIDI_LOGICAL,
    - *          UBIDI_LTR, UBIDI_VISUAL,
    - *          UBIDI_MIRRORING_OFF,
    - *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
    - *          &errorCode);
    - *\endcode
    - * 
    - *

    - * - * @param pBiDiTransform A pointer to a UBiDiTransform object - * allocated with ubiditransform_open() or - * NULL.

    - * This object serves for one-time setup to amortize initialization - * overheads. Use of this object is not thread-safe. All other threads - * should allocate a new UBiDiTransform object by calling - * ubiditransform_open() before using it. Alternatively, - * a caller can set this parameter to NULL, in which case - * the object will be allocated by the engine on the fly.

    - * @param src A pointer to the text that the Bidi layout transformations will - * be performed on. - *

    Note: the text must be (at least) - * srcLength long.

    - * @param srcLength The length of the text, in number of UChars. If - * length == -1 then the text must be zero-terminated. - * @param dest A pointer to where the processed text is to be copied. - * @param destSize The size of the dest buffer, in number of - * UChars. If the U_SHAPE_LETTERS_UNSHAPE option is set, - * then the destination length could be as large as - * srcLength * 2. Otherwise, the destination length will - * not exceed srcLength. If the caller reserves the last - * position for zero-termination, it should be excluded from - * destSize. - *

    destSize == -1 is allowed and makes sense when - * dest was holds some meaningful value, e.g. that of - * src. In this case dest must be - * zero-terminated.

    - * @param inParaLevel A base embedding level of the input as defined in - * ubidi_setPara documentation for the - * paraLevel parameter. - * @param inOrder An order of the input, which can be one of the - * UBiDiOrder values. - * @param outParaLevel A base embedding level of the output as defined in - * ubidi_setPara documentation for the - * paraLevel parameter. - * @param outOrder An order of the output, which can be one of the - * UBiDiOrder values. - * @param doMirroring Indicates whether or not to perform character mirroring, - * and can accept one of the UBiDiMirroring values. - * @param shapingOptions Arabic digit and letter shaping options defined in the - * ushape.h documentation. - *

    Note: Direction indicator options are computed by - * the transformation engine based on the effective ordering schemes, so - * user-defined direction indicators will be ignored.

    - * @param pErrorCode A pointer to an error code value. - * - * @return The destination length, i.e. the number of UChars written to - * dest. If the transformation fails, the return value - * will be 0 (and the error code will be written to - * pErrorCode). - * - * @see UBiDiLevel - * @see UBiDiOrder - * @see UBiDiMirroring - * @see ubidi_setPara - * @see u_shapeArabic - * @draft ICU 58 - */ -U_DRAFT uint32_t U_EXPORT2 -ubiditransform_transform(UBiDiTransform *pBiDiTransform, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - UBiDiLevel inParaLevel, UBiDiOrder inOrder, - UBiDiLevel outParaLevel, UBiDiOrder outOrder, - UBiDiMirroring doMirroring, uint32_t shapingOptions, - UErrorCode *pErrorCode); - -/** - * Allocates a UBiDiTransform object. This object can be reused, - * e.g. with different ordering schemes, mirroring or shaping options.

    - * Note:The object can only be reused in the same thread. - * All other threads should allocate a new UBiDiTransform object - * before using it.

    - * Example of usage:

    - *

    - * \code
    - * UErrorCode errorCode = U_ZERO_ERROR;
    - * // Open a new UBiDiTransform.
    - * UBiDiTransform* transform = ubiditransform_open(&errorCode);
    - * // Run a transformation.
    - * ubiditransform_transform(transform,
    - *          text1, -1, text2, -1,
    - *          UBIDI_RTL, UBIDI_LOGICAL,
    - *          UBIDI_LTR, UBIDI_VISUAL,
    - *          UBIDI_MIRRORING_ON,
    - *          U_SHAPE_DIGITS_EN2AN,
    - *          &errorCode);
    - * // Do something with the output text and invoke another transformation using
    - * //   that text as input.
    - * ubiditransform_transform(transform,
    - *          text2, -1, text3, -1,
    - *          UBIDI_LTR, UBIDI_VISUAL,
    - *          UBIDI_RTL, UBIDI_VISUAL,
    - *          UBIDI_MIRRORING_ON,
    - *          0, &errorCode);
    - *\endcode
    - * 
    - *

    - * The UBiDiTransform object must be deallocated by calling - * ubiditransform_close(). - * - * @return An empty UBiDiTransform object. - * @draft ICU 58 - */ -U_DRAFT UBiDiTransform* U_EXPORT2 -ubiditransform_open(UErrorCode *pErrorCode); - -/** - * Deallocates the given UBiDiTransform object. - * @draft ICU 58 - */ -U_DRAFT void U_EXPORT2 -ubiditransform_close(UBiDiTransform *pBidiTransform); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUBiDiTransformPointer - * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @draft ICU 58 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); - -U_NAMESPACE_END - -#endif - -#endif +/* +****************************************************************************** +* +* Copyright (C) 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html +* +****************************************************************************** +* file name: ubiditransform.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2016jul24 +* created by: Lina Kemmel +* +*/ + +#ifndef UBIDITRANSFORM_H +#define UBIDITRANSFORM_H + +#include "unicode/uchar.h" +#include "unicode/localpointer.h" + +/** + * UBiDiOrder indicates the order of text.

    + * This bidi transformation engine supports all possible combinations (4 in + * total) of input and output text order: + *

    + * @see ubidi_setInverse + * @see ubidi_setReorderingMode + * @see UBIDI_REORDER_DEFAULT + * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT + * @see UBIDI_REORDER_RUNS_ONLY + * @draft ICU 58 + */ +typedef enum { + /** 0: Constant indicating a logical order. + * This is the default for input text. + * @draft ICU 58 + */ + UBIDI_LOGICAL = 0, + /** 1: Constant indicating a visual order. + * This is a default for output text. + * @draft ICU 58 + */ + UBIDI_VISUAL +} UBiDiOrder; + +/** + * UBiDiMirroring indicates whether or not characters with the + * "mirrored" property in RTL runs should be replaced with their mirror-image + * counterparts. + * @see UBIDI_DO_MIRRORING + * @see ubidi_setReorderingOptions + * @see ubidi_writeReordered + * @see ubidi_writeReverse + * @draft ICU 58 + */ +typedef enum { + /** 0: Constant indicating that character mirroring should not be + * performed. + * This is the default. + * @draft ICU 58 + */ + UBIDI_MIRRORING_OFF = 0, + /** 1: Constant indicating that character mirroring should be performed. + * This corresponds to calling ubidi_writeReordered or + * ubidi_writeReverse with the + * UBIDI_DO_MIRRORING option bit set. + * @draft ICU 58 + */ + UBIDI_MIRRORING_ON +} UBiDiMirroring; + +/** + * Forward declaration of the UBiDiTransform structure that stores + * information used by the layout transformation engine. + * @draft ICU 58 + */ +typedef struct UBiDiTransform UBiDiTransform; + +/** + * Performs transformation of text from the bidi layout defined by the input + * ordering scheme to the bidi layout defined by the output ordering scheme, + * and applies character mirroring and Arabic shaping operations.

    + * In terms of UBiDi, such a transformation implies: + *

  • calling ubidi_setReorderingMode as needed (when the + * reordering mode is other than normal),
  • + *
  • calling ubidi_setInverse as needed (when text should be + * transformed from a visual to a logical form),
  • + *
  • resolving embedding levels of each character in the input text by + * calling ubidi_setPara,
  • + *
  • reordering the characters based on the computed embedding levels, also + * performing character mirroring as needed, and streaming the result to the + * output, by calling ubidi_writeReordered,
  • + *
  • performing Arabic digit and letter shaping on the output text by calling + * u_shapeArabic.
  • + * + * An "ordering scheme" encompasses the base direction and the order of text, + * and these characteristics must be defined by the caller for both input and + * output explicitly .

    + * There are 36 possible combinations of ordering schemes, + * which are partially supported by UBiDi already. Examples of the + * currently supported combinations: + *

    + * All combinations that involve the Visual RTL scheme are unsupported by + * UBiDi, for instance: + * + *

    Example of usage of the transformation engine:
    + *

    + * \code
    + * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
    + * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
    + * UErrorCode errorCode = U_ZERO_ERROR;
    + * // Run a transformation.
    + * ubiditransform_transform(pBidiTransform,
    + *          text1, -1, text2, -1,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_RTL, UBIDI_LOGICAL,
    + *          UBIDI_MIRRORING_OFF,
    + *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
    + *          &errorCode);
    + * // Do something with text2.
    + *  text2[4] = '2';
    + * // Run a reverse transformation.
    + * ubiditransform_transform(pBidiTransform,
    + *          text2, -1, text1, -1,
    + *          UBIDI_RTL, UBIDI_LOGICAL,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_MIRRORING_OFF,
    + *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
    + *          &errorCode);
    + *\endcode
    + * 
    + *

    + * + * @param pBiDiTransform A pointer to a UBiDiTransform object + * allocated with ubiditransform_open() or + * NULL.

    + * This object serves for one-time setup to amortize initialization + * overheads. Use of this object is not thread-safe. All other threads + * should allocate a new UBiDiTransform object by calling + * ubiditransform_open() before using it. Alternatively, + * a caller can set this parameter to NULL, in which case + * the object will be allocated by the engine on the fly.

    + * @param src A pointer to the text that the Bidi layout transformations will + * be performed on. + *

    Note: the text must be (at least) + * srcLength long.

    + * @param srcLength The length of the text, in number of UChars. If + * length == -1 then the text must be zero-terminated. + * @param dest A pointer to where the processed text is to be copied. + * @param destSize The size of the dest buffer, in number of + * UChars. If the U_SHAPE_LETTERS_UNSHAPE option is set, + * then the destination length could be as large as + * srcLength * 2. Otherwise, the destination length will + * not exceed srcLength. If the caller reserves the last + * position for zero-termination, it should be excluded from + * destSize. + *

    destSize == -1 is allowed and makes sense when + * dest was holds some meaningful value, e.g. that of + * src. In this case dest must be + * zero-terminated.

    + * @param inParaLevel A base embedding level of the input as defined in + * ubidi_setPara documentation for the + * paraLevel parameter. + * @param inOrder An order of the input, which can be one of the + * UBiDiOrder values. + * @param outParaLevel A base embedding level of the output as defined in + * ubidi_setPara documentation for the + * paraLevel parameter. + * @param outOrder An order of the output, which can be one of the + * UBiDiOrder values. + * @param doMirroring Indicates whether or not to perform character mirroring, + * and can accept one of the UBiDiMirroring values. + * @param shapingOptions Arabic digit and letter shaping options defined in the + * ushape.h documentation. + *

    Note: Direction indicator options are computed by + * the transformation engine based on the effective ordering schemes, so + * user-defined direction indicators will be ignored.

    + * @param pErrorCode A pointer to an error code value. + * + * @return The destination length, i.e. the number of UChars written to + * dest. If the transformation fails, the return value + * will be 0 (and the error code will be written to + * pErrorCode). + * + * @see UBiDiLevel + * @see UBiDiOrder + * @see UBiDiMirroring + * @see ubidi_setPara + * @see u_shapeArabic + * @draft ICU 58 + */ +U_DRAFT uint32_t U_EXPORT2 +ubiditransform_transform(UBiDiTransform *pBiDiTransform, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + UBiDiLevel inParaLevel, UBiDiOrder inOrder, + UBiDiLevel outParaLevel, UBiDiOrder outOrder, + UBiDiMirroring doMirroring, uint32_t shapingOptions, + UErrorCode *pErrorCode); + +/** + * Allocates a UBiDiTransform object. This object can be reused, + * e.g. with different ordering schemes, mirroring or shaping options.

    + * Note:The object can only be reused in the same thread. + * All other threads should allocate a new UBiDiTransform object + * before using it.

    + * Example of usage:

    + *

    + * \code
    + * UErrorCode errorCode = U_ZERO_ERROR;
    + * // Open a new UBiDiTransform.
    + * UBiDiTransform* transform = ubiditransform_open(&errorCode);
    + * // Run a transformation.
    + * ubiditransform_transform(transform,
    + *          text1, -1, text2, -1,
    + *          UBIDI_RTL, UBIDI_LOGICAL,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_MIRRORING_ON,
    + *          U_SHAPE_DIGITS_EN2AN,
    + *          &errorCode);
    + * // Do something with the output text and invoke another transformation using
    + * //   that text as input.
    + * ubiditransform_transform(transform,
    + *          text2, -1, text3, -1,
    + *          UBIDI_LTR, UBIDI_VISUAL,
    + *          UBIDI_RTL, UBIDI_VISUAL,
    + *          UBIDI_MIRRORING_ON,
    + *          0, &errorCode);
    + *\endcode
    + * 
    + *

    + * The UBiDiTransform object must be deallocated by calling + * ubiditransform_close(). + * + * @return An empty UBiDiTransform object. + * @draft ICU 58 + */ +U_DRAFT UBiDiTransform* U_EXPORT2 +ubiditransform_open(UErrorCode *pErrorCode); + +/** + * Deallocates the given UBiDiTransform object. + * @draft ICU 58 + */ +U_DRAFT void U_EXPORT2 +ubiditransform_close(UBiDiTransform *pBidiTransform); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUBiDiTransformPointer + * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 58 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); + +U_NAMESPACE_END + +#endif + +#endif diff --git a/icu4c/source/test/cintltst/cbiditransformtst.c b/icu4c/source/test/cintltst/cbiditransformtst.c index 0e18d1e642a..bd2afe11a7f 100644 --- a/icu4c/source/test/cintltst/cbiditransformtst.c +++ b/icu4c/source/test/cintltst/cbiditransformtst.c @@ -1,430 +1,430 @@ -/******************************************************************** - * Copyright (C) 2016 and later: Unicode, Inc. and others. - * License & terms of use: http://www.unicode.org/copyright.html - ********************************************************************/ -/* file name: cbiditransformtst.c - * encoding: US-ASCII - * tab size: 8 (not used) - * indentation:4 - * - * created on: 2016aug21 - * created by: Lina Kemmel -*/ - -#include "cintltst.h" -#include "unicode/ubidi.h" -#include "unicode/ubiditransform.h" -#include "unicode/ushape.h" -#include "unicode/ustring.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define LATN_ZERO 0x0030 -#define ARAB_ZERO 0x0660 -#define MIN_HEB_LETTER 0x05D0 -#define MIN_ARAB_LETTER 0x0630 -#define MIN_SHAPED_LETTER 0xFEAB - -#define STR_CAPACITY 100 - -typedef struct { - UBiDiLevel inLevel; - UBiDiOrder inOr; - UBiDiLevel outLevel; - UBiDiOrder outOr; - const char *pReorderNoMirror; - const char *pReorderAndMirror; - const char *pContextShapes; - const char *pMessage; -} UBidiTestCases; - -UChar src[STR_CAPACITY] = { 0 }; -UChar dest[STR_CAPACITY] = { 0 }; -UChar expected[STR_CAPACITY] = { 0 }; -UChar temp[STR_CAPACITY * 2] = { 0 }; -char pseudo[STR_CAPACITY] = { 0 }; - -void addBidiTransformTest(TestNode** root); - -static void testAutoDirection(void); - -static void testAllTransformOptions(void); - -static char* pseudoScript(const UChar *str); - -static void shapeDigits(UChar *str, uint32_t digits); - -static void logResultsForDir(const UChar *srcText, const UChar *destTxt, - const UChar *expectedTxt, UBiDiLevel inLevel, UBiDiLevel outLevel); - -static void verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt, - const UChar *destTxt, const char *expectedChars, uint32_t digits, - uint32_t letters); - -#if 0 -static void substituteByPseudoChar(const UChar *src, char *dest, - const UChar baseReal, const char basePseudo, const char max); - - -/* TODO: This code assumes the codepage is ASCII based. */ - -/* - * Using the following conventions: - * AL unshaped: A-E - * AL shaped: F-J - * R: K-Z - * EN: 0-4 - * AN: 5-9 -*/ -static void -substituteByPseudoChar(const UChar *src, char *dest, const UChar baseReal, - const char basePseudo, const char max) { - *dest = basePseudo + (*src - baseReal); /* (range math won't work on EBCDIC) */ - if (*dest > max) { - *dest = max; - } -} - -static char* -pseudoScript(const UChar *str) { - char *p; - if (!str) { - return "\0"; - } - for (p = pseudo; *str; str++, p++) { - switch (u_charDirection(*str)) { - case U_RIGHT_TO_LEFT: - substituteByPseudoChar(str, p, MIN_HEB_LETTER, 'K', 'Z'); - break; - case U_RIGHT_TO_LEFT_ARABIC: - if (*str > 0xFE00) { - substituteByPseudoChar(str, p, MIN_SHAPED_LETTER, 'F', 'J'); - } else { - substituteByPseudoChar(str, p, MIN_ARAB_LETTER, 'A', 'E'); - } - break; - case U_ARABIC_NUMBER: - substituteByPseudoChar(str, p, ARAB_ZERO, '5', '9'); - break; - default: - *p = (char)*str; - break; - } - } - *p = '\0'; - return pseudo; -} -#else -static char* -pseudoScript(const UChar *str) { - return aescstrdup(str, -1); -} -#endif - -static void -logResultsForDir(const UChar *srcTxt, const UChar *destTxt, const UChar *expectedTxt, - UBiDiLevel inLevel, UBiDiLevel outLevel) -{ - if (u_strcmp(expectedTxt, destTxt)) { - log_err("Unexpected transform Dest: inLevel: 0x%02x; outLevel: 0x%02x;\ninText: %s; outText: %s; expected: %s\n", - inLevel, outLevel, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expectedTxt)); - } -} - -/** - * Tests various combinations of base directions, with the input either - * UBIDI_DEFAULT_LTR or UBIDI_DEFAULT_RTL, and the - * output either UBIDI_LTR or UBIDI_RTL. Order is - * always UBIDI_LOGICAL for the input and UBIDI_VISUAL - * for the output. - */ -static void -testAutoDirection(void) -{ - static const UBiDiLevel inLevels[] = { - UBIDI_DEFAULT_LTR, UBIDI_DEFAULT_RTL - }; - static const UBiDiLevel outLevels[] = { - UBIDI_LTR, UBIDI_RTL - }; - static const char *srcTexts[] = { - "abc \\u05d0\\u05d1\0", - "... abc \\u05d0\\u05d1\0", - "\\u05d0\\u05d1 abc\0", - "... \\u05d0\\u05d1 abc\0", - ".*:" - }; - uint32_t nTexts = sizeof(srcTexts) / sizeof(srcTexts[0]); - uint32_t i, nInLevels = sizeof(inLevels) / sizeof(inLevels[0]); - uint32_t j, nOutLevels = sizeof(outLevels) / sizeof(outLevels[0]); - - UBiDi *pBidi = ubidi_open(); - - UErrorCode errorCode = U_ZERO_ERROR; - UBiDiTransform *pTransform = ubiditransform_open(&errorCode); - - while (nTexts-- > 0) { - uint32_t srcLen; - u_unescape(srcTexts[nTexts], src, STR_CAPACITY); - srcLen = u_strlen(src); - for (i = 0; i < nInLevels; i++) { - for (j = 0; j < nOutLevels; j++) { - ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY - 1, - inLevels[i], UBIDI_LOGICAL, outLevels[j], UBIDI_VISUAL, - UBIDI_MIRRORING_OFF, 0, &errorCode); - /* Use UBiDi as a model we compare to */ - ubidi_setPara(pBidi, src, srcLen, inLevels[i], NULL, &errorCode); - ubidi_writeReordered(pBidi, expected, STR_CAPACITY, UBIDI_REORDER_DEFAULT, &errorCode); - if (outLevels[j] == UBIDI_RTL) { - ubidi_writeReverse(expected, u_strlen(expected), temp, STR_CAPACITY, - UBIDI_OUTPUT_REVERSE, &errorCode); - logResultsForDir(src, dest, temp, inLevels[i], outLevels[j]); - } else { - logResultsForDir(src, dest, expected, inLevels[i], outLevels[j]); - } - } - } - } - ubidi_close(pBidi); - ubiditransform_close(pTransform); -} - -static void -shapeDigits(UChar *str, uint32_t digits) -{ - const UChar srcZero = (digits & U_SHAPE_DIGITS_EN2AN) ? LATN_ZERO : ARAB_ZERO; - const UChar extent = srcZero == ARAB_ZERO ? LATN_ZERO - ARAB_ZERO : ARAB_ZERO - LATN_ZERO; - UChar32 c = 0; - uint32_t i = 0, j, length = u_strlen(str); - while (i < length) { - j = i; - U16_NEXT(str, i, length, c); - if (c >= srcZero && c <= srcZero + 9) { - /* length of c here is always a single UChar16 */ - str[j] = c + extent; - } - } -} - -static void -verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt, - const UChar *destTxt, const char *expectedChars, uint32_t digits, uint32_t letters) -{ - switch (digits) { - case U_SHAPE_DIGITS_EN2AN: - case U_SHAPE_DIGITS_AN2EN: - u_unescape(expectedChars, expected, STR_CAPACITY); - shapeDigits(expected, digits); - break; - case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: - u_unescape(pTest->pContextShapes, expected, STR_CAPACITY); - break; - case U_SHAPE_DIGITS_NOOP: - u_unescape(expectedChars, expected, STR_CAPACITY); - break; - } - if (letters & U_SHAPE_LETTERS_SHAPE) { - uint32_t i = 0, j, length = u_strlen(expected); - UChar32 c = 0; - while (i < length) { - j = i; - U16_NEXT(expected, i, length, c); - /* below the length of old and new values is always a single - UChar16, so can just assign a new value to expected[j] */ - if (c == 0x0630) { - expected[j] = 0xfeab; - } else if (c == 0x0631) { - expected[j] = 0xfead; - } else if (c == 0x0632) { - expected[j] = 0xfeaf; - } - } - } - if (u_strcmp(expected, dest)) { - log_err("Unexpected transform Dest: Test: %s; Digits: 0x%08x; Letters: 0x%08x\ninText: %s; outText: %s; expected: %s\n", - pTest->pMessage, digits, letters, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expected)); - } -} - -/** - * This function covers: - *

    - */ -static void -testAllTransformOptions(void) -{ - static const char *inText = - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0"; - - static const UBidiTestCases testCases[] = { - { UBIDI_LTR, UBIDI_LOGICAL, - UBIDI_LTR, UBIDI_LOGICAL, - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", // reordering no mirroring - "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662", // context numeric shaping - "1: Logical LTR ==> Logical LTR" }, - { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", - "2: Logical LTR ==> Visual LTR" }, - { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL, - "\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c", - "\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 a[b]c", - "\\u0632 \\u0661\\u0662 f \\u0631e\\u0664 \\u0630 \\u0662\\u0663\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c", - "3: Logical LTR ==> Logical RTL" }, - { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL, - "\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a", - "\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 c]b[a", - "\\u0632 \\u0662\\u0661 f \\u0631\\u0664e \\u0630 \\u0660\\u0663\\u0662 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a", - "4: Logical LTR ==> Visual RTL" }, - { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL, - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", - "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", - "5: Logical RTL ==> Logical RTL" }, - { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL, - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "6: Logical RTL ==> Visual RTL" }, - { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_LOGICAL, - "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c", - "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 a[b]c", - "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c", - "7: Logical RTL ==> Logical LTR" }, - { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, - "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", - "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c", - "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", - "8: Logical RTL ==> Visual LTR" }, - { UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL, - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", - "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662", - "9: Visual LTR ==> Visual LTR" }, - { UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL, - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "10: Visual LTR ==> Logical LTR" }, - { UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL, - "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", - "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a", - "\\u0662\\u0661 \\u0632 f \\u0664\\u0631e \\u0660\\u0663\\u0662 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", - "11: Visual LTR ==> Visual RTL" }, - { UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL, - "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", - "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c", - "\\u0661\\u0662 \\u0632 f \\u0664\\u0631e \\u0662\\u0663\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", - "12: Visual LTR ==> Logical RTL" }, - { UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL, - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", - "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", - "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", - "13: Visual RTL ==> Visual RTL" }, - { UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL, - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "14: Visual RTL ==> Logical RTL" }, - { UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL, - "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", - "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a", - "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", - "15: Visual RTL ==> Visual LTR" }, - { UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL, - "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a", - "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 c]b[a", - "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a", - "16: Visual RTL ==> Logical LTR" }, - - { UBIDI_DEFAULT_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", - "17: Logical DEFAULT_RTL ==> Visual LTR" }, -#if 0 - { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_LTR, UBIDI_VISUAL, - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "18: Logical RTL ==> Visual DEFAULT_LTR" }, -#endif - { UBIDI_DEFAULT_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", - "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", - "19: Logical DEFAULT_LTR ==> Visual LTR" }, -#if 0 - { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_RTL, UBIDI_VISUAL, - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", - "20: Logical RTL ==> Visual DEFAULT_RTL" } -#endif - }; - static const uint32_t digits[] = { - U_SHAPE_DIGITS_NOOP, - U_SHAPE_DIGITS_AN2EN, - U_SHAPE_DIGITS_EN2AN, - U_SHAPE_DIGITS_ALEN2AN_INIT_LR - }; - static const uint32_t letters[] = { - U_SHAPE_LETTERS_UNSHAPE, - U_SHAPE_LETTERS_SHAPE - }; - uint32_t i, nTestCases = sizeof(testCases) / sizeof(testCases[0]); - uint32_t j, nDigits = sizeof(digits) / sizeof(digits[0]); - uint32_t k, nLetters = sizeof(letters) / sizeof(letters[0]); - - UErrorCode errorCode = U_ZERO_ERROR; - UBiDiTransform *pTransform = ubiditransform_open(&errorCode); - - u_unescape(inText, src, STR_CAPACITY); - - // Test various combinations of para level, order, mirroring, digits and letters - for (i = 0; i < nTestCases; i++) { - ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY, - testCases[i].inLevel, testCases[i].inOr, - testCases[i].outLevel, testCases[i].outOr, - UBIDI_MIRRORING_ON, 0, &errorCode); - verifyResultsForAllOpt(&testCases[i], src, dest, - testCases[i].pReorderAndMirror, U_SHAPE_DIGITS_NOOP, - U_SHAPE_LETTERS_NOOP); - - for (j = 0; j < nDigits; j++) { - for (k = 0; k < nLetters; k++) { - /* Use here NULL for pTransform */ - ubiditransform_transform(NULL, src, -1, dest, STR_CAPACITY, - testCases[i].inLevel, testCases[i].inOr, - testCases[i].outLevel, testCases[i].outOr, - UBIDI_MIRRORING_OFF, digits[j] | letters[k], - &errorCode); - verifyResultsForAllOpt(&testCases[i], src, dest, - testCases[i].pReorderNoMirror, digits[j], letters[k]); - } - } - } - ubiditransform_close(pTransform); -} - -void -addBidiTransformTest(TestNode** root) -{ - addTest(root, testAutoDirection, "complex/bidi-transform/TestAutoDirection"); - addTest(root, testAllTransformOptions, "complex/bidi-transform/TestAllTransformOptions"); -} - -#ifdef __cplusplus -} -#endif +/******************************************************************** + * Copyright (C) 2016 and later: Unicode, Inc. and others. + * License & terms of use: http://www.unicode.org/copyright.html + ********************************************************************/ +/* file name: cbiditransformtst.c + * encoding: US-ASCII + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2016aug21 + * created by: Lina Kemmel +*/ + +#include "cintltst.h" +#include "unicode/ubidi.h" +#include "unicode/ubiditransform.h" +#include "unicode/ushape.h" +#include "unicode/ustring.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LATN_ZERO 0x0030 +#define ARAB_ZERO 0x0660 +#define MIN_HEB_LETTER 0x05D0 +#define MIN_ARAB_LETTER 0x0630 +#define MIN_SHAPED_LETTER 0xFEAB + +#define STR_CAPACITY 100 + +typedef struct { + UBiDiLevel inLevel; + UBiDiOrder inOr; + UBiDiLevel outLevel; + UBiDiOrder outOr; + const char *pReorderNoMirror; + const char *pReorderAndMirror; + const char *pContextShapes; + const char *pMessage; +} UBidiTestCases; + +UChar src[STR_CAPACITY] = { 0 }; +UChar dest[STR_CAPACITY] = { 0 }; +UChar expected[STR_CAPACITY] = { 0 }; +UChar temp[STR_CAPACITY * 2] = { 0 }; +char pseudo[STR_CAPACITY] = { 0 }; + +void addBidiTransformTest(TestNode** root); + +static void testAutoDirection(void); + +static void testAllTransformOptions(void); + +static char* pseudoScript(const UChar *str); + +static void shapeDigits(UChar *str, uint32_t digits); + +static void logResultsForDir(const UChar *srcText, const UChar *destTxt, + const UChar *expectedTxt, UBiDiLevel inLevel, UBiDiLevel outLevel); + +static void verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt, + const UChar *destTxt, const char *expectedChars, uint32_t digits, + uint32_t letters); + +#if 0 +static void substituteByPseudoChar(const UChar *src, char *dest, + const UChar baseReal, const char basePseudo, const char max); + + +/* TODO: This code assumes the codepage is ASCII based. */ + +/* + * Using the following conventions: + * AL unshaped: A-E + * AL shaped: F-J + * R: K-Z + * EN: 0-4 + * AN: 5-9 +*/ +static void +substituteByPseudoChar(const UChar *src, char *dest, const UChar baseReal, + const char basePseudo, const char max) { + *dest = basePseudo + (*src - baseReal); /* (range math won't work on EBCDIC) */ + if (*dest > max) { + *dest = max; + } +} + +static char* +pseudoScript(const UChar *str) { + char *p; + if (!str) { + return "\0"; + } + for (p = pseudo; *str; str++, p++) { + switch (u_charDirection(*str)) { + case U_RIGHT_TO_LEFT: + substituteByPseudoChar(str, p, MIN_HEB_LETTER, 'K', 'Z'); + break; + case U_RIGHT_TO_LEFT_ARABIC: + if (*str > 0xFE00) { + substituteByPseudoChar(str, p, MIN_SHAPED_LETTER, 'F', 'J'); + } else { + substituteByPseudoChar(str, p, MIN_ARAB_LETTER, 'A', 'E'); + } + break; + case U_ARABIC_NUMBER: + substituteByPseudoChar(str, p, ARAB_ZERO, '5', '9'); + break; + default: + *p = (char)*str; + break; + } + } + *p = '\0'; + return pseudo; +} +#else +static char* +pseudoScript(const UChar *str) { + return aescstrdup(str, -1); +} +#endif + +static void +logResultsForDir(const UChar *srcTxt, const UChar *destTxt, const UChar *expectedTxt, + UBiDiLevel inLevel, UBiDiLevel outLevel) +{ + if (u_strcmp(expectedTxt, destTxt)) { + log_err("Unexpected transform Dest: inLevel: 0x%02x; outLevel: 0x%02x;\ninText: %s; outText: %s; expected: %s\n", + inLevel, outLevel, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expectedTxt)); + } +} + +/** + * Tests various combinations of base directions, with the input either + * UBIDI_DEFAULT_LTR or UBIDI_DEFAULT_RTL, and the + * output either UBIDI_LTR or UBIDI_RTL. Order is + * always UBIDI_LOGICAL for the input and UBIDI_VISUAL + * for the output. + */ +static void +testAutoDirection(void) +{ + static const UBiDiLevel inLevels[] = { + UBIDI_DEFAULT_LTR, UBIDI_DEFAULT_RTL + }; + static const UBiDiLevel outLevels[] = { + UBIDI_LTR, UBIDI_RTL + }; + static const char *srcTexts[] = { + "abc \\u05d0\\u05d1\0", + "... abc \\u05d0\\u05d1\0", + "\\u05d0\\u05d1 abc\0", + "... \\u05d0\\u05d1 abc\0", + ".*:" + }; + uint32_t nTexts = sizeof(srcTexts) / sizeof(srcTexts[0]); + uint32_t i, nInLevels = sizeof(inLevels) / sizeof(inLevels[0]); + uint32_t j, nOutLevels = sizeof(outLevels) / sizeof(outLevels[0]); + + UBiDi *pBidi = ubidi_open(); + + UErrorCode errorCode = U_ZERO_ERROR; + UBiDiTransform *pTransform = ubiditransform_open(&errorCode); + + while (nTexts-- > 0) { + uint32_t srcLen; + u_unescape(srcTexts[nTexts], src, STR_CAPACITY); + srcLen = u_strlen(src); + for (i = 0; i < nInLevels; i++) { + for (j = 0; j < nOutLevels; j++) { + ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY - 1, + inLevels[i], UBIDI_LOGICAL, outLevels[j], UBIDI_VISUAL, + UBIDI_MIRRORING_OFF, 0, &errorCode); + /* Use UBiDi as a model we compare to */ + ubidi_setPara(pBidi, src, srcLen, inLevels[i], NULL, &errorCode); + ubidi_writeReordered(pBidi, expected, STR_CAPACITY, UBIDI_REORDER_DEFAULT, &errorCode); + if (outLevels[j] == UBIDI_RTL) { + ubidi_writeReverse(expected, u_strlen(expected), temp, STR_CAPACITY, + UBIDI_OUTPUT_REVERSE, &errorCode); + logResultsForDir(src, dest, temp, inLevels[i], outLevels[j]); + } else { + logResultsForDir(src, dest, expected, inLevels[i], outLevels[j]); + } + } + } + } + ubidi_close(pBidi); + ubiditransform_close(pTransform); +} + +static void +shapeDigits(UChar *str, uint32_t digits) +{ + const UChar srcZero = (digits & U_SHAPE_DIGITS_EN2AN) ? LATN_ZERO : ARAB_ZERO; + const UChar extent = srcZero == ARAB_ZERO ? LATN_ZERO - ARAB_ZERO : ARAB_ZERO - LATN_ZERO; + UChar32 c = 0; + uint32_t i = 0, j, length = u_strlen(str); + while (i < length) { + j = i; + U16_NEXT(str, i, length, c); + if (c >= srcZero && c <= srcZero + 9) { + /* length of c here is always a single UChar16 */ + str[j] = c + extent; + } + } +} + +static void +verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt, + const UChar *destTxt, const char *expectedChars, uint32_t digits, uint32_t letters) +{ + switch (digits) { + case U_SHAPE_DIGITS_EN2AN: + case U_SHAPE_DIGITS_AN2EN: + u_unescape(expectedChars, expected, STR_CAPACITY); + shapeDigits(expected, digits); + break; + case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: + u_unescape(pTest->pContextShapes, expected, STR_CAPACITY); + break; + case U_SHAPE_DIGITS_NOOP: + u_unescape(expectedChars, expected, STR_CAPACITY); + break; + } + if (letters & U_SHAPE_LETTERS_SHAPE) { + uint32_t i = 0, j, length = u_strlen(expected); + UChar32 c = 0; + while (i < length) { + j = i; + U16_NEXT(expected, i, length, c); + /* below the length of old and new values is always a single + UChar16, so can just assign a new value to expected[j] */ + if (c == 0x0630) { + expected[j] = 0xfeab; + } else if (c == 0x0631) { + expected[j] = 0xfead; + } else if (c == 0x0632) { + expected[j] = 0xfeaf; + } + } + } + if (u_strcmp(expected, dest)) { + log_err("Unexpected transform Dest: Test: %s; Digits: 0x%08x; Letters: 0x%08x\ninText: %s; outText: %s; expected: %s\n", + pTest->pMessage, digits, letters, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expected)); + } +} + +/** + * This function covers: + * + */ +static void +testAllTransformOptions(void) +{ + static const char *inText = + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0"; + + static const UBidiTestCases testCases[] = { + { UBIDI_LTR, UBIDI_LOGICAL, + UBIDI_LTR, UBIDI_LOGICAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", // reordering no mirroring + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662", // context numeric shaping + "1: Logical LTR ==> Logical LTR" }, + { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", + "2: Logical LTR ==> Visual LTR" }, + { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL, + "\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c", + "\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 a[b]c", + "\\u0632 \\u0661\\u0662 f \\u0631e\\u0664 \\u0630 \\u0662\\u0663\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c", + "3: Logical LTR ==> Logical RTL" }, + { UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL, + "\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a", + "\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 c]b[a", + "\\u0632 \\u0662\\u0661 f \\u0631\\u0664e \\u0630 \\u0660\\u0663\\u0662 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a", + "4: Logical LTR ==> Visual RTL" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", + "5: Logical RTL ==> Logical RTL" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "6: Logical RTL ==> Visual RTL" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_LOGICAL, + "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c", + "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 a[b]c", + "\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c", + "7: Logical RTL ==> Logical LTR" }, + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "8: Logical RTL ==> Visual LTR" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662", + "9: Visual LTR ==> Visual LTR" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "10: Visual LTR ==> Logical LTR" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL, + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f \\u0664\\u0631e \\u0660\\u0663\\u0662 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "11: Visual LTR ==> Visual RTL" }, + { UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL, + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c", + "\\u0661\\u0662 \\u0632 f \\u0664\\u0631e \\u0662\\u0663\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c", + "12: Visual LTR ==> Logical RTL" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL, + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", + "a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", + "a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", + "13: Visual RTL ==> Visual RTL" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "14: Visual RTL ==> Logical RTL" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL, + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a", + "\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a", + "15: Visual RTL ==> Visual LTR" }, + { UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL, + "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a", + "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 c]b[a", + "\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a", + "16: Visual RTL ==> Logical LTR" }, + + { UBIDI_DEFAULT_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", + "17: Logical DEFAULT_RTL ==> Visual LTR" }, +#if 0 + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_LTR, UBIDI_VISUAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "18: Logical RTL ==> Visual DEFAULT_LTR" }, +#endif + { UBIDI_DEFAULT_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL, + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632", + "a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632", + "19: Logical DEFAULT_LTR ==> Visual LTR" }, +#if 0 + { UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_RTL, UBIDI_VISUAL, + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661", + "20: Logical RTL ==> Visual DEFAULT_RTL" } +#endif + }; + static const uint32_t digits[] = { + U_SHAPE_DIGITS_NOOP, + U_SHAPE_DIGITS_AN2EN, + U_SHAPE_DIGITS_EN2AN, + U_SHAPE_DIGITS_ALEN2AN_INIT_LR + }; + static const uint32_t letters[] = { + U_SHAPE_LETTERS_UNSHAPE, + U_SHAPE_LETTERS_SHAPE + }; + uint32_t i, nTestCases = sizeof(testCases) / sizeof(testCases[0]); + uint32_t j, nDigits = sizeof(digits) / sizeof(digits[0]); + uint32_t k, nLetters = sizeof(letters) / sizeof(letters[0]); + + UErrorCode errorCode = U_ZERO_ERROR; + UBiDiTransform *pTransform = ubiditransform_open(&errorCode); + + u_unescape(inText, src, STR_CAPACITY); + + // Test various combinations of para level, order, mirroring, digits and letters + for (i = 0; i < nTestCases; i++) { + ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY, + testCases[i].inLevel, testCases[i].inOr, + testCases[i].outLevel, testCases[i].outOr, + UBIDI_MIRRORING_ON, 0, &errorCode); + verifyResultsForAllOpt(&testCases[i], src, dest, + testCases[i].pReorderAndMirror, U_SHAPE_DIGITS_NOOP, + U_SHAPE_LETTERS_NOOP); + + for (j = 0; j < nDigits; j++) { + for (k = 0; k < nLetters; k++) { + /* Use here NULL for pTransform */ + ubiditransform_transform(NULL, src, -1, dest, STR_CAPACITY, + testCases[i].inLevel, testCases[i].inOr, + testCases[i].outLevel, testCases[i].outOr, + UBIDI_MIRRORING_OFF, digits[j] | letters[k], + &errorCode); + verifyResultsForAllOpt(&testCases[i], src, dest, + testCases[i].pReorderNoMirror, digits[j], letters[k]); + } + } + } + ubiditransform_close(pTransform); +} + +void +addBidiTransformTest(TestNode** root) +{ + addTest(root, testAutoDirection, "complex/bidi-transform/TestAutoDirection"); + addTest(root, testAllTransformOptions, "complex/bidi-transform/TestAllTransformOptions"); +} + +#ifdef __cplusplus +} +#endif