ICU-11679 merge C BiDi Transform from branch

* fixed from branch: inadvertently removed Unicode attribution from Makefiles
* fixed from branch: Unicode attribution
* bonus: moved icuplug into the 'registration' filter on windows

X-SVN-Rev: 39170
This commit is contained in:
Steven R. Loomis 2016-09-09 16:41:15 +00:00
parent d77bd4cec6
commit 6ce57afd03
12 changed files with 1298 additions and 3 deletions

3
.gitattributes vendored
View file

@ -51,6 +51,8 @@ README text !eol
icu4c/icu4c.css -text
icu4c/source/aclocal.m4 -text
icu4c/source/allinone/icucheck.bat -text
icu4c/source/common/ubiditransform.c -text
icu4c/source/common/unicode/ubiditransform.h -text
icu4c/source/config/m4/icu-conditional.m4 -text
icu4c/source/data/curr/pool.res -text
icu4c/source/data/in/coll/ucadata-implicithan.icu -text
@ -132,6 +134,7 @@ icu4c/source/samples/ugrep/ugrep.vcxproj -text
icu4c/source/samples/uresb/resources.vcxproj -text
icu4c/source/samples/uresb/uresb.vcxproj -text
icu4c/source/samples/ustring/ustring.vcxproj -text
icu4c/source/test/cintltst/cbiditransformtst.c -text
icu4c/source/test/depstest/icu-dependencies-mode.el -text
icu4c/source/test/iotest/iotest.vcxproj -text
icu4c/source/test/letest/cletest.vcxproj -text

4
.gitignore vendored
View file

@ -6,11 +6,15 @@ icu4c/lib64
icu4c/source/Doxyfile
icu4c/source/Makefile
icu4c/source/README
icu4c/source/allinone/*.db
icu4c/source/allinone/*.ncb
icu4c/source/allinone/*.opendb
icu4c/source/allinone/*.opensdf
icu4c/source/allinone/*.opt
icu4c/source/allinone/*.sdf
icu4c/source/allinone/*.suo
icu4c/source/allinone/.vs
icu4c/source/allinone/Debug
icu4c/source/allinone/ipch
icu4c/source/autom4te.cache
icu4c/source/bin

View file

@ -109,6 +109,7 @@ uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ulistformatter.o \
sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
ubiditransform.o \
pluralmap.o
## Header files to install

View file

@ -238,6 +238,7 @@
<ItemGroup>
<ClCompile Include="filteredbrk.cpp" />
<ClCompile Include="ubidi.c" />
<ClCompile Include="ubiditransform.c" />
<ClCompile Include="ubidi_props.c" />
<ClCompile Include="ubidiln.c" />
<ClCompile Include="ubidiwrt.c" />
@ -586,6 +587,7 @@
<ClInclude Include="uchar_props_data.h" />
<ClInclude Include="ucol_data.h" />
<ClInclude Include="ucol_swp.h" />
<ClInclude Include="unicode\ubiditransform.h" />
<ClInclude Include="unistrappender.h" />
<ClInclude Include="hash.h" />
<ClInclude Include="propsvec.h" />

View file

@ -586,13 +586,18 @@
<ClCompile Include="stringtriebuilder.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="icuplug.cpp" />
<ClCompile Include="uloc_keytype.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="filteredbrk.cpp">
<Filter>break iteration</Filter>
</ClCompile>
<ClCompile Include="icuplug.cpp">
<Filter>registration</Filter>
</ClCompile>
<ClCompile Include="ubiditransform.c">
<Filter>bidi</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ubidi_props.h">
@ -910,6 +915,9 @@
<ClInclude Include="unicode\filteredbrk.h">
<Filter>break iteration</Filter>
</ClInclude>
<ClInclude Include="unicode\ubiditransform.h">
<Filter>bidi</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc">
@ -1173,4 +1181,4 @@
<Filter>collections</Filter>
</CustomBuild>
</ItemGroup>
</Project>
</Project>

View file

@ -0,0 +1,528 @@
/*
******************************************************************************
*
* Copyright (C) 2016 and later: Unicode, Inc. and others.
* License & terms of use: http://www.unicode.org/copyright.html
*
******************************************************************************
* file name: ubiditransform.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2016jul24
* created by: Lina Kemmel
*
*/
#include "cmemory.h"
#include "unicode/ubidi.h"
#include "unicode/ustring.h"
#include "unicode/ushape.h"
#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "unicode/ubiditransform.h"
/* Some convenience defines */
#define LTR UBIDI_LTR
#define RTL UBIDI_RTL
#define LOGICAL UBIDI_LOGICAL
#define VISUAL UBIDI_VISUAL
#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL
#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR
#define CHECK_LEN(STR, LEN, ERROR) { \
if (LEN == 0) return 0; \
if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \
if (LEN == -1) LEN = u_strlen(STR); \
}
#define MAX_ACTIONS 7
/**
* Typedef for a pointer to a function, which performs some operation (such as
* reordering, setting "inverse" mode, character mirroring, etc.). Return value
* indicates whether the text was changed in the course of this operation or
* not.
*/
typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *);
/**
* Structure that holds a predefined reordering scheme, including the following
* information:
* <ul>
* <li>an input base direction,</li>
* <li>an input order,</li>
* <li>an output base direction,</li>
* <li>an output order,</li>
* <li>a digit shaping direction,</li>
* <li>a letter shaping direction,</li>
* <li>a base direction that should be applied when the reordering engine is
* invoked (which can not always be derived from the caller-defined
* options),</li>
* <li>an array of pointers to functions that accomplish the bidi layout
* transformation.</li>
* </ul>
*/
typedef struct {
UBiDiLevel inLevel; /* input level */
UBiDiOrder inOrder; /* input order */
UBiDiLevel outLevel; /* output level */
UBiDiOrder outOrder; /* output order */
uint32_t digitsDir; /* digit shaping direction */
uint32_t lettersDir; /* letter shaping direction */
UBiDiLevel baseLevel; /* paragraph level to be used with setPara */
const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */
} ReorderingScheme;
struct UBiDiTransform {
UBiDi *pBidi; /* pointer to a UBiDi object */
const ReorderingScheme *pActiveScheme; /* effective reordering scheme */
UChar *src; /* input text */
UChar *dest; /* output text */
uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */
uint32_t srcSize; /* input text capacity excluding the trailing zero */
uint32_t destSize; /* output text capacity */
uint32_t *pDestLength; /* number of UChars written to dest */
uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */
uint32_t digits; /* digit option for ArabicShaping */
uint32_t letters; /* letter option for ArabicShaping */
};
U_DRAFT UBiDiTransform* U_EXPORT2
ubiditransform_open(UErrorCode *pErrorCode)
{
UBiDiTransform *pBiDiTransform = NULL;
if (U_SUCCESS(*pErrorCode)) {
pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform));
if (pBiDiTransform == NULL) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
}
}
return pBiDiTransform;
}
U_DRAFT void U_EXPORT2
ubiditransform_close(UBiDiTransform *pBiDiTransform)
{
if (pBiDiTransform != NULL) {
if (pBiDiTransform->pBidi != NULL) {
ubidi_close(pBiDiTransform->pBidi);
}
if (pBiDiTransform->src != NULL) {
uprv_free(pBiDiTransform->src);
}
uprv_free(pBiDiTransform);
}
}
/**
* Performs Bidi resolution of text.
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
*/
static UBool
action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength,
pTransform->pActiveScheme->baseLevel, NULL, pErrorCode);
return FALSE;
}
/**
* Performs basic reordering of text (Logical -> Visual LTR).
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
*/
static UBool
action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
pTransform->reorderingOptions, pErrorCode);
*pTransform->pDestLength = pTransform->srcLength;
pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
return TRUE;
}
/**
* Sets "inverse" mode on the <code>UBiDi</code> object.
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
*/
static UBool
action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_setInverse(pTransform->pBidi, TRUE);
ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT);
return FALSE;
}
/**
* Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL
* transformation.
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
*/
static UBool
action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY);
return FALSE;
}
/**
* Performs string reverse.
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
*/
static UBool
action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_writeReverse(pTransform->src, pTransform->srcLength,
pTransform->dest, pTransform->destSize,
UBIDI_REORDER_DEFAULT, pErrorCode);
*pTransform->pDestLength = pTransform->srcLength;
return TRUE;
}
/**
* Applies a new value to the text that serves as input at the current
* processing step. This value is identical to the original one when we begin
* the processing, but usually changes as the transformation progresses.
*
* @param pTransform A pointer to the <code>UBiDiTransform</code> structure.
* @param newSrc A pointer whose value is to be used as input text.
* @param newLength A length of the new text in <code>UChar</code>s.
* @param newSize A new source capacity in <code>UChar</code>s.
* @param pErrorCode Pointer to the error code value.
*/
static void
updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength,
uint32_t newSize, UErrorCode *pErrorCode)
{
if (newSize < newLength) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
return;
}
if (newSize > pTransform->srcSize) {
newSize += 50; // allocate slightly more than needed right now
if (pTransform->src != NULL) {
uprv_free(pTransform->src);
pTransform->src = NULL;
}
pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar));
if (pTransform->src == NULL) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
//pTransform->srcLength = pTransform->srcSize = 0;
return;
}
pTransform->srcSize = newSize;
}
u_strncpy(pTransform->src, newSrc, newLength);
pTransform->srcLength = u_terminateUChars(pTransform->src,
pTransform->srcSize, newLength, pErrorCode);
}
/**
* Calls a lower level shaping function.
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param options Shaping options.
* @param pErrorCode Pointer to the error code value.
*/
static void
doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode)
{
*pTransform->pDestLength = u_shapeArabic(pTransform->src,
pTransform->srcLength, pTransform->dest, pTransform->destSize,
options, pErrorCode);
}
/**
* Performs digit and letter shaping.
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
*/
static UBool
action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
if ((pTransform->letters | pTransform->digits) == 0) {
return FALSE;
}
if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) {
doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir,
pErrorCode);
} else {
doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode);
if (U_SUCCESS(*pErrorCode)) {
updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength,
*pTransform->pDestLength, pErrorCode);
doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir,
pErrorCode);
}
}
return TRUE;
}
/**
* Performs character mirroring.
*
* @param pTransform Pointer to the <code>UBiDiTransform</code> structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>.
*/
static UBool
action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
UChar32 c;
uint32_t i = 0, j = 0;
if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) {
return FALSE;
}
if (pTransform->destSize < pTransform->srcLength) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
return FALSE;
}
do {
UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1;
U16_NEXT(pTransform->src, i, pTransform->srcLength, c);
U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c);
} while (i < pTransform->srcLength);
*pTransform->pDestLength = pTransform->srcLength;
pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
return TRUE;
}
/**
* All possible reordering schemes.
*
*/
static const ReorderingScheme Schemes[] =
{
/* 0: Logical LTR => Visual LTR */
{LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_shapeArabic, action_resolve, action_reorder, NULL}},
/* 1: Logical RTL => Visual LTR */
{RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_resolve, action_reorder, action_shapeArabic, NULL}},
/* 2: Logical LTR => Visual RTL */
{LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}},
/* 3: Logical RTL => Visual RTL */
{RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}},
/* 4: Visual LTR => Logical RTL */
{LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
/* 5: Visual RTL => Logical RTL */
{RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}},
/* 6: Visual LTR => Logical LTR */
{LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
/* 7: Visual RTL => Logical LTR */
{RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}},
/* 8: Logical LTR => Logical RTL */
{LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}},
/* 9: Logical RTL => Logical LTR */
{RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL,
{action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}},
/* 10: Visual LTR => Visual RTL */
{LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}},
/* 11: Visual RTL => Visual LTR */
{RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}},
/* 12: Logical LTR => Logical LTR */
{LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_resolve, action_mirror, action_shapeArabic, NULL}},
/* 13: Logical RTL => Logical RTL */
{RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL,
{action_resolve, action_mirror, action_shapeArabic, NULL}},
/* 14: Visual LTR => Visual LTR */
{LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_resolve, action_mirror, action_shapeArabic, NULL}},
/* 15: Visual RTL => Visual RTL */
{RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}}
};
static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes);
/**
* When the direction option is <code>UBIDI_DEFAULT_LTR</code> or
* <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that
* of the first strong bidi character.
*/
static void
resolveBaseDirection(const UChar *text, uint32_t length,
UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel)
{
switch (*pInLevel) {
case UBIDI_DEFAULT_LTR:
case UBIDI_DEFAULT_RTL: {
UBiDiLevel level = ubidi_getBaseDirection(text, length);
*pInLevel = level != UBIDI_NEUTRAL ? level
: *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR;
break;
}
default:
*pInLevel &= 1;
break;
}
switch (*pOutLevel) {
case UBIDI_DEFAULT_LTR:
case UBIDI_DEFAULT_RTL:
*pOutLevel = *pInLevel;
break;
default:
*pOutLevel &= 1;
break;
}
}
/**
* Finds a valid <code>ReorderingScheme</code> matching the
* caller-defined scheme.
*
* @return A valid <code>ReorderingScheme</code> object or NULL
*/
static const ReorderingScheme*
findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel,
UBiDiOrder inOrder, UBiDiOrder outOrder)
{
uint32_t i;
for (i = 0; i < nSchemes; i++) {
const ReorderingScheme *pScheme = Schemes + i;
if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel
&& inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) {
return pScheme;
}
}
return NULL;
}
U_DRAFT uint32_t U_EXPORT2
ubiditransform_transform(UBiDiTransform *pBiDiTransform,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destSize,
UBiDiLevel inParaLevel, UBiDiOrder inOrder,
UBiDiLevel outParaLevel, UBiDiOrder outOrder,
UBiDiMirroring doMirroring, uint32_t shapingOptions,
UErrorCode *pErrorCode)
{
uint32_t destLength = 0;
UBool textChanged = FALSE;
const UBiDiTransform *pOrigTransform = pBiDiTransform;
const UBiDiAction *action = NULL;
if (U_FAILURE(*pErrorCode)) {
return 0;
}
if (src == NULL || dest == NULL) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
CHECK_LEN(src, srcLength, pErrorCode);
CHECK_LEN(dest, destSize, pErrorCode);
if (pBiDiTransform == NULL) {
pBiDiTransform = ubiditransform_open(pErrorCode);
if (U_FAILURE(*pErrorCode)) {
return 0;
}
}
/* Current limitation: in multiple paragraphs will be resolved according
to the 1st paragraph */
resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel);
pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel,
inOrder, outOrder);
if (pBiDiTransform->pActiveScheme == NULL || pBiDiTransform->pActiveScheme->actions == NULL) {
goto cleanup;
}
pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING
: UBIDI_REORDER_DEFAULT;
/* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text
scheme at the time shaping is invoked. */
shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK;
pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK;
pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK;
updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode);
if (U_FAILURE(*pErrorCode)) {
goto cleanup;
}
if (pBiDiTransform->pBidi == NULL) {
pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode);
if (U_FAILURE(*pErrorCode)) {
goto cleanup;
}
}
pBiDiTransform->dest = dest;
pBiDiTransform->destSize = destSize;
pBiDiTransform->pDestLength = &destLength;
/* Checking for U_SUCCESS() within the loop to bail out on first failure. */
for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) {
if ((*action)(pBiDiTransform, pErrorCode)) {
if (action + 1) {
updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength,
*pBiDiTransform->pDestLength, pErrorCode);
}
textChanged = TRUE;
}
}
ubidi_setInverse(pBiDiTransform->pBidi, FALSE);
if (!textChanged && U_SUCCESS(*pErrorCode)) {
/* Text was not changed - just copy src to dest */
if (destSize < srcLength) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
} else {
u_strncpy(dest, src, srcLength);
destLength = srcLength;
}
}
cleanup:
if (pOrigTransform != pBiDiTransform) {
ubiditransform_close(pBiDiTransform);
} else {
pBiDiTransform->dest = NULL;
pBiDiTransform->pDestLength = NULL;
pBiDiTransform->srcLength = 0;
pBiDiTransform->destSize = 0;
}
return U_FAILURE(*pErrorCode) ? 0 : destLength;
}

View file

@ -0,0 +1,312 @@
/*
******************************************************************************
*
* Copyright (C) 2016 and later: Unicode, Inc. and others.
* License & terms of use: http://www.unicode.org/copyright.html
*
******************************************************************************
* file name: ubiditransform.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2016jul24
* created by: Lina Kemmel
*
*/
#ifndef UBIDITRANSFORM_H
#define UBIDITRANSFORM_H
#include "unicode/uchar.h"
#include "unicode/localpointer.h"
/**
* <code>UBiDiOrder</code> indicates the order of text.<p>
* This bidi transformation engine supports all possible combinations (4 in
* total) of input and output text order:
* <ul>
* <li><logical input, visual output>: unless the output direction is RTL, this
* corresponds to a normal operation of the Bidi algorithm as described in the
* Unicode Technical Report and implemented by <code>UBiDi</code> when the
* reordering mode is set to <code>UBIDI_REORDER_DEFAULT</code>. Visual RTL
* mode is not supported by <code>UBiDi</code> and is accomplished through
* reversing a visual LTR string,</li>
* <li><visual input, logical output>: unless the input direction is RTL, this
* corresponds to an "inverse bidi algorithm" in <code>UBiDi</code> with the
* reordering mode set to <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>.
* Visual RTL mode is not not supported by <code>UBiDi</code> and is
* accomplished through reversing a visual LTR string,</li>
* <li><logical input, logical output>: if the input and output base directions
* mismatch, this corresponds to the <code>UBiDi</code> implementation with the
* reordering mode set to <code>UBIDI_REORDER_RUNS_ONLY</code>; and if the
* input and output base directions are identical, the transformation engine
* will only handle character mirroring and Arabic shaping operations without
* reordering,</li>
* <li><visual input, visual output>: this reordering mode is not supported by
* the <code>UBiDi</code> engine; it implies character mirroring, Arabic
* shaping, and - if the input/output base directions mismatch - string
* reverse operations.</li>
* </ul>
* @see ubidi_setInverse
* @see ubidi_setReorderingMode
* @see UBIDI_REORDER_DEFAULT
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
* @see UBIDI_REORDER_RUNS_ONLY
* @draft ICU 58
*/
typedef enum {
/** 0: Constant indicating a logical order.
* This is the default for input text.
* @draft ICU 58
*/
UBIDI_LOGICAL = 0,
/** 1: Constant indicating a visual order.
* This is a default for output text.
* @draft ICU 58
*/
UBIDI_VISUAL
} UBiDiOrder;
/**
* <code>UBiDiMirroring</code> indicates whether or not characters with the
* "mirrored" property in RTL runs should be replaced with their mirror-image
* counterparts.
* @see UBIDI_DO_MIRRORING
* @see ubidi_setReorderingOptions
* @see ubidi_writeReordered
* @see ubidi_writeReverse
* @draft ICU 58
*/
typedef enum {
/** 0: Constant indicating that character mirroring should not be
* performed.
* This is the default.
* @draft ICU 58
*/
UBIDI_MIRRORING_OFF = 0,
/** 1: Constant indicating that character mirroring should be performed.
* This corresponds to calling <code>ubidi_writeReordered</code> or
* <code>ubidi_writeReverse</code> with the
* <code>UBIDI_DO_MIRRORING</code> option bit set.
* @draft ICU 58
*/
UBIDI_MIRRORING_ON
} UBiDiMirroring;
/**
* Forward declaration of the <code>UBiDiTransform</code> structure that stores
* information used by the layout transformation engine.
* @draft ICU 58
*/
typedef struct UBiDiTransform UBiDiTransform;
/**
* Performs transformation of text from the bidi layout defined by the input
* ordering scheme to the bidi layout defined by the output ordering scheme,
* and applies character mirroring and Arabic shaping operations.<p>
* In terms of <code>UBiDi</code>, such a transformation implies:
* <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
* reordering mode is other than normal),</li>
* <li>calling <code>ubidi_setInverse</code> as needed (when text should be
* transformed from a visual to a logical form),</li>
* <li>resolving embedding levels of each character in the input text by
* calling <code>ubidi_setPara</code>,</li>
* <li>reordering the characters based on the computed embedding levels, also
* performing character mirroring as needed, and streaming the result to the
* output, by calling <code>ubidi_writeReordered</code>,</li>
* <li>performing Arabic digit and letter shaping on the output text by calling
* <code>u_shapeArabic</code>.</li>
* </ul>
* An "ordering scheme" encompasses the base direction and the order of text,
* and these characteristics must be defined by the caller for both input and
* output explicitly .<p>
* There are 36 possible combinations of <input, output> ordering schemes,
* which are partially supported by <code>UBiDi</code> already. Examples of the
* currently supported combinations:
* <ul>
* <li><Logical LTR, Visual LTR>: this is equivalent to calling
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
* <li><Logical RTL, Visual LTR>: this is equivalent to calling
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
* <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
* calling <code>ubidi_setPara</code> with
* <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
* <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
* calling <code>ubidi_setPara</code> with
* <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
* <li><Visual LTR, Logical LTR>: this is equivalent to
* calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
* <li><Visual LTR, Logical RTL>: this is equivalent to
* calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
* </ul>
* All combinations that involve the Visual RTL scheme are unsupported by
* <code>UBiDi</code>, for instance:
* <ul>
* <li><Logical LTR, Visual RTL>,</li>
* <li><Visual RTL, Logical RTL>.</li>
* </ul>
* <p>Example of usage of the transformation engine:<br>
* <pre>
* \code
* UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
* UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
* UErrorCode errorCode = U_ZERO_ERROR;
* // Run a transformation.
* ubiditransform_transform(pBidiTransform,
* text1, -1, text2, -1,
* UBIDI_LTR, UBIDI_VISUAL,
* UBIDI_RTL, UBIDI_LOGICAL,
* UBIDI_MIRRORING_OFF,
* U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
* &errorCode);
* // Do something with text2.
* text2[4] = '2';
* // Run a reverse transformation.
* ubiditransform_transform(pBidiTransform,
* text2, -1, text1, -1,
* UBIDI_RTL, UBIDI_LOGICAL,
* UBIDI_LTR, UBIDI_VISUAL,
* UBIDI_MIRRORING_OFF,
* U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
* &errorCode);
*\endcode
* </pre>
* </p>
*
* @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
* allocated with <code>ubiditransform_open()</code> or
* <code>NULL</code>.<p>
* This object serves for one-time setup to amortize initialization
* overheads. Use of this object is not thread-safe. All other threads
* should allocate a new <code>UBiDiTransform</code> object by calling
* <code>ubiditransform_open()</code> before using it. Alternatively,
* a caller can set this parameter to <code>NULL</code>, in which case
* the object will be allocated by the engine on the fly.</p>
* @param src A pointer to the text that the Bidi layout transformations will
* be performed on.
* <p><strong>Note:</strong> the text must be (at least)
* <code>srcLength</code> long.</p>
* @param srcLength The length of the text, in number of UChars. If
* <code>length == -1</code> then the text must be zero-terminated.
* @param dest A pointer to where the processed text is to be copied.
* @param destSize The size of the <code>dest</code> buffer, in number of
* UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
* then the destination length could be as large as
* <code>srcLength * 2</code>. Otherwise, the destination length will
* not exceed <code>srcLength</code>. If the caller reserves the last
* position for zero-termination, it should be excluded from
* <code>destSize</code>.
* <p><code>destSize == -1</code> is allowed and makes sense when
* <code>dest</code> was holds some meaningful value, e.g. that of
* <code>src</code>. In this case <code>dest</code> must be
* zero-terminated.</p>
* @param inParaLevel A base embedding level of the input as defined in
* <code>ubidi_setPara</code> documentation for the
* <code>paraLevel</code> parameter.
* @param inOrder An order of the input, which can be one of the
* <code>UBiDiOrder</code> values.
* @param outParaLevel A base embedding level of the output as defined in
* <code>ubidi_setPara</code> documentation for the
* <code>paraLevel</code> parameter.
* @param outOrder An order of the output, which can be one of the
* <code>UBiDiOrder</code> values.
* @param doMirroring Indicates whether or not to perform character mirroring,
* and can accept one of the <code>UBiDiMirroring</code> values.
* @param shapingOptions Arabic digit and letter shaping options defined in the
* ushape.h documentation.
* <p><strong>Note:</strong> Direction indicator options are computed by
* the transformation engine based on the effective ordering schemes, so
* user-defined direction indicators will be ignored.</p>
* @param pErrorCode A pointer to an error code value.
*
* @return The destination length, i.e. the number of UChars written to
* <code>dest</code>. If the transformation fails, the return value
* will be 0 (and the error code will be written to
* <code>pErrorCode</code>).
*
* @see UBiDiLevel
* @see UBiDiOrder
* @see UBiDiMirroring
* @see ubidi_setPara
* @see u_shapeArabic
* @draft ICU 58
*/
U_DRAFT uint32_t U_EXPORT2
ubiditransform_transform(UBiDiTransform *pBiDiTransform,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destSize,
UBiDiLevel inParaLevel, UBiDiOrder inOrder,
UBiDiLevel outParaLevel, UBiDiOrder outOrder,
UBiDiMirroring doMirroring, uint32_t shapingOptions,
UErrorCode *pErrorCode);
/**
* Allocates a <code>UBiDiTransform</code> object. This object can be reused,
* e.g. with different ordering schemes, mirroring or shaping options.<p>
* <strong>Note:</strong>The object can only be reused in the same thread.
* All other threads should allocate a new <code>UBiDiTransform</code> object
* before using it.<p>
* Example of usage:<p>
* <pre>
* \code
* UErrorCode errorCode = U_ZERO_ERROR;
* // Open a new UBiDiTransform.
* UBiDiTransform* transform = ubiditransform_open(&errorCode);
* // Run a transformation.
* ubiditransform_transform(transform,
* text1, -1, text2, -1,
* UBIDI_RTL, UBIDI_LOGICAL,
* UBIDI_LTR, UBIDI_VISUAL,
* UBIDI_MIRRORING_ON,
* U_SHAPE_DIGITS_EN2AN,
* &errorCode);
* // Do something with the output text and invoke another transformation using
* // that text as input.
* ubiditransform_transform(transform,
* text2, -1, text3, -1,
* UBIDI_LTR, UBIDI_VISUAL,
* UBIDI_RTL, UBIDI_VISUAL,
* UBIDI_MIRRORING_ON,
* 0, &errorCode);
*\endcode
* </pre>
* <p>
* The <code>UBiDiTransform</code> object must be deallocated by calling
* <code>ubiditransform_close()</code>.
*
* @return An empty <code>UBiDiTransform</code> object.
* @draft ICU 58
*/
U_DRAFT UBiDiTransform* U_EXPORT2
ubiditransform_open(UErrorCode *pErrorCode);
/**
* Deallocates the given <code>UBiDiTransform</code> object.
* @draft ICU 58
*/
U_DRAFT void U_EXPORT2
ubiditransform_close(UBiDiTransform *pBidiTransform);
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
* \class LocalUBiDiTransformPointer
* "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
* For most methods see the LocalPointerBase base class.
*
* @see LocalPointerBase
* @see LocalPointer
* @draft ICU 58
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
U_NAMESPACE_END
#endif
#endif

View file

@ -53,6 +53,7 @@ uenumtst.o utmstest.o currtest.o \
idnatest.o nfsprep.o spreptst.o sprpdata.o \
hpmufn.o tracetst.o reapits.o uregiontest.o ulistfmttest.o\
utexttst.o ucsdetst.o spooftest.o \
cbiditransformtst.o \
cgendtst.o
DEPS = $(OBJECTS:.o=.d)

View file

@ -25,6 +25,7 @@ void addFormatTest(TestNode** root);
void addConvert(TestNode** root);
void addCollTest(TestNode** root);
void addComplexTest(TestNode** root);
void addBidiTransformTest(TestNode** root);
void addUDataTest(TestNode** root);
void addUTF16Test(TestNode** root);
void addUTF8Test(TestNode** root);
@ -60,6 +61,7 @@ void addAllTests(TestNode** root)
addStandardNamesTest(root);
addUCsdetTest(root);
addComplexTest(root);
addBidiTransformTest(root);
addUSetTest(root);
#if !UCONFIG_NO_IDNA
addUStringPrepTest(root);

View file

@ -0,0 +1,430 @@
/********************************************************************
* Copyright (C) 2016 and later: Unicode, Inc. and others.
* License & terms of use: http://www.unicode.org/copyright.html
********************************************************************/
/* file name: cbiditransformtst.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2016aug21
* created by: Lina Kemmel
*/
#include "cintltst.h"
#include "unicode/ubidi.h"
#include "unicode/ubiditransform.h"
#include "unicode/ushape.h"
#include "unicode/ustring.h"
#ifdef __cplusplus
extern "C" {
#endif
#define LATN_ZERO 0x0030
#define ARAB_ZERO 0x0660
#define MIN_HEB_LETTER 0x05D0
#define MIN_ARAB_LETTER 0x0630
#define MIN_SHAPED_LETTER 0xFEAB
#define STR_CAPACITY 100
typedef struct {
UBiDiLevel inLevel;
UBiDiOrder inOr;
UBiDiLevel outLevel;
UBiDiOrder outOr;
const char *pReorderNoMirror;
const char *pReorderAndMirror;
const char *pContextShapes;
const char *pMessage;
} UBidiTestCases;
UChar src[STR_CAPACITY] = { 0 };
UChar dest[STR_CAPACITY] = { 0 };
UChar expected[STR_CAPACITY] = { 0 };
UChar temp[STR_CAPACITY * 2] = { 0 };
char pseudo[STR_CAPACITY] = { 0 };
void addBidiTransformTest(TestNode** root);
static void testAutoDirection(void);
static void testAllTransformOptions(void);
static char* pseudoScript(const UChar *str);
static void shapeDigits(UChar *str, uint32_t digits);
static void logResultsForDir(const UChar *srcText, const UChar *destTxt,
const UChar *expectedTxt, UBiDiLevel inLevel, UBiDiLevel outLevel);
static void verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt,
const UChar *destTxt, const char *expectedChars, uint32_t digits,
uint32_t letters);
#if 0
static void substituteByPseudoChar(const UChar *src, char *dest,
const UChar baseReal, const char basePseudo, const char max);
/* TODO: This code assumes the codepage is ASCII based. */
/*
* Using the following conventions:
* AL unshaped: A-E
* AL shaped: F-J
* R: K-Z
* EN: 0-4
* AN: 5-9
*/
static void
substituteByPseudoChar(const UChar *src, char *dest, const UChar baseReal,
const char basePseudo, const char max) {
*dest = basePseudo + (*src - baseReal); /* (range math won't work on EBCDIC) */
if (*dest > max) {
*dest = max;
}
}
static char*
pseudoScript(const UChar *str) {
char *p;
if (!str) {
return "\0";
}
for (p = pseudo; *str; str++, p++) {
switch (u_charDirection(*str)) {
case U_RIGHT_TO_LEFT:
substituteByPseudoChar(str, p, MIN_HEB_LETTER, 'K', 'Z');
break;
case U_RIGHT_TO_LEFT_ARABIC:
if (*str > 0xFE00) {
substituteByPseudoChar(str, p, MIN_SHAPED_LETTER, 'F', 'J');
} else {
substituteByPseudoChar(str, p, MIN_ARAB_LETTER, 'A', 'E');
}
break;
case U_ARABIC_NUMBER:
substituteByPseudoChar(str, p, ARAB_ZERO, '5', '9');
break;
default:
*p = (char)*str;
break;
}
}
*p = '\0';
return pseudo;
}
#else
static char*
pseudoScript(const UChar *str) {
return aescstrdup(str, -1);
}
#endif
static void
logResultsForDir(const UChar *srcTxt, const UChar *destTxt, const UChar *expectedTxt,
UBiDiLevel inLevel, UBiDiLevel outLevel)
{
if (u_strcmp(expectedTxt, destTxt)) {
log_err("Unexpected transform Dest: inLevel: 0x%02x; outLevel: 0x%02x;\ninText: %s; outText: %s; expected: %s\n",
inLevel, outLevel, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expectedTxt));
}
}
/**
* Tests various combinations of base directions, with the input either
* <code>UBIDI_DEFAULT_LTR</code> or <code>UBIDI_DEFAULT_RTL</code>, and the
* output either <code>UBIDI_LTR</code> or <code>UBIDI_RTL</code>. Order is
* always <code>UBIDI_LOGICAL</code> for the input and <code>UBIDI_VISUAL</code>
* for the output.
*/
static void
testAutoDirection(void)
{
static const UBiDiLevel inLevels[] = {
UBIDI_DEFAULT_LTR, UBIDI_DEFAULT_RTL
};
static const UBiDiLevel outLevels[] = {
UBIDI_LTR, UBIDI_RTL
};
static const char *srcTexts[] = {
"abc \\u05d0\\u05d1\0",
"... abc \\u05d0\\u05d1\0",
"\\u05d0\\u05d1 abc\0",
"... \\u05d0\\u05d1 abc\0",
".*:"
};
uint32_t nTexts = sizeof(srcTexts) / sizeof(srcTexts[0]);
uint32_t i, nInLevels = sizeof(inLevels) / sizeof(inLevels[0]);
uint32_t j, nOutLevels = sizeof(outLevels) / sizeof(outLevels[0]);
UBiDi *pBidi = ubidi_open();
UErrorCode errorCode = U_ZERO_ERROR;
UBiDiTransform *pTransform = ubiditransform_open(&errorCode);
while (nTexts-- > 0) {
uint32_t srcLen;
u_unescape(srcTexts[nTexts], src, STR_CAPACITY);
srcLen = u_strlen(src);
for (i = 0; i < nInLevels; i++) {
for (j = 0; j < nOutLevels; j++) {
ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY - 1,
inLevels[i], UBIDI_LOGICAL, outLevels[j], UBIDI_VISUAL,
UBIDI_MIRRORING_OFF, 0, &errorCode);
/* Use UBiDi as a model we compare to */
ubidi_setPara(pBidi, src, srcLen, inLevels[i], NULL, &errorCode);
ubidi_writeReordered(pBidi, expected, STR_CAPACITY, UBIDI_REORDER_DEFAULT, &errorCode);
if (outLevels[j] == UBIDI_RTL) {
ubidi_writeReverse(expected, u_strlen(expected), temp, STR_CAPACITY,
UBIDI_OUTPUT_REVERSE, &errorCode);
logResultsForDir(src, dest, temp, inLevels[i], outLevels[j]);
} else {
logResultsForDir(src, dest, expected, inLevels[i], outLevels[j]);
}
}
}
}
ubidi_close(pBidi);
ubiditransform_close(pTransform);
}
static void
shapeDigits(UChar *str, uint32_t digits)
{
const UChar srcZero = (digits & U_SHAPE_DIGITS_EN2AN) ? LATN_ZERO : ARAB_ZERO;
const UChar extent = srcZero == ARAB_ZERO ? LATN_ZERO - ARAB_ZERO : ARAB_ZERO - LATN_ZERO;
UChar32 c = 0;
uint32_t i = 0, j, length = u_strlen(str);
while (i < length) {
j = i;
U16_NEXT(str, i, length, c);
if (c >= srcZero && c <= srcZero + 9) {
/* length of c here is always a single UChar16 */
str[j] = c + extent;
}
}
}
static void
verifyResultsForAllOpt(const UBidiTestCases *pTest, const UChar *srcTxt,
const UChar *destTxt, const char *expectedChars, uint32_t digits, uint32_t letters)
{
switch (digits) {
case U_SHAPE_DIGITS_EN2AN:
case U_SHAPE_DIGITS_AN2EN:
u_unescape(expectedChars, expected, STR_CAPACITY);
shapeDigits(expected, digits);
break;
case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
u_unescape(pTest->pContextShapes, expected, STR_CAPACITY);
break;
case U_SHAPE_DIGITS_NOOP:
u_unescape(expectedChars, expected, STR_CAPACITY);
break;
}
if (letters & U_SHAPE_LETTERS_SHAPE) {
uint32_t i = 0, j, length = u_strlen(expected);
UChar32 c = 0;
while (i < length) {
j = i;
U16_NEXT(expected, i, length, c);
/* below the length of old and new values is always a single
UChar16, so can just assign a new value to expected[j] */
if (c == 0x0630) {
expected[j] = 0xfeab;
} else if (c == 0x0631) {
expected[j] = 0xfead;
} else if (c == 0x0632) {
expected[j] = 0xfeaf;
}
}
}
if (u_strcmp(expected, dest)) {
log_err("Unexpected transform Dest: Test: %s; Digits: 0x%08x; Letters: 0x%08x\ninText: %s; outText: %s; expected: %s\n",
pTest->pMessage, digits, letters, pseudoScript(srcTxt), pseudoScript(destTxt), pseudoScript(expected));
}
}
/**
* This function covers:
* <ul>
* <li>all possible combinations of ordering schemes and <strong>explicit</strong>
* base directions, applied to both input and output,</li>
* <li>selected tests for auto direction (systematically, auto direction is
* covered in a dedicated test) applied on both input and output,</li>
* <li>all possible combinations of mirroring, digits and letters applied
* to output only.</li>
* </ul>
*/
static void
testAllTransformOptions(void)
{
static const char *inText =
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0";
static const UBidiTestCases testCases[] = {
{ UBIDI_LTR, UBIDI_LOGICAL,
UBIDI_LTR, UBIDI_LOGICAL,
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0", // reordering no mirroring
"a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662", // context numeric shaping
"1: Logical LTR ==> Logical LTR" },
{ UBIDI_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL,
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632",
"2: Logical LTR ==> Visual LTR" },
{ UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL,
"\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c",
"\\u0632 \\u0661\\u0662 f \\u0631e4 \\u0630 23\\u0660 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 a[b]c",
"\\u0632 \\u0661\\u0662 f \\u0631e\\u0664 \\u0630 \\u0662\\u0663\\u0660 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 a[b]c",
"3: Logical LTR ==> Logical RTL" },
{ UBIDI_LTR, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL,
"\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a",
"\\u0632 \\u0662\\u0661 f \\u06314e \\u0630 \\u066032 d \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 c]b[a",
"\\u0632 \\u0662\\u0661 f \\u0631\\u0664e \\u0630 \\u0660\\u0663\\u0662 d \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 c]b[a",
"4: Logical LTR ==> Visual RTL" },
{ UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_LOGICAL,
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0",
"a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662",
"5: Logical RTL ==> Logical RTL" },
{ UBIDI_RTL, UBIDI_LOGICAL, UBIDI_RTL, UBIDI_VISUAL,
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"6: Logical RTL ==> Visual RTL" },
{ UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_LOGICAL,
"\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c",
"\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 a[b]c",
"\\u0632 \\u0661\\u0662 f 4\\u0631e 23\\u0630 \\u0660 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 a[b]c",
"7: Logical RTL ==> Logical LTR" },
{ UBIDI_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL,
"\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c",
"\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c",
"\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c",
"8: Logical RTL ==> Visual LTR" },
{ UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL,
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0",
"a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662", // mirroring
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u0662\\u0663\\u0660 e\\u0631\\u0664 f \\u0632 \\u0661\\u0662",
"9: Visual LTR ==> Visual LTR" },
{ UBIDI_LTR, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL,
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"10: Visual LTR ==> Logical LTR" },
{ UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL,
"\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a",
"\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a",
"\\u0662\\u0661 \\u0632 f \\u0664\\u0631e \\u0660\\u0663\\u0662 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a",
"11: Visual LTR ==> Visual RTL" },
{ UBIDI_LTR, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL,
"\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c",
"\\u0661\\u0662 \\u0632 f 4\\u0631e 23\\u0660 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 a[b]c",
"\\u0661\\u0662 \\u0632 f \\u0664\\u0631e \\u0662\\u0663\\u0660 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 a[b]c",
"12: Visual LTR ==> Logical RTL" },
{ UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_VISUAL,
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662\0",
"a[b]c \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662",
"a[b]c \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 23\\u0660 e\\u06314 f \\u0632 \\u0661\\u0662",
"13: Visual RTL ==> Visual RTL" },
{ UBIDI_RTL, UBIDI_VISUAL, UBIDI_RTL, UBIDI_LOGICAL,
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"14: Visual RTL ==> Logical RTL" },
{ UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_VISUAL,
"\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a",
"\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 c]b[a",
"\\u0662\\u0661 \\u0632 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 c]b[a",
"15: Visual RTL ==> Visual LTR" },
{ UBIDI_RTL, UBIDI_VISUAL, UBIDI_LTR, UBIDI_LOGICAL,
"\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a",
"\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 c]b[a",
"\\u0632 \\u0662\\u0661 f 4\\u0631e \\u066032 \\u0630 d 1 \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 c]b[a",
"16: Visual RTL ==> Logical LTR" },
{ UBIDI_DEFAULT_RTL, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL,
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632",
"17: Logical DEFAULT_RTL ==> Visual LTR" },
#if 0
{ UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_LTR, UBIDI_VISUAL,
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"18: Logical RTL ==> Visual DEFAULT_LTR" },
#endif
{ UBIDI_DEFAULT_LTR, UBIDI_LOGICAL, UBIDI_LTR, UBIDI_VISUAL,
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4(\\u05d3 \\u05d2\\u05d1)\\u05d0 d 23\\u0660 \\u0630 e4\\u0631 f \\u0661\\u0662 \\u0632",
"a[b]c 1 \\u05d4)\\u05d3 \\u05d2\\u05d1(\\u05d0 d \\u0662\\u0663\\u0660 \\u0630 e\\u0664\\u0631 f \\u0661\\u0662 \\u0632",
"19: Logical DEFAULT_LTR ==> Visual LTR" },
#if 0
{ UBIDI_RTL, UBIDI_LOGICAL, UBIDI_DEFAULT_RTL, UBIDI_VISUAL,
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0)\\u05d1\\u05d2 \\u05d3(\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"c]b[a \\u05d0(\\u05d1\\u05d2 \\u05d3)\\u05d4 1 d \\u0630 \\u066032 e\\u06314 f \\u0632 \\u0662\\u0661",
"20: Logical RTL ==> Visual DEFAULT_RTL" }
#endif
};
static const uint32_t digits[] = {
U_SHAPE_DIGITS_NOOP,
U_SHAPE_DIGITS_AN2EN,
U_SHAPE_DIGITS_EN2AN,
U_SHAPE_DIGITS_ALEN2AN_INIT_LR
};
static const uint32_t letters[] = {
U_SHAPE_LETTERS_UNSHAPE,
U_SHAPE_LETTERS_SHAPE
};
uint32_t i, nTestCases = sizeof(testCases) / sizeof(testCases[0]);
uint32_t j, nDigits = sizeof(digits) / sizeof(digits[0]);
uint32_t k, nLetters = sizeof(letters) / sizeof(letters[0]);
UErrorCode errorCode = U_ZERO_ERROR;
UBiDiTransform *pTransform = ubiditransform_open(&errorCode);
u_unescape(inText, src, STR_CAPACITY);
// Test various combinations of para level, order, mirroring, digits and letters
for (i = 0; i < nTestCases; i++) {
ubiditransform_transform(pTransform, src, -1, dest, STR_CAPACITY,
testCases[i].inLevel, testCases[i].inOr,
testCases[i].outLevel, testCases[i].outOr,
UBIDI_MIRRORING_ON, 0, &errorCode);
verifyResultsForAllOpt(&testCases[i], src, dest,
testCases[i].pReorderAndMirror, U_SHAPE_DIGITS_NOOP,
U_SHAPE_LETTERS_NOOP);
for (j = 0; j < nDigits; j++) {
for (k = 0; k < nLetters; k++) {
/* Use here NULL for pTransform */
ubiditransform_transform(NULL, src, -1, dest, STR_CAPACITY,
testCases[i].inLevel, testCases[i].inOr,
testCases[i].outLevel, testCases[i].outOr,
UBIDI_MIRRORING_OFF, digits[j] | letters[k],
&errorCode);
verifyResultsForAllOpt(&testCases[i], src, dest,
testCases[i].pReorderNoMirror, digits[j], letters[k]);
}
}
}
ubiditransform_close(pTransform);
}
void
addBidiTransformTest(TestNode** root)
{
addTest(root, testAutoDirection, "complex/bidi-transform/TestAutoDirection");
addTest(root, testAllTransformOptions, "complex/bidi-transform/TestAllTransformOptions");
}
#ifdef __cplusplus
}
#endif

View file

@ -231,6 +231,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="cbididat.c" />
<ClCompile Include="cbiditransformtst.c" />
<ClCompile Include="cbiditst.c" />
<ClCompile Include="cbiapts.c" />
<ClCompile Include="cbkittst.c" />

View file

@ -303,6 +303,9 @@
<ClCompile Include="spooftest.c">
<Filter>spoof</Filter>
</ClCompile>
<ClCompile Include="cbiditransformtst.c">
<Filter>bidi</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="cbiditst.h">
@ -411,4 +414,4 @@
<Filter>sprep &amp; idna</Filter>
</ClInclude>
</ItemGroup>
</Project>
</Project>