ICU-10142 implement Unicode 6.3 bidi algorithm additions: merge icu/branches/mati/uba63 into trunk, merge new test for BidiCharacterTest.txt into bidiconf.cpp

X-SVN-Rev: 34147
This commit is contained in:
Markus Scherer 2013-08-30 16:32:45 +00:00
parent b5da651126
commit 90b538ae3f
10 changed files with 101499 additions and 4376 deletions

File diff suppressed because it is too large Load diff

View file

@ -72,30 +72,27 @@ enum {
#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
/* are there any characters that are LTR or RTL? */
#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
#define MASK_STRONG (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
/* explicit embedding codes */
#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
/* explicit isolate codes */
#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
/* paragraph and segment separators */
#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
/* all types that are counted as White Space or Neutral in some steps */
#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
#define MASK_N (DIRPROP_FLAG(ON)|MASK_WS)
/* all types that are included in a sequence of European Terminators for (W5) */
#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
/* types that are neutrals or could becomes neutrals in (Wn) */
#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
/*
* These types may be changed to "e",
@ -110,22 +107,33 @@ enum {
#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
/*
* The following bit is ORed to the property of characters in paragraphs
* with contextual RTL direction when paraLevel is contextual.
* The following bit is ORed to the property of directional control
* characters which are ignored: unmatched PDF or PDI; LRx, RLx or FSI
* which would exceed the maximum explicit bidi level.
*/
#define CONTEXT_RTL 0x80
#define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL)
#define IGNORE_CC 0x40
#define PURE_DIRPROP(prop) ((prop)&~IGNORE_CC)
/*
* The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit.
* The following bit is used for the directional isolate status.
* Stack entries corresponding to isolate sequences are greater than ISOLATE.
*/
#define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir)))
#define ISOLATE 0x0100
U_CFUNC UBiDiLevel
ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
#define GET_PARALEVEL(ubidi, index) \
(UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \
: (ubidi)->paraLevel)
((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
(ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
/* Paragraph type for multiple paragraph support ---------------------------- */
typedef int32_t Para;
/* number of paras entries allocated initially without malloc */
#define SIMPLE_PARAS_SIZE 10
/* number of isolate entries allocated initially without malloc */
#define SIMPLE_ISOLATES_SIZE 5
/* number of isolate run entries for paired brackets allocated initially without malloc */
#define SIMPLE_OPENINGS_SIZE 20
#define CR 0x000D
#define LF 0x000A
@ -138,6 +146,50 @@ enum {
RLM_AFTER=8
};
typedef struct Para {
int32_t limit;
int32_t level;
} Para;
enum { /* flags for Opening.flags */
FOUND_L=DIRPROP_FLAG(L),
FOUND_R=DIRPROP_FLAG(R)
};
typedef struct Opening {
int32_t position; /* position of opening bracket */
int32_t match; /* matching char or -position of closing bracket */
int32_t lastStrongPos; /* position of last strong char found before opening */
DirProp lastStrong; /* bidi class of last strong char before opening */
uint16_t flags; /* bits for L or R/AL found within the pair */
} Opening;
typedef struct IsoRun {
int32_t lastStrongPos; /* position of last strong char found in this run */
uint16_t start; /* index of first opening entry for this run */
uint16_t limit; /* index after last opening entry for this run */
UBiDiLevel level; /* level of this run */
DirProp lastStrong; /* bidi class of last strong char found in this run */
} IsoRun;
typedef struct BracketData {
UBiDi *pBiDi;
/* array of opening entries which should be enough in most cases; no malloc() */
Opening simpleOpenings[SIMPLE_OPENINGS_SIZE];
Opening *openings; /* pointer to current array of entries */
int32_t openingsSize; /* number of allocated entries */
int32_t isoRunLast; /* index of last used entry */
/* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
+ 1 for index 0, + 1 for before the first isolated sequence */
IsoRun isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
} BracketData;
typedef struct Isolate {
int32_t start1;
int16_t stateImp;
int16_t state;
} Isolate;
typedef struct Run {
int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
visualLimit, /* last visual position of the run +1 */
@ -170,10 +222,14 @@ enum {
RLE_CHAR,
PDF_CHAR,
LRO_CHAR,
RLO_CHAR
RLO_CHAR,
LRI_CHAR=0x2066,
RLI_CHAR,
FSI_CHAR,
PDI_CHAR
};
#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5)
#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
/* InsertPoints structure for noting where to put BiDi marks ---------------- */
@ -222,19 +278,21 @@ struct UBiDi {
int32_t resultLength;
/* memory sizes in bytes */
int32_t dirPropsSize, levelsSize, parasSize, runsSize;
int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
/* allocated memory */
DirProp *dirPropsMemory;
UBiDiLevel *levelsMemory;
Opening *openingsMemory;
Para *parasMemory;
Run *runsMemory;
Isolate *isolatesMemory;
/* indicators for whether memory may be allocated after ubidi_open() */
UBool mayAllocateText, mayAllocateRuns;
/* arrays with one value per text-character */
const DirProp *dirProps;
DirProp *dirProps;
UBiDiLevel *levels;
/* are we performing an approximation of the "inverse BiDi" algorithm? */
@ -285,11 +343,11 @@ struct UBiDi {
/* fields for paragraph handling */
int32_t paraCount; /* set in getDirProps() */
Para *paras; /* limits of paragraphs, filled in
ResolveExplicitLevels() or CheckExplicitLevels() */
/* filled in getDirProps() */
Para *paras;
/* for single paragraph text, we only need a tiny array of paras (no malloc()) */
Para simpleParas[1];
/* for relatively short text, we only need a tiny array of paras (no malloc()) */
Para simpleParas[SIMPLE_PARAS_SIZE];
/* fields for line reordering */
int32_t runCount; /* ==-1: runs not set up yet */
@ -298,6 +356,17 @@ struct UBiDi {
/* for non-mixed text, we only need a tiny array of runs (no malloc()) */
Run simpleRuns[1];
/* maximum or current nesting depth of isolate sequences */
/* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
nesting encountered.
Within resolveImplicitLevels(), this is the index of the current isolates
stack entry. */
int32_t isolateCount;
Isolate *isolates;
/* for simple text, have a small stack (no malloc()) */
Isolate simpleIsolates[SIMPLE_ISOLATES_SIZE];
/* for inverse Bidi with insertion of directional marks */
InsertPoints insertPoints;
@ -315,8 +384,10 @@ struct UBiDi {
typedef union {
DirProp *dirPropsMemory;
UBiDiLevel *levelsMemory;
Opening *openingsMemory;
Para *parasMemory;
Run *runsMemory;
Isolate *isolatesMemory;
} BidiMemoryForAllocation;
/* Macros for initial checks at function entry */
@ -382,6 +453,10 @@ ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAlloc
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
TRUE, (length))
#define getInitialOpeningsMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
TRUE, (length)*sizeof(Opening))
#define getInitialParasMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
TRUE, (length)*sizeof(Para))
@ -390,6 +465,10 @@ ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAlloc
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
TRUE, (length)*sizeof(Run))
#define getInitialIsolatesMemory(pBiDi, length) \
ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
TRUE, (length)*sizeof(Isolate))
#endif
#endif

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2011, International Business Machines
* Copyright (C) 1999-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -32,9 +32,9 @@
* These functions deal with the aspects of potentially mixed-directional
* text in a single paragraph or in a line of a single paragraph
* which has already been processed according to
* the Unicode 3.0 BiDi algorithm as defined in
* http://www.unicode.org/unicode/reports/tr9/ , version 13,
* also described in The Unicode Standard, Version 4.0.1 .
* the Unicode 6.3 BiDi algorithm as defined in
* http://www.unicode.org/unicode/reports/tr9/ , version 28,
* also described in The Unicode Standard, Version 6.3.0 .
*
* This means that there is a UBiDi object with a levels
* and a dirProps array.
@ -105,12 +105,12 @@ setTrailingWSStart(UBiDi *pBiDi) {
level of B chars from 0 to paraLevel in ubidi_getLevels when
orderParagraphsLTR==TRUE.
*/
if(NO_CONTEXT_RTL(dirProps[start-1])==B) {
if(dirProps[start-1]==B) {
pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
return;
}
/* go backwards across all WS, BN, explicit codes */
while(start>0 && DIRPROP_FLAG_NC(dirProps[start-1])&MASK_WS) {
while(start>0 && DIRPROP_FLAG(PURE_DIRPROP(dirProps[start-1]))&MASK_WS) {
--start;
}

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2012, International Business Machines
* Copyright (C) 1999-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -387,7 +387,7 @@ typedef uint8_t UBiDiLevel;
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
* @stable ICU 2.0
*/
#define UBIDI_MAX_EXPLICIT_LEVEL 61
#define UBIDI_MAX_EXPLICIT_LEVEL 125
/** Bit flag for level input.
* Overrides directional properties.

View file

@ -1,5 +1,5 @@
/********************************************************************
* COPYRIGHT:
* COPYRIGHT:
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -157,62 +157,73 @@ testVisualMap8[]={
static const uint8_t
testText9[]={
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE,
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE,
AN, RLO, NSM, LRE, PDF, RLE, ES, EN, ON
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, /* 15 entries */
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, /* 15 entries */
AN, RLO, NSM, LRE, PDF, RLE, ES, EN, ON /* 9 entries */
};
static const UBiDiLevel
testLevels9[]={
62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61
126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, /* 15 entries */
126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, /* 15 entries */
126, 125, 125, 125, 125, 125, 125, 125, 125 /* 9 entries */
};
static const uint8_t
testVisualMap9[]={
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 7, 6, 5, 4, 3, 2, 1, 0
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, /* 15 entries */
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, /* 15 entries */
38, 7, 6, 5, 4, 3, 2, 1, 0 /* 9 entries */
};
static const uint8_t
testText10[]={
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE,
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE,
LRE, BN, CS, RLO, S, PDF, EN, LRO, AN, ES
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, /* 15 entries */
LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, LRE, /* 15 entries */
LRE, BN, CS, RLO, S, PDF, EN, LRO, AN, ES /* 10 entries */
};
static const UBiDiLevel
testLevels10[]={
60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 0, 0, 62, 62, 62, 62, 60
124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, /* 15 entries */
124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, /* 15 entries */
124, 124, 124, 64, 64, 124, 124, 126, 126, 124 /* 10 entries */
};
static const uint8_t
testVisualMap10[]={
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 15 entries */
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 15 entries */
30, 31, 32, 33, 34, 35, 36, 37, 38, 39 /* 10 entries */
};
static const uint8_t
testText11[]={
S, WS, NSM, RLE, WS, L, L, L, WS, LRO, WS, R, R, R, WS, RLO, WS, L, L,
L, WS, LRE, WS, R, R, R, WS, PDF, WS, L, L, L, WS, PDF, WS,
AL, AL, AL, WS, PDF, WS, L, L, L, WS, PDF, WS, L, L, L, WS, PDF,
ON, PDF, BN, BN, ON, PDF
S, WS, NSM, RLE, WS, L, L, L, WS, LRO, WS, R, R, R, WS, RLO, WS, L, L, L, /* 20 entries */
WS, LRE, WS, R, R, R, WS, PDF, WS, L, L, L, WS, PDF, WS, AL, AL, AL, WS, PDF, /* 20 entries */
WS, L, L, L, WS, PDF, WS, L, L, L, WS, PDF, ON, PDF, BN, BN, ON, PDF /* 18 entries */
};
static const UBiDiLevel
testLevels11[]={
0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, /* 20 entries */
3, 4, 4, 5, 5, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, /* 20 entries */
2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* 18 entries */
};
static const uint8_t
testVisualMap11[]={
0, 1, 2, 44, 43, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 31, 30, 29, 28, 27, 26, 20, 21, 24, 23, 22, 25, 19, 18, 17, 16, 15, 14, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 3, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57
0, 1, 2, 44, 43, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 31, 30, 29, 28, 27, /* 20 entries */
26, 20, 21, 24, 23, 22, 25, 19, 18, 17, 16, 15, 14, 32, 33, 34, 35, 36, 37, 38, /* 20 entries */
39, 40, 41, 42, 3, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 /* 18 entries */
};
static const uint8_t
testText12[]={
NSM, WS, L, L, L, L, L, L, L, WS, L, L, L, L, WS,
R, R, R, R, R, WS, L, L, L, L, L, L, L, WS, WS, AL,
AL, AL, AL, WS, EN, EN, ES, EN, EN, CS, S, EN, EN, CS, WS,
EN, EN, WS, AL, AL, AL, AL, AL, B, L, L, L, L, L, L,
NSM, WS, L, L, L, L, L, L, L, WS, L, L, L, L, WS,
R, R, R, R, R, WS, L, L, L, L, L, L, L, WS, WS, AL,
AL, AL, AL, WS, EN, EN, ES, EN, EN, CS, S, EN, EN, CS, WS,
EN, EN, WS, AL, AL, AL, AL, AL, B, L, L, L, L, L, L,
L, L, WS, AN, AN, CS, AN, AN, WS
};
@ -387,11 +398,11 @@ tests[]={
{testText8, ARRAY_LENGTH(testText8), UBIDI_DEFAULT_LTR, -1, -1,
UBIDI_RTL, 1,
testLevels8, testVisualMap8},
{testText9, ARRAY_LENGTH(testText9), UBIDI_DEFAULT_LTR, -1, -1,
UBIDI_MIXED, 0,
{testText9, ARRAY_LENGTH(testText9), 64, -1, -1,
UBIDI_MIXED, 64,
testLevels9, testVisualMap9},
{testText10, ARRAY_LENGTH(testText10), UBIDI_DEFAULT_LTR, -1, -1,
UBIDI_MIXED, 0,
{testText10, ARRAY_LENGTH(testText10), 64, -1, -1,
UBIDI_MIXED, 64,
testLevels10, testVisualMap10},
{testText11, ARRAY_LENGTH(testText11), UBIDI_DEFAULT_LTR, -1, -1,
UBIDI_MIXED, 0,
@ -429,7 +440,7 @@ tests[]={
{testText17, ARRAY_LENGTH(testText17), UBIDI_LTR, 0, 8,
UBIDI_MIXED, 0,
testLevels22, testVisualMap21},
{testTextXX, ARRAY_LENGTH(testTextXX), UBIDI_RTL, -1, -1,
{testTextXX, ARRAY_LENGTH(testTextXX), UBIDI_RTL, -1, -1,
UBIDI_MIXED, 1, testLevelsXX, testVisualMapXX}
};

View file

@ -1,9 +1,9 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2012, International Business Machines Corporation and
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/* file name: cbiditst.cpp
/* file name: cbiditst.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -650,8 +650,8 @@ testReorder(void) {
static const char* const visualOrder[]={
"del(CK)add(&.C.K)",
"del(TVDQ) add(LDVB)",
"del(QP)add(&.U(T(.S.R",
"del(VL)add(&.V.L (.V.L",
"del(QP)add(S.R.)&.U(T", /* updated for Unicode 6.3 matching brackets */
"del(VL)add(V.L.) &.V.L", /* updated for Unicode 6.3 matching brackets */
"day 0 RVRHDPD R dayabbr",
"day 1 ADHDPHPD H dayabbr",
"day 2 ADNELBPD L dayabbr",
@ -664,8 +664,8 @@ testReorder(void) {
static const char* const visualOrder1[]={
")K.C.&(dda)KC(led",
")BVDL(dda )QDVT(led",
"R.S.(T(U.&(dda)PQ(led",
"L.V.( L.V.&(dda)LV(led",
"T(U.&).R.S(dda)PQ(led", /* updated for Unicode 6.3 matching brackets */
"L.V.& ).L.V(dda)LV(led", /* updated for Unicode 6.3 matching brackets */
"rbbayad R DPDHRVR 0 yad",
"rbbayad H DPHPDHDA 1 yad",
"rbbayad L DPBLENDA 2 yad",
@ -898,86 +898,86 @@ static void
testReorderArabicMathSymbols(void) {
static const UChar logicalOrder[][MAXLEN]={
/* Arabic mathematical Symbols 0x1EE00 - 0x1EE1B */
{0xD83B, 0xDE00, 0xD83B, 0xDE01, 0xD83B, 0xDE02, 0xD83B, 0xDE03, 0x20,
0xD83B, 0xDE24, 0xD83B, 0xDE05, 0xD83B, 0xDE06, 0x20,
0xD83B, 0xDE07, 0xD83B, 0xDE08, 0xD83B, 0xDE09, 0x20,
0xD83B, 0xDE0A, 0xD83B, 0xDE0B, 0xD83B, 0xDE0C, 0xD83B, 0xDE0D, 0x20,
0xD83B, 0xDE0E, 0xD83B, 0xDE0F, 0xD83B, 0xDE10, 0xD83B, 0xDE11, 0x20,
0xD83B, 0xDE12, 0xD83B, 0xDE13, 0xD83B, 0xDE14, 0xD83B, 0xDE15, 0x20,
0xD83B, 0xDE16, 0xD83B, 0xDE17, 0xD83B, 0xDE18, 0x20,
{0xD83B, 0xDE00, 0xD83B, 0xDE01, 0xD83B, 0xDE02, 0xD83B, 0xDE03, 0x20,
0xD83B, 0xDE24, 0xD83B, 0xDE05, 0xD83B, 0xDE06, 0x20,
0xD83B, 0xDE07, 0xD83B, 0xDE08, 0xD83B, 0xDE09, 0x20,
0xD83B, 0xDE0A, 0xD83B, 0xDE0B, 0xD83B, 0xDE0C, 0xD83B, 0xDE0D, 0x20,
0xD83B, 0xDE0E, 0xD83B, 0xDE0F, 0xD83B, 0xDE10, 0xD83B, 0xDE11, 0x20,
0xD83B, 0xDE12, 0xD83B, 0xDE13, 0xD83B, 0xDE14, 0xD83B, 0xDE15, 0x20,
0xD83B, 0xDE16, 0xD83B, 0xDE17, 0xD83B, 0xDE18, 0x20,
0xD83B, 0xDE19, 0xD83B, 0xDE1A, 0xD83B, 0xDE1B},
/* Arabic mathematical Symbols - Looped Symbols, 0x1EE80 - 0x1EE9B */
{0xD83B, 0xDE80, 0xD83B, 0xDE81, 0xD83B, 0xDE82, 0xD83B, 0xDE83, 0x20,
0xD83B, 0xDE84, 0xD83B, 0xDE85, 0xD83B, 0xDE86, 0x20,
0xD83B, 0xDE87, 0xD83B, 0xDE88, 0xD83B, 0xDE89, 0x20,
0xD83B, 0xDE8B, 0xD83B, 0xDE8C, 0xD83B, 0xDE8D, 0x20,
0xD83B, 0xDE8E, 0xD83B, 0xDE8F, 0xD83B, 0xDE90, 0xD83B, 0xDE91, 0x20,
0xD83B, 0xDE92, 0xD83B, 0xDE93, 0xD83B, 0xDE94, 0xD83B, 0xDE95, 0x20,
0xD83B, 0xDE96, 0xD83B, 0xDE97, 0xD83B, 0xDE98, 0x20,
{0xD83B, 0xDE80, 0xD83B, 0xDE81, 0xD83B, 0xDE82, 0xD83B, 0xDE83, 0x20,
0xD83B, 0xDE84, 0xD83B, 0xDE85, 0xD83B, 0xDE86, 0x20,
0xD83B, 0xDE87, 0xD83B, 0xDE88, 0xD83B, 0xDE89, 0x20,
0xD83B, 0xDE8B, 0xD83B, 0xDE8C, 0xD83B, 0xDE8D, 0x20,
0xD83B, 0xDE8E, 0xD83B, 0xDE8F, 0xD83B, 0xDE90, 0xD83B, 0xDE91, 0x20,
0xD83B, 0xDE92, 0xD83B, 0xDE93, 0xD83B, 0xDE94, 0xD83B, 0xDE95, 0x20,
0xD83B, 0xDE96, 0xD83B, 0xDE97, 0xD83B, 0xDE98, 0x20,
0xD83B, 0xDE99, 0xD83B, 0xDE9A, 0xD83B, 0xDE9B},
/* Arabic mathematical Symbols - Double-struck Symbols, 0x1EEA1 - 0x1EEBB */
{0xD83B, 0xDEA1, 0xD83B, 0xDEA2, 0xD83B, 0xDEA3, 0x20,
0xD83B, 0xDEA5, 0xD83B, 0xDEA6, 0x20,
0xD83B, 0xDEA7, 0xD83B, 0xDEA8, 0xD83B, 0xDEA9, 0x20,
0xD83B, 0xDEAB, 0xD83B, 0xDEAC, 0xD83B, 0xDEAD, 0x20,
0xD83B, 0xDEAE, 0xD83B, 0xDEAF, 0xD83B, 0xDEB0, 0xD83B, 0xDEB1, 0x20,
0xD83B, 0xDEB2, 0xD83B, 0xDEB3, 0xD83B, 0xDEB4, 0xD83B, 0xDEB5, 0x20,
0xD83B, 0xDEB6, 0xD83B, 0xDEB7, 0xD83B, 0xDEB8, 0x20,
{0xD83B, 0xDEA1, 0xD83B, 0xDEA2, 0xD83B, 0xDEA3, 0x20,
0xD83B, 0xDEA5, 0xD83B, 0xDEA6, 0x20,
0xD83B, 0xDEA7, 0xD83B, 0xDEA8, 0xD83B, 0xDEA9, 0x20,
0xD83B, 0xDEAB, 0xD83B, 0xDEAC, 0xD83B, 0xDEAD, 0x20,
0xD83B, 0xDEAE, 0xD83B, 0xDEAF, 0xD83B, 0xDEB0, 0xD83B, 0xDEB1, 0x20,
0xD83B, 0xDEB2, 0xD83B, 0xDEB3, 0xD83B, 0xDEB4, 0xD83B, 0xDEB5, 0x20,
0xD83B, 0xDEB6, 0xD83B, 0xDEB7, 0xD83B, 0xDEB8, 0x20,
0xD83B, 0xDEB9, 0xD83B, 0xDEBA, 0xD83B, 0xDEBB},
/* Arabic mathematical Symbols - Initial Symbols, 0x1EE21 - 0x1EE3B */
{0xD83B, 0xDE21, 0xD83B, 0xDE22, 0x20,
0xD83B, 0xDE27, 0xD83B, 0xDE29, 0x20,
0xD83B, 0xDE2A, 0xD83B, 0xDE2B, 0xD83B, 0xDE2C, 0xD83B, 0xDE2D, 0x20,
0xD83B, 0xDE2E, 0xD83B, 0xDE2F, 0xD83B, 0xDE30, 0xD83B, 0xDE31, 0x20,
0xD83B, 0xDE32, 0xD83B, 0xDE34, 0xD83B, 0xDE35, 0x20,
0xD83B, 0xDE36, 0xD83B, 0xDE37, 0x20,
{0xD83B, 0xDE21, 0xD83B, 0xDE22, 0x20,
0xD83B, 0xDE27, 0xD83B, 0xDE29, 0x20,
0xD83B, 0xDE2A, 0xD83B, 0xDE2B, 0xD83B, 0xDE2C, 0xD83B, 0xDE2D, 0x20,
0xD83B, 0xDE2E, 0xD83B, 0xDE2F, 0xD83B, 0xDE30, 0xD83B, 0xDE31, 0x20,
0xD83B, 0xDE32, 0xD83B, 0xDE34, 0xD83B, 0xDE35, 0x20,
0xD83B, 0xDE36, 0xD83B, 0xDE37, 0x20,
0xD83B, 0xDE39, 0xD83B, 0xDE3B},
/* Arabic mathematical Symbols - Tailed Symbols */
{0xD83B, 0xDE42, 0xD83B, 0xDE47, 0xD83B, 0xDE49, 0xD83B, 0xDE4B, 0x20,
0xD83B, 0xDE4D, 0xD83B, 0xDE4E, 0xD83B, 0xDE4F, 0x20,
0xD83B, 0xDE51, 0xD83B, 0xDE52, 0xD83B, 0xDE54, 0xD83B, 0xDE57, 0x20,
{0xD83B, 0xDE42, 0xD83B, 0xDE47, 0xD83B, 0xDE49, 0xD83B, 0xDE4B, 0x20,
0xD83B, 0xDE4D, 0xD83B, 0xDE4E, 0xD83B, 0xDE4F, 0x20,
0xD83B, 0xDE51, 0xD83B, 0xDE52, 0xD83B, 0xDE54, 0xD83B, 0xDE57, 0x20,
0xD83B, 0xDE59, 0xD83B, 0xDE5B, 0xD83B, 0xDE5D, 0xD83B, 0xDE5F}
};
static const UChar visualOrder[][MAXLEN]={
/* Arabic mathematical Symbols 0x1EE00 - 0x1EE1B */
{0xD83B, 0xDE1B, 0xD83B, 0xDE1A, 0xD83B, 0xDE19, 0x20,
0xD83B, 0xDE18, 0xD83B, 0xDE17, 0xD83B, 0xDE16, 0x20,
{0xD83B, 0xDE1B, 0xD83B, 0xDE1A, 0xD83B, 0xDE19, 0x20,
0xD83B, 0xDE18, 0xD83B, 0xDE17, 0xD83B, 0xDE16, 0x20,
0xD83B, 0xDE15, 0xD83B, 0xDE14, 0xD83B, 0xDE13, 0xD83B, 0xDE12, 0x20,
0xD83B, 0xDE11, 0xD83B, 0xDE10, 0xD83B, 0xDE0F, 0xD83B, 0xDE0E, 0x20,
0xD83B, 0xDE0D, 0xD83B, 0xDE0C, 0xD83B, 0xDE0B, 0xD83B, 0xDE0A, 0x20,
0xD83B, 0xDE09, 0xD83B, 0xDE08, 0xD83B, 0xDE07, 0x20,
0xD83B, 0xDE06, 0xD83B, 0xDE05, 0xD83B, 0xDE24, 0x20,
0xD83B, 0xDE09, 0xD83B, 0xDE08, 0xD83B, 0xDE07, 0x20,
0xD83B, 0xDE06, 0xD83B, 0xDE05, 0xD83B, 0xDE24, 0x20,
0xD83B, 0xDE03, 0xD83B, 0xDE02, 0xD83B, 0xDE01, 0xD83B, 0xDE00},
/* Arabic mathematical Symbols - Looped Symbols, 0x1EE80 - 0x1EE9B */
{0xD83B, 0xDE9B, 0xD83B, 0xDE9A, 0xD83B, 0xDE99, 0x20,
0xD83B, 0xDE98, 0xD83B, 0xDE97, 0xD83B, 0xDE96, 0x20,
{0xD83B, 0xDE9B, 0xD83B, 0xDE9A, 0xD83B, 0xDE99, 0x20,
0xD83B, 0xDE98, 0xD83B, 0xDE97, 0xD83B, 0xDE96, 0x20,
0xD83B, 0xDE95, 0xD83B, 0xDE94, 0xD83B, 0xDE93, 0xD83B, 0xDE92, 0x20,
0xD83B, 0xDE91, 0xD83B, 0xDE90, 0xD83B, 0xDE8F, 0xD83B, 0xDE8E, 0x20,
0xD83B, 0xDE8D, 0xD83B, 0xDE8C, 0xD83B, 0xDE8B, 0x20,
0xD83B, 0xDE89, 0xD83B, 0xDE88, 0xD83B, 0xDE87, 0x20,
0xD83B, 0xDE86, 0xD83B, 0xDE85, 0xD83B, 0xDE84, 0x20,
0xD83B, 0xDE8D, 0xD83B, 0xDE8C, 0xD83B, 0xDE8B, 0x20,
0xD83B, 0xDE89, 0xD83B, 0xDE88, 0xD83B, 0xDE87, 0x20,
0xD83B, 0xDE86, 0xD83B, 0xDE85, 0xD83B, 0xDE84, 0x20,
0xD83B, 0xDE83, 0xD83B, 0xDE82, 0xD83B, 0xDE81, 0xD83B, 0xDE80},
/* Arabic mathematical Symbols - Double-struck Symbols, 0x1EEA1 - 0x1EEBB */
{0xD83B, 0xDEBB, 0xD83B, 0xDEBA, 0xD83B, 0xDEB9, 0x20,
0xD83B, 0xDEB8, 0xD83B, 0xDEB7, 0xD83B, 0xDEB6, 0x20,
{0xD83B, 0xDEBB, 0xD83B, 0xDEBA, 0xD83B, 0xDEB9, 0x20,
0xD83B, 0xDEB8, 0xD83B, 0xDEB7, 0xD83B, 0xDEB6, 0x20,
0xD83B, 0xDEB5, 0xD83B, 0xDEB4, 0xD83B, 0xDEB3, 0xD83B, 0xDEB2, 0x20,
0xD83B, 0xDEB1, 0xD83B, 0xDEB0, 0xD83B, 0xDEAF, 0xD83B, 0xDEAE, 0x20,
0xD83B, 0xDEAD, 0xD83B, 0xDEAC, 0xD83B, 0xDEAB, 0x20,
0xD83B, 0xDEA9, 0xD83B, 0xDEA8, 0xD83B, 0xDEA7, 0x20,
0xD83B, 0xDEA6, 0xD83B, 0xDEA5, 0x20,
0xD83B, 0xDEAD, 0xD83B, 0xDEAC, 0xD83B, 0xDEAB, 0x20,
0xD83B, 0xDEA9, 0xD83B, 0xDEA8, 0xD83B, 0xDEA7, 0x20,
0xD83B, 0xDEA6, 0xD83B, 0xDEA5, 0x20,
0xD83B, 0xDEA3, 0xD83B, 0xDEA2, 0xD83B, 0xDEA1},
/* Arabic mathematical Symbols - Initial Symbols, 0x1EE21 - 0x1EE3B */
{0xD83B, 0xDE3B, 0xD83B, 0xDE39, 0x20,
0xD83B, 0xDE37, 0xD83B, 0xDE36, 0x20,
0xD83B, 0xDE35, 0xD83B, 0xDE34, 0xD83B, 0xDE32, 0x20,
{0xD83B, 0xDE3B, 0xD83B, 0xDE39, 0x20,
0xD83B, 0xDE37, 0xD83B, 0xDE36, 0x20,
0xD83B, 0xDE35, 0xD83B, 0xDE34, 0xD83B, 0xDE32, 0x20,
0xD83B, 0xDE31, 0xD83B, 0xDE30, 0xD83B, 0xDE2F, 0xD83B, 0xDE2E, 0x20,
0xD83B, 0xDE2D, 0xD83B, 0xDE2C, 0xD83B, 0xDE2B, 0xD83B, 0xDE2A, 0x20,
0xD83B, 0xDE29, 0xD83B, 0xDE27, 0x20,
0xD83B, 0xDE29, 0xD83B, 0xDE27, 0x20,
0xD83B, 0xDE22, 0xD83B, 0xDE21},
/* Arabic mathematical Symbols - Tailed Symbols */
{0xD83B, 0xDE5F, 0xD83B, 0xDE5D, 0xD83B, 0xDE5B, 0xD83B, 0xDE59, 0x20,
0xD83B, 0xDE57, 0xD83B, 0xDE54, 0xD83B, 0xDE52, 0xD83B, 0xDE51, 0x20,
0xD83B, 0xDE4F, 0xD83B, 0xDE4E, 0xD83B, 0xDE4D, 0x20,
0xD83B, 0xDE4F, 0xD83B, 0xDE4E, 0xD83B, 0xDE4D, 0x20,
0xD83B, 0xDE4B, 0xD83B, 0xDE49, 0xD83B, 0xDE47, 0xD83B, 0xDE42}
};
char formatChars[MAXLEN];
@ -1666,8 +1666,8 @@ static void doMisc(void) {
srcLen = u_unescape("A\\u202a\\u05d0\\u202aC\\u202c\\u05d1\\u202cE", src, MAXLEN);
ubidi_setPara(bidi, src, srcLen, UBIDI_MAX_EXPLICIT_LEVEL - 1, NULL, &errorCode);
level = ubidi_getLevelAt(bidi, 2);
if (level != 61) {
log_err("\nWrong level at index 2\n, should be 61, got %d\n", level);
if (level != UBIDI_MAX_EXPLICIT_LEVEL) {
log_err("\nWrong level at index 2\n, should be %d, got %d\n", UBIDI_MAX_EXPLICIT_LEVEL, level);
}
RETURN_IF_BAD_ERRCODE("#24#");
@ -2910,7 +2910,7 @@ doTailTest(void) {
UChar dst[3] = { 0x0000, 0x0000,0 };
int32_t length;
UErrorCode status;
log_verbose("SRC: U+%04X U+%04X\n", src[0],src[1]);
log_verbose("Trying old tail\n");
@ -2918,7 +2918,7 @@ doTailTest(void) {
length = u_shapeArabic(src, -1, dst, LENGTHOF(dst),
U_SHAPE_LETTERS_SHAPE|U_SHAPE_SEEN_TWOCELL_NEAR, &status);
if(U_FAILURE(status)) {
log_err("Fail: status %s\n", u_errorName(status));
log_err("Fail: status %s\n", u_errorName(status));
} else if(length!=2) {
log_err("Fail: len %d expected 3\n", length);
} else if(u_strncmp(dst,dst_old,LENGTHOF(dst))) {
@ -2935,7 +2935,7 @@ doTailTest(void) {
length = u_shapeArabic(src, -1, dst, LENGTHOF(dst),
U_SHAPE_LETTERS_SHAPE|U_SHAPE_SEEN_TWOCELL_NEAR|U_SHAPE_TAIL_NEW_UNICODE, &status);
if(U_FAILURE(status)) {
log_err("Fail: status %s\n", u_errorName(status));
log_err("Fail: status %s\n", u_errorName(status));
} else if(length!=2) {
log_err("Fail: len %d expected 3\n", length);
} else if(u_strncmp(dst,dst_new,LENGTHOF(dst))) {
@ -3028,21 +3028,21 @@ doArabicShapingTestForBug8703(void) {
letters_source1[]={
0x0634,0x0651,0x0645,0x0652,0x0633
}, letters_source2[]={
0x0634,0x0651,0x0645,0x0652,0x0633
0x0634,0x0651,0x0645,0x0652,0x0633
}, letters_source3[]={
0x0634,0x0651,0x0645,0x0652,0x0633
}, letters_source4[]={
0x0634,0x0651,0x0645,0x0652,0x0633
0x0634,0x0651,0x0645,0x0652,0x0633
}, letters_source5[]={
0x0633,0x0652,0x0645,0x0651,0x0634
}, letters_source6[]={
0x0633,0x0652,0x0645,0x0651,0x0634
0x0633,0x0652,0x0645,0x0651,0x0634
}, letters_source7[]={
0x0633,0x0652,0x0645,0x0651,0x0634
}, letters_source8[]={
0x0633,0x0652,0x0645,0x0651,0x0634
}, letters_dest1[]={
0x0020,0xFEB7,0xFE7D,0xFEE4,0xFEB2
0x0020,0xFEB7,0xFE7D,0xFEE4,0xFEB2
}, letters_dest2[]={
0xFEB7,0xFE7D,0xFEE4,0xFEB2,0x0020
}, letters_dest3[]={
@ -3050,7 +3050,7 @@ doArabicShapingTestForBug8703(void) {
}, letters_dest4[]={
0xFEB7,0xFE7D,0xFEE4,0x0640,0xFEB2
}, letters_dest5[]={
0x0020,0xFEB2,0xFEE4,0xFE7D,0xFEB7
0x0020,0xFEB2,0xFEE4,0xFE7D,0xFEB7
}, letters_dest6[]={
0xFEB2,0xFEE4,0xFE7D,0xFEB7,0x0020
}, letters_dest7[]={
@ -3156,7 +3156,7 @@ static void
doArabicShapingTestForBug9024(void) {
static const UChar
letters_source1[]={ /* Arabic mathematical Symbols 0x1EE00 - 0x1EE1B */
0xD83B, 0xDE00, 0xD83B, 0xDE01, 0xD83B, 0xDE02, 0xD83B, 0xDE03, 0x20,
0xD83B, 0xDE00, 0xD83B, 0xDE01, 0xD83B, 0xDE02, 0xD83B, 0xDE03, 0x20,
0xD83B, 0xDE24, 0xD83B, 0xDE05, 0xD83B, 0xDE06, 0x20,
0xD83B, 0xDE07, 0xD83B, 0xDE08, 0xD83B, 0xDE09, 0x20,
0xD83B, 0xDE0A, 0xD83B, 0xDE0B, 0xD83B, 0xDE0C, 0xD83B, 0xDE0D, 0x20,
@ -3165,7 +3165,7 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDE16, 0xD83B, 0xDE17, 0xD83B, 0xDE18, 0x20,
0xD83B, 0xDE19, 0xD83B, 0xDE1A, 0xD83B, 0xDE1B
}, letters_source2[]={/* Arabic mathematical Symbols - Looped Symbols, 0x1EE80 - 0x1EE9B */
0xD83B, 0xDE80, 0xD83B, 0xDE81, 0xD83B, 0xDE82, 0xD83B, 0xDE83, 0x20,
0xD83B, 0xDE80, 0xD83B, 0xDE81, 0xD83B, 0xDE82, 0xD83B, 0xDE83, 0x20,
0xD83B, 0xDE84, 0xD83B, 0xDE85, 0xD83B, 0xDE86, 0x20,
0xD83B, 0xDE87, 0xD83B, 0xDE88, 0xD83B, 0xDE89, 0x20,
0xD83B, 0xDE8B, 0xD83B, 0xDE8C, 0xD83B, 0xDE8D, 0x20,
@ -3174,7 +3174,7 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDE96, 0xD83B, 0xDE97, 0xD83B, 0xDE98, 0x20,
0xD83B, 0xDE99, 0xD83B, 0xDE9A, 0xD83B, 0xDE9B
}, letters_source3[]={/* Arabic mathematical Symbols - Double-struck Symbols, 0x1EEA1 - 0x1EEBB */
0xD83B, 0xDEA1, 0xD83B, 0xDEA2, 0xD83B, 0xDEA3, 0x20,
0xD83B, 0xDEA1, 0xD83B, 0xDEA2, 0xD83B, 0xDEA3, 0x20,
0xD83B, 0xDEA5, 0xD83B, 0xDEA6, 0x20,
0xD83B, 0xDEA7, 0xD83B, 0xDEA8, 0xD83B, 0xDEA9, 0x20,
0xD83B, 0xDEAB, 0xD83B, 0xDEAC, 0xD83B, 0xDEAD, 0x20,
@ -3183,7 +3183,7 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDEB6, 0xD83B, 0xDEB7, 0xD83B, 0xDEB8, 0x20,
0xD83B, 0xDEB9, 0xD83B, 0xDEBA, 0xD83B, 0xDEBB
}, letters_source4[]={/* Arabic mathematical Symbols - Initial Symbols, 0x1EE21 - 0x1EE3B */
0xD83B, 0xDE21, 0xD83B, 0xDE22, 0x20,
0xD83B, 0xDE21, 0xD83B, 0xDE22, 0x20,
0xD83B, 0xDE27, 0xD83B, 0xDE29, 0x20,
0xD83B, 0xDE2A, 0xD83B, 0xDE2B, 0xD83B, 0xDE2C, 0xD83B, 0xDE2D, 0x20,
0xD83B, 0xDE2E, 0xD83B, 0xDE2F, 0xD83B, 0xDE30, 0xD83B, 0xDE31, 0x20,
@ -3191,14 +3191,14 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDE36, 0xD83B, 0xDE37, 0x20,
0xD83B, 0xDE39, 0xD83B, 0xDE3B
}, letters_source5[]={/* Arabic mathematical Symbols - Tailed Symbols */
0xD83B, 0xDE42, 0xD83B, 0xDE47, 0xD83B, 0xDE49, 0xD83B, 0xDE4B, 0x20,
0xD83B, 0xDE42, 0xD83B, 0xDE47, 0xD83B, 0xDE49, 0xD83B, 0xDE4B, 0x20,
0xD83B, 0xDE4D, 0xD83B, 0xDE4E, 0xD83B, 0xDE4F, 0x20,
0xD83B, 0xDE51, 0xD83B, 0xDE52, 0xD83B, 0xDE54, 0xD83B, 0xDE57, 0x20,
0xD83B, 0xDE59, 0xD83B, 0xDE5B, 0xD83B, 0xDE5D, 0xD83B, 0xDE5F
}, letters_source6[]={/* Arabic mathematical Symbols - Stretched Symbols with 06 range */
0xD83B, 0xDE21, 0x0633, 0xD83B, 0xDE62, 0x0647
}, letters_dest1[]={
0xD83B, 0xDE00, 0xD83B, 0xDE01, 0xD83B, 0xDE02, 0xD83B, 0xDE03, 0x20,
0xD83B, 0xDE00, 0xD83B, 0xDE01, 0xD83B, 0xDE02, 0xD83B, 0xDE03, 0x20,
0xD83B, 0xDE24, 0xD83B, 0xDE05, 0xD83B, 0xDE06, 0x20,
0xD83B, 0xDE07, 0xD83B, 0xDE08, 0xD83B, 0xDE09, 0x20,
0xD83B, 0xDE0A, 0xD83B, 0xDE0B, 0xD83B, 0xDE0C, 0xD83B, 0xDE0D, 0x20,
@ -3207,7 +3207,7 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDE16, 0xD83B, 0xDE17, 0xD83B, 0xDE18, 0x20,
0xD83B, 0xDE19, 0xD83B, 0xDE1A, 0xD83B, 0xDE1B
}, letters_dest2[]={
0xD83B, 0xDE80, 0xD83B, 0xDE81, 0xD83B, 0xDE82, 0xD83B, 0xDE83, 0x20,
0xD83B, 0xDE80, 0xD83B, 0xDE81, 0xD83B, 0xDE82, 0xD83B, 0xDE83, 0x20,
0xD83B, 0xDE84, 0xD83B, 0xDE85, 0xD83B, 0xDE86, 0x20,
0xD83B, 0xDE87, 0xD83B, 0xDE88, 0xD83B, 0xDE89, 0x20,
0xD83B, 0xDE8B, 0xD83B, 0xDE8C, 0xD83B, 0xDE8D, 0x20,
@ -3216,7 +3216,7 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDE96, 0xD83B, 0xDE97, 0xD83B, 0xDE98, 0x20,
0xD83B, 0xDE99, 0xD83B, 0xDE9A, 0xD83B, 0xDE9B
}, letters_dest3[]={
0xD83B, 0xDEA1, 0xD83B, 0xDEA2, 0xD83B, 0xDEA3, 0x20,
0xD83B, 0xDEA1, 0xD83B, 0xDEA2, 0xD83B, 0xDEA3, 0x20,
0xD83B, 0xDEA5, 0xD83B, 0xDEA6, 0x20,
0xD83B, 0xDEA7, 0xD83B, 0xDEA8, 0xD83B, 0xDEA9, 0x20,
0xD83B, 0xDEAB, 0xD83B, 0xDEAC, 0xD83B, 0xDEAD, 0x20,
@ -3225,7 +3225,7 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDEB6, 0xD83B, 0xDEB7, 0xD83B, 0xDEB8, 0x20,
0xD83B, 0xDEB9, 0xD83B, 0xDEBA, 0xD83B, 0xDEBB
}, letters_dest4[]={
0xD83B, 0xDE21, 0xD83B, 0xDE22, 0x20,
0xD83B, 0xDE21, 0xD83B, 0xDE22, 0x20,
0xD83B, 0xDE27, 0xD83B, 0xDE29, 0x20,
0xD83B, 0xDE2A, 0xD83B, 0xDE2B, 0xD83B, 0xDE2C, 0xD83B, 0xDE2D, 0x20,
0xD83B, 0xDE2E, 0xD83B, 0xDE2F, 0xD83B, 0xDE30, 0xD83B, 0xDE31, 0x20,
@ -3233,7 +3233,7 @@ doArabicShapingTestForBug9024(void) {
0xD83B, 0xDE36, 0xD83B, 0xDE37, 0x20,
0xD83B, 0xDE39, 0xD83B, 0xDE3B
}, letters_dest5[]={
0xD83B, 0xDE42, 0xD83B, 0xDE47, 0xD83B, 0xDE49, 0xD83B, 0xDE4B, 0x20,
0xD83B, 0xDE42, 0xD83B, 0xDE47, 0xD83B, 0xDE49, 0xD83B, 0xDE4B, 0x20,
0xD83B, 0xDE4D, 0xD83B, 0xDE4E, 0xD83B, 0xDE4F, 0x20,
0xD83B, 0xDE51, 0xD83B, 0xDE52, 0xD83B, 0xDE54, 0xD83B, 0xDE57, 0x20,
0xD83B, 0xDE59, 0xD83B, 0xDE5B, 0xD83B, 0xDE5D, 0xD83B, 0xDE5F
@ -4169,20 +4169,20 @@ testStreaming(void) {
"\\u000D"
"02468\\u000D"
"ghi",
6, { 6, 6 }, {{ 6, 4, 6, 1, 6, 3}, { 4, 6, 6, 1, 6, 3 }},
{"6, 4, 6, 1, 6, 3", "4, 6, 6, 1, 6, 3"}
6, { 6, 6 }, {{ 4, 6, 6, 1, 6, 3}, { 4, 6, 6, 1, 6, 3 }},
{"4, 6, 6, 1, 6, 3", "4, 6, 6, 1, 6, 3"}
},
{ "abcd\\u000Afgh\\u000D12345\\u000A456",
6, { 4, 4 }, {{ 6, 3, 6, 3 }, { 5, 4, 6, 3 }},
{"6, 3, 6, 3", "5, 4, 6, 3"}
6, { 4, 4 }, {{ 5, 4, 6, 3 }, { 5, 4, 6, 3 }},
{"5, 4, 6, 3", "5, 4, 6, 3"}
},
{ "abcd\\u000Afgh\\u000D12345\\u000A45\\u000D",
6, { 4, 4 }, {{ 6, 3, 6, 3 }, { 5, 4, 6, 3 }},
{"6, 3, 6, 3", "5, 4, 6, 3"}
6, { 4, 4 }, {{ 5, 4, 6, 3 }, { 5, 4, 6, 3 }},
{"5, 4, 6, 3", "5, 4, 6, 3"}
},
{ "abcde\\u000Afghi",
10, { 1, 2 }, {{ 10 }, { 6, 4 }},
{"10", "6, 4"}
10, { 2, 2 }, {{ 6, 4 }, { 6, 4 }},
{"6, 4", "6, 4"}
}
};
UChar src[MAXLEN];
@ -4194,7 +4194,7 @@ testStreaming(void) {
UBiDiLevel level;
int nTests = LENGTHOF(testData), nLevels = LENGTHOF(paraLevels);
UBool mismatch, testOK = TRUE;
char processedLenStr[MAXPORTIONS * 5];
char processedLenStr[MAXPORTIONS * 5];
log_verbose("\nEntering TestStreaming\n\n");
@ -4208,7 +4208,7 @@ testStreaming(void) {
chunk = testData[i].chunk;
nPortions = testData[i].nPortions[levelIndex];
level = paraLevels[levelIndex];
*processedLenStr = NULL_CHAR;
processedLenStr[0] = NULL_CHAR;
log_verbose("Testing level %d, case %d\n", level, i);
mismatch = FALSE;
@ -4230,7 +4230,7 @@ testStreaming(void) {
}
ubidi_setReorderingOptions(pBiDi, UBIDI_OPTION_STREAMING);
mismatch = (UBool)(j >= nPortions ||
mismatch |= (UBool)(j >= nPortions ||
processedLen != testData[i].portionLens[levelIndex][j]);
sprintf(processedLenStr + j * 4, "%4d", processedLen);

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2011, International Business Machines Corporation and
* COPYRIGHT:
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/* file name: cbiditst.h
@ -51,6 +51,10 @@ extern "C" {
#define PDF U_POP_DIRECTIONAL_FORMAT
#define NSM U_DIR_NON_SPACING_MARK
#define BN U_BOUNDARY_NEUTRAL
#define FSI U_FIRST_STRONG_ISOLATE
#define LRI U_LEFT_TO_RIGHT_ISOLATE
#define RLI U_RIGHT_TO_LEFT_ISOLATE
#define PDI U_POP_DIRECTIONAL_ISOLATE
extern const char * const
dirPropNames[U_CHAR_DIRECTION_COUNT];

View file

@ -13,7 +13,7 @@
* created on: 2009oct16
* created by: Markus W. Scherer
*
* BiDi conformance test, using the Unicode BidiTest.txt file.
* BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files.
*/
#include <stdio.h>
@ -37,18 +37,18 @@ public:
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
void TestBidiTest();
void TestBidiCharacterTest();
private:
char *getUnidataPath(char path[]);
UBool parseLevels(const char *start);
UBool parseLevels(const char *&start);
UBool parseOrdering(const char *start);
UBool parseInputStringFromBiDiClasses(const char *&start);
UBool parseInputStringFromBiDiClasses(const char *&start, UBool parseChars);
UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
const char *paraLevelName);
UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount);
UBool checkOrdering(UBiDi *ubidi);
void printErrorLine(const char *paraLevelName);
void printErrorLine();
char line[10000];
UBiDiLevel levels[1000];
@ -59,6 +59,8 @@ private:
int32_t orderingCount;
int32_t errorCount;
UnicodeString inputString;
const char *paraLevelName;
char levelNameString[12];
};
extern IntlTest *createBiDiConformanceTest() {
@ -69,12 +71,10 @@ void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *
if(exec) {
logln("TestSuite BiDiConformanceTest: ");
}
switch (index) {
TESTCASE(0, TestBidiTest);
default:
name="";
break; // needed to end the loop
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(TestBidiTest);
TESTCASE_AUTO(TestBidiCharacterTest);
TESTCASE_AUTO_END;
}
// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
@ -115,18 +115,20 @@ char *BiDiConformanceTest::getUnidataPath(char path[]) {
U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
UBool BiDiConformanceTest::parseLevels(const char *start) {
UBool BiDiConformanceTest::parseLevels(const char *&start) {
directionBits=0;
levelsCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
if(*start=='x') {
levels[levelsCount++]=UBIDI_DEFAULT_LTR;
++start;
} else {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
errln("@Levels: parse error at %s", start);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';')
|| value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start);
printErrorLine();
return FALSE;
}
levels[levelsCount++]=(UBiDiLevel)value;
@ -139,11 +141,12 @@ UBool BiDiConformanceTest::parseLevels(const char *start) {
UBool BiDiConformanceTest::parseOrdering(const char *start) {
orderingCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
errln("@Reorder: parse error at %s", start);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) {
errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start);
printErrorLine();
return FALSE;
}
ordering[orderingCount++]=(int32_t)value;
@ -152,7 +155,7 @@ UBool BiDiConformanceTest::parseOrdering(const char *start) {
return TRUE;
}
static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
static const UChar pseudoCharFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
0x6c, // 'l' for L
0x52, // 'R' for R
0x33, // '3' for EN
@ -179,12 +182,38 @@ static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
0x2e // '.' for PDI
};
static const UChar realCharFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
0x006c, // 'l' for L
0x05d0, // Hebrew Letter Alef for R
0x0033, // '3' for EN
0x002d, // '-' for ES
0x0025, // '%' for ET
0x0669, // Arabic-Indic '9' for AN
0x002c, // ',' for CS
0x000d, // CR for B
0x0009, // Tab for S
0x0020, // ' ' for WS
0x003d, // '=' for ON
0x202a, // LRE
0x202d, // LRO
0x0630, // Arabic Letter Thal for AL
0x202b, // RLE
0x202e, // RLO
0x202c, // PDF
0x05b9, // Hebrew Point Holam for NSM
0x00ad, // Soft Hyphen for BN
0x2068, // FSI
0x2066, // LRI
0x2067, // RLI
0x2069 // PDI
};
U_CDECL_BEGIN
static UCharDirection U_CALLCONV
biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
if(c==charFromBiDiClass[i]) {
if(c==pseudoCharFromBiDiClass[i]) {
return (UCharDirection)i;
}
}
@ -195,11 +224,21 @@ biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
U_CDECL_END
static int32_t hexdigit(char c) {
if(c>='0' && c<='9')
return c - '0';
if(c>='A' && c<='F')
return c - ('A'-10);
if(c>='a' && c<='f')
return c - ('a'-10);
return -1;
}
static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
};
UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start, UBool parseChars) {
inputString.remove();
/*
* Lengthy but fast BiDi class parser.
@ -208,6 +247,24 @@ UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
* but that makes this test take significantly more time.
*/
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
int32_t d1, d2, hexnum;
// First look for an hexa value of at least 2 digits
if(parseChars && (d1=hexdigit(start[0]))>=0 && (d2=hexdigit(start[1]))>=0) {
const char *saveStart=start;
hexnum=(d1<<4) + d2;
start+=2;
while((d1=hexdigit(start[0]))>=0) {
hexnum=(hexnum<<4) + d1;
start++;
}
if(hexnum<=0 || hexnum>0xffff ||
(!U_IS_INV_WHITESPACE(start[0]) && start[0]!=';' && start[0]!=0)) {
errln("\nError on line %d: Invalid hexa number at %s", (int)lineNumber, saveStart);
return FALSE;
}
inputString.append(hexnum);
continue;
}
UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
// Compare each character once until we have a match on
// a complete, short BiDi class name.
@ -278,22 +335,37 @@ UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
// and not just the start of a longer word.
int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
char c=start[biDiClassNameLength];
if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
errln("BiDi class string not recognized at %s", start);
return FALSE;
if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) {
if(parseChars) {
inputString.append(realCharFromBiDiClass[biDiClass]);
} else {
inputString.append(pseudoCharFromBiDiClass[biDiClass]);
}
start+=biDiClassNameLength;
continue;
}
inputString.append(charFromBiDiClass[biDiClass]);
start+=biDiClassNameLength;
#if 0
// Accept any single character
// Not currently supported:
// This parser reads the .txt file as is, with the default charset.
// We could at most support "invariant" characters,
// and would have to convert them to Unicode using invariant-character functions.
// If we need to support Unicode characters, then we would have to
// rewrite the code for reading and parsing to read UTF-8.
if(parseChars && (U_IS_INV_WHITESPACE(start[1]) || start[1]==';' || start[1]==0)) {
inputString.append(start[0]);
start++;
continue;
}
#endif
errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start);
printErrorLine();
return FALSE;
}
return TRUE;
}
void BiDiConformanceTest::TestBidiTest() {
if(isICUVersionBefore(52, 1)) {
// TODO: Update the ICU BiDi code to implement the additions in the Unicode 6.3 BiDi Algorithm,
// and reenable the BiDi conformance test.
return;
}
IcuTestErrorCode errorCode(*this, "TestBidiTest");
const char *sourceTestDataPath=getSourceTestData(errorCode);
if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
@ -333,7 +405,8 @@ void BiDiConformanceTest::TestBidiTest() {
if(*start=='@') {
++start;
if(0==strncmp(start, "Levels:", 7)) {
if(!parseLevels(start+7)) {
start+=7;
if(!parseLevels(start)) {
return;
}
} else if(0==strncmp(start, "Reorder:", 8)) {
@ -343,7 +416,7 @@ void BiDiConformanceTest::TestBidiTest() {
}
// Skip unknown @Xyz: ...
} else {
if(!parseInputStringFromBiDiClasses(start)) {
if(!parseInputStringFromBiDiClasses(start, FALSE)) {
return;
}
start=u_skipWhitespace(start);
@ -370,13 +443,13 @@ void BiDiConformanceTest::TestBidiTest() {
errln("Input line %d: %s", (int)lineNumber, line);
return;
}
if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
paraLevelNames[i])) {
paraLevelName=paraLevelNames[i];
if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
}
if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
if(!checkOrdering(ubidi.getAlias())) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
@ -387,6 +460,228 @@ void BiDiConformanceTest::TestBidiTest() {
}
}
/*
*******************************************************************************
*
* created on: 2013jul01
* created by: Matitiahu Allouche
This function performs a conformance test for implementations of the
Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/
Each test case is represented in a single line which is read from a file
named BidiCharacter.txt. Empty, blank and comment lines may also appear
in this file.
The format of the test data is specified below. Note that each test
case constitutes a single line of text; reordering is applied within a
single line and independently of a rendering engine, and rules L3 and L4
are out of scope.
The number sign '#' is the comment character: everything is ignored from
the occurrence of '#' until the end of the line,
Empty lines and lines containing only spaces and/or comments are ignored.
Lines which represent test cases consist of 4 or 5 fields separated by a
semicolon. Each field consists of tokens separated by whitespace (space
or Tab). Whitespace before and after semicolons is optional.
Field 0: A sequence of tokens where each token may be one of the following:
- an hexadecimal number of at least 2 digits representing a code point
- a bidi property value, which must be one of (case sensitive)
L (translated to 'l'),
R (translated to Hebrew Letter Alef),
EN (translated to '3'),
ES (translated to '-'),
ET (translated to '%'),
AN (translated to Arabic-Indic '9'),
CS (translated to ','),
B (translated to CR),
S (translated to Tab),
WS (translated to space),
ON (translated to '='),
LRE, LRO,
AL (translated to Arabic Letter Thal),
RLE, RLO, PDF,
NSM (translated to Hebrew Point Holam),
BN (translated to Soft Hyphen),
FSI, LRI, RLI, PDI
- a single character which represents itself
Field 1: A value representing the paragraph direction, as follows:
- 0 represents left-to-right
- 1 represents right-to-left
- 2 represents auto-LTR according to rules P2 and P3 of the algorithm
- 3 represents auto-RTL according to rules P2 and P3 of the algorithm
- a negative number whose absolute value is taken as paragraph level;
this may be useful to test cases where the embedding level approaches
or exceeds the maximum embedding level.
Field 2: The resolved paragraph embedding level. If the input (field 0)
includes more than one paragraph, this field represents the
resolved level of the first paragraph.
Field 3: An ordered list of resulting levels for each token in field 0
(each token represents one source character).
The UBA does not assign levels to certain characters (e.g. LRO);
characters removed in rule X9 are indicated with an 'x'.
Field 4: An ordered list of indices showing the resulting visual ordering
from left to right; characters with a resolved level of 'x' are
skipped. The number are zero-based. Each index corresponds to
a character in the reordered (visual) string. It represents the
index of the source character in the input (field 0).
This field is optional. When it is absent, the visual ordering
is not verified.
Examples:
# This is a comment line.
L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
L L ON R;0;0;0 0 0 1;0 1 2 3
# Note: in the next line, 'B' represents a block separator, not the letter 'B'.
LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
# Note: in the next line, 'b' represents the letter 'b', not a block separator.
a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5
a R R x ; 1 ; 1 ; 2 1 1 2
L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1
*
*******************************************************************************
*/
void BiDiConformanceTest::TestBidiCharacterTest() {
IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
const char *sourceTestDataPath=getSourceTestData(errorCode);
if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
"folder (getSourceTestData())")) {
return;
}
char bidiTestPath[400];
strcpy(bidiTestPath, sourceTestDataPath);
strcat(bidiTestPath, "BidiCharacterTest.txt");
LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
if(bidiTestFile.isNull()) {
errln("unable to open %s", bidiTestPath);
return;
}
LocalUBiDiPointer ubidi(ubidi_open());
lineNumber=0;
levelsCount=0;
orderingCount=0;
errorCount=0;
while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
++lineNumber;
paraLevelName="N/A";
inputString="N/A";
// Remove trailing comments and whitespace.
char *commentStart=strchr(line, '#');
if(commentStart!=NULL) {
*commentStart=0;
}
u_rtrim(line);
const char *start=u_skipWhitespace(line);
if(*start==0) {
continue; // Skip empty and comment-only lines.
}
if(!parseInputStringFromBiDiClasses(start, TRUE)) {
continue;
}
start=u_skipWhitespace(start);
if(*start!=';') {
errorCount++;
errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
continue;
}
start=u_skipWhitespace(start+1);
char *end;
int32_t paraDirection=(int32_t)strtol(start, &end, 10);
UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
if(paraDirection==0) {
paraLevel=0;
paraLevelName="LTR";
}
else if(paraDirection==1) {
paraLevel=1;
paraLevelName="RTL";
}
else if(paraDirection==2) {
paraLevel=UBIDI_DEFAULT_LTR;
paraLevelName="Auto/LTR";
}
else if(paraDirection==3) {
paraLevel=UBIDI_DEFAULT_RTL;
paraLevelName="Auto/RTL";
}
else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
paraLevel=(UBiDiLevel)(-paraDirection);
sprintf(levelNameString, "%d", (int)paraLevel);
paraLevelName=levelNameString;
}
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
printErrorLine();
continue;
}
start=u_skipWhitespace(end);
if(*start!=';') {
errorCount++;
errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
continue;
}
start++;
uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
resolvedParaLevel>1) {
errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
printErrorLine();
continue;
}
start=u_skipWhitespace(end);
if(*start!=';') {
errorCount++;
errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
return;
}
start++;
if(!parseLevels(start)) {
continue;
}
start=u_skipWhitespace(start);
if(*start==';') {
if(!parseOrdering(start+1)) {
continue;
}
}
else
orderingCount=-1;
ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
paraLevel, NULL, errorCode);
const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
errln("Input line %d: %s", (int)lineNumber, line);
continue;
}
UBiDiLevel actualLevel;
if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
printErrorLine();
errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
(int)lineNumber, resolvedParaLevel, actualLevel);
continue;
}
if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
continue;
}
if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
continue;
}
}
}
static UChar printLevel(UBiDiLevel level) {
if(level<UBIDI_DEFAULT_LTR) {
return 0x30+level;
@ -403,12 +698,11 @@ static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actual
return actualDirectionBits;
}
UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
const char *paraLevelName) {
UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) {
UBool isOk=TRUE;
if(levelsCount!=actualCount) {
errln("Wrong number of level values; expected %d actual %d",
(int)levelsCount, (int)actualCount);
errln("\nError on line %d: Wrong number of level values; expected %d actual %d",
(int)lineNumber, (int)levelsCount, (int)actualCount);
isOk=FALSE;
} else {
for(int32_t i=0; i<actualCount; ++i) {
@ -420,8 +714,8 @@ UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t
// The reordering result is the same, so this is fine.
break;
} else {
errln("Wrong level value at index %d; expected %d actual %d",
(int)i, levels[i], actualLevels[i]);
errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d",
(int)lineNumber, (int)i, levels[i], actualLevels[i]);
isOk=FALSE;
break;
}
@ -429,7 +723,7 @@ UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t
}
}
if(!isOk) {
printErrorLine(paraLevelName);
printErrorLine();
UnicodeString els("Expected levels: ");
int32_t i;
for(i=0; i<levelsCount; ++i) {
@ -450,9 +744,9 @@ UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t
// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
// Therefore we just skip the indexes for BiDi controls while comparing
// with the expected ordering that has them omitted.
UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) {
UBool isOk=TRUE;
IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
IcuTestErrorCode errorCode(*this, "checkOrdering()");
int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
int32_t i, visualIndex;
// Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
@ -467,8 +761,8 @@ UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName
continue; // BiDi control, omitted from expected ordering.
}
if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
errln("Wrong ordering value at visual index %d; expected %d actual %d",
(int)visualIndex, ordering[visualIndex], logicalIndex);
errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d",
(int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex);
isOk=FALSE;
break;
}
@ -477,12 +771,12 @@ UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName
// visualIndex is now the visual length minus the BiDi controls,
// which should match the length of the BidiTest.txt ordering.
if(isOk && orderingCount!=visualIndex) {
errln("Wrong number of ordering values; expected %d actual %d",
(int)orderingCount, (int)visualIndex);
errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d",
(int)lineNumber, (int)orderingCount, (int)visualIndex);
isOk=FALSE;
}
if(!isOk) {
printErrorLine(paraLevelName);
printErrorLine();
UnicodeString eord("Expected ordering: ");
for(i=0; i<orderingCount; ++i) {
eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
@ -500,7 +794,7 @@ UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName
return isOk;
}
void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
void BiDiConformanceTest::printErrorLine() {
++errorCount;
errln("Input line %5d: %s", (int)lineNumber, line);
errln(UnicodeString("Input string: ")+inputString);

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff