From f3ffdb5afc833c5f271bf0962096ebb2ae857b5b Mon Sep 17 00:00:00 2001 From: Matitiahu Allouche Date: Tue, 7 Aug 2007 19:54:10 +0000 Subject: [PATCH] ICU-5732 a few fixes to match those in ICU4J X-SVN-Rev: 22304 --- icu4c/source/common/ubidi.c | 180 ++++++---- icu4c/source/common/ubidiimp.h | 49 ++- icu4c/source/common/ubidiln.c | 602 ++++++++++++++++----------------- icu4c/source/common/ubidiwrt.c | 8 +- 4 files changed, 453 insertions(+), 386 deletions(-) diff --git a/icu4c/source/common/ubidi.c b/icu4c/source/common/ubidi.c index 14370a560cd..8fd86ab6115 100644 --- a/icu4c/source/common/ubidi.c +++ b/icu4c/source/common/ubidi.c @@ -1,18 +1,18 @@ /* - ****************************************************************************** - * - * Copyright (C) 1999-2007, International Business Machines - * Corporation and others. All Rights Reserved. - * - ****************************************************************************** - * file name: ubidi.c - * encoding: US-ASCII - * tab size: 8 (not used) - * indentation:4 - * - * created on: 1999jul27 - * created by: Markus W. Scherer - */ +****************************************************************************** +* +* Copyright (C) 1999-2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidi.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999jul27 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ #include "cmemory.h" #include "unicode/utypes.h" @@ -114,6 +114,14 @@ static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; /* UBiDi object management -------------------------------------------------- */ +static void +crash() +{ + char ** pc; + pc = NULL; + *pc = "make it crash!"; +} + U_CAPI UBiDi * U_EXPORT2 ubidi_open(void) { @@ -281,7 +289,7 @@ ubidi_isInverse(UBiDi *pBiDi) { * concept of RUNS_ONLY which is a double operation. * It could be advantageous to divide this into 3 concepts: * a) Operation: direct / inverse / RUNS_ONLY - * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L + * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL * This would allow combinations not possible today like RUNS_ONLY with * NUMBERS_SPECIAL. @@ -293,7 +301,7 @@ ubidi_isInverse(UBiDi *pBiDi) { */ U_CAPI void U_EXPORT2 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { - if ((pBiDi != NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) + if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) && (reorderingMode < UBIDI_REORDER_COUNT)) { pBiDi->reorderingMode = reorderingMode; pBiDi->isInverse = reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L; @@ -302,7 +310,7 @@ ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { U_CAPI UBiDiReorderingMode U_EXPORT2 ubidi_getReorderingMode(UBiDi *pBiDi) { - if (pBiDi != NULL) { + if (pBiDi!=NULL) { return pBiDi->reorderingMode; } else { return UBIDI_REORDER_DEFAULT; @@ -314,14 +322,14 @@ ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; } - if (pBiDi != NULL) { - pBiDi->reorderingOptions = reorderingOptions; + if (pBiDi!=NULL) { + pBiDi->reorderingOptions=reorderingOptions; } } U_CAPI uint32_t U_EXPORT2 ubidi_getReorderingOptions(UBiDi *pBiDi) { - if (pBiDi != NULL) { + if (pBiDi!=NULL) { return pBiDi->reorderingOptions; } else { return 0; @@ -1372,8 +1380,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, break; default: /* we should never get here */ - start=start0+25; - start/=(start-start0-25); /* force program crash */ + crash(); break; } } @@ -1487,8 +1494,7 @@ resolveImplicitLevels(UBiDi *pBiDi, start2=i; break; default: /* we should never get here */ - start=start1+25; - start/=(start-start1-25); /* force program crash */ + crash(); break; } } @@ -1552,8 +1558,11 @@ setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, void *runsOnlyMemory; int32_t *visualMap; UChar *visualText; + int32_t saveLength, saveTrailingWSStart; const UBiDiLevel *levels; UBiDiLevel *saveLevels; + UBiDiDirection saveDirection; + UBool saveMayAllocateText; Run *runs; int32_t visualLength, i, j, visualStart, logicalStart, runCount, runLength, addedRuns, insertRemove, @@ -1580,8 +1589,17 @@ setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; } + paraLevel&=1; /* accept only 0 or 1 */ ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + goto cleanup3; + } + /* we cannot access directly pBiDi->levels since it is not yet set if + * direction is not MIXED + */ levels=ubidi_getLevels(pBiDi, pErrorCode); + uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); + saveTrailingWSStart=pBiDi->trailingWSStart; /* FOOD FOR THOUGHT: instead of writing the visual text, we could use * the visual map and the dirProps array to drive the second call @@ -1591,20 +1609,31 @@ setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, */ visualLength=ubidi_writeReordered(pBiDi, visualText, length, UBIDI_DO_MIRRORING, pErrorCode); - pBiDi->reorderingOptions=saveOptions; ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); if(U_FAILURE(*pErrorCode)) { goto cleanup2; } - uprv_memcpy(saveLevels, levels, length*sizeof(UBiDiLevel)); + pBiDi->reorderingOptions=saveOptions; + saveLength=pBiDi->length; + saveDirection=pBiDi->direction; pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; - paraLevel=pBiDi->paraLevel^1; + paraLevel^=1; + /* Because what we did with reorderingOptions, visualText may be shorter + * than the original text. But we don't want the levels memory to be + * reallocated shorter than the original length, since we need to restore + * the levels as after the first call to ubidi_setpara() before returning. + * We will force mayAllocateText to FALSE before the second call to + * ubidi_setpara(), and will restore it afterwards. + */ + saveMayAllocateText=pBiDi->mayAllocateText; + pBiDi->mayAllocateText=FALSE; ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); + pBiDi->mayAllocateText=saveMayAllocateText; + ubidi_getRuns(pBiDi, pErrorCode); if(U_FAILURE(*pErrorCode)) { goto cleanup1; } - ubidi_getRuns(pBiDi, pErrorCode); /* check if some runs must be split, count how many splits */ addedRuns=0; runCount=pBiDi->runCount; @@ -1691,13 +1720,22 @@ setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, cleanup2: /* restore real text */ pBiDi->text=text; + pBiDi->length=saveLength; + pBiDi->originalLength=length; + pBiDi->direction=saveDirection; + /* the saved levels should never excess levelsSize, but we check anyway */ + if(saveLength>pBiDi->levelsSize) { + saveLength=pBiDi->levelsSize; + } + uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); + pBiDi->trailingWSStart=saveTrailingWSStart; /* free memory for mapping table and visual text */ uprv_free(runsOnlyMemory); - cleanup3: - pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; if(pBiDi->runCount>1) { pBiDi->direction=UBIDI_MIXED; } + cleanup3: + pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; } /* ubidi_setPara ------------------------------------------------------------ */ @@ -1709,12 +1747,11 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, UBiDiDirection direction; /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } else if(pBiDi==NULL || text==NULL || - ((UBIDI_MAX_EXPLICIT_LEVELrunCount=0; + pBiDi->paraCount=0; pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ return; } @@ -1862,8 +1900,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, break; case UBIDI_REORDER_RUNS_ONLY: /* we should never get here */ - pBiDi=NULL; - pBiDi->text=NULL; /* make the program crash! */ + crash(); break; case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; @@ -1882,9 +1919,6 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; } break; - default: - pBiDi->pImpTabPair=&impTab_DEFAULT; - break; } /* * If there are no external levels specified and there @@ -1969,6 +2003,39 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, adjustWSLevels(pBiDi); break; } + /* add RLM for inverse Bidi with contextual orientation resolving + * to RTL which would not round-trip otherwise + */ + if((pBiDi->defaultParaLevel>0) && + (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && + ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { + int32_t i, j, start, last; + DirProp dirProp; + for(i=0; iparaCount; i++) { + last=pBiDi->paras[i]-1; + if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) { + continue; /* LTR paragraph */ + } + start= i==0 ? 0 : pBiDi->paras[i - 1]; + for(j=last; j>=start; j--) { + dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]); + if(dirProp==L) { + if(jdirProps[last])==B) { + last--; + } + } + addPoint(pBiDi, last, RLM_BEFORE); + break; + } + if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { + break; + } + } + } + } + if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { pBiDi->resultLength -= pBiDi->controlCount; } else { @@ -2065,13 +2132,10 @@ ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, int32_t paraStart; /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } else if( !IS_VALID_PARA_OR_LINE(pBiDi) || /* no valid setPara/setLine */ - paraIndex<0 || paraIndex>=pBiDi->paraCount ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, ); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, ); + RETURN_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode, ); + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ if(paraIndex) { paraStart=pBiDi->paras[paraIndex-1]; @@ -2098,15 +2162,11 @@ ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, /* check the argument values */ /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ - if( !IS_VALID_PARA_OR_LINE(pBiDi)) {/* no valid setPara/setLine */ - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ - if( charIndex<0 || charIndex>=pBiDi->length ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } + RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); + for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); return paraIndex; @@ -2117,9 +2177,8 @@ ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, const void *newContext, UBiDiClassCallback **oldFn, const void **oldContext, UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } else if(pBiDi==NULL) { + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, ); + if(pBiDi==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } @@ -2138,6 +2197,9 @@ ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, U_CAPI void U_EXPORT2 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) { + if(pBiDi==NULL) { + return; + } if( fn ) { *fn = pBiDi->fnClassCallback; diff --git a/icu4c/source/common/ubidiimp.h b/icu4c/source/common/ubidiimp.h index 5282155de96..609480ef3f6 100644 --- a/icu4c/source/common/ubidiimp.h +++ b/icu4c/source/common/ubidiimp.h @@ -1,18 +1,18 @@ /* - ****************************************************************************** - * - * Copyright (C) 1999-2007, International Business Machines - * Corporation and others. All Rights Reserved. - * - ****************************************************************************** - * file name: ubidiimp.h - * encoding: US-ASCII - * tab size: 8 (not used) - * indentation:4 - * - * created on: 1999aug06 - * created by: Markus W. Scherer - */ +****************************************************************************** +* +* Copyright (C) 1999-2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidiimp.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999aug06 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ #ifndef UBIDIIMP_H #define UBIDIIMP_H @@ -70,6 +70,7 @@ enum { /* are there any characters that are LTR or RTL? */ #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) +#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) /* explicit embedding codes */ #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) @@ -299,7 +300,6 @@ struct UBiDi { }; #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) -#define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)) #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) typedef union { @@ -309,6 +309,25 @@ typedef union { Run *runsMemory; } BidiMemoryForAllocation; +/* Macros for initial checks at function entry */ +#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ + if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue +#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ + if(!IS_VALID_PARA(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return retvalue; \ + } +#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ + if(!IS_VALID_PARA_OR_LINE(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return retvalue; \ + } +#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ + if((arg)<(start) || (arg)>=(limit)) { \ + (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ + return retvalue; \ + } + /* helper function to (re)allocate memory if allowed */ U_CFUNC UBool ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); diff --git a/icu4c/source/common/ubidiln.c b/icu4c/source/common/ubidiln.c index e66662e4b3c..3590c1945d2 100644 --- a/icu4c/source/common/ubidiln.c +++ b/icu4c/source/common/ubidiln.c @@ -1,18 +1,18 @@ /* - ****************************************************************************** - * - * Copyright (C) 1999-2007, International Business Machines - * Corporation and others. All Rights Reserved. - * - ****************************************************************************** - * file name: ubidiln.c - * encoding: US-ASCII - * tab size: 8 (not used) - * indentation:4 - * - * created on: 1999aug06 - * created by: Markus W. Scherer - */ +****************************************************************************** +* +* Copyright (C) 1999-2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidiln.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999aug06 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ /* set import/export definitions */ #ifndef U_COMMON_IMPLEMENTATION @@ -133,16 +133,16 @@ ubidi_setLine(const UBiDi *pParaBiDi, int32_t length; /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } else if(!IS_VALID_PARA(pParaBiDi) || pLineBiDi==NULL) { + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, ); + RETURN_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode, ); + RETURN_IF_BAD_RANGE(start, 0, limit, *pErrorCode, ); + RETURN_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode, ); + if(pLineBiDi==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; - } else if(start<0 || start>limit || limit>pParaBiDi->length) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } else if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != - ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { + } + if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != + ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { /* the line crosses a paragraph boundary */ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; @@ -167,92 +167,84 @@ ubidi_setLine(const UBiDi *pParaBiDi, pLineBiDi->controlCount++; } } + pLineBiDi->resultLength-=pLineBiDi->controlCount; } - if(length>0) { - pLineBiDi->dirProps=pParaBiDi->dirProps+start; - pLineBiDi->levels=pParaBiDi->levels+start; - pLineBiDi->runCount=-1; + pLineBiDi->dirProps=pParaBiDi->dirProps+start; + pLineBiDi->levels=pParaBiDi->levels+start; + pLineBiDi->runCount=-1; - if(pParaBiDi->direction!=UBIDI_MIXED) { - /* the parent is already trivial */ - pLineBiDi->direction=pParaBiDi->direction; + if(pParaBiDi->direction!=UBIDI_MIXED) { + /* the parent is already trivial */ + pLineBiDi->direction=pParaBiDi->direction; - /* - * The parent's levels are all either - * implicitly or explicitly ==paraLevel; - * do the same here. - */ - if(pParaBiDi->trailingWSStart<=start) { - pLineBiDi->trailingWSStart=0; - } else if(pParaBiDi->trailingWSStarttrailingWSStart=pParaBiDi->trailingWSStart-start; - } else { - pLineBiDi->trailingWSStart=length; - } + /* + * The parent's levels are all either + * implicitly or explicitly ==paraLevel; + * do the same here. + */ + if(pParaBiDi->trailingWSStart<=start) { + pLineBiDi->trailingWSStart=0; + } else if(pParaBiDi->trailingWSStarttrailingWSStart=pParaBiDi->trailingWSStart-start; } else { - const UBiDiLevel *levels=pLineBiDi->levels; - int32_t i, trailingWSStart; - UBiDiLevel level; - - setTrailingWSStart(pLineBiDi); - trailingWSStart=pLineBiDi->trailingWSStart; - - /* recalculate pLineBiDi->direction */ - if(trailingWSStart==0) { - /* all levels are at paraLevel */ - pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); - } else { - /* get the level of the first character */ - level=(UBiDiLevel)(levels[0]&1); - - /* if there is anything of a different level, then the line is mixed */ - if(trailingWSStartparaLevel&1)!=level) { - /* the trailing WS is at paraLevel, which differs from levels[0] */ - pLineBiDi->direction=UBIDI_MIXED; - } else { - /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ - i=1; - for(;;) { - if(i==trailingWSStart) { - /* the direction values match those in level */ - pLineBiDi->direction=(UBiDiDirection)level; - break; - } else if((levels[i]&1)!=level) { - pLineBiDi->direction=UBIDI_MIXED; - break; - } - ++i; - } - } - } - - switch(pLineBiDi->direction) { - case UBIDI_LTR: - /* make sure paraLevel is even */ - pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pLineBiDi->trailingWSStart=0; - break; - case UBIDI_RTL: - /* make sure paraLevel is odd */ - pLineBiDi->paraLevel|=1; - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pLineBiDi->trailingWSStart=0; - break; - default: - break; - } + pLineBiDi->trailingWSStart=length; } } else { - /* create an object for a zero-length line */ - pLineBiDi->direction=pLineBiDi->paraLevel&1 ? UBIDI_RTL : UBIDI_LTR; - pLineBiDi->trailingWSStart=pLineBiDi->runCount=0; + const UBiDiLevel *levels=pLineBiDi->levels; + int32_t i, trailingWSStart; + UBiDiLevel level; - pLineBiDi->dirProps=NULL; - pLineBiDi->levels=NULL; + setTrailingWSStart(pLineBiDi); + trailingWSStart=pLineBiDi->trailingWSStart; + + /* recalculate pLineBiDi->direction */ + if(trailingWSStart==0) { + /* all levels are at paraLevel */ + pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); + } else { + /* get the level of the first character */ + level=(UBiDiLevel)(levels[0]&1); + + /* if there is anything of a different level, then the line is mixed */ + if(trailingWSStartparaLevel&1)!=level) { + /* the trailing WS is at paraLevel, which differs from levels[0] */ + pLineBiDi->direction=UBIDI_MIXED; + } else { + /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ + i=1; + for(;;) { + if(i==trailingWSStart) { + /* the direction values match those in level */ + pLineBiDi->direction=(UBiDiDirection)level; + break; + } else if((levels[i]&1)!=level) { + pLineBiDi->direction=UBIDI_MIXED; + break; + } + ++i; + } + } + } + + switch(pLineBiDi->direction) { + case UBIDI_LTR: + /* make sure paraLevel is even */ + pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); + + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pLineBiDi->trailingWSStart=0; + break; + case UBIDI_RTL: + /* make sure paraLevel is odd */ + pLineBiDi->paraLevel|=1; + + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pLineBiDi->trailingWSStart=0; + break; + default: + break; + } } pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ return; @@ -274,13 +266,12 @@ U_CAPI const UBiDiLevel * U_EXPORT2 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { int32_t start, length; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } else if(!IS_VALID_PARA_OR_LINE(pBiDi) || (length=pBiDi->length)<=0) { + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL); + if((length=pBiDi->length)<=0) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } - if((start=pBiDi->trailingWSStart)==length) { /* the current levels array reflects the WS run */ return pBiDi->levels; @@ -293,7 +284,6 @@ ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { * This must be a UBiDi object for a line, and * we need to create a new levels array. */ - if(getLevelsMemory(pBiDi, length)) { UBiDiLevel *levels=pBiDi->levelsMemory; @@ -321,18 +311,16 @@ ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, int32_t length, runCount, visualStart, logicalLimit, logicalFirst, i; Run iRun; - if(!IS_VALID_PARA_OR_LINE(pBiDi) || logicalStart<0 || - (length=pBiDi->length)<=logicalStart) { + errorCode=U_ZERO_ERROR; + RETURN_IF_BAD_RANGE(logicalStart, 0, pBiDi->length, errorCode, ); + /* ubidi_countRuns will check VALID_PARA_OR_LINE */ + runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); + if(U_FAILURE(errorCode)) { return; } /* this is done based on runs rather than on levels since levels have a special interpretation when UBIDI_REORDER_RUNS_ONLY */ - errorCode=U_ZERO_ERROR; - runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); - if(U_FAILURE(errorCode)) { - return; - } visualStart=logicalLimit=0; for(i=0; idirection!=UBIDI_MIXED || logicalStart>=pBiDi->trailingWSStart) { + if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { + *pLevel=GET_ODD_BIT(iRun.logicalStart); + } + else if(pBiDi->direction!=UBIDI_MIXED || logicalStart>=pBiDi->trailingWSStart) { *pLevel=GET_PARALEVEL(pBiDi, logicalStart); } else { *pLevel=pBiDi->levels[logicalStart]; @@ -361,42 +352,41 @@ ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, U_CAPI int32_t U_EXPORT2 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + ubidi_getRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { return -1; - } else if(!IS_VALID_PARA_OR_LINE(pBiDi) || - (pBiDi->runCount<0 && !ubidi_getRuns(pBiDi, pErrorCode))) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return -1; - } else { - return pBiDi->runCount; } + return pBiDi->runCount; } U_CAPI UBiDiDirection U_EXPORT2 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, int32_t *pLogicalStart, int32_t *pLength) { - UErrorCode status = U_ZERO_ERROR; - if( !IS_VALID_PARA_OR_LINE(pBiDi) || runIndex<0 || - (pBiDi->runCount==-1 && !ubidi_getRuns(pBiDi, &status)) || - runIndex>=pBiDi->runCount - ) { + int32_t start; + UErrorCode errorCode = U_ZERO_ERROR; + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR); + ubidi_getRuns(pBiDi, &errorCode); + if(U_FAILURE(errorCode)) { return UBIDI_LTR; - } else { - int32_t start=pBiDi->runs[runIndex].logicalStart; - if(pLogicalStart!=NULL) { - *pLogicalStart=GET_INDEX(start); - } - if(pLength!=NULL) { - if(runIndex>0) { - *pLength=pBiDi->runs[runIndex].visualLimit- - pBiDi->runs[runIndex-1].visualLimit; - } else { - *pLength=pBiDi->runs[0].visualLimit; - } - } - return (UBiDiDirection)GET_ODD_BIT(start); } + RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR); + + start=pBiDi->runs[runIndex].logicalStart; + if(pLogicalStart!=NULL) { + *pLogicalStart=GET_INDEX(start); + } + if(pLength!=NULL) { + if(runIndex>0) { + *pLength=pBiDi->runs[runIndex].visualLimit- + pBiDi->runs[runIndex-1].visualLimit; + } else { + *pLength=pBiDi->runs[0].visualLimit; + } + } + return (UBiDiDirection)GET_ODD_BIT(start); } /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ @@ -529,7 +519,7 @@ reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { /* compute the runs array --------------------------------------------------- */ -static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *status) { +static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { Run *runs=pBiDi->runs; int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart; @@ -543,7 +533,7 @@ static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UError } /* we should never get here */ U_ASSERT(FALSE); - *status = U_INDEX_OUTOFBOUNDS_ERROR; + *pErrorCode = U_INVALID_STATE_ERROR; return 0; } @@ -560,6 +550,14 @@ static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UError */ U_CFUNC UBool ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { + /* + * This method returns immediately if the runs are already set. This + * includes the case of length==0 (handled in setPara).. + */ + if (pBiDi->runCount>=0) { + return TRUE; + } + if(pBiDi->direction!=UBIDI_MIXED) { /* simple, single-run case - this covers length==0 */ /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ @@ -567,7 +565,9 @@ ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { } else /* UBIDI_MIXED, length>0 */ { /* mixed directionality */ int32_t length=pBiDi->length, limit; - + UBiDiLevel *levels=pBiDi->levels; + int32_t i, runCount; + UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ /* * If there are WS characters at the end of the line * and the run preceding them has a level different from @@ -580,114 +580,105 @@ ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { * levels[]!=paraLevel but we have to treat it like it were so. */ limit=pBiDi->trailingWSStart; - if(limit==0) { - /* there is only WS on this line */ - getSingleRun(pBiDi, GET_PARALEVEL(pBiDi, 0)); - } else { - UBiDiLevel *levels=pBiDi->levels; - int32_t i, runCount; - UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ + /* count the runs, there is at least one non-WS run, and limit>0 */ + runCount=0; + for(i=0; i0 */ - runCount=0; - for(i=0; i1 || limit1 */ + if(getRunsMemory(pBiDi, runCount)) { + runs=pBiDi->runsMemory; + } else { + return FALSE; + } + + /* set the runs */ + /* FOOD FOR THOUGHT: this could be optimized, e.g.: + * 464->444, 484->444, 575->555, 595->555 + * However, that would take longer. Check also how it would + * interact with BiDi control removal and inserting Marks. + */ + runIndex=0; + + /* search for the run limits and initialize visualLimit values with the run lengths */ + i=0; + do { + /* prepare this run */ + start=i; + level=levels[i]; + if(levelmaxLevel) { + maxLevel=level; + } + + /* look for the run limit */ + while(++iparaLevel is ok even + if contextual multiple paragraphs. */ + if(pBiDi->paraLevelparaLevel; } } - /* - * We don't need to see if the last run can be merged with a trailing - * WS run because setTrailingWSStart() would have done that. - */ - if(runCount==1 && limit==length) { - /* There is only one non-WS run and no trailing WS-run. */ - getSingleRun(pBiDi, levels[0]); - } else /* runCount>1 || limitruns=runs; + pBiDi->runCount=runCount; - /* now, count a (non-mergeable) WS run */ - if(limit1 */ - if(getRunsMemory(pBiDi, runCount)) { - runs=pBiDi->runsMemory; - } else { - return FALSE; - } + /* now add the direction flags and adjust the visualLimit's to be just that */ + /* this loop will also handle the trailing WS run */ + limit=0; + for(i=0; i444, 484->444, 575->555, 595->555 - * However, that would take longer. Check also how it would - * interact with BiDi control removal and inserting Marks. - */ - runIndex=0; + /* Set the "odd" bit for the trailing WS run. */ + /* For a RTL paragraph, it will be the *first* run in visual order. */ + /* For the trailing WS run, pBiDi->paraLevel is ok even if + contextual multiple paragraphs. */ + if(runIndexparaLevel & 1) != 0)? 0 : runIndex; - /* search for the run limits and initialize visualLimit values with the run lengths */ - i=0; - do { - /* prepare this run */ - start=i; - level=levels[i]; - if(levelmaxLevel) { - maxLevel=level; - } - - /* look for the run limit */ - while(++iparaLevel is ok even - if contextual multiple paragraphs. */ - if(pBiDi->paraLevelparaLevel; - } - } - - /* set the object fields */ - pBiDi->runs=runs; - pBiDi->runCount=runCount; - - reorderLine(pBiDi, minLevel, maxLevel); - - /* now add the direction flags and adjust the visualLimit's to be just that */ - /* this loop will also handle the trailing WS run */ - limit=0; - for(i=0; iparaLevel is ok even if - contextual multiple paragraphs. */ - if(runIndexparaLevel & 1) != 0)? 0 : runIndex; - - ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); - } + ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); } } } @@ -885,51 +876,45 @@ ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) U_CAPI int32_t U_EXPORT2 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { - int32_t visualIndex; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } else if(!IS_VALID_PARA_OR_LINE(pBiDi)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } else if(logicalIndex<0 || pBiDi->length<=logicalIndex) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } else { - /* we can do the trivial cases without the runs array */ - switch(pBiDi->direction) { - case UBIDI_LTR: - visualIndex=logicalIndex; - break; - case UBIDI_RTL: - visualIndex=pBiDi->length-logicalIndex-1; - break; - default: - if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi, pErrorCode)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } else { - Run *runs=pBiDi->runs; - int32_t i, visualStart=0, offset, length; + int32_t visualIndex=UBIDI_MAP_NOWHERE; + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1); - /* linear search for the run, search on the visual runs */ - for(i=0; irunCount; ++i) { - length=runs[i].visualLimit-visualStart; - offset=logicalIndex-GET_INDEX(runs[i].logicalStart); - if(offset>=0 && offsetdirection) { + case UBIDI_LTR: + visualIndex=logicalIndex; + break; + case UBIDI_RTL: + visualIndex=pBiDi->length-logicalIndex-1; + break; + default: + if(!ubidi_getRuns(pBiDi, pErrorCode)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return -1; + } else { + Run *runs=pBiDi->runs; + int32_t i, visualStart=0, offset, length; + + /* linear search for the run, search on the visual runs */ + for(i=0; irunCount; ++i) { + length=runs[i].visualLimit-visualStart; + offset=logicalIndex-GET_INDEX(runs[i].logicalStart); + if(offset>=0 && offset=pBiDi->runCount) { - return UBIDI_MAP_NOWHERE; + break; /* exit for loop */ } + visualStart+=length; + } + if(i>=pBiDi->runCount) { + return UBIDI_MAP_NOWHERE; } } } @@ -984,7 +969,7 @@ ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) } else { /* RTL: check from logical index to run end */ start=logicalIndex+1; - limit=runs[i].logicalStart+length; + limit=GET_INDEX(runs[i].logicalStart)+length; } for(j=start; jtext[j]; @@ -1003,15 +988,9 @@ U_CAPI int32_t U_EXPORT2 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { Run *runs; int32_t i, runCount, start; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } else if(!IS_VALID_PARA_OR_LINE(pBiDi)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } else if(visualIndex<0 || pBiDi->resultLength<=visualIndex) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1); /* we can do the trivial cases without the runs array */ if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) { if(pBiDi->direction==UBIDI_LTR) { @@ -1021,9 +1000,9 @@ ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) return pBiDi->length-visualIndex-1; } } - if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi, pErrorCode)) { + if(!ubidi_getRuns(pBiDi, pErrorCode)) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; + return -1; } runs=pBiDi->runs; @@ -1132,7 +1111,8 @@ ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) U_CAPI void U_EXPORT2 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { - /* ubidi_countRuns() checks all of its and our arguments */ + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, ); + /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ ubidi_countRuns(pBiDi, pErrorCode); if(U_FAILURE(*pErrorCode)) { /* no op */ @@ -1141,10 +1121,13 @@ ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { } else { /* fill a logical-to-visual index map using the runs[] */ int32_t visualStart, visualLimit, i, j, k; - int32_t logicalStart, logicalEnd; + int32_t logicalStart, logicalLimit; Run *runs=pBiDi->runs; + if (pBiDi->length<=0) { + return; + } if (pBiDi->length>pBiDi->resultLength) { - uprv_memset(indexMap, 0xFF, pBiDi->resultLength*sizeof(int32_t)); + uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t)); } visualStart=0; @@ -1176,10 +1159,10 @@ ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { markFound++; } if(markFound>0) { - int32_t limit; + int32_t logicalLimit; logicalStart=GET_INDEX(runs[i].logicalStart); - limit=logicalStart+length; - for(j=logicalStart; jtext[k]; if(IS_BIDI_CONTROL_CHAR(uchar)) { controlFound++; @@ -1230,8 +1213,10 @@ ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { U_CAPI void U_EXPORT2 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { - /* ubidi_countRuns() checks all of its and our arguments */ - if(ubidi_countRuns(pBiDi, pErrorCode)<=0) { + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, ); + /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ + ubidi_countRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { /* no op */ } else if(indexMap==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; @@ -1240,6 +1225,9 @@ ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; + if (pBiDi->resultLength<=0) { + return; + } visualStart=0; for(; runslogicalStart; diff --git a/icu4c/source/common/ubidiwrt.c b/icu4c/source/common/ubidiwrt.c index acbc4bb47ab..34b13711d2c 100644 --- a/icu4c/source/common/ubidiwrt.c +++ b/icu4c/source/common/ubidiwrt.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2000-2006, International Business Machines +* Copyright (C) 2000-2007, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -11,7 +11,7 @@ * indentation:4 * * created on: 1999aug06 -* created by: Markus W. Scherer +* created by: Markus W. Scherer, updated by Matitiahu Allouche * * This file contains implementations for BiDi functions that use * the core algorithm and core API to write reordered text. @@ -348,8 +348,6 @@ ubidi_writeReverse(const UChar *src, int32_t srcLength, return u_terminateUChars(dest, destSize, destLength, pErrorCode); } -#define MASK_R_AL (1UL<runs[run].insertRemove; - if(markFlag<0) { /* insert count */ + if(markFlag<0) { /* BiDi controls count */ markFlag=0; }