diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index bc50c0a0f35..e69a3c8f2c9 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -43,6 +43,10 @@ /* -------------------------------------------------------------------------- */ +enum { + _STACK_BUFFER_CAPACITY=100 +}; + /* Korean Hangul and Jamo constants */ enum { JAMO_L_BASE=0x1100, /* "lead" jamo */ @@ -102,6 +106,20 @@ isJamoVTNorm32JamoV(uint32_t norm32) { return norm32<_NORM_JAMO_V_TOP; } +static const UChar * +_findPreviousStarter(const UChar *start, const UChar *src, + uint32_t ccOrQCMask, uint32_t decompQCMask, UChar minNoMaybe); + +static const UChar * +_findNextStarter(const UChar *src, const UChar *limit, + uint32_t qcMask, uint32_t decompQCMask, UChar minNoMaybe); + +static const UChar * +_composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_t &length, + const UChar *prevStarter, const UChar *src, + uint32_t qcMask, uint8_t &prevCC, + UErrorCode *pErrorCode); + /* load unorm.dat ----------------------------------------------------------- */ #define DATA_NAME "unorm" @@ -932,13 +950,18 @@ unorm_checkFCD(const UChar *src, int32_t srcLength) { } } -U_CAPI UNormalizationCheckResult U_EXPORT2 -unorm_quickCheck(const UChar *src, - int32_t srcLength, - UNormalizationMode mode, - UErrorCode *pErrorCode) { - const UChar *limit; - uint32_t norm32, ccOrQCMask, qcMask; +static UNormalizationCheckResult +_quickCheck(const UChar *src, + int32_t srcLength, + UNormalizationMode mode, + UBool allowMaybe, + UErrorCode *pErrorCode) { + UChar stackBuffer[_STACK_BUFFER_CAPACITY]; + UChar *buffer; + int32_t bufferCapacity; + + const UChar *start, *limit; + uint32_t norm32, qcNorm32, ccOrQCMask, qcMask; UChar c, c2, minNoMaybe; uint8_t cc, prevCC; UNormalizationCheckResult result; @@ -983,10 +1006,14 @@ unorm_quickCheck(const UChar *src, } /* initialize */ + buffer=stackBuffer; + bufferCapacity=_STACK_BUFFER_CAPACITY; + ccOrQCMask=_NORM_CC_MASK|qcMask; result=UNORM_YES; prevCC=0; + start=src; if(srcLength>=0) { /* string with length */ limit=src+srcLength; @@ -1004,7 +1031,7 @@ unorm_quickCheck(const UChar *src, c=*src++; if(c=minNoMaybe && ((norm32=_getNorm32(c))&ccOrQCMask)!=0) { break; } @@ -1036,18 +1063,82 @@ unorm_quickCheck(const UChar *src, /* check the combining order */ cc=(uint8_t)(norm32>>_NORM_CC_SHIFT); if(cc!=0 && cc=2) { @@ -2368,6 +2443,7 @@ _compose(UChar *dest, int32_t destCapacity, cc=(uint8_t)(norm32>>_NORM_CC_SHIFT); } else { const UChar *p; + uint32_t decompQCMask; /* * find appropriate boundaries around this character, @@ -2382,13 +2458,29 @@ _compose(UChar *dest, int32_t destCapacity, * for source text that passed the quick check but needed to * take part in the recomposition */ - p=_composePart(stackBuffer, buffer, bufferCapacity, length, - prevStarter, /* in/out, will be set to the following true starter */ - prevSrc, src, limit, - norm32, + decompQCMask=(qcMask<<2)&0xf; /* decomposition quick check mask */ + + /* + * find the last true starter in [prevStarter..src[ + * it is either the decomposition of the current character (at prevSrc), + * or prevStarter + */ + if(_isTrueStarter(norm32, ccOrQCMask, decompQCMask)) { + prevStarter=prevSrc; + } else { + /* adjust destIndex: back out what had been copied with qc "yes" */ + destIndex-=(int32_t)(prevSrc-prevStarter); + } + + /* find the next true starter in [src..limit[ - modifies src to point to the next starter */ + src=_findNextStarter(src, limit, qcMask, decompQCMask, minNoMaybe); + + /* compose [prevStarter..src[ */ + p=_composePart(stackBuffer, buffer, bufferCapacity, + length, /* output */ + prevStarter, src, qcMask, prevCC, /* output */ - destIndex, /* will be adjusted */ pErrorCode); if(p==NULL) { @@ -2408,7 +2500,9 @@ _compose(UChar *dest, int32_t destCapacity, destIndex+=length; } - src=prevStarter; + /* set the next starter */ + prevStarter=src; + continue; } }