From 6c1e41e0f20efe32cf19f4dba0d93c610627db9d Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 7 Jun 2017 07:13:34 +0000 Subject: [PATCH] ICU-13197 catch one more edge case where a mapping has no compose boundary after it X-SVN-Rev: 40156 --- icu4c/source/data/in/nfkc.nrm | Bin 54160 -> 54160 bytes icu4c/source/data/in/nfkc_cf.nrm | Bin 51864 -> 51864 bytes icu4c/source/data/in/uts46.nrm | Bin 61060 -> 61060 bytes icu4c/source/tools/gennorm2/n2builder.cpp | 28 +++++++++++++--------- icu4c/source/tools/gennorm2/n2builder.h | 2 -- icu4c/source/tools/gennorm2/norms.cpp | 2 +- icu4c/source/tools/gennorm2/norms.h | 4 +++- 7 files changed, 21 insertions(+), 15 deletions(-) diff --git a/icu4c/source/data/in/nfkc.nrm b/icu4c/source/data/in/nfkc.nrm index 8f6802b9c81066a91d1f72f8e342b4eba2f7dfbd..c4b7e7228448588f615cff3e7f19b6f7e973b575 100644 GIT binary patch delta 16 YcmbQRoO!}><_#D2F)D4oxNpA*071$J>i_@% delta 16 YcmbQRoO!}><_#D2F*0qwxNpA*06_`|%K!iX diff --git a/icu4c/source/data/in/nfkc_cf.nrm b/icu4c/source/data/in/nfkc_cf.nrm index 29abf1330f44d7c266b8b82996e75e1b5a061d88..9d91ad32ab40ca5b42b683da21a458e8064b6eb2 100644 GIT binary patch delta 16 YcmbO+m3hWg<_&o}8I?BY@6;3o06J_2Z~y=R delta 16 YcmbO+m3hWg<_&o}8JRZc@6;3o06DA%Pyhe` diff --git a/icu4c/source/data/in/uts46.nrm b/icu4c/source/data/in/uts46.nrm index e3f0dca90c93d45e8a78b84ceccf0858a6b3f7aa..7270a67b41d09430c64d307e1a10207eb484a4f8 100644 GIT binary patch delta 16 XcmZp<%iMC8c|+A*My1WwcR2+CL8u29 delta 16 XcmZp<%iMC8c|+A*MyAcxcR2+CKcompositions==NULL) { - return FALSE; // the last starter does not combine forward + const Norm *starterNorm=norms.getNorm(starter); + if(starterNorm==nullptr || starterNorm->compositions==nullptr) { + return FALSE; // The last starter does not combine forward. } // Compose as far as possible, and see if further compositions are possible. uint8_t prevCC=0; for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndexcombine(buffer.charAt(combMarkIndex)))>=0) { - starterNorm=&norms.getNormRef(starter); - if(starterNorm->compositions==NULL) { - return FALSE; // the composite does not combine further + // The starter combines with this mark into a composite replacement starter. + starterNorm=norms.getNorm(starter); + if(starterNorm==nullptr || starterNorm->compositions==nullptr) { + return FALSE; // The composite does not combine further. } // Keep prevCC because we "removed" the combining mark. } else { prevCC=cc; } } - // TRUE if the final, forward-combining starter is at the end. - return prevCC==0; - // TODO?! prevCC==0 || norms.combinesWithCCBetween(*starterNorm, prevCC, int32_t! 0x100) - // TODO?! actually, should check if it combines with any cc not seen here + if(prevCC==0) { + return TRUE; // forward-combining starter at the very end + } + if(norms.combinesWithCCBetween(*starterNorm, prevCC, 256)) { + // The starter combines with another mark. + return TRUE; + } + return FALSE; } void Normalizer2DataBuilder::postProcess(Norm &norm) { diff --git a/icu4c/source/tools/gennorm2/n2builder.h b/icu4c/source/tools/gennorm2/n2builder.h index 130a0bd6e6e..1fca0b70571 100644 --- a/icu4c/source/tools/gennorm2/n2builder.h +++ b/icu4c/source/tools/gennorm2/n2builder.h @@ -77,8 +77,6 @@ private: * (especially for a "YesNo" which has a round-trip mapping). * This flag is used in Normalizer2Impl::hasCompBoundaryAfter(). * - * Modifies the buffer (partially composes it). - * * A starter character with a mapping does not have a composition boundary after it * if the character itself combines-forward (which is tested by the caller of this function), * or it is deleted (mapped to the empty string), diff --git a/icu4c/source/tools/gennorm2/norms.cpp b/icu4c/source/tools/gennorm2/norms.cpp index 7ea93778d89..8dba9c81558 100644 --- a/icu4c/source/tools/gennorm2/norms.cpp +++ b/icu4c/source/tools/gennorm2/norms.cpp @@ -131,7 +131,7 @@ void Norms::reorder(UnicodeString &mapping, BuilderReorderingBuffer &buffer) con } } -UBool Norms::combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const { +UBool Norms::combinesWithCCBetween(const Norm &norm, uint8_t lowCC, int32_t highCC) const { if((highCC-lowCC)>=2) { int32_t length; const CompositionPair *pairs=norm.getCompositionPairs(length); diff --git a/icu4c/source/tools/gennorm2/norms.h b/icu4c/source/tools/gennorm2/norms.h index a7ee4974b0d..052298ed7ed 100644 --- a/icu4c/source/tools/gennorm2/norms.h +++ b/icu4c/source/tools/gennorm2/norms.h @@ -156,7 +156,9 @@ public: uint8_t getCC(UChar32 c) const { return getNormRef(c).cc; } void reorder(UnicodeString &mapping, BuilderReorderingBuffer &buffer) const; - UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const; + + // int32_t highCC not uint8_t so that we can pass in 256 as the upper limit. + UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, int32_t highCC) const; class Enumerator { public: