diff --git a/icu4c/source/data/in/nfkc.nrm b/icu4c/source/data/in/nfkc.nrm index 8f6802b9c81..c4b7e722844 100644 Binary files a/icu4c/source/data/in/nfkc.nrm and b/icu4c/source/data/in/nfkc.nrm differ diff --git a/icu4c/source/data/in/nfkc_cf.nrm b/icu4c/source/data/in/nfkc_cf.nrm index 29abf1330f4..9d91ad32ab4 100644 Binary files a/icu4c/source/data/in/nfkc_cf.nrm and b/icu4c/source/data/in/nfkc_cf.nrm differ diff --git a/icu4c/source/data/in/uts46.nrm b/icu4c/source/data/in/uts46.nrm index e3f0dca90c9..7270a67b41d 100644 Binary files a/icu4c/source/data/in/uts46.nrm and b/icu4c/source/data/in/uts46.nrm differ diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp index 587f0dc9f46..374e91c4884 100644 --- a/icu4c/source/tools/gennorm2/n2builder.cpp +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -208,7 +208,7 @@ void Normalizer2DataBuilder::removeMapping(UChar32 c) { UBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(const BuilderReorderingBuffer &buffer) { if(buffer.isEmpty()) { - return TRUE; // maps-to-empty-string is no boundary of any kind + return TRUE; // Maps-to-empty-string is no boundary of any kind. } int32_t lastStarterIndex=buffer.lastStarterIndex(); if(lastStarterIndex<0) { @@ -223,31 +223,37 @@ UBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(const BuilderReorderingBuff return lastStarterIndex==buffer.length()-1; } // Note: There can be no Hangul syllable in the fully decomposed mapping. - const Norm *starterNorm=&norms.getNormRef(starter); - if(starterNorm->compositions==NULL) { - return FALSE; // the last starter does not combine forward + const Norm *starterNorm=norms.getNorm(starter); + if(starterNorm==nullptr || starterNorm->compositions==nullptr) { + return FALSE; // The last starter does not combine forward. } // Compose as far as possible, and see if further compositions are possible. uint8_t prevCC=0; for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndexcombine(buffer.charAt(combMarkIndex)))>=0) { - starterNorm=&norms.getNormRef(starter); - if(starterNorm->compositions==NULL) { - return FALSE; // the composite does not combine further + // The starter combines with this mark into a composite replacement starter. + starterNorm=norms.getNorm(starter); + if(starterNorm==nullptr || starterNorm->compositions==nullptr) { + return FALSE; // The composite does not combine further. } // Keep prevCC because we "removed" the combining mark. } else { prevCC=cc; } } - // TRUE if the final, forward-combining starter is at the end. - return prevCC==0; - // TODO?! prevCC==0 || norms.combinesWithCCBetween(*starterNorm, prevCC, int32_t! 0x100) - // TODO?! actually, should check if it combines with any cc not seen here + if(prevCC==0) { + return TRUE; // forward-combining starter at the very end + } + if(norms.combinesWithCCBetween(*starterNorm, prevCC, 256)) { + // The starter combines with another mark. + return TRUE; + } + return FALSE; } void Normalizer2DataBuilder::postProcess(Norm &norm) { diff --git a/icu4c/source/tools/gennorm2/n2builder.h b/icu4c/source/tools/gennorm2/n2builder.h index 130a0bd6e6e..1fca0b70571 100644 --- a/icu4c/source/tools/gennorm2/n2builder.h +++ b/icu4c/source/tools/gennorm2/n2builder.h @@ -77,8 +77,6 @@ private: * (especially for a "YesNo" which has a round-trip mapping). * This flag is used in Normalizer2Impl::hasCompBoundaryAfter(). * - * Modifies the buffer (partially composes it). - * * A starter character with a mapping does not have a composition boundary after it * if the character itself combines-forward (which is tested by the caller of this function), * or it is deleted (mapped to the empty string), diff --git a/icu4c/source/tools/gennorm2/norms.cpp b/icu4c/source/tools/gennorm2/norms.cpp index 7ea93778d89..8dba9c81558 100644 --- a/icu4c/source/tools/gennorm2/norms.cpp +++ b/icu4c/source/tools/gennorm2/norms.cpp @@ -131,7 +131,7 @@ void Norms::reorder(UnicodeString &mapping, BuilderReorderingBuffer &buffer) con } } -UBool Norms::combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const { +UBool Norms::combinesWithCCBetween(const Norm &norm, uint8_t lowCC, int32_t highCC) const { if((highCC-lowCC)>=2) { int32_t length; const CompositionPair *pairs=norm.getCompositionPairs(length); diff --git a/icu4c/source/tools/gennorm2/norms.h b/icu4c/source/tools/gennorm2/norms.h index a7ee4974b0d..052298ed7ed 100644 --- a/icu4c/source/tools/gennorm2/norms.h +++ b/icu4c/source/tools/gennorm2/norms.h @@ -156,7 +156,9 @@ public: uint8_t getCC(UChar32 c) const { return getNormRef(c).cc; } void reorder(UnicodeString &mapping, BuilderReorderingBuffer &buffer) const; - UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const; + + // int32_t highCC not uint8_t so that we can pass in 256 as the upper limit. + UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, int32_t highCC) const; class Enumerator { public: