mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-13197 catch one more edge case where a mapping has no compose boundary after it
X-SVN-Rev: 40156
This commit is contained in:
parent
19d53e7641
commit
6c1e41e0f2
7 changed files with 21 additions and 15 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -208,7 +208,7 @@ void Normalizer2DataBuilder::removeMapping(UChar32 c) {
|
|||
|
||||
UBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(const BuilderReorderingBuffer &buffer) {
|
||||
if(buffer.isEmpty()) {
|
||||
return TRUE; // maps-to-empty-string is no boundary of any kind
|
||||
return TRUE; // Maps-to-empty-string is no boundary of any kind.
|
||||
}
|
||||
int32_t lastStarterIndex=buffer.lastStarterIndex();
|
||||
if(lastStarterIndex<0) {
|
||||
|
@ -223,31 +223,37 @@ UBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(const BuilderReorderingBuff
|
|||
return lastStarterIndex==buffer.length()-1;
|
||||
}
|
||||
// Note: There can be no Hangul syllable in the fully decomposed mapping.
|
||||
const Norm *starterNorm=&norms.getNormRef(starter);
|
||||
if(starterNorm->compositions==NULL) {
|
||||
return FALSE; // the last starter does not combine forward
|
||||
const Norm *starterNorm=norms.getNorm(starter);
|
||||
if(starterNorm==nullptr || starterNorm->compositions==nullptr) {
|
||||
return FALSE; // The last starter does not combine forward.
|
||||
}
|
||||
// Compose as far as possible, and see if further compositions are possible.
|
||||
uint8_t prevCC=0;
|
||||
for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndex<buffer.length(); ++combMarkIndex) {
|
||||
uint8_t cc=buffer.ccAt(combMarkIndex); // !=0 because after last starter
|
||||
if(norms.combinesWithCCBetween(*starterNorm, prevCC, cc)) {
|
||||
// The starter combines with a mark that reorders before the current one.
|
||||
return TRUE;
|
||||
}
|
||||
if(prevCC<cc && (starter=starterNorm->combine(buffer.charAt(combMarkIndex)))>=0) {
|
||||
starterNorm=&norms.getNormRef(starter);
|
||||
if(starterNorm->compositions==NULL) {
|
||||
return FALSE; // the composite does not combine further
|
||||
// The starter combines with this mark into a composite replacement starter.
|
||||
starterNorm=norms.getNorm(starter);
|
||||
if(starterNorm==nullptr || starterNorm->compositions==nullptr) {
|
||||
return FALSE; // The composite does not combine further.
|
||||
}
|
||||
// Keep prevCC because we "removed" the combining mark.
|
||||
} else {
|
||||
prevCC=cc;
|
||||
}
|
||||
}
|
||||
// TRUE if the final, forward-combining starter is at the end.
|
||||
return prevCC==0;
|
||||
// TODO?! prevCC==0 || norms.combinesWithCCBetween(*starterNorm, prevCC, int32_t! 0x100)
|
||||
// TODO?! actually, should check if it combines with any cc not seen here
|
||||
if(prevCC==0) {
|
||||
return TRUE; // forward-combining starter at the very end
|
||||
}
|
||||
if(norms.combinesWithCCBetween(*starterNorm, prevCC, 256)) {
|
||||
// The starter combines with another mark.
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void Normalizer2DataBuilder::postProcess(Norm &norm) {
|
||||
|
|
|
@ -77,8 +77,6 @@ private:
|
|||
* (especially for a "YesNo" which has a round-trip mapping).
|
||||
* This flag is used in Normalizer2Impl::hasCompBoundaryAfter().
|
||||
*
|
||||
* Modifies the buffer (partially composes it).
|
||||
*
|
||||
* A starter character with a mapping does not have a composition boundary after it
|
||||
* if the character itself combines-forward (which is tested by the caller of this function),
|
||||
* or it is deleted (mapped to the empty string),
|
||||
|
|
|
@ -131,7 +131,7 @@ void Norms::reorder(UnicodeString &mapping, BuilderReorderingBuffer &buffer) con
|
|||
}
|
||||
}
|
||||
|
||||
UBool Norms::combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const {
|
||||
UBool Norms::combinesWithCCBetween(const Norm &norm, uint8_t lowCC, int32_t highCC) const {
|
||||
if((highCC-lowCC)>=2) {
|
||||
int32_t length;
|
||||
const CompositionPair *pairs=norm.getCompositionPairs(length);
|
||||
|
|
|
@ -156,7 +156,9 @@ public:
|
|||
uint8_t getCC(UChar32 c) const { return getNormRef(c).cc; }
|
||||
|
||||
void reorder(UnicodeString &mapping, BuilderReorderingBuffer &buffer) const;
|
||||
UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const;
|
||||
|
||||
// int32_t highCC not uint8_t so that we can pass in 256 as the upper limit.
|
||||
UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, int32_t highCC) const;
|
||||
|
||||
class Enumerator {
|
||||
public:
|
||||
|
|
Loading…
Add table
Reference in a new issue