ICU-22115 Merge passthrough and canonical combining class data into the NFD trie for ICU4X

This commit is contained in:
Henri Sivonen 2022-08-09 14:36:16 +03:00 committed by Elango
parent ed2b3a335b
commit 01c194a366

View file

@ -380,7 +380,7 @@ void writeDecompositionTables(const char* basename, const uint16_t* ptr16, size_
fclose(f);
}
void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t baseSize32, uint32_t supplementSize16, USet* uset, USet* reference, const std::vector<PendingDescriptor>& pendingTrieInsertions) {
void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t baseSize32, uint32_t supplementSize16, USet* uset, USet* reference, const std::vector<PendingDescriptor>& pendingTrieInsertions, char16_t passthroughCap) {
IcuToolErrorCode status("icuexportdata: writeDecompositionData");
FILE* f = prepareOutputFile(basename);
@ -392,7 +392,7 @@ void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t
for (int32_t i = pendingTrieInsertions.size() - 1; i >= 0; --i) {
const PendingDescriptor& pending = pendingTrieInsertions[i];
uint32_t additional = 0;
if (!(pending.descriptor & 0xFFFF0000)) {
if (!(pending.descriptor & 0xFFFE0000)) {
uint32_t offset = pending.descriptor & 0xFFF;
if (!pending.supplementary) {
if (offset >= baseSize16) {
@ -419,7 +419,15 @@ void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t
handleError(status, basename);
}
}
umutablecptrie_set(builder.getAlias(), pending.scalar, pending.descriptor + additional, status);
// It turns out it's better to swap the halves compared to the initial
// idea in order to put special marker values close to zero so that
// an important marker value becomes 1, so it's efficient to compare
// "1 or 0". Unfortunately, going through all the code to swap
// things is too error prone, so let's do the swapping here in one
// place.
uint32_t oldTrieValue = pending.descriptor + additional;
uint32_t swappedTrieValue = (oldTrieValue >> 16) | (oldTrieValue << 16);
umutablecptrie_set(builder.getAlias(), pending.scalar, swappedTrieValue, status);
}
LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
builder.getAlias(),
@ -460,9 +468,7 @@ void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t
USet* iotaCheck = uset_cloneAsThawed(reference);
uset_removeAll(iotaCheck, uset);
if (uset_equals(iotaCheck, iotaSubscript)) {
flags |= (1 << 1);
} else if (!uset_isEmpty(iotaCheck)) {
if (!(uset_equals(iotaCheck, iotaSubscript)) && !uset_isEmpty(iotaCheck)) {
// The result was neither empty nor contained exactly
// the iota subscript. The ICU4X normalizer doesn't
// know how to deal with this case.
@ -475,6 +481,7 @@ void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t
uset_close(halfWidthVoicing);
fprintf(f, "flags = 0x%X\n", flags);
fprintf(f, "cap = 0x%X\n", passthroughCap);
}
fprintf(f, "[trie]\n");
usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
@ -482,110 +489,64 @@ void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t
handleError(status, basename);
}
void writeNopCompositionPassThrough(const char* basename) {
IcuToolErrorCode status("icuexportdata: writeNopCompositionPassThrough");
FILE* f = prepareOutputFile(basename);
// Special marker for the NFKD form of U+FDFA
const int32_t FDFA_MARKER = 3;
fprintf(f, "first = 0x0\n");
// Special marker for characters whose decomposition starts with a non-starter
// and the decomposition isn't the character itself.
const int32_t SPECIAL_NON_STARTER_DECOMPOSITION_MARKER = 2;
LocalUMutableCPTriePointer builder(umutablecptrie_open(0xFF, 0xFF, status));
// Special marker for starters that decompose to themselves but that may
// combine backwards under canonical composition
const int32_t BACKWARD_COMBINING_STARTER_MARKER = 1;
LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
builder.getAlias(),
trieType,
UCPTRIE_VALUE_BITS_8,
status));
handleError(status, basename);
/// Marker that a complex decomposition isn't round-trippable
/// under re-composition.
const uint32_t NON_ROUND_TRIP_MARKER = 1;
fprintf(f, "[trie]\n");
usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
fclose(f);
handleError(status, basename);
}
void writePotentialCompositionPassThrough(const char* basename, const Normalizer2* norm, const USet* decompositionStartsWithNonStarter, const USet* decompositionStartsWithBackwardCombiningStarter, USet* potentialPassthroughAndNotBackwardCombining) {
IcuToolErrorCode status("icuexportdata: writePotentialCompositionPassThrough");
FILE* f = prepareOutputFile(basename);
const Normalizer2* nfc = nullptr;
if (!norm) {
// UTS 46 case
norm = Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, status);
nfc = Normalizer2::getNFCInstance(status);
UBool permissibleBmpPair(UBool knownToRoundTrip, UChar32 c, UChar32 second) {
if (knownToRoundTrip) {
return TRUE;
}
for (UChar32 c = 0; c <= 0x10FFFF; ++c) {
if (c >= 0xD800 && c < 0xE000) {
// Surrogate
continue;
}
if (uset_contains(decompositionStartsWithNonStarter, c) || uset_contains(decompositionStartsWithBackwardCombiningStarter, c)) {
continue;
}
UnicodeString src;
UnicodeString dst;
src.append(c);
norm->normalize(src, dst, status);
if (nfc && (dst.isEmpty() || (dst == u"\uFFFD" && c != 0xFFFD))) {
// UTS 46 ignored and disallowed fall back to NFC for data
// overlap.
dst.truncate(0);
nfc->normalize(src, dst, status);
}
if (src == dst) {
uset_add(potentialPassthroughAndNotBackwardCombining, c);
}
// Nuktas, Hebrew presentation forms and polytonic Greek with oxia
// are special-cased in ICU4X.
if (c >= 0xFB1D && c <= 0xFB4E) {
// Hebrew presentation forms
return TRUE;
}
// There are fancier ways to do this, but let's keep things
// very simple: Deliberately not working this into the above
// loop and not extracting this from the inversion list
// directly.
for (UChar32 c = 0; c <= 0x10FFFF; ++c) {
if (!uset_contains(potentialPassthroughAndNotBackwardCombining, c)) {
fprintf(f, "first = 0x%X\n", c);
break;
}
if (c >= 0x1F71 && c <= 0x1FFB) {
// Polytonic Greek with oxia
return TRUE;
}
// 8 bits per trie value. Default is 0, which means pass-through.
// That is, the lookup key isn't actually a UChar32 but a UChar32
// divided by 8, but that's still in range, so things work despite
// the data structure not being meant to be used like this.
LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
for (int32_t i = 0; i < ((0x10FFFF + 1)/8); ++i) {
uint32_t trieVal = 0;
for (int32_t j = 0; j < 8; ++j) {
UChar32 c = i*8 + j;
if (!uset_contains(potentialPassthroughAndNotBackwardCombining, c)) {
trieVal |= (1 << j);
}
}
if (trieVal) {
umutablecptrie_set(builder.getAlias(), UChar32(i), trieVal, status);
}
if ((second & 0x7F) == 0x3C && second >= 0x0900 && second <= 0x0BFF) {
// Nukta
return TRUE;
}
LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
builder.getAlias(),
trieType,
UCPTRIE_VALUE_BITS_8,
status));
handleError(status, basename);
fprintf(f, "[trie]\n");
usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
fclose(f);
handleError(status, basename);
// To avoid more branchiness, 4 characters that decompose to
// a BMP starter followed by a BMP non-starter are excluded
// from being encoded directly into the trie value and are
// handled as complex decompositions instead. These are:
// U+0F76 TIBETAN VOWEL SIGN VOCALIC R
// U+0F78 TIBETAN VOWEL SIGN VOCALIC L
// U+212B ANGSTROM SIGN
// U+2ADC FORKING
return FALSE;
}
// Computes data for canonical decompositions
void computeDecompositions(const char* basename, const USet* backwardCombiningStarters, std::vector<uint16_t>& storage16, std::vector<uint32_t>& storage32, USet* decompositionStartsWithNonStarter, USet* decompositionStartsWithBackwardCombiningStarter, std::vector<PendingDescriptor>& pendingTrieInsertions) {
void computeDecompositions(const char* basename,
const USet* backwardCombiningStarters,
std::vector<uint16_t>& storage16,
std::vector<uint32_t>& storage32,
USet* decompositionStartsWithNonStarter,
USet* decompositionStartsWithBackwardCombiningStarter,
std::vector<PendingDescriptor>& pendingTrieInsertions,
UChar32& decompositionPassthroughBound,
UChar32& compositionPassthroughBound) {
IcuToolErrorCode status("icuexportdata: computeDecompositions");
const Normalizer2* mainNormalizer;
const Normalizer2* nfdNormalizer = Normalizer2::getNFDInstance(status);
const Normalizer2* nfcNormalizer = Normalizer2::getNFCInstance(status);
FILE* f = NULL;
std::vector<uint32_t> nonRecursive32;
LocalUMutableCPTriePointer nonRecursiveBuilder(umutablecptrie_open(0, 0, status));
@ -637,6 +598,10 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
}
UnicodeString src;
UnicodeString dst;
// True if we're building non-NFD or we're building NFD but
// the `c` round trips to NFC.
// False if we're building NFD and `c` does not round trip to NFC.
UBool nonNfdOrRoundTrips = TRUE;
src.append(c);
if (mainNormalizer != nfdNormalizer) {
UnicodeString inter;
@ -644,6 +609,9 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
nfdNormalizer->normalize(inter, dst, status);
} else {
nfdNormalizer->normalize(src, dst, status);
UnicodeString nfc;
nfcNormalizer->normalize(dst, nfc, status);
nonNfdOrRoundTrips = (src == nfc);
}
int32_t len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
@ -670,19 +638,36 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
bool startsWithNonStarter = u_getCombiningClass(utf32[0]);
if (startsWithNonStarter) {
uint8_t firstCombiningClass = u_getCombiningClass(utf32[0]);
bool specialNonStarterDecomposition = false;
bool startsWithBackwardCombiningStarter = false;
if (firstCombiningClass) {
decompositionPassthroughBound = c;
compositionPassthroughBound = c;
uset_add(decompositionStartsWithNonStarter, c);
if (src != dst && !(c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344 || c == 0x0F73 || c == 0x0F75 || c == 0x0F81 || c == 0xFF9E || c == 0xFF9F)) {
// A character whose decomposition starts with a non-starter and isn't the same as the character itself and isn't already hard-coded into ICU4X.
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
if (src != dst) {
if (c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344 || c == 0x0F73 || c == 0x0F75 || c == 0x0F81 || c == 0xFF9E || c == 0xFF9F) {
specialNonStarterDecomposition = true;
} else {
// A character whose decomposition starts with a non-starter and isn't the same as the character itself and isn't already hard-coded into ICU4X.
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
}
} else if (uset_contains(backwardCombiningStarters, c)) {
} else if (uset_contains(backwardCombiningStarters, utf32[0])) {
compositionPassthroughBound = c;
startsWithBackwardCombiningStarter = true;
uset_add(decompositionStartsWithBackwardCombiningStarter, c);
}
if (c != 2 && len == 1 && utf32[0] == 2) {
// 2 is reserved as a marker for decomposition starts with non-starter.
if (c != BACKWARD_COMBINING_STARTER_MARKER && len == 1 && utf32[0] == BACKWARD_COMBINING_STARTER_MARKER) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
if (c != SPECIAL_NON_STARTER_DECOMPOSITION_MARKER && len == 1 && utf32[0] == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
if (c != FDFA_MARKER && len == 1 && utf32[0] == FDFA_MARKER) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
@ -692,14 +677,24 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
if (dst == nfd) {
continue;
}
} else if (startsWithNonStarter) {
// Insert a special marker
decompositionPassthroughBound = c;
compositionPassthroughBound = c;
} else if (firstCombiningClass) {
len = 1;
utf32[0] = 2; // magic value (1 is reserved for U+FDFA)
if (specialNonStarterDecomposition) {
utf32[0] = SPECIAL_NON_STARTER_DECOMPOSITION_MARKER; // magic value
} else {
// Use the surrogate range to store the canonical combining class
utf32[0] = 0xD800 | UChar32(firstCombiningClass);
}
} else {
if (src == dst) {
if (startsWithBackwardCombiningStarter) {
pendingTrieInsertions.push_back({c, BACKWARD_COMBINING_STARTER_MARKER << 16, FALSE});
}
continue;
}
decompositionPassthroughBound = c;
// ICU4X hard-codes ANGSTROM SIGN
if (c != 0x212B) {
UnicodeString raw;
@ -725,14 +720,14 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
uint32_t shifted = uint32_t(rawUtf32[0]) << 16;
umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, shifted, status);
umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, uint32_t(rawUtf32[0]), status);
} else if (rawUtf32[0] <= 0xFFFF && rawUtf32[1] <= 0xFFFF) {
if (!rawUtf32[0] || !rawUtf32[1]) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
uint32_t bmpPair = uint32_t(rawUtf32[0]) << 16 | uint32_t(rawUtf32[1]);
// Swapped for consistency with the primary trie
uint32_t bmpPair = uint32_t(rawUtf32[1]) << 16 | uint32_t(rawUtf32[0]);
umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, bmpPair, status);
} else {
// Let's add 1 to index to make it always non-zero to distinguish
@ -744,33 +739,53 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, index, status);
umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, index << 16, status);
}
}
}
}
if (!nonNfdOrRoundTrips) {
compositionPassthroughBound = c;
}
if (len == 1 && utf32[0] <= 0xFFFF) {
if (utf32[0] == 1) {
// 1 is reserved as a marker for the expansion of U+FDFA.
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
// U+0345 is hard-coded in ICU4X
if (!(c == 0x0345 && utf32[0] == 0x03B9)) {
pendingTrieInsertions.push_back({c, uint32_t(utf32[0]) << 16, FALSE});
}
} else if (len == 2 && utf32[0] <= 0xFFFF && utf32[1] <= 0xFFFF && !u_getCombiningClass(utf32[0]) && u_getCombiningClass(utf32[1])) {
for (int32_t i = 0; i < len; ++i) {
if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
// Assert that iota subscript and half-width voicing marks never occur in these
// expansions in the normalization forms where they are special.
printf("HER c: %X\n", c);
if (startsWithBackwardCombiningStarter) {
if (mainNormalizer == nfdNormalizer) {
// Not supposed to happen in NFD
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
} else if (!((utf32[0] >= 0x1161 && utf32[0] <= 0x1175) || (utf32[0] >= 0x11A8 && utf32[0] <= 0x11C2))) {
// Other than conjoining jamo vowels and trails
// unsupported for non-NFD.
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
}
pendingTrieInsertions.push_back({c, uint32_t(utf32[0]) << 16, FALSE});
} else if (len == 2 &&
utf32[0] <= 0xFFFF &&
utf32[1] <= 0xFFFF &&
!u_getCombiningClass(utf32[0]) &&
u_getCombiningClass(utf32[1]) &&
permissibleBmpPair(nonNfdOrRoundTrips, c, utf32[1])) {
for (int32_t i = 0; i < len; ++i) {
if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
// Assert that iota subscript and half-width voicing marks never occur in these
// expansions in the normalization forms where they are special.
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
}
if (startsWithBackwardCombiningStarter) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
pendingTrieInsertions.push_back({c, (uint32_t(utf32[0]) << 16) | uint32_t(utf32[1]), FALSE});
} else {
if (startsWithBackwardCombiningStarter) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
UBool supplementary = FALSE;
UBool nonInitialStarter = FALSE;
for (int32_t i = 0; i < len; ++i) {
@ -797,7 +812,7 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
if (len == 18 && c == 0xFDFA) {
// Special marker for the one character whose decomposition
// is too long.
pendingTrieInsertions.push_back({c, 1 << 16, supplementary});
pendingTrieInsertions.push_back({c, FDFA_MARKER << 16, supplementary});
continue;
} else {
status.set(U_INTERNAL_PROGRAM_ERROR);
@ -900,7 +915,12 @@ void computeDecompositions(const char* basename, const USet* backwardCombiningSt
}
}
}
pendingTrieInsertions.push_back({c, descriptor, supplementary});
uint32_t nonRoundTripMarker = 0;
if (!nonNfdOrRoundTrips) {
nonRoundTripMarker = (NON_ROUND_TRIP_MARKER << 16);
}
pendingTrieInsertions.push_back({c, descriptor | nonRoundTripMarker, supplementary});
}
}
if (storage16.size() + storage32.size() > 0xFFF) {
@ -1187,7 +1207,22 @@ int exportNorm() {
USet* nfdDecompositionStartsWithNonStarter = uset_openEmpty();
USet* nfdDecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
std::vector<PendingDescriptor> nfdPendingTrieInsertions;
computeDecompositions("nfd", backwardCombiningStarters, storage16, storage32, nfdDecompositionStartsWithNonStarter, nfdDecompositionStartsWithBackwardCombiningStarter, nfdPendingTrieInsertions);
UChar32 nfdBound = 0x10FFFF;
UChar32 nfcBound = 0x10FFFF;
computeDecompositions("nfd",
backwardCombiningStarters,
storage16,
storage32,
nfdDecompositionStartsWithNonStarter,
nfdDecompositionStartsWithBackwardCombiningStarter,
nfdPendingTrieInsertions,
nfdBound,
nfcBound);
if (!(nfdBound == 0xC0 && nfcBound == 0x300)) {
// Unexpected bounds for NFD/NFC.
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
uint32_t baseSize16 = storage16.size();
uint32_t baseSize32 = storage32.size();
@ -1195,47 +1230,73 @@ int exportNorm() {
USet* nfkdDecompositionStartsWithNonStarter = uset_openEmpty();
USet* nfkdDecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
std::vector<PendingDescriptor> nfkdPendingTrieInsertions;
computeDecompositions("nfkd", backwardCombiningStarters, storage16, storage32, nfkdDecompositionStartsWithNonStarter, nfkdDecompositionStartsWithBackwardCombiningStarter, nfkdPendingTrieInsertions);
UChar32 nfkdBound = 0x10FFFF;
UChar32 nfkcBound = 0x10FFFF;
computeDecompositions("nfkd",
backwardCombiningStarters,
storage16,
storage32,
nfkdDecompositionStartsWithNonStarter,
nfkdDecompositionStartsWithBackwardCombiningStarter,
nfkdPendingTrieInsertions,
nfkdBound,
nfkcBound);
if (!(nfkdBound <= 0xC0 && nfkcBound <= 0x300)) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
if (nfkcBound > 0xC0) {
if (nfkdBound != 0xC0) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
} else {
if (nfkdBound != nfkcBound) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
}
USet* uts46DecompositionStartsWithNonStarter = uset_openEmpty();
USet* uts46DecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
std::vector<PendingDescriptor> uts46PendingTrieInsertions;
computeDecompositions("uts46d", backwardCombiningStarters, storage16, storage32, uts46DecompositionStartsWithNonStarter, uts46DecompositionStartsWithBackwardCombiningStarter, uts46PendingTrieInsertions);
UChar32 uts46dBound = 0x10FFFF;
UChar32 uts46Bound = 0x10FFFF;
computeDecompositions("uts46d",
backwardCombiningStarters,
storage16,
storage32,
uts46DecompositionStartsWithNonStarter,
uts46DecompositionStartsWithBackwardCombiningStarter,
uts46PendingTrieInsertions,
uts46dBound,
uts46Bound);
if (!(uts46dBound <= 0xC0 && uts46Bound <= 0x300)) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
if (uts46Bound > 0xC0) {
if (uts46dBound != 0xC0) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
} else {
if (uts46dBound != uts46Bound) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
}
uint32_t supplementSize16 = storage16.size() - baseSize16;
uint32_t supplementSize32 = storage32.size() - baseSize32;
writeDecompositionData("nfd", baseSize16, baseSize32, supplementSize16, nfdDecompositionStartsWithNonStarter, nullptr, nfdPendingTrieInsertions);
writeDecompositionData("nfkd", baseSize16, baseSize32, supplementSize16, nfkdDecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, nfkdPendingTrieInsertions);
writeDecompositionData("uts46d", baseSize16, baseSize32, supplementSize16, uts46DecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, uts46PendingTrieInsertions);
writeDecompositionData("nfd", baseSize16, baseSize32, supplementSize16, nfdDecompositionStartsWithNonStarter, nullptr, nfdPendingTrieInsertions, char16_t(nfcBound));
writeDecompositionData("nfkd", baseSize16, baseSize32, supplementSize16, nfkdDecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, nfkdPendingTrieInsertions, char16_t(nfkcBound));
writeDecompositionData("uts46d", baseSize16, baseSize32, supplementSize16, uts46DecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, uts46PendingTrieInsertions, char16_t(uts46Bound));
writeDecompositionTables("nfdex", storage16.data(), baseSize16, storage32.data(), baseSize32);
writeDecompositionTables("nfkdex", storage16.data() + baseSize16, supplementSize16, storage32.data() + baseSize32, supplementSize32);
USet* nfcPotentialPassthroughAndNotBackwardCombining = uset_openEmpty();
const Normalizer2* nfc = Normalizer2::getNFCInstance(status);
writePotentialCompositionPassThrough("nfc", nfc, nfdDecompositionStartsWithNonStarter, nfdDecompositionStartsWithBackwardCombiningStarter, nfcPotentialPassthroughAndNotBackwardCombining);
USet* nfkcPotentialPassthroughAndNotBackwardCombining = uset_openEmpty();
const Normalizer2* nfkc = Normalizer2::getNFKCInstance(status);
writePotentialCompositionPassThrough("nfkc", nfkc, nfkdDecompositionStartsWithNonStarter, nfkdDecompositionStartsWithBackwardCombiningStarter, nfkcPotentialPassthroughAndNotBackwardCombining);
USet* uts46PotentialPassthroughAndNotBackwardCombining = uset_openEmpty();
writePotentialCompositionPassThrough("uts46", nullptr, uts46DecompositionStartsWithNonStarter, uts46DecompositionStartsWithBackwardCombiningStarter, uts46PotentialPassthroughAndNotBackwardCombining);
writeNopCompositionPassThrough("passthroughnop");
// Check that NFKC set has no characters that NFC doesn't also have.
uset_removeAll(nfkcPotentialPassthroughAndNotBackwardCombining, nfcPotentialPassthroughAndNotBackwardCombining);
if (!uset_isEmpty(nfkcPotentialPassthroughAndNotBackwardCombining)) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, "exportNorm");
}
uset_close(nfcPotentialPassthroughAndNotBackwardCombining);
uset_close(nfkcPotentialPassthroughAndNotBackwardCombining);
uset_close(uts46PotentialPassthroughAndNotBackwardCombining);
uset_close(nfdDecompositionStartsWithNonStarter);
uset_close(nfkdDecompositionStartsWithNonStarter);
uset_close(uts46DecompositionStartsWithNonStarter);