mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-7273 add gennorm2 --fast option; write fully decomposed regular mappings rather than delta mappings
X-SVN-Rev: 27650
This commit is contained in:
parent
0763686c6c
commit
f555e87a2d
3 changed files with 27 additions and 8 deletions
|
@ -59,7 +59,8 @@ enum {
|
|||
COPYRIGHT,
|
||||
SOURCEDIR,
|
||||
OUTPUT_FILENAME,
|
||||
UNICODE_VERSION
|
||||
UNICODE_VERSION,
|
||||
OPT_FAST
|
||||
};
|
||||
|
||||
static UOption options[]={
|
||||
|
@ -69,7 +70,8 @@ static UOption options[]={
|
|||
UOPTION_COPYRIGHT,
|
||||
UOPTION_SOURCEDIR,
|
||||
UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
|
||||
UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG)
|
||||
UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
|
||||
UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
|
||||
};
|
||||
|
||||
extern "C" int
|
||||
|
@ -113,6 +115,12 @@ main(int argc, char* argv[]) {
|
|||
fprintf(stderr,
|
||||
"\t-s or --sourcedir source directory, followed by the path\n"
|
||||
"\t-o or --output output filename\n");
|
||||
fprintf(stderr,
|
||||
"\t --fast optimize the .nrm file for fast normalization,\n"
|
||||
"\t which might increase its size (Writes fully decomposed\n"
|
||||
"\t regular mappings instead of delta mappings.\n"
|
||||
"\t You should measure the runtime speed to make sure that\n"
|
||||
"\t this is a good trade-off.)\n");
|
||||
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
|
@ -137,6 +145,10 @@ main(int argc, char* argv[]) {
|
|||
|
||||
builder->setUnicodeVersion(options[UNICODE_VERSION].value);
|
||||
|
||||
if(options[OPT_FAST].doesOccur) {
|
||||
builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
|
||||
}
|
||||
|
||||
// prepare the filename beginning with the source dir
|
||||
U_STD_NSQ string filename(options[SOURCEDIR].value);
|
||||
int32_t pathLength=filename.length();
|
||||
|
|
|
@ -150,7 +150,7 @@ enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value
|
|||
U_CDECL_END
|
||||
|
||||
Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
|
||||
phase(0), overrideHandling(OVERRIDE_PREVIOUS) {
|
||||
phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) {
|
||||
memset(unicodeVersion, 0, sizeof(unicodeVersion));
|
||||
normTrie=utrie2_open(0, 0, &errorCode);
|
||||
normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
|
||||
|
@ -778,9 +778,9 @@ void Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraData
|
|||
(long)c);
|
||||
exit(U_INVALID_FORMAT_ERROR);
|
||||
}
|
||||
if(p->cc==0) {
|
||||
if(p->cc==0 && optimization!=OPTIMIZE_FAST) {
|
||||
// Try a compact, algorithmic encoding.
|
||||
// Only for ccc=0.
|
||||
// Only for ccc=0, because we can't store additional information.
|
||||
if(p->mappingCP>=0) {
|
||||
int32_t delta=p->mappingCP-c;
|
||||
if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) {
|
||||
|
@ -791,9 +791,7 @@ void Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraData
|
|||
if(p->offset==0) {
|
||||
int32_t oldNoNoLength=writer.noNoMappings.length();
|
||||
writeMapping(c, p, writer.noNoMappings);
|
||||
UnicodeString newMapping(FALSE,
|
||||
writer.noNoMappings.getBuffer()+oldNoNoLength,
|
||||
writer.noNoMappings.length()-oldNoNoLength);
|
||||
UnicodeString newMapping=writer.noNoMappings.tempSubString(oldNoNoLength);
|
||||
int32_t previousOffset=writer.previousNoNoMappings.geti(newMapping);
|
||||
if(previousOffset!=0) {
|
||||
// Duplicate, remove the new units and point to the old ones.
|
||||
|
|
|
@ -56,6 +56,13 @@ public:
|
|||
|
||||
void setOverrideHandling(OverrideHandling oh);
|
||||
|
||||
enum Optimization {
|
||||
OPTIMIZE_NORMAL,
|
||||
OPTIMIZE_FAST
|
||||
};
|
||||
|
||||
void setOptimization(Optimization opt) { optimization=opt; }
|
||||
|
||||
void setCC(UChar32 c, uint8_t cc);
|
||||
void setOneWayMapping(UChar32 c, const UnicodeString &m);
|
||||
void setRoundTripMapping(UChar32 c, const UnicodeString &m);
|
||||
|
@ -106,6 +113,8 @@ private:
|
|||
int32_t phase;
|
||||
OverrideHandling overrideHandling;
|
||||
|
||||
Optimization optimization;
|
||||
|
||||
int32_t indexes[Normalizer2Impl::IX_COUNT];
|
||||
UTrie2 *norm16Trie;
|
||||
UnicodeString extraData;
|
||||
|
|
Loading…
Add table
Reference in a new issue