ICU-7273 add gennorm2 --fast option; write fully decomposed regular mappings rather than delta mappings

X-SVN-Rev: 27650
This commit is contained in:
Markus Scherer 2010-02-24 17:54:10 +00:00
parent 0763686c6c
commit f555e87a2d
3 changed files with 27 additions and 8 deletions

View file

@ -59,7 +59,8 @@ enum {
COPYRIGHT,
SOURCEDIR,
OUTPUT_FILENAME,
UNICODE_VERSION
UNICODE_VERSION,
OPT_FAST
};
static UOption options[]={
@ -69,7 +70,8 @@ static UOption options[]={
UOPTION_COPYRIGHT,
UOPTION_SOURCEDIR,
UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG)
UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
};
extern "C" int
@ -113,6 +115,12 @@ main(int argc, char* argv[]) {
fprintf(stderr,
"\t-s or --sourcedir source directory, followed by the path\n"
"\t-o or --output output filename\n");
fprintf(stderr,
"\t --fast optimize the .nrm file for fast normalization,\n"
"\t which might increase its size (Writes fully decomposed\n"
"\t regular mappings instead of delta mappings.\n"
"\t You should measure the runtime speed to make sure that\n"
"\t this is a good trade-off.)\n");
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
@ -137,6 +145,10 @@ main(int argc, char* argv[]) {
builder->setUnicodeVersion(options[UNICODE_VERSION].value);
if(options[OPT_FAST].doesOccur) {
builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
}
// prepare the filename beginning with the source dir
U_STD_NSQ string filename(options[SOURCEDIR].value);
int32_t pathLength=filename.length();

View file

@ -150,7 +150,7 @@ enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value
U_CDECL_END
Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
phase(0), overrideHandling(OVERRIDE_PREVIOUS) {
phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) {
memset(unicodeVersion, 0, sizeof(unicodeVersion));
normTrie=utrie2_open(0, 0, &errorCode);
normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
@ -778,9 +778,9 @@ void Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraData
(long)c);
exit(U_INVALID_FORMAT_ERROR);
}
if(p->cc==0) {
if(p->cc==0 && optimization!=OPTIMIZE_FAST) {
// Try a compact, algorithmic encoding.
// Only for ccc=0.
// Only for ccc=0, because we can't store additional information.
if(p->mappingCP>=0) {
int32_t delta=p->mappingCP-c;
if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) {
@ -791,9 +791,7 @@ void Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraData
if(p->offset==0) {
int32_t oldNoNoLength=writer.noNoMappings.length();
writeMapping(c, p, writer.noNoMappings);
UnicodeString newMapping(FALSE,
writer.noNoMappings.getBuffer()+oldNoNoLength,
writer.noNoMappings.length()-oldNoNoLength);
UnicodeString newMapping=writer.noNoMappings.tempSubString(oldNoNoLength);
int32_t previousOffset=writer.previousNoNoMappings.geti(newMapping);
if(previousOffset!=0) {
// Duplicate, remove the new units and point to the old ones.

View file

@ -56,6 +56,13 @@ public:
void setOverrideHandling(OverrideHandling oh);
enum Optimization {
OPTIMIZE_NORMAL,
OPTIMIZE_FAST
};
void setOptimization(Optimization opt) { optimization=opt; }
void setCC(UChar32 c, uint8_t cc);
void setOneWayMapping(UChar32 c, const UnicodeString &m);
void setRoundTripMapping(UChar32 c, const UnicodeString &m);
@ -106,6 +113,8 @@ private:
int32_t phase;
OverrideHandling overrideHandling;
Optimization optimization;
int32_t indexes[Normalizer2Impl::IX_COUNT];
UTrie2 *norm16Trie;
UnicodeString extraData;