From 90d982f891eb43c408b99692b32bd6a51a58c64a Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 15 Dec 1999 19:04:11 +0000 Subject: [PATCH] ICU-130 cleaner code & write data file X-SVN-Rev: 418 --- icu4c/source/tools/genprops/genprops.c | 1 + icu4c/source/tools/genprops/store.c | 312 +++++++++++++------------ 2 files changed, 169 insertions(+), 144 deletions(-) diff --git a/icu4c/source/tools/genprops/genprops.c b/icu4c/source/tools/genprops/genprops.c index 8a386647abc..f4c175324db 100644 --- a/icu4c/source/tools/genprops/genprops.c +++ b/icu4c/source/tools/genprops/genprops.c @@ -118,6 +118,7 @@ main(int argc, char *argv[]) { } init(); + initStore(); parseDB(in); repeatProps(); compactProps(); diff --git a/icu4c/source/tools/genprops/store.c b/icu4c/source/tools/genprops/store.c index 5fb6f08ece6..9abaa3679f2 100644 --- a/icu4c/source/tools/genprops/store.c +++ b/icu4c/source/tools/genprops/store.c @@ -58,21 +58,25 @@ another pointer variable for this: Formally, the file contains the following structures: - A const uint16_t exceptionsIndex; -- 32-bit index - B const uint16_t ucharsIndex; -- 32-bit index - C const uint16_t reservedIndex; - D const uint16_t reservedIndex; + A0 const uint16_t exceptionsIndex; -- 32-bit index + A1 const uint16_t ucharsIndex; -- 32-bit index + A2 const uint16_t reservedIndex; + A3 const uint16_t reservedIndex; + A4 const uint16_t reservedIndex; + A5 const uint16_t reservedIndex; + A6 const uint16_t reservedIndex; + A7 const uint16_t reservedIndex; - E const uint16_t stage1[0x440]; -- 0x440=0x110000>>10 - F const uint16_t stage2[variable]; - G const uint16_t stage3[variable]; - (possible 1*uint16_t for padding to 4-alignment) + S1 const uint16_t stage1[0x440]; -- 0x440=0x110000>>10 + S2 const uint16_t stage2[variable]; + S3 const uint16_t stage3[variable]; + (possible 1*uint16_t for padding to 4-alignment) - H const uint32_t props32[variable]; - I const uint16_t exceptions[variable]; - (possible 1*uint16_t for padding to 4-alignment) + P const uint32_t props32[variable]; + E const uint16_t exceptions[variable]; + (possible 1*uint16_t for padding to 4-alignment) - J const UChar uchars[variable]; + U const UChar uchars[variable]; 3-stage lookup and properties: @@ -114,7 +118,7 @@ array of Unicode strings, especially for non-1:1 case mappings. The first stage consumes the 11 most significant bits of the 21-bit code point and results in an index into the second stage: - uint16_t i2=p16[4+c>>10]; + uint16_t i2=p16[8+c>>10]; The second stage consumes bits 9 to 4 of c and results in an index into the third stage: @@ -139,15 +143,15 @@ For some characters, this contains an index into the exceptions array: The exception values are a variable number of uint16_t starting at - const uint16_t *pe=p16+2*p16[0]+e; + const uint16_t *pe=p16+2*exceptionsIndex+e; The first uint16_t there contains flags about what values actually follow it. Some of those may be indexes for case mappings or similar and point to strings (zero-terminated) in the uchars[] array: ... - uint16_t u=pe[depends on pe[0]]; - const UChar *pu=(const UChar *)(p32+p16[1])+u; + uint16_t u=pe[index depends on pe[0]]; + const UChar *pu=(const UChar *)(p32+ucharsIndex)+u; 32-bit properties sets: @@ -237,6 +241,13 @@ static uint16_t exceptionsCount=0; static uint16_t repeatFromStage2(uint16_t i2, uint16_t i2Limit, uint16_t i3Repeat, uint32_t x); +static void +repeatFromStage3(uint16_t i2, uint16_t j3, uint32_t x); + +static uint16_t +compactStage(uint16_t *stage, uint16_t stageTop, uint16_t blockSize, + uint16_t *parent, uint16_t parentTop); + static int compareProps(const void *l, const void *r); @@ -266,6 +277,7 @@ initStore() { icu_memset(stage2, 0, sizeof(stage2)); icu_memset(stage3, 0, sizeof(stage3)); icu_memset(map, 0, sizeof(map)); + icu_memset(props, 0, sizeof(props)); icu_memset(props32, 0, sizeof(props32)); } @@ -490,6 +502,8 @@ repeatProps() { x=getProps(start, &i1, &i2, &i3); /* i1, i2, and i3 are set for the start code point */ + i1Limit=(uint16_t)(limit>>STAGE_1_SHIFT); + /* assume that i3 is the beginning of a stage 3 block (see assumptions above) */ /* is this stage 3 block suitable for setting it everywhere? (set i3Repeat) */ @@ -532,7 +546,16 @@ repeatProps() { */ /* fill stages 2 & 3 */ - i2=repeatFromStage2(i2, (uint16_t)((i2+STAGE_2_BLOCK)&~(STAGE_2_BLOCK-1)), i3Repeat, x); + if(i1>STAGE_2_SHIFT)&(STAGE_2_BLOCK-1))), i3Repeat, x); + + /* does this area end in an incomplete stage 3 block? */ + repeatFromStage3(i2, (uint16_t)(limit&(STAGE_3_BLOCK-1)), x); + return; + } /* this stage 2 block will not be suitable for repetition */ i2Repeat=0; @@ -560,7 +583,6 @@ repeatProps() { } } - i1Limit=(uint16_t)(limit>>STAGE_1_SHIFT); if(i1>STAGE_2_SHIFT)&(STAGE_2_BLOCK-1))), i3Repeat, x); /* does this area end in an incomplete stage 3 block? */ - j3=(uint16_t)(limit&(STAGE_3_BLOCK-1)); - if(j3!=0) { - /* fill in properties in a last, incomplete stage 3 block */ - i3=stage2[i2]; - if(i3==0) { - stage2[i2]=i3=allocProps(); - } - - /* some properties are set in this stage 3 block */ - do { - if(props[i3]==0) { - props[i3]=x; - } - ++i3; - } while(--j3>0); - } + repeatFromStage3(i2, (uint16_t)(limit&(STAGE_3_BLOCK-1)), x); } } } @@ -639,65 +646,37 @@ repeatFromStage2(uint16_t i2, uint16_t i2Limit, uint16_t i3Repeat, uint32_t x) { return i2; } +static void +repeatFromStage3(uint16_t i2, uint16_t j3, uint32_t x) { + if(j3!=0) { + /* fill in properties in a last, incomplete stage 3 block */ + uint16_t i3=stage2[i2]; + if(i3==0) { + stage2[i2]=i3=allocProps(); + } + + /* some properties may be set in this stage 3 block */ + do { + if(props[i3]==0) { + props[i3]=x; + } + ++i3; + } while(--j3>0); + } +} + /* compacting --------------------------------------------------------------- */ extern void compactStage2() { - /* - * At this point, there are stage2Top indexes in stage2[]. - * stage2Top is a multiple of 64, and there are always 64 stage2[] entries - * per stage 1 entry which do not overlap. - * The first 64 stage2[] are always the empty ones. - * We make them overlap appropriately here and fill every 64th entry in - * map[] with the mapping from old to new properties indexes - * in order to adjust the stage 1 tables. - * This simple algorithm does not find arbitrary overlaps, but only those - * where the last i indexes of the previous group and the first i of the - * current one all have the same value. - * This seems reasonable and yields linear performance. - */ - uint16_t i, start, prevEnd, newStart, x; - - map[0]=0; - newStart=STAGE_2_BLOCK; - for(start=newStart; start0; --i) { - stage2[newStart++]=stage2[start++]; - } - } else if(newStart0; --i) { - stage2[newStart++]=stage2[start++]; - } - } else /* no overlap && newStart==start */ { - map[start]=start; - newStart+=STAGE_2_BLOCK; - start=newStart; - } - } + uint16_t newTop=compactStage(stage2, stage2Top, STAGE_2_BLOCK, stage1, STAGE_1_BLOCK); /* we saved some space */ if(beVerbose) { - printf("compactStage2() reduced stage2Top from %u to %u\n", stage2Top, stage2Top-(start-newStart)); + printf("compactStage2() reduced stage2Top from %u to %u\n", stage2Top, newTop); } - stage2Top-=(start-newStart); + stage2Top=newTop; - /* now adjust the stage 1 table */ - for(start=0; start0; --i) { - stage3[newStart++]=stage3[start++]; - } - } else if(newStart0; --i) { - stage3[newStart++]=stage3[start++]; - } - } else /* no overlap && newStart==start */ { - map[start]=start; - newStart+=STAGE_3_BLOCK; - start=newStart; - } - } + uint16_t newTop=compactStage(stage3, stage3Top, STAGE_3_BLOCK, stage2, stage2Top); /* we saved some space */ if(beVerbose) { - printf("compactStage3() reduced stage3Top from %u to %u\n", stage3Top, stage3Top-(start-newStart)); + printf("compactStage3() reduced stage3Top from %u to %u\n", stage3Top, newTop); } - stage3Top-=(start-newStart); + stage3Top=newTop; - /* now adjust the stage 2 tables */ - for(start=0; start0; --i) { + stage[newStart++]=stage[start++]; + } + } else if(newStart0; --i) { + stage[newStart++]=stage[start++]; + } + } else /* no overlap && newStart==start */ { + map[start]=start; + newStart+=blockSize; + start=newStart; + } + } + + /* now adjust the parent stage table */ + for(i=0; i