ICU-7264 switch ubidi.icu and ucase.icu from UTrie to UTrie2

X-SVN-Rev: 28396
This commit is contained in:
Markus Scherer 2010-07-31 19:16:48 +00:00
parent ce3a205cc9
commit 4473e0a758
4 changed files with 59 additions and 133 deletions

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2008, International Business Machines
* Copyright (C) 2004-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -23,7 +23,6 @@
#include "unicode/uchar.h"
#include "cmemory.h"
#include "cstring.h"
#include "utrie.h"
#include "utrie2.h"
#include "uarrsort.h"
#include "unicode/udata.h"
@ -33,7 +32,7 @@
#include "ubidi_props.h"
#include "genbidi.h"
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
/* Unicode bidi/shaping properties file format ---------------------------------
@ -45,7 +44,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
precedes the actual data. It contains platform properties values and the
file format version.
The following is a description of format version 1.0 .
The following is a description of format version 2.0 .
The file contains the following structures:
@ -66,7 +65,7 @@ The file contains the following structures:
bits 23..16 contain the max value for Joining_Group,
otherwise the bits are used like enum fields in the trie word
Serialized trie, see utrie.h;
Serialized trie, see utrie2.h;
const uint32_t mirrors[mirrorLength];
@ -122,6 +121,10 @@ containing the Joining_Group value.
All code points outside of this range have No_Joining_Group (0).
--- Changes in format version 2 ---
Change from UTrie to UTrie2.
----------------------------------------------------------------------------- */
/* UDataInfo cf. udata.h */
@ -136,8 +139,8 @@ static UDataInfo dataInfo={
/* dataFormat="BiDi" */
{ UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
{ 1, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
{ 4, 0, 1, 0 } /* dataVersion */
{ 2, 0, 0, 0 }, /* formatVersion */
{ 6, 0, 0, 0 } /* dataVersion */
};
/* exceptions values */
@ -297,25 +300,26 @@ generateData(const char *dataDir, UBool csource) {
int32_t i;
UNewDataMemory *pData;
UNewTrie *pTrie;
UTrie2 *pTrie;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t trieSize;
long dataLength;
makeMirror();
pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
if(pTrie==NULL) {
fprintf(stderr, "genbidi error: unable to create a UNewTrie\n");
exit(U_MEMORY_ALLOCATION_ERROR);
pTrie=utrie2_open(0, 0, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genbidi error: utrie2_open() failed - %s\n", u_errorName(errorCode));
exit(errorCode);
}
prev=jgStart=0;
for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP; ++i) {
/* store most values from vector column 0 in the trie */
if(!utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
fprintf(stderr, "genbidi error: unable to set trie value (overflow)\n");
exit(U_BUFFER_OVERFLOW_ERROR);
utrie2_setRange32(pTrie, start, end, *row, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genbidi error: utrie2_setRange32() failed - %s\n", u_errorName(errorCode));
exit(errorCode);
}
/* store Joining_Group values from vector column 1 in a simple byte array */
@ -350,9 +354,11 @@ generateData(const char *dataDir, UBool csource) {
indexes[UBIDI_IX_JG_START]=jgStart;
indexes[UBIDI_IX_JG_LIMIT]=prev;
trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genbidi error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
fprintf(stderr, "genbidi error: utrie2_freeze()+utrie2_serialize() failed: %s (length %ld)\n",
u_errorName(errorCode), (long)trieSize);
exit(errorCode);
}
@ -378,49 +384,7 @@ generateData(const char *dataDir, UBool csource) {
if(csource) {
/* write .c file for hardcoded data */
UTrie trie={ NULL };
UTrie2 *trie2;
FILE *f;
utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(
stderr,
"genbidi error: failed to utrie_unserialize(ubidi.icu trie) - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
/* use UTrie2 */
dataInfo.formatVersion[0]=2;
dataInfo.formatVersion[2]=0;
dataInfo.formatVersion[3]=0;
trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(
stderr,
"genbidi error: utrie2_fromUTrie() failed - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
{
/* delete lead surrogate code unit values */
UChar lead;
trie2=utrie2_cloneAsThawed(trie2, &errorCode);
for(lead=0xd800; lead<0xdc00; ++lead) {
utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
}
utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(
stderr,
"genbidi error: deleting lead surrogate code unit values failed - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
}
f=usrc_create(dataDir, "ubidi_props_data.c");
FILE *f=usrc_create(dataDir, "ubidi_props_data.c");
if(f!=NULL) {
usrc_writeArray(f,
"static const UVersionInfo ubidi_props_dataVersion={",
@ -432,7 +396,7 @@ generateData(const char *dataDir, UBool csource) {
"};\n\n");
usrc_writeUTrie2Arrays(f,
"static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL,
trie2,
pTrie,
"\n};\n\n");
usrc_writeArray(f,
"static const uint32_t ubidi_props_mirrors[%ld]={\n",
@ -451,17 +415,16 @@ generateData(const char *dataDir, UBool csource) {
f);
usrc_writeUTrie2Struct(f,
" {\n",
trie2, "ubidi_props_trieIndex", NULL,
pTrie, "ubidi_props_trieIndex", NULL,
" },\n");
usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n");
fputs("};\n", f);
fclose(f);
}
utrie2_close(trie2);
} else {
/* write the data */
pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo,
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode));
exit(errorCode);
@ -486,7 +449,7 @@ generateData(const char *dataDir, UBool csource) {
}
}
utrie_close(pTrie);
utrie2_close(pTrie);
upvec_close(pv);
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2009, International Business Machines
* Copyright (C) 2004-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -25,7 +25,6 @@
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "utrie.h"
#include "utrie2.h"
#include "uarrsort.h"
#include "unicode/udata.h"
@ -34,7 +33,7 @@
#include "writesrc.h"
#include "gencase.h"
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
/* Unicode case mapping properties file format ---------------------------------
@ -46,13 +45,15 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
precedes the actual data. It contains platform properties values and the
file format version.
The following is a description of format version 1.2 .
The following is a description of format version 2.0 .
Format version 1.1 adds data for case closure.
Format version 1.2 adds an exception bit for case-ignorable. Needed because
the Cased and Case_Ignorable properties are not disjoint.
Format version 2.0 changes from UTrie to UTrie2.
The file contains the following structures:
const int32_t indexes[i0] with values i0, i1, ...:
@ -69,7 +70,7 @@ The file contains the following structures:
i15 maxFullLength; -- maximum length of a full case mapping/folding string
Serialized trie, see utrie.h;
Serialized trie, see utrie2.h;
const uint16_t exceptions[exceptionsLength];
@ -198,8 +199,8 @@ static UDataInfo dataInfo={
/* dataFormat="cAsE" */
{ UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
{ 1, 1, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
{ 4, 0, 1, 0 } /* dataVersion */
{ 2, 0, 0, 0 }, /* formatVersion */
{ 6, 0, 0, 0 } /* dataVersion */
};
enum {
@ -1072,27 +1073,33 @@ generateData(const char *dataDir, UBool csource) {
int32_t i;
UNewDataMemory *pData;
UNewTrie *pTrie;
UTrie2 *pTrie;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t trieSize;
long dataLength;
pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
if(pTrie==NULL) {
fprintf(stderr, "gencase error: unable to create a UNewTrie\n");
exit(U_MEMORY_ALLOCATION_ERROR);
pTrie=utrie2_open(0, 0, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencase error: utrie2_open() failed - %s\n", u_errorName(errorCode));
exit(errorCode);
}
for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL; ++i) {
if(start<UPVEC_FIRST_SPECIAL_CP && !utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
fprintf(stderr, "gencase error: unable to set trie value (overflow)\n");
exit(U_BUFFER_OVERFLOW_ERROR);
if(start<UPVEC_FIRST_SPECIAL_CP) {
utrie2_setRange32(pTrie, start, end, *row, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencase error: utrie2_setRange32() failed - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
}
}
trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
fprintf(stderr, "gencase error: utrie2_freeze()+utrie2_serialize() failed: %s (length %ld)\n",
u_errorName(errorCode), (long)trieSize);
exit(errorCode);
}
@ -1113,49 +1120,7 @@ generateData(const char *dataDir, UBool csource) {
if(csource) {
/* write .c file for hardcoded data */
UTrie trie={ NULL };
UTrie2 *trie2;
FILE *f;
utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(
stderr,
"gencase error: failed to utrie_unserialize(ucase.icu trie) - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
/* use UTrie2 */
dataInfo.formatVersion[0]=2;
dataInfo.formatVersion[2]=0;
dataInfo.formatVersion[3]=0;
trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(
stderr,
"gencase error: utrie2_fromUTrie() failed - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
{
/* delete lead surrogate code unit values */
UChar lead;
trie2=utrie2_cloneAsThawed(trie2, &errorCode);
for(lead=0xd800; lead<0xdc00; ++lead) {
utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
}
utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(
stderr,
"gencase error: deleting lead surrogate code unit values failed - %s\n",
u_errorName(errorCode));
exit(errorCode);
}
}
f=usrc_create(dataDir, "ucase_props_data.c");
FILE *f=usrc_create(dataDir, "ucase_props_data.c");
if(f!=NULL) {
usrc_writeArray(f,
"static const UVersionInfo ucase_props_dataVersion={",
@ -1167,7 +1132,7 @@ generateData(const char *dataDir, UBool csource) {
"};\n\n");
usrc_writeUTrie2Arrays(f,
"static const uint16_t ucase_props_trieIndex[%ld]={\n", NULL,
trie2,
pTrie,
"\n};\n\n");
usrc_writeArray(f,
"static const uint16_t ucase_props_exceptions[%ld]={\n",
@ -1186,13 +1151,12 @@ generateData(const char *dataDir, UBool csource) {
f);
usrc_writeUTrie2Struct(f,
" {\n",
trie2, "ucase_props_trieIndex", NULL,
pTrie, "ucase_props_trieIndex", NULL,
" },\n");
usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n");
fputs("};\n", f);
fclose(f);
}
utrie2_close(trie2);
} else {
/* write the data */
pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo,
@ -1221,7 +1185,7 @@ generateData(const char *dataDir, UBool csource) {
}
}
utrie_close(pTrie);
utrie2_close(pTrie);
}
/*

View file

@ -839,7 +839,6 @@ writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROP
pvCount=pvRows*UPROPS_VECTOR_WORDS;
errorCode=U_ZERO_ERROR;
utrie2_freeze(newTrie, UTRIE2_16_VALUE_BITS, &errorCode);
length=utrie2_serialize(newTrie, p, capacity, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr,

View file

@ -83,7 +83,7 @@ Formally, the file contains the following structures:
i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (new in format version 3.2)
i12..i15 reservedIndexes; -- reserved values; 0 for now
PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
PT serialized properties trie, see utrie2.h (byte size: 4*(i0-16))
P, E, and U are not used (empty) in format versions 4 and above
@ -243,8 +243,8 @@ static UDataInfo dataInfo={
0,
{ 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */
{ 6, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
{ 5, 1, 0, 0 } /* dataVersion */
{ 7, 0, 0, 0 }, /* formatVersion */
{ 6, 0, 0, 0 } /* dataVersion */
};
static UTrie2 *pTrie=NULL;