From 4473e0a758cfbdd3418c17a223b74b410044532a Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 31 Jul 2010 19:16:48 +0000 Subject: [PATCH] ICU-7264 switch ubidi.icu and ucase.icu from UTrie to UTrie2 X-SVN-Rev: 28396 --- tools/unicode/c/genbidi/store.c | 93 +++++++++-------------------- tools/unicode/c/gencase/store.c | 92 +++++++++------------------- tools/unicode/c/genprops/props2.cpp | 1 - tools/unicode/c/genprops/store.c | 6 +- 4 files changed, 59 insertions(+), 133 deletions(-) diff --git a/tools/unicode/c/genbidi/store.c b/tools/unicode/c/genbidi/store.c index dbbf5e5b4e5..f22534aa534 100644 --- a/tools/unicode/c/genbidi/store.c +++ b/tools/unicode/c/genbidi/store.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2004-2008, International Business Machines +* Copyright (C) 2004-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -23,7 +23,6 @@ #include "unicode/uchar.h" #include "cmemory.h" #include "cstring.h" -#include "utrie.h" #include "utrie2.h" #include "uarrsort.h" #include "unicode/udata.h" @@ -33,7 +32,7 @@ #include "ubidi_props.h" #include "genbidi.h" -#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) /* Unicode bidi/shaping properties file format --------------------------------- @@ -45,7 +44,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure precedes the actual data. It contains platform properties values and the file format version. -The following is a description of format version 1.0 . +The following is a description of format version 2.0 . The file contains the following structures: @@ -66,7 +65,7 @@ The file contains the following structures: bits 23..16 contain the max value for Joining_Group, otherwise the bits are used like enum fields in the trie word - Serialized trie, see utrie.h; + Serialized trie, see utrie2.h; const uint32_t mirrors[mirrorLength]; @@ -122,6 +121,10 @@ containing the Joining_Group value. All code points outside of this range have No_Joining_Group (0). +--- Changes in format version 2 --- + +Change from UTrie to UTrie2. + ----------------------------------------------------------------------------- */ /* UDataInfo cf. udata.h */ @@ -136,8 +139,8 @@ static UDataInfo dataInfo={ /* dataFormat="BiDi" */ { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, - { 1, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ - { 4, 0, 1, 0 } /* dataVersion */ + { 2, 0, 0, 0 }, /* formatVersion */ + { 6, 0, 0, 0 } /* dataVersion */ }; /* exceptions values */ @@ -297,25 +300,26 @@ generateData(const char *dataDir, UBool csource) { int32_t i; UNewDataMemory *pData; - UNewTrie *pTrie; + UTrie2 *pTrie; UErrorCode errorCode=U_ZERO_ERROR; int32_t trieSize; long dataLength; makeMirror(); - pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE); - if(pTrie==NULL) { - fprintf(stderr, "genbidi error: unable to create a UNewTrie\n"); - exit(U_MEMORY_ALLOCATION_ERROR); + pTrie=utrie2_open(0, 0, &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "genbidi error: utrie2_open() failed - %s\n", u_errorName(errorCode)); + exit(errorCode); } prev=jgStart=0; for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL && startinitialValue, &errorCode); - } - utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf( - stderr, - "genbidi error: deleting lead surrogate code unit values failed - %s\n", - u_errorName(errorCode)); - exit(errorCode); - } - } - - f=usrc_create(dataDir, "ubidi_props_data.c"); + FILE *f=usrc_create(dataDir, "ubidi_props_data.c"); if(f!=NULL) { usrc_writeArray(f, "static const UVersionInfo ubidi_props_dataVersion={", @@ -432,7 +396,7 @@ generateData(const char *dataDir, UBool csource) { "};\n\n"); usrc_writeUTrie2Arrays(f, "static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL, - trie2, + pTrie, "\n};\n\n"); usrc_writeArray(f, "static const uint32_t ubidi_props_mirrors[%ld]={\n", @@ -451,17 +415,16 @@ generateData(const char *dataDir, UBool csource) { f); usrc_writeUTrie2Struct(f, " {\n", - trie2, "ubidi_props_trieIndex", NULL, + pTrie, "ubidi_props_trieIndex", NULL, " },\n"); usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n"); fputs("};\n", f); fclose(f); } - utrie2_close(trie2); } else { /* write the data */ pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo, - haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); + haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode)); exit(errorCode); @@ -486,7 +449,7 @@ generateData(const char *dataDir, UBool csource) { } } - utrie_close(pTrie); + utrie2_close(pTrie); upvec_close(pv); } diff --git a/tools/unicode/c/gencase/store.c b/tools/unicode/c/gencase/store.c index 5265c424d0e..25390424cf4 100644 --- a/tools/unicode/c/gencase/store.c +++ b/tools/unicode/c/gencase/store.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2004-2009, International Business Machines +* Copyright (C) 2004-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -25,7 +25,6 @@ #include "cmemory.h" #include "cstring.h" #include "filestrm.h" -#include "utrie.h" #include "utrie2.h" #include "uarrsort.h" #include "unicode/udata.h" @@ -34,7 +33,7 @@ #include "writesrc.h" #include "gencase.h" -#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) /* Unicode case mapping properties file format --------------------------------- @@ -46,13 +45,15 @@ the udata API for loading ICU data. Especially, a UDataInfo structure precedes the actual data. It contains platform properties values and the file format version. -The following is a description of format version 1.2 . +The following is a description of format version 2.0 . Format version 1.1 adds data for case closure. Format version 1.2 adds an exception bit for case-ignorable. Needed because the Cased and Case_Ignorable properties are not disjoint. +Format version 2.0 changes from UTrie to UTrie2. + The file contains the following structures: const int32_t indexes[i0] with values i0, i1, ...: @@ -69,7 +70,7 @@ The file contains the following structures: i15 maxFullLength; -- maximum length of a full case mapping/folding string - Serialized trie, see utrie.h; + Serialized trie, see utrie2.h; const uint16_t exceptions[exceptionsLength]; @@ -198,8 +199,8 @@ static UDataInfo dataInfo={ /* dataFormat="cAsE" */ { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, - { 1, 1, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ - { 4, 0, 1, 0 } /* dataVersion */ + { 2, 0, 0, 0 }, /* formatVersion */ + { 6, 0, 0, 0 } /* dataVersion */ }; enum { @@ -1072,27 +1073,33 @@ generateData(const char *dataDir, UBool csource) { int32_t i; UNewDataMemory *pData; - UNewTrie *pTrie; + UTrie2 *pTrie; UErrorCode errorCode=U_ZERO_ERROR; int32_t trieSize; long dataLength; - pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE); - if(pTrie==NULL) { - fprintf(stderr, "gencase error: unable to create a UNewTrie\n"); - exit(U_MEMORY_ALLOCATION_ERROR); + pTrie=utrie2_open(0, 0, &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "gencase error: utrie2_open() failed - %s\n", u_errorName(errorCode)); + exit(errorCode); } for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL; ++i) { - if(startinitialValue, &errorCode); - } - utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf( - stderr, - "gencase error: deleting lead surrogate code unit values failed - %s\n", - u_errorName(errorCode)); - exit(errorCode); - } - } - - f=usrc_create(dataDir, "ucase_props_data.c"); + FILE *f=usrc_create(dataDir, "ucase_props_data.c"); if(f!=NULL) { usrc_writeArray(f, "static const UVersionInfo ucase_props_dataVersion={", @@ -1167,7 +1132,7 @@ generateData(const char *dataDir, UBool csource) { "};\n\n"); usrc_writeUTrie2Arrays(f, "static const uint16_t ucase_props_trieIndex[%ld]={\n", NULL, - trie2, + pTrie, "\n};\n\n"); usrc_writeArray(f, "static const uint16_t ucase_props_exceptions[%ld]={\n", @@ -1186,13 +1151,12 @@ generateData(const char *dataDir, UBool csource) { f); usrc_writeUTrie2Struct(f, " {\n", - trie2, "ucase_props_trieIndex", NULL, + pTrie, "ucase_props_trieIndex", NULL, " },\n"); usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n"); fputs("};\n", f); fclose(f); } - utrie2_close(trie2); } else { /* write the data */ pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo, @@ -1221,7 +1185,7 @@ generateData(const char *dataDir, UBool csource) { } } - utrie_close(pTrie); + utrie2_close(pTrie); } /* diff --git a/tools/unicode/c/genprops/props2.cpp b/tools/unicode/c/genprops/props2.cpp index 7114f394f6d..d777c0040ef 100644 --- a/tools/unicode/c/genprops/props2.cpp +++ b/tools/unicode/c/genprops/props2.cpp @@ -839,7 +839,6 @@ writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROP pvCount=pvRows*UPROPS_VECTOR_WORDS; errorCode=U_ZERO_ERROR; - utrie2_freeze(newTrie, UTRIE2_16_VALUE_BITS, &errorCode); length=utrie2_serialize(newTrie, p, capacity, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, diff --git a/tools/unicode/c/genprops/store.c b/tools/unicode/c/genprops/store.c index e5b53eee22e..f25ccacb4fd 100644 --- a/tools/unicode/c/genprops/store.c +++ b/tools/unicode/c/genprops/store.c @@ -83,7 +83,7 @@ Formally, the file contains the following structures: i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (new in format version 3.2) i12..i15 reservedIndexes; -- reserved values; 0 for now - PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) + PT serialized properties trie, see utrie2.h (byte size: 4*(i0-16)) P, E, and U are not used (empty) in format versions 4 and above @@ -243,8 +243,8 @@ static UDataInfo dataInfo={ 0, { 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */ - { 6, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ - { 5, 1, 0, 0 } /* dataVersion */ + { 7, 0, 0, 0 }, /* formatVersion */ + { 6, 0, 0, 0 } /* dataVersion */ }; static UTrie2 *pTrie=NULL;