diff --git a/icu4c/source/common/propsvec.c b/icu4c/source/common/propsvec.c index 2f027b5f9c7..05384eb5cec 100644 --- a/icu4c/source/common/propsvec.c +++ b/icu4c/source/common/propsvec.c @@ -13,7 +13,7 @@ * created on: 2002feb22 * created by: Markus W. Scherer * -* Store additional Unicode character properties in bit set vectors. +* Store bits (Unicode character properties) in bit set vectors. */ #include @@ -24,22 +24,81 @@ #include "uarrsort.h" #include "propsvec.h" +struct UPropsVectors { + uint32_t *v; + int32_t columns; /* number of columns, plus two for start & limit values */ + int32_t maxRows; + int32_t rows; + int32_t prevRow; /* search optimization: remember last row seen */ + UBool isCompacted; +}; + +#define UPVEC_INITIAL_ROWS (1<<14) +#define UPVEC_MEDIUM_ROWS ((int32_t)1<<17) +#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) + +U_CAPI UPropsVectors * U_EXPORT2 +upvec_open(int32_t columns, UErrorCode *pErrorCode) { + UPropsVectors *pv; + uint32_t *v, *row; + uint32_t cp; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(columns<1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); + v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); + if(pv==NULL || v==NULL) { + uprv_free(pv); + uprv_free(v); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(pv, 0, sizeof(UPropsVectors)); + pv->v=v; + pv->columns=columns+=2; /* count range start and limit columns */ + pv->maxRows=UPVEC_INITIAL_ROWS; + pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); + + /* set the all-Unicode row and the special-value rows */ + row=pv->v; + uprv_memset(row, 0, pv->rows*columns*4); + row[0]=0; + row[1]=0x110000; + row+=columns; + for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { + row[0]=cp; + row[1]=cp+1; + row+=columns; + } + return pv; +} + +U_CAPI void U_EXPORT2 +upvec_close(UPropsVectors *pv) { + if(pv!=NULL) { + uprv_free(pv->v); + uprv_free(pv); + } +} + static uint32_t * -_findRow(uint32_t *pv, UChar32 rangeStart) { +_findRow(UPropsVectors *pv, UChar32 rangeStart) { uint32_t *row; - int32_t *hdr; int32_t columns, i, start, limit, prevRow, rows; - hdr=(int32_t *)pv; - columns=hdr[UPVEC_COLUMNS]; - limit=hdr[UPVEC_ROWS]; - prevRow=hdr[UPVEC_PREV_ROW]; - rows=hdr[UPVEC_ROWS]; - pv+=UPVEC_HEADER_LENGTH; + columns=pv->columns; + rows=limit=pv->rows; + prevRow=pv->prevRow; /* check the vicinity of the last-seen row */ if(prevRowv+prevRow*columns; if(rangeStart>=(UChar32)row[0]) { if(rangeStart<(UChar32)row[1]) { /* same row as last seen */ @@ -49,7 +108,7 @@ _findRow(uint32_t *pv, UChar32 rangeStart) { rangeStart>=(UChar32)(row+=columns)[0] && rangeStart<(UChar32)row[1] ) { /* next row after the last one */ - hdr[UPVEC_PREV_ROW]=prevRow; + pv->prevRow=prevRow; return row; } } @@ -59,11 +118,11 @@ _findRow(uint32_t *pv, UChar32 rangeStart) { start=0; while(startv+i*columns; if(rangeStart<(UChar32)row[0]) { limit=i; } else if(rangeStart<(UChar32)row[1]) { - hdr[UPVEC_PREV_ROW]=i; + pv->prevRow=i; return row; } else { start=i; @@ -71,54 +130,12 @@ _findRow(uint32_t *pv, UChar32 rangeStart) { } /* must be found because all ranges together always cover all of Unicode */ - hdr[UPVEC_PREV_ROW]=start; - return pv+start*columns; -} - -U_CAPI uint32_t * U_EXPORT2 -upvec_open(int32_t columns, int32_t maxRows) { - uint32_t *pv, *row; - uint32_t cp; - int32_t length; - - if(columns<1 || maxRows<1) { - return NULL; - } - - columns+=2; /* count range start and limit columns */ - length=UPVEC_HEADER_LENGTH+maxRows*columns; - pv=(uint32_t *)uprv_malloc(length*4); - if(pv!=NULL) { - /* set header */ - pv[UPVEC_COLUMNS]=(uint32_t)columns; - pv[UPVEC_MAXROWS]=(uint32_t)maxRows; - pv[UPVEC_ROWS]=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); - pv[UPVEC_PREV_ROW]=0; - - /* set the all-Unicode row and the special-value rows */ - row=pv+UPVEC_HEADER_LENGTH; - uprv_memset(row, 0, pv[UPVEC_ROWS]*columns*4); - row[0]=0; - row[1]=0x110000; - row+=columns; - for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { - row[0]=cp; - row[1]=cp+1; - row+=columns; - } - } - return pv; + pv->prevRow=start; + return pv->v+start*columns; } U_CAPI void U_EXPORT2 -upvec_close(uint32_t *pv) { - if(pv!=NULL) { - uprv_free(pv); - } -} - -U_CAPI UBool U_EXPORT2 -upvec_setValue(uint32_t *pv, +upvec_setValue(UPropsVectors *pv, UChar32 start, UChar32 end, int32_t column, uint32_t value, uint32_t mask, @@ -129,21 +146,24 @@ upvec_setValue(uint32_t *pv, UBool splitFirstRow, splitLastRow; /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return FALSE; + if(U_FAILURE(*pErrorCode)) { + return; } - if( pv==NULL || start<0 || start>end || end>UPVEC_MAX_CP || - column<0 || (uint32_t)(column+1)>=pv[UPVEC_COLUMNS] + column<0 || column>=(pv->columns-2) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; + return; + } + if(pv->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; } limit=end+1; /* initialize */ - columns=(int32_t)pv[UPVEC_COLUMNS]; + columns=pv->columns; column+=2; /* skip range start and limit columns */ value&=mask; @@ -187,21 +207,39 @@ upvec_setValue(uint32_t *pv, if(splitFirstRow || splitLastRow) { int32_t count, rows; - rows=(int32_t)pv[UPVEC_ROWS]; - if((rows+splitFirstRow+splitLastRow)>(int32_t)pv[UPVEC_MAXROWS]) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return FALSE; + rows=pv->rows; + if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { + uint32_t *newVectors; + int32_t newMaxRows; + + if(pv->maxRowsmaxRowsv); + pv->v=newVectors; + pv->maxRows=newMaxRows; } /* count the number of row cells to move after the last row, and move them */ - count = (int32_t)((pv+UPVEC_HEADER_LENGTH+rows*columns)-(lastRow+columns)); + count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); if(count>0) { uprv_memmove( lastRow+(1+splitFirstRow+splitLastRow)*columns, lastRow+columns, count*4); } - pv[UPVEC_ROWS]=rows+splitFirstRow+splitLastRow; + pv->rows=rows+splitFirstRow+splitLastRow; /* split the first row, and move the firstRow pointer to the second part */ if(splitFirstRow) { @@ -226,7 +264,7 @@ upvec_setValue(uint32_t *pv, } /* set the "row last seen" to the last row for the range */ - pv[UPVEC_PREV_ROW]=(uint32_t)((lastRow-(pv+UPVEC_HEADER_LENGTH))/columns); + pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); /* set the input value in all remaining rows */ firstRow+=column; @@ -239,37 +277,36 @@ upvec_setValue(uint32_t *pv, } firstRow+=columns; } - return TRUE; } U_CAPI uint32_t U_EXPORT2 -upvec_getValue(uint32_t *pv, UChar32 c, int32_t column) { +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { uint32_t *row; - if(pv==NULL || c<0 || c>UPVEC_MAX_CP) { + if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { return 0; } - row=_findRow(pv, c); + row=_findRow((UPropsVectors *)pv, c); return row[2+column]; } U_CAPI uint32_t * U_EXPORT2 -upvec_getRow(uint32_t *pv, int32_t rowIndex, +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, UChar32 *pRangeStart, UChar32 *pRangeEnd) { uint32_t *row; int32_t columns; - if(pv==NULL || rowIndex<0 || rowIndex>=(int32_t)pv[UPVEC_ROWS]) { + if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { return NULL; } - columns=(int32_t)pv[UPVEC_COLUMNS]; - row=pv+UPVEC_HEADER_LENGTH+rowIndex*columns; + columns=pv->columns; + row=pv->v+rowIndex*columns; if(pRangeStart!=NULL) { - *pRangeStart=row[0]; + *pRangeStart=(UChar32)row[0]; } if(pRangeEnd!=NULL) { - *pRangeEnd=row[1]-1; + *pRangeEnd=(UChar32)row[1]-1; } return row+2; } @@ -277,10 +314,10 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex, static int32_t U_CALLCONV upvec_compareRows(const void *context, const void *l, const void *r) { const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; - const uint32_t *pv=(const uint32_t *)context; + const UPropsVectors *pv=(const UPropsVectors *)context; int32_t i, count, columns; - count=columns=(int32_t)pv[UPVEC_COLUMNS]; /* includes start/limit columns */ + count=columns=pv->columns; /* includes start/limit columns */ /* start comparing after start/limit but wrap around to them */ i=2; @@ -296,38 +333,38 @@ upvec_compareRows(const void *context, const void *l, const void *r) { return 0; } -U_CAPI int32_t U_EXPORT2 -upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { +U_CAPI void U_EXPORT2 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { uint32_t *row; int32_t i, columns, valueColumns, rows, count; UChar32 start, limit; /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; + if(U_FAILURE(*pErrorCode)) { + return; } - - if(pv==NULL || handler==NULL) { + if(handler==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; + return; + } + if(pv->isCompacted) { + return; } - rows=(int32_t)pv[UPVEC_ROWS]; - if(rows==0) { - return 0; - } + /* Set the flag now: Sorting and compacting destroys the builder data structure. */ + pv->isCompacted=TRUE; - row=pv+UPVEC_HEADER_LENGTH; - columns=(int32_t)pv[UPVEC_COLUMNS]; + rows=pv->rows; + columns=pv->columns; valueColumns=columns-2; /* not counting start & limit */ /* sort the properties vectors to find unique vector values */ if(rows>1) { - uprv_sortArray(row, rows, columns*4, + uprv_sortArray(pv->v, rows, columns*4, upvec_compareRows, pv, FALSE, pErrorCode); } if(U_FAILURE(*pErrorCode)) { - return 0; + return; } /* @@ -335,6 +372,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC * This has to do almost the same work as the compaction below, * to find the indexes where the special-value rows will move. */ + row=pv->v; count=-valueColumns; for(i=0; i=UPVEC_FIRST_SPECIAL_CP) { handler(context, start, start, count, row+2, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { - return 0; + return; } } @@ -361,7 +399,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, count, row-valueColumns, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { - return 0; + return; } /* @@ -371,7 +409,7 @@ upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorC * This destroys the Properties Vector structure and replaces it * with an array of just vector values. */ - row=pv+UPVEC_HEADER_LENGTH; + row=pv->v; count=-valueColumns; for(i=0; iv+count, valueColumns*4)) { count+=valueColumns; - uprv_memmove(pv+count, row+2, valueColumns*4); + uprv_memmove(pv->v+count, row+2, valueColumns*4); } if(startv+count, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { - return 0; + return; } } row+=columns; } - /* count is at the beginning of the last vector, add valueColumns to include that last vector */ - return count+valueColumns; + /* count is at the beginning of the last vector, add one to include that last vector */ + pv->rows=count/valueColumns+1; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { + if(!pv->isCompacted) { + return NULL; + } + if(pRows!=NULL) { + *pRows=pv->rows; + } + if(pColumns!=NULL) { + *pColumns=pv->columns-2; + } + return pv->v; +} + +U_CAPI UTrie2 * U_EXPORT2 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { + UPVecToUTrie2Context toUTrie2={ NULL }; + upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); + utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + utrie2_close(toUTrie2.trie); + toUTrie2.trie=NULL; + } + return toUTrie2.trie; } /* - * TODO(markus): Add upvec_compactToUTrie2WithRowIndexes() function that returns - * a UTrie2 and does not require the caller to pass in a callback function. - * - * Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts + * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts * some 16-bit field and builds and returns a UTrie2. */ diff --git a/icu4c/source/common/propsvec.h b/icu4c/source/common/propsvec.h index 8ffd3dd4334..26d200d6c8f 100644 --- a/icu4c/source/common/propsvec.h +++ b/icu4c/source/common/propsvec.h @@ -13,7 +13,7 @@ * created on: 2002feb22 * created by: Markus W. Scherer * -* Store additional Unicode character properties in bit set vectors. +* Store bits (Unicode character properties) in bit set vectors. */ #ifndef __UPROPSVEC_H__ @@ -25,11 +25,10 @@ U_CDECL_BEGIN -/* +/** * Unicode Properties Vectors associated with code point ranges. - * Stored in an array of uint32_t. * - * The array starts with a header, then rows of integers store + * Rows of uint32_t integers in a contiguous array store * the range limits and the properties vectors. * * In each row, row[0] contains the start code point and @@ -41,15 +40,8 @@ U_CDECL_BEGIN * It would be possible to store only one range boundary per row, * but self-contained rows allow to later sort them by contents. */ -enum { - /* stores number of columns, plus two for start & limit values */ - UPVEC_COLUMNS, - UPVEC_MAXROWS, - UPVEC_ROWS, - /* search optimization: remember last row seen */ - UPVEC_PREV_ROW, - UPVEC_HEADER_LENGTH -}; +struct UPropsVectors; +typedef struct UPropsVectors UPropsVectors; /* * Special pseudo code points for storing the initialValue and the errorValue, @@ -67,28 +59,39 @@ enum { */ #define UPVEC_START_REAL_VALUES_CP 0x200000 -U_CAPI uint32_t * U_EXPORT2 -upvec_open(int32_t columns, int32_t maxRows); +U_CAPI UPropsVectors * U_EXPORT2 +upvec_open(int32_t columns, UErrorCode *pErrorCode); U_CAPI void U_EXPORT2 -upvec_close(uint32_t *pv); +upvec_close(UPropsVectors *pv); -U_CAPI UBool U_EXPORT2 -upvec_setValue(uint32_t *pv, +/* + * In rows for code points [start..end], select the column, + * reset the mask bits and set the value bits (ANDed with the mask). + * + * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). + */ +U_CAPI void U_EXPORT2 +upvec_setValue(UPropsVectors *pv, UChar32 start, UChar32 end, int32_t column, uint32_t value, uint32_t mask, UErrorCode *pErrorCode); +/* + * Logically const but must not be used on the same pv concurrently! + * Always returns 0 if called after upvec_compact(). + */ U_CAPI uint32_t U_EXPORT2 -upvec_getValue(uint32_t *pv, UChar32 c, int32_t column); +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); /* * pRangeStart and pRangeEnd can be NULL. - * @return NULL if rowIndex out of range and for illegal arguments + * @return NULL if rowIndex out of range and for illegal arguments, + * or if called after upvec_compact() */ U_CAPI uint32_t * U_EXPORT2 -upvec_getRow(uint32_t *pv, int32_t rowIndex, +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, UChar32 *pRangeStart, UChar32 *pRangeEnd); /* @@ -98,7 +101,7 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex, * - store them contiguously from the beginning of the memory * - for each (non-unique) row, call the handler function * - * The handler's rowIndex is the uint32_t index of the row in the compacted + * The handler's rowIndex is the index of the row in the compacted * memory block. * (Therefore, it starts at 0 increases in increments of the columns value.) * @@ -109,19 +112,28 @@ upvec_getRow(uint32_t *pv, int32_t rowIndex, * and the row is arbitrary (but not NULL). * Then, in the second phase, the handler is called for each row of real values. */ - -U_CDECL_BEGIN - typedef void U_CALLCONV UPVecCompactHandler(void *context, UChar32 start, UChar32 end, int32_t rowIndex, uint32_t *row, int32_t columns, UErrorCode *pErrorCode); -U_CDECL_END +U_CAPI void U_EXPORT2 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); -U_CAPI int32_t U_EXPORT2 -upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); +/* + * Get the vectors array after calling upvec_compact(). + * Returns NULL if called before upvec_compact(). + */ +U_CAPI uint32_t * U_EXPORT2 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); + +/* + * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted + * vectors array, and freeze the trie. + */ +U_CAPI UTrie2 * U_EXPORT2 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); struct UPVecToUTrieContext { UNewTrie *newTrie; diff --git a/icu4c/source/common/ucnvsel.cpp b/icu4c/source/common/ucnvsel.cpp index 1889216a7d7..82501f4f485 100644 --- a/icu4c/source/common/ucnvsel.cpp +++ b/icu4c/source/common/ucnvsel.cpp @@ -63,6 +63,7 @@ struct UConverterSelector { /* internal function */ static void generateSelectorData(UConverterSelector* result, + UPropsVectors *upvec, const USet* excludedCodePoints, const UConverterUnicodeSet whichSet, UErrorCode* status); @@ -203,7 +204,9 @@ U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList, } newSelector->encodingsCount = converterListSize; - generateSelectorData(newSelector, excludedCodePoints, whichSet, status); + UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status); + generateSelectorData(newSelector, upvec, excludedCodePoints, whichSet, status); + upvec_close(upvec); if (U_FAILURE(*status)) { // at this point, we know pv and encodings have been allocated. No harm in @@ -223,7 +226,7 @@ U_CAPI void ucnvsel_close(UConverterSelector *sel) { } uprv_free(sel->encodings[0]); uprv_free(sel->encodings); - upvec_close(sel->pv); + uprv_free(sel->pv); utrie2_close(sel->trie); uprv_free(sel); } @@ -480,21 +483,19 @@ U_CAPI int32_t ucnvsel_serialize(const UConverterSelector* sel, /* internal function! */ static void generateSelectorData(UConverterSelector* result, + UPropsVectors *upvec, const USet* excludedCodePoints, const UConverterUnicodeSet whichSet, UErrorCode* status) { + if (U_FAILURE(*status)) { + return; + } + int32_t columns = (result->encodingsCount+31)/32; - // 66000 as suggested by Markus [I suggest something like 66000 which - // exceeds the number of BMP code points. There will be fewer ranges of - // combinations of encodings. (I believe there are no encodings that have - // interesting mappings for supplementary code points. All encodings either - // support all of them or none of them.)] - result->pv = upvec_open(columns, 66000); // create for all - // unicode codepoints, and have space for all those bits needed! // set errorValue to all-ones for (int32_t col = 0 ; col < columns; col++) { - upvec_setValue(result->pv, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, + upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, col, ~0, ~0, status); } @@ -505,7 +506,6 @@ static void generateSelectorData(UConverterSelector* result, int32_t j; UConverter* test_converter = ucnv_open(result->encodings[i], status); if (U_FAILURE(*status)) { - // status will propagate back to user return; } USet* unicode_point_set; @@ -513,6 +513,10 @@ static void generateSelectorData(UConverterSelector* result, ucnv_getUnicodeSet(test_converter, unicode_point_set, whichSet, status); + if (U_FAILURE(*status)) { + ucnv_close(test_converter); + return; + } column = i / 32; mask = 1 << (i%32); @@ -529,18 +533,17 @@ static void generateSelectorData(UConverterSelector* result, // this will be reached for the converters that fill the set with // strings. Those should be ignored by our system } else { - upvec_setValue(result->pv, start_char, end_char, column, ~0, mask, + upvec_setValue(upvec, start_char, end_char, column, ~0, mask, status); - if (U_FAILURE(*status)) { - return; - } } } ucnv_close(test_converter); uset_close(unicode_point_set); + if (U_FAILURE(*status)) { + return; + } } - // handle excluded encodings! Simply set their values to all 1's in the upvec if (excludedCodePoints) { int32_t item_count = uset_getItemCount(excludedCodePoints); @@ -550,30 +553,29 @@ static void generateSelectorData(UConverterSelector* result, uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0, status); - if (U_FAILURE(*status)) { - return; - } else { - for (int32_t col = 0 ; col < columns; col++) { - upvec_setValue(result->pv, start_char, end_char, col, ~0, ~0, - status); - } + for (int32_t col = 0 ; col < columns; col++) { + upvec_setValue(upvec, start_char, end_char, col, ~0, ~0, + status); } } } // alright. Now, let's put things in the same exact form you'd get when you // unserialize things. - UPVecToUTrie2Context toUTrie2={ NULL }; - result->pvCount = upvec_compact(result->pv, upvec_compactToUTrie2Handler, - &toUTrie2, status); + result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status); if (U_SUCCESS(*status)) { - result->trie = toUTrie2.trie; - utrie2_freeze(result->trie, UTRIE2_16_VALUE_BITS, status); + uint32_t *memory = upvec_getArray(upvec, &result->pvCount, NULL); + result->pvCount *= columns; + result->pv = (uint32_t *)uprv_malloc(result->pvCount * 4); + if (result->pv == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(result->pv, memory, result->pvCount * 4); } } - // a bunch of functions for the enumeration thingie! Nothing fancy here. Just // iterate over the selected encodings struct Enumerator { diff --git a/icu4c/source/tools/genbidi/genbidi.c b/icu4c/source/tools/genbidi/genbidi.c index c05d6176310..f773b1ccb9b 100644 --- a/icu4c/source/tools/genbidi/genbidi.c +++ b/icu4c/source/tools/genbidi/genbidi.c @@ -39,7 +39,7 @@ /* data --------------------------------------------------------------------- */ -uint32_t *pv; +UPropsVectors *pv; UBool beVerbose=FALSE, haveCopyright=TRUE; @@ -169,7 +169,8 @@ singleEnumLineFn(void *context, exit(U_INTERNAL_PROGRAM_ERROR); } - if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) { + upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode); + if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi error: unable to set %s code: %s\n", sen->propName, u_errorName(*pErrorCode)); exit(*pErrorCode); @@ -260,7 +261,8 @@ binariesLineFn(void *context, exit(U_INTERNAL_PROGRAM_ERROR); } - if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) { + upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode); + if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi error: unable to set %s, code: %s\n", bin->binaries[i].propName, u_errorName(*pErrorCode)); exit(*pErrorCode); @@ -394,7 +396,7 @@ main(int argc, char* argv[]) { } /* initialize */ - pv=upvec_open(2, 10000); + pv=upvec_open(2, &errorCode); /* process BidiMirroring.txt */ writeUCDFilename(basename, "BidiMirroring", suffix); @@ -522,7 +524,8 @@ unicodeDataLineFn(void *context, /* get Mirrored flag, field 9 */ if(*fields[9][0]=='Y') { - if(!upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode)) { + upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode); + if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n", (long)c, u_errorName(errorCode)); exit(errorCode); @@ -576,7 +579,8 @@ parseDB(const char *filename, UErrorCode *pErrorCode) { for(i=0; ibinaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) { + upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode); + if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "gencase error: unable to set %s, code: %s\n", bin->binaries[i].propName, u_errorName(*pErrorCode)); exit(*pErrorCode); @@ -290,7 +291,7 @@ main(int argc, char* argv[]) { } /* initialize */ - pv=upvec_open(2, 10000); + pv=upvec_open(2, &errorCode); caseSensitive=uset_open(1, 0); /* empty set (start>end) */ /* process SpecialCasing.txt */ diff --git a/icu4c/source/tools/gencase/gencase.h b/icu4c/source/tools/gencase/gencase.h index a0f5e277e00..57d03e81f1b 100644 --- a/icu4c/source/tools/gencase/gencase.h +++ b/icu4c/source/tools/gencase/gencase.h @@ -19,6 +19,7 @@ #include "unicode/utypes.h" #include "utrie.h" +#include "propsvec.h" #include "ucase.h" U_CDECL_BEGIN @@ -92,7 +93,7 @@ typedef struct { extern UBool beVerbose, haveCopyright; /* properties vectors in gencase.c */ -extern uint32_t *pv; +extern UPropsVectors *pv; /* prototypes */ U_CFUNC void diff --git a/icu4c/source/tools/gencase/store.c b/icu4c/source/tools/gencase/store.c index 57c2abf028c..eaa924fe7f4 100644 --- a/icu4c/source/tools/gencase/store.c +++ b/icu4c/source/tools/gencase/store.c @@ -408,12 +408,13 @@ setProps(Props *p) { } errorCode=U_ZERO_ERROR; - if( value!=oldValue && - !upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode) - ) { - fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n", - u_errorName(errorCode)); - exit(errorCode); + if(value!=oldValue) { + upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n", + u_errorName(errorCode)); + exit(errorCode); + } } /* add the multi-character case folding to the "unfold" data */ @@ -428,7 +429,8 @@ setProps(Props *p) { extern void addCaseSensitive(UChar32 first, UChar32 last) { UErrorCode errorCode=U_ZERO_ERROR; - if(!upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) { + upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode); + if(U_FAILURE(errorCode)) { fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n", u_errorName(errorCode)); exit(errorCode); @@ -573,7 +575,8 @@ addClosureMapping(UChar32 src, UChar32 dest) { } errorCode=U_ZERO_ERROR; - if(!upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode)) { + upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode); + if(U_FAILURE(errorCode)) { fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n", u_errorName(errorCode)); exit(errorCode); diff --git a/icu4c/source/tools/genprops/genprops.c b/icu4c/source/tools/genprops/genprops.c index 616f0687f47..119ed006d06 100644 --- a/icu4c/source/tools/genprops/genprops.c +++ b/icu4c/source/tools/genprops/genprops.c @@ -339,7 +339,8 @@ unicodeDataLineFn(void *context, exit(U_PARSE_ERROR); } } - if(!upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode)) { + upvec_setValue(pv, p.code, p.code, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode); + if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); } @@ -544,7 +545,8 @@ repeatAreaProps() { /* Hangul have canonical decompositions */ errorCode=U_ZERO_ERROR; - if(!upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode)) { + upvec_setValue(pv, 0xac00, 0xd7a3, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode); + if(U_FAILURE(errorCode)) { fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(errorCode)); exit(errorCode); } diff --git a/icu4c/source/tools/genprops/genprops.h b/icu4c/source/tools/genprops/genprops.h index 40915235b6f..b50a1037625 100644 --- a/icu4c/source/tools/genprops/genprops.h +++ b/icu4c/source/tools/genprops/genprops.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2005, International Business Machines +* Copyright (C) 1999-2008, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -19,6 +19,7 @@ #include "unicode/utypes.h" #include "utrie.h" +#include "propsvec.h" /* file definitions */ #define DATA_NAME "uprops" @@ -39,7 +40,7 @@ extern const char *const genCategoryNames[]; /* properties vectors in props2.c */ -extern uint32_t *pv; +extern UPropsVectors *pv; /* prototypes */ U_CFUNC void diff --git a/icu4c/source/tools/genprops/props2.c b/icu4c/source/tools/genprops/props2.c index 75273eafb64..0385a4405f2 100644 --- a/icu4c/source/tools/genprops/props2.c +++ b/icu4c/source/tools/genprops/props2.c @@ -35,8 +35,7 @@ /* data --------------------------------------------------------------------- */ static UNewTrie *newTrie; -uint32_t *pv; -static int32_t pvCount; +UPropsVectors *pv; /* miscellaneous ------------------------------------------------------------ */ @@ -208,7 +207,8 @@ singleEnumLineFn(void *context, /* Also set bits for initialValue and errorValue. */ end=UPVEC_MAX_CP; } - if(!upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode)) { + upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode); + if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set %s code: %s\n", sen->propName, u_errorName(*pErrorCode)); exit(*pErrorCode); @@ -370,7 +370,8 @@ binariesLineFn(void *context, /* Also set bits for initialValue and errorValue. */ end=UPVEC_MAX_CP; } - if(!upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode)) { + upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode); + if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set %s code: %s\n", bin->binaries[i].propName, u_errorName(*pErrorCode)); exit(*pErrorCode); @@ -408,7 +409,12 @@ parseBinariesFile(char *filename, char *basename, const char *suffix, U_CFUNC void initAdditionalProperties() { - pv=upvec_open(UPROPS_VECTOR_WORDS, 20000); + UErrorCode errorCode=U_ZERO_ERROR; + pv=upvec_open(UPROPS_VECTOR_WORDS, &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "error: upvec_open() failed - %s\n", u_errorName(errorCode)); + exit(errorCode); + } } U_CFUNC void @@ -484,11 +490,11 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr * W for plane 2 */ *pErrorCode=U_ZERO_ERROR; - if( !upvec_setValue(pv, 0xe000, 0xf8ff, 0, (uint32_t)(U_EA_AMBIGUOUS<indexLength, (long)pTrie->dataLength, (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset, (long)pTrie->initialValue, (long)pTrie->errorValue,