mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-6453 fix/improve upvec_ serialization, require 32-bit-aligned serialization buffers, and clean up some code
X-SVN-Rev: 24870
This commit is contained in:
parent
7364736f03
commit
6b0e32fd7c
5 changed files with 577 additions and 647 deletions
|
@ -436,7 +436,7 @@ upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UE
|
|||
pv->rows=count/valueColumns+1;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
U_CAPI const uint32_t * U_EXPORT2
|
||||
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
|
||||
if(!pv->isCompacted) {
|
||||
return NULL;
|
||||
|
@ -450,6 +450,35 @@ upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
|
|||
return pv->v;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_cloneArray(const UPropsVectors *pv,
|
||||
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
|
||||
uint32_t *clonedArray;
|
||||
int32_t byteLength;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(!pv->isCompacted) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
byteLength=pv->rows*(pv->columns-2)*4;
|
||||
clonedArray=(uint32_t *)uprv_malloc(byteLength);
|
||||
if(clonedArray==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(clonedArray, pv->v, byteLength);
|
||||
if(pRows!=NULL) {
|
||||
*pRows=pv->rows;
|
||||
}
|
||||
if(pColumns!=NULL) {
|
||||
*pColumns=pv->columns-2;
|
||||
}
|
||||
return clonedArray;
|
||||
}
|
||||
|
||||
U_CAPI UTrie2 * U_EXPORT2
|
||||
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
|
||||
UPVecToUTrie2Context toUTrie2={ NULL };
|
||||
|
|
|
@ -123,10 +123,20 @@ upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UE
|
|||
|
||||
/*
|
||||
* Get the vectors array after calling upvec_compact().
|
||||
* The caller must not modify nor release the returned array.
|
||||
* Returns NULL if called before upvec_compact().
|
||||
*/
|
||||
U_CAPI const uint32_t * U_EXPORT2
|
||||
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
|
||||
|
||||
/*
|
||||
* Get a clone of the vectors array after calling upvec_compact().
|
||||
* The caller owns the returned array and must uprv_free() it.
|
||||
* Returns NULL if called before upvec_compact().
|
||||
*/
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
|
||||
upvec_cloneArray(const UPropsVectors *pv,
|
||||
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
|
||||
|
||||
/*
|
||||
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -25,17 +25,17 @@
|
|||
#include "unicode/uenum.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* This is the declarations for the encoding selector.
|
||||
* The goal is, given a unicode string, find the encodings
|
||||
* this string can be mapped to.
|
||||
* A converter selector is built with a set of encoding/charset names
|
||||
* and given an input string returns the set of names of the
|
||||
* corresponding converters which can convert the string.
|
||||
*
|
||||
* A converter selector can be serialized into a buffer and reopened
|
||||
* from the serialized form.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @{
|
||||
* The selector data structure
|
||||
|
@ -45,38 +45,37 @@ typedef struct UConverterSelector UConverterSelector;
|
|||
/** @} */
|
||||
|
||||
/**
|
||||
* open a selector. If converterList is NULL, build for all converters. If excludedCodePoints
|
||||
* is NULL, don't exclude any codepoints
|
||||
*
|
||||
* Open a selector.
|
||||
* If converterListSize is 0, build for all available converters.
|
||||
* If excludedCodePoints is NULL, don't exclude any code points.
|
||||
*
|
||||
* @param converterList a pointer to encoding names needed to be involved.
|
||||
* NULL means build a selector for all possible converters
|
||||
* @param converterListSize number of encodings in above list.
|
||||
* Setting converterListSize to 0, builds a selector for all
|
||||
* converters. ucnvsel_open() does not transfer ownership to this
|
||||
* array. Once uncvsel_open() returns, the caller is free to reuse/destroy
|
||||
* the array.
|
||||
* @param excludedCodePoints a set of codepoints to be excluded from
|
||||
* consideration. set to NULL to exclude nothing
|
||||
* @param whichSet what converter set to use? use this to determine whether
|
||||
* to construct selector for fallback or for roundtrip only mappings
|
||||
* Can be NULL if converterListSize==0.
|
||||
* The list and the names will be cloned, and the caller
|
||||
* retains ownership of the original.
|
||||
* @param converterListSize number of encodings in above list.
|
||||
* If 0, builds a selector for all available converters.
|
||||
* @param excludedCodePoints a set of code points to be excluded from consideration.
|
||||
* That is, excluded code points in a string do not change
|
||||
* the selection result. (They might be handled by a callback.)
|
||||
* Use NULL to exclude nothing.
|
||||
* @param whichSet what converter set to use? Use this to determine whether
|
||||
* to consider only roundtrip mappings or also fallbacks.
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return a pointer to the created selector
|
||||
* @return the new selector
|
||||
*
|
||||
* @draft ICU 4.2
|
||||
*/
|
||||
U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList,
|
||||
int32_t converterListSize,
|
||||
const USet* excludedCodePoints,
|
||||
const UConverterUnicodeSet whichSet,
|
||||
UErrorCode* status);
|
||||
U_CAPI UConverterSelector* U_EXPORT2
|
||||
ucnvsel_open(const char* const* converterList, int32_t converterListSize,
|
||||
const USet* excludedCodePoints,
|
||||
const UConverterUnicodeSet whichSet, UErrorCode* status);
|
||||
|
||||
/* close opened selector */
|
||||
/**
|
||||
* closes a selector. and releases allocated memory
|
||||
* if any Enumerations were returned by ucnv_select*, they become invalid.
|
||||
* Closes a selector.
|
||||
* If any Enumerations were returned by ucnv_select*, they become invalid.
|
||||
* They can be closed before or after calling ucnv_closeSelector,
|
||||
* but should never be used after selector is closed
|
||||
* but should never be used after the selector is closed.
|
||||
*
|
||||
* @see ucnv_selectForString
|
||||
* @see ucnv_selectForUTF8
|
||||
|
@ -85,88 +84,79 @@ U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList,
|
|||
*
|
||||
* @draft ICU 4.2
|
||||
*/
|
||||
U_CAPI void ucnvsel_close(UConverterSelector *sel);
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnvsel_close(UConverterSelector *sel);
|
||||
|
||||
/**
|
||||
* unserialize a selector from a linear buffer. No alignment necessary.
|
||||
* the function does NOT take ownership of the given buffer. Caller is free
|
||||
* to reuse/destroy buffer immediately after calling this function
|
||||
* Unserializing a selector is much faster than creating it from scratch
|
||||
* and is nicer on the heap (not as many allocations and frees)
|
||||
* ucnvsel_open() is expensive. Therefore, it is desirable to unserialize the data structre
|
||||
* rather than building it from scratch.
|
||||
* Open a selector from its serialized form.
|
||||
* The buffer must remain valid and unchanged for the lifetime of the selector.
|
||||
* This is much faster than creating a selector from scratch.
|
||||
* Using a serialized form from a different machine (endianness/charset) is supported.
|
||||
*
|
||||
* @param buffer pointer to a linear buffer containing serialized data
|
||||
* @param buffer pointer to the serialized form of a converter selector;
|
||||
* must be 32-bit-aligned
|
||||
* @param length the capacity of this buffer (can be equal to or larger than
|
||||
the actual data length)
|
||||
* the actual data length)
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return a pointer to the created selector
|
||||
* @return the new selector
|
||||
*
|
||||
* @draft ICU 4.2
|
||||
*/
|
||||
U_CAPI UConverterSelector* ucnvsel_unserialize(const char* buffer,
|
||||
int32_t length,
|
||||
UErrorCode* status);
|
||||
U_CAPI UConverterSelector* U_EXPORT2
|
||||
ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* serialize a selector into a linear buffer. No alignment necessary
|
||||
* The current serialized form is portable to different Endianness, and can
|
||||
* travel between ASCII and EBCDIC systems
|
||||
* Serialize a selector into a linear buffer.
|
||||
* The serialized form is portable to different machines.
|
||||
*
|
||||
* @param sel selector to consider
|
||||
* @param buffer pointer to a linear buffer to receive data
|
||||
* @param buffer pointer to 32-bit-aligned memory to be filled with the
|
||||
* serialized form of this converter selector
|
||||
* @param bufferCapacity the capacity of this buffer
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return the required buffer capacity to hold serialize data (even if the call fails
|
||||
with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
|
||||
* with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
|
||||
*
|
||||
* @draft ICU 4.2
|
||||
*/
|
||||
U_CAPI int32_t ucnvsel_serialize(const UConverterSelector* sel,
|
||||
char* buffer,
|
||||
int32_t bufferCapacity,
|
||||
UErrorCode* status);
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucnvsel_serialize(const UConverterSelector* sel,
|
||||
void* buffer, int32_t bufferCapacity, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* check a UTF16 string using the selector. Find out what encodings it can be mapped to
|
||||
* Select converters that can map all characters in a UTF-16 string,
|
||||
* ignoring the excluded code points.
|
||||
*
|
||||
*
|
||||
* @param sel built selector
|
||||
* @param s pointer to UTF16 string
|
||||
* @param length length of UTF16 string in UChars, or -1 if NULL terminated
|
||||
* @param sel a selector
|
||||
* @param s UTF-16 string
|
||||
* @param length length of the string, or -1 if NUL-terminated
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return an enumeration containing encoding names. Returned encoding names
|
||||
* will be the same as supplied to ucnv_openSelector, or will be the
|
||||
* canonical names if selector was built for all encodings.
|
||||
* The order of encodings will be the same as supplied by the call to
|
||||
* ucnv_openSelector (if encodings were supplied)
|
||||
* @return an enumeration containing encoding names.
|
||||
* The returned encoding names and their order will be the same as
|
||||
* supplied when building the selector.
|
||||
*
|
||||
* @draft ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration *ucnvsel_selectForString(const UConverterSelector* sel, const UChar *s,
|
||||
int32_t length, UErrorCode *status);
|
||||
U_CAPI UEnumeration * U_EXPORT2
|
||||
ucnvsel_selectForString(const UConverterSelector* sel,
|
||||
const UChar *s, int32_t length, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* check a UTF8 string using the selector. Find out what encodings it can be
|
||||
* mapped to illegal codepoints will be ignored by this function! Only legal
|
||||
* codepoints will be considered for conversion
|
||||
* Select converters that can map all characters in a UTF-8 string,
|
||||
* ignoring the excluded code points.
|
||||
*
|
||||
* @param sel built selector
|
||||
* @param s pointer to UTF8 string
|
||||
* @param length length of UTF8 string (in chars), or -1 if NULL terminated
|
||||
* @param sel a selector
|
||||
* @param s UTF-8 string
|
||||
* @param length length of the string, or -1 if NUL-terminated
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return an enumeration containing encoding names. Returned encoding names
|
||||
* will be the same as supplied to ucnv_openSelector, or will be the canonical
|
||||
* names if selector was built for all encodings.
|
||||
* The order of encodings will be the same as supplied by the call to
|
||||
* ucnv_openSelector (if encodings were supplied)
|
||||
* @return an enumeration containing encoding names.
|
||||
* The returned encoding names and their order will be the same as
|
||||
* supplied when building the selector.
|
||||
*
|
||||
* @draft ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration *ucnvsel_selectForUTF8(const UConverterSelector* sel,
|
||||
const char *s,
|
||||
int32_t length,
|
||||
UErrorCode *status);
|
||||
|
||||
U_CAPI UEnumeration * U_EXPORT2
|
||||
ucnvsel_selectForUTF8(const UConverterSelector* sel,
|
||||
const char *s, int32_t length, UErrorCode *status);
|
||||
|
||||
#endif /* __ICU_UCNV_SEL_H__ */
|
||||
|
|
|
@ -26,9 +26,6 @@
|
|||
#define FILENAME_BUFFER 1024
|
||||
|
||||
#define TDSRCPATH ".."U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING
|
||||
|
||||
|
||||
|
||||
|
||||
static FILE *fopenOrError(const char *filename) {
|
||||
int32_t needLen;
|
||||
|
@ -56,14 +53,11 @@ static FILE *fopenOrError(const char *filename) {
|
|||
|
||||
void addCnvSelTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &TestConversionUTF16, "ucnv/ucnvseltst/TestConversionUTF16");
|
||||
addTest(root, &TestConversionUTF8, "ucnv/ucnvseltst/TestConversionUTF8");
|
||||
addTest(root, &TestSerializationAndUnserialization, "ucnv/ucnvseltst/TestSerializationAndUnserialization");
|
||||
addTest(root, &TestConversionUTF16, "tsconv/ucnvseltst/TestConversionUTF16");
|
||||
addTest(root, &TestConversionUTF8, "tsconv/ucnvseltst/TestConversionUTF8");
|
||||
addTest(root, &TestSerializationAndUnserialization, "tsconv/ucnvseltst/TestSerializationAndUnserialization");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* there doesn't seem to be a fn in ucnv to get the index of a converter
|
||||
* given one of its aliases!
|
||||
|
@ -792,9 +786,9 @@ static void TestSerializationAndUnserialization()
|
|||
uprv_free(buffer);
|
||||
return;
|
||||
}
|
||||
sel = ucnvsel_unserialize( buffer, ser_len,&status);
|
||||
sel = ucnvsel_openFromSerialized( buffer, ser_len,&status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("ucnvsel_unserialize(test case %d) failed: %s\n", curCase, u_errorName(status));
|
||||
log_err("ucnvsel_openFromSerialized(test case %d) failed: %s\n", curCase, u_errorName(status));
|
||||
uprv_free(encodings);
|
||||
uprv_free(names);
|
||||
uprv_free(buffer);
|
||||
|
@ -917,7 +911,7 @@ static void TestSerializationAndUnserialization()
|
|||
ucnvsel_serialize(sel, buffer, ser_len, &status);
|
||||
|
||||
ucnvsel_close(sel);
|
||||
sel = ucnvsel_unserialize( buffer, ser_len,&status);
|
||||
sel = ucnvsel_openFromSerialized( buffer, ser_len,&status);
|
||||
|
||||
/* count how many bytes (Is there a portable function that is more efficient than this?) */
|
||||
f1 = fopenOrError("ConverterSelectorTestUTF16.txt");
|
||||
|
|
Loading…
Add table
Reference in a new issue