ICU-6453 fix/improve upvec_ serialization, require 32-bit-aligned serialization buffers, and clean up some code

X-SVN-Rev: 24870
This commit is contained in:
Markus Scherer 2008-10-24 04:35:50 +00:00
parent 7364736f03
commit 6b0e32fd7c
5 changed files with 577 additions and 647 deletions

View file

@ -436,7 +436,7 @@ upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UE
pv->rows=count/valueColumns+1;
}
U_CAPI uint32_t * U_EXPORT2
U_CAPI const uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
if(!pv->isCompacted) {
return NULL;
@ -450,6 +450,35 @@ upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
return pv->v;
}
U_CAPI uint32_t * U_EXPORT2
upvec_cloneArray(const UPropsVectors *pv,
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
uint32_t *clonedArray;
int32_t byteLength;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(!pv->isCompacted) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
byteLength=pv->rows*(pv->columns-2)*4;
clonedArray=(uint32_t *)uprv_malloc(byteLength);
if(clonedArray==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(clonedArray, pv->v, byteLength);
if(pRows!=NULL) {
*pRows=pv->rows;
}
if(pColumns!=NULL) {
*pColumns=pv->columns-2;
}
return clonedArray;
}
U_CAPI UTrie2 * U_EXPORT2
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
UPVecToUTrie2Context toUTrie2={ NULL };

View file

@ -123,10 +123,20 @@ upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UE
/*
* Get the vectors array after calling upvec_compact().
* The caller must not modify nor release the returned array.
* Returns NULL if called before upvec_compact().
*/
U_CAPI const uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
/*
* Get a clone of the vectors array after calling upvec_compact().
* The caller owns the returned array and must uprv_free() it.
* Returns NULL if called before upvec_compact().
*/
U_CAPI uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
upvec_cloneArray(const UPropsVectors *pv,
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
/*
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted

File diff suppressed because it is too large Load diff

View file

@ -25,17 +25,17 @@
#include "unicode/uenum.h"
#include "unicode/ucnv.h"
/**
* \file
*
* This is the declarations for the encoding selector.
* The goal is, given a unicode string, find the encodings
* this string can be mapped to.
* A converter selector is built with a set of encoding/charset names
* and given an input string returns the set of names of the
* corresponding converters which can convert the string.
*
* A converter selector can be serialized into a buffer and reopened
* from the serialized form.
*/
/**
* @{
* The selector data structure
@ -45,38 +45,37 @@ typedef struct UConverterSelector UConverterSelector;
/** @} */
/**
* open a selector. If converterList is NULL, build for all converters. If excludedCodePoints
* is NULL, don't exclude any codepoints
*
* Open a selector.
* If converterListSize is 0, build for all available converters.
* If excludedCodePoints is NULL, don't exclude any code points.
*
* @param converterList a pointer to encoding names needed to be involved.
* NULL means build a selector for all possible converters
* @param converterListSize number of encodings in above list.
* Setting converterListSize to 0, builds a selector for all
* converters. ucnvsel_open() does not transfer ownership to this
* array. Once uncvsel_open() returns, the caller is free to reuse/destroy
* the array.
* @param excludedCodePoints a set of codepoints to be excluded from
* consideration. set to NULL to exclude nothing
* @param whichSet what converter set to use? use this to determine whether
* to construct selector for fallback or for roundtrip only mappings
* Can be NULL if converterListSize==0.
* The list and the names will be cloned, and the caller
* retains ownership of the original.
* @param converterListSize number of encodings in above list.
* If 0, builds a selector for all available converters.
* @param excludedCodePoints a set of code points to be excluded from consideration.
* That is, excluded code points in a string do not change
* the selection result. (They might be handled by a callback.)
* Use NULL to exclude nothing.
* @param whichSet what converter set to use? Use this to determine whether
* to consider only roundtrip mappings or also fallbacks.
* @param status an in/out ICU UErrorCode
* @return a pointer to the created selector
* @return the new selector
*
* @draft ICU 4.2
*/
U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList,
int32_t converterListSize,
const USet* excludedCodePoints,
const UConverterUnicodeSet whichSet,
UErrorCode* status);
U_CAPI UConverterSelector* U_EXPORT2
ucnvsel_open(const char* const* converterList, int32_t converterListSize,
const USet* excludedCodePoints,
const UConverterUnicodeSet whichSet, UErrorCode* status);
/* close opened selector */
/**
* closes a selector. and releases allocated memory
* if any Enumerations were returned by ucnv_select*, they become invalid.
* Closes a selector.
* If any Enumerations were returned by ucnv_select*, they become invalid.
* They can be closed before or after calling ucnv_closeSelector,
* but should never be used after selector is closed
* but should never be used after the selector is closed.
*
* @see ucnv_selectForString
* @see ucnv_selectForUTF8
@ -85,88 +84,79 @@ U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList,
*
* @draft ICU 4.2
*/
U_CAPI void ucnvsel_close(UConverterSelector *sel);
U_CAPI void U_EXPORT2
ucnvsel_close(UConverterSelector *sel);
/**
* unserialize a selector from a linear buffer. No alignment necessary.
* the function does NOT take ownership of the given buffer. Caller is free
* to reuse/destroy buffer immediately after calling this function
* Unserializing a selector is much faster than creating it from scratch
* and is nicer on the heap (not as many allocations and frees)
* ucnvsel_open() is expensive. Therefore, it is desirable to unserialize the data structre
* rather than building it from scratch.
* Open a selector from its serialized form.
* The buffer must remain valid and unchanged for the lifetime of the selector.
* This is much faster than creating a selector from scratch.
* Using a serialized form from a different machine (endianness/charset) is supported.
*
* @param buffer pointer to a linear buffer containing serialized data
* @param buffer pointer to the serialized form of a converter selector;
* must be 32-bit-aligned
* @param length the capacity of this buffer (can be equal to or larger than
the actual data length)
* the actual data length)
* @param status an in/out ICU UErrorCode
* @return a pointer to the created selector
* @return the new selector
*
* @draft ICU 4.2
*/
U_CAPI UConverterSelector* ucnvsel_unserialize(const char* buffer,
int32_t length,
UErrorCode* status);
U_CAPI UConverterSelector* U_EXPORT2
ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
/**
* serialize a selector into a linear buffer. No alignment necessary
* The current serialized form is portable to different Endianness, and can
* travel between ASCII and EBCDIC systems
* Serialize a selector into a linear buffer.
* The serialized form is portable to different machines.
*
* @param sel selector to consider
* @param buffer pointer to a linear buffer to receive data
* @param buffer pointer to 32-bit-aligned memory to be filled with the
* serialized form of this converter selector
* @param bufferCapacity the capacity of this buffer
* @param status an in/out ICU UErrorCode
* @return the required buffer capacity to hold serialize data (even if the call fails
with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
* with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
*
* @draft ICU 4.2
*/
U_CAPI int32_t ucnvsel_serialize(const UConverterSelector* sel,
char* buffer,
int32_t bufferCapacity,
UErrorCode* status);
U_CAPI int32_t U_EXPORT2
ucnvsel_serialize(const UConverterSelector* sel,
void* buffer, int32_t bufferCapacity, UErrorCode* status);
/**
* check a UTF16 string using the selector. Find out what encodings it can be mapped to
* Select converters that can map all characters in a UTF-16 string,
* ignoring the excluded code points.
*
*
* @param sel built selector
* @param s pointer to UTF16 string
* @param length length of UTF16 string in UChars, or -1 if NULL terminated
* @param sel a selector
* @param s UTF-16 string
* @param length length of the string, or -1 if NUL-terminated
* @param status an in/out ICU UErrorCode
* @return an enumeration containing encoding names. Returned encoding names
* will be the same as supplied to ucnv_openSelector, or will be the
* canonical names if selector was built for all encodings.
* The order of encodings will be the same as supplied by the call to
* ucnv_openSelector (if encodings were supplied)
* @return an enumeration containing encoding names.
* The returned encoding names and their order will be the same as
* supplied when building the selector.
*
* @draft ICU 4.2
*/
U_CAPI UEnumeration *ucnvsel_selectForString(const UConverterSelector* sel, const UChar *s,
int32_t length, UErrorCode *status);
U_CAPI UEnumeration * U_EXPORT2
ucnvsel_selectForString(const UConverterSelector* sel,
const UChar *s, int32_t length, UErrorCode *status);
/**
* check a UTF8 string using the selector. Find out what encodings it can be
* mapped to illegal codepoints will be ignored by this function! Only legal
* codepoints will be considered for conversion
* Select converters that can map all characters in a UTF-8 string,
* ignoring the excluded code points.
*
* @param sel built selector
* @param s pointer to UTF8 string
* @param length length of UTF8 string (in chars), or -1 if NULL terminated
* @param sel a selector
* @param s UTF-8 string
* @param length length of the string, or -1 if NUL-terminated
* @param status an in/out ICU UErrorCode
* @return an enumeration containing encoding names. Returned encoding names
* will be the same as supplied to ucnv_openSelector, or will be the canonical
* names if selector was built for all encodings.
* The order of encodings will be the same as supplied by the call to
* ucnv_openSelector (if encodings were supplied)
* @return an enumeration containing encoding names.
* The returned encoding names and their order will be the same as
* supplied when building the selector.
*
* @draft ICU 4.2
*/
U_CAPI UEnumeration *ucnvsel_selectForUTF8(const UConverterSelector* sel,
const char *s,
int32_t length,
UErrorCode *status);
U_CAPI UEnumeration * U_EXPORT2
ucnvsel_selectForUTF8(const UConverterSelector* sel,
const char *s, int32_t length, UErrorCode *status);
#endif /* __ICU_UCNV_SEL_H__ */

View file

@ -26,9 +26,6 @@
#define FILENAME_BUFFER 1024
#define TDSRCPATH ".."U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING
static FILE *fopenOrError(const char *filename) {
int32_t needLen;
@ -56,14 +53,11 @@ static FILE *fopenOrError(const char *filename) {
void addCnvSelTest(TestNode** root)
{
addTest(root, &TestConversionUTF16, "ucnv/ucnvseltst/TestConversionUTF16");
addTest(root, &TestConversionUTF8, "ucnv/ucnvseltst/TestConversionUTF8");
addTest(root, &TestSerializationAndUnserialization, "ucnv/ucnvseltst/TestSerializationAndUnserialization");
addTest(root, &TestConversionUTF16, "tsconv/ucnvseltst/TestConversionUTF16");
addTest(root, &TestConversionUTF8, "tsconv/ucnvseltst/TestConversionUTF8");
addTest(root, &TestSerializationAndUnserialization, "tsconv/ucnvseltst/TestSerializationAndUnserialization");
}
/*
* there doesn't seem to be a fn in ucnv to get the index of a converter
* given one of its aliases!
@ -792,9 +786,9 @@ static void TestSerializationAndUnserialization()
uprv_free(buffer);
return;
}
sel = ucnvsel_unserialize( buffer, ser_len,&status);
sel = ucnvsel_openFromSerialized( buffer, ser_len,&status);
if (U_FAILURE(status)) {
log_err("ucnvsel_unserialize(test case %d) failed: %s\n", curCase, u_errorName(status));
log_err("ucnvsel_openFromSerialized(test case %d) failed: %s\n", curCase, u_errorName(status));
uprv_free(encodings);
uprv_free(names);
uprv_free(buffer);
@ -917,7 +911,7 @@ static void TestSerializationAndUnserialization()
ucnvsel_serialize(sel, buffer, ser_len, &status);
ucnvsel_close(sel);
sel = ucnvsel_unserialize( buffer, ser_len,&status);
sel = ucnvsel_openFromSerialized( buffer, ser_len,&status);
/* count how many bytes (Is there a portable function that is more efficient than this?) */
f1 = fopenOrError("ConverterSelectorTestUTF16.txt");