mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 00:43:32 +00:00
ICU-2235 add swapping of pnames.icu
X-SVN-Rev: 13175
This commit is contained in:
parent
c192f04f76
commit
e9d1c49ba6
3 changed files with 439 additions and 2 deletions
icu4c/source
|
@ -12,6 +12,9 @@
|
|||
#include "unicode/uchar.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uarrsort.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -196,4 +199,379 @@ u_getPropertyValueEnum(UProperty property,
|
|||
: UCHAR_INVALID_CODE;
|
||||
}
|
||||
|
||||
/* data swapping ------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* Sub-structure-swappers use the temp array (which is as large as the
|
||||
* actual data) for intermediate storage,
|
||||
* as well as to indicate if a particular structure has been swapped already.
|
||||
* The temp array is initially reset to all 0.
|
||||
* pos is the byte offset of the sub-structure in the inBytes/outBytes/temp arrays.
|
||||
*/
|
||||
|
||||
int32_t
|
||||
EnumToOffset::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode) {
|
||||
const EnumToOffset *inMap;
|
||||
EnumToOffset *outMap, *tempMap;
|
||||
int32_t size;
|
||||
|
||||
tempMap=(EnumToOffset *)(temp+pos);
|
||||
if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) {
|
||||
/* this map was swapped already */
|
||||
size=tempMap->getSize();
|
||||
return size;
|
||||
}
|
||||
|
||||
inMap=(const EnumToOffset *)(inBytes+pos);
|
||||
outMap=(EnumToOffset *)(outBytes+pos);
|
||||
|
||||
tempMap->enumStart=udata_readInt32(ds, inMap->enumStart);
|
||||
tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit);
|
||||
size=tempMap->getSize();
|
||||
|
||||
if(length>=0) {
|
||||
if(length<(pos+size)) {
|
||||
if(length<sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(EnumToOffset): too few bytes (%d after header)\n"
|
||||
" for pnames.icu EnumToOffset{%d..%d} at %d\n",
|
||||
length, tempMap->enumStart, tempMap->enumLimit, pos);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* swap enumStart and enumLimit */
|
||||
ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode);
|
||||
|
||||
/* swap _offsetArray[] */
|
||||
ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset),
|
||||
outMap->getOffsetArray(), pErrorCode);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int32_t
|
||||
NonContiguousEnumToOffset::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode) {
|
||||
const NonContiguousEnumToOffset *inMap;
|
||||
NonContiguousEnumToOffset *outMap, *tempMap;
|
||||
int32_t size;
|
||||
|
||||
tempMap=(NonContiguousEnumToOffset *)(temp+pos);
|
||||
if(tempMap->count!=0) {
|
||||
/* this map was swapped already */
|
||||
size=tempMap->getSize();
|
||||
return size;
|
||||
}
|
||||
|
||||
inMap=(const NonContiguousEnumToOffset *)(inBytes+pos);
|
||||
outMap=(NonContiguousEnumToOffset *)(outBytes+pos);
|
||||
|
||||
tempMap->count=udata_readInt32(ds, inMap->count);
|
||||
size=tempMap->getSize();
|
||||
|
||||
if(length>=0) {
|
||||
if(length<(pos+size)) {
|
||||
if(length<sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(NonContiguousEnumToOffset): too few bytes (%d after header)\n"
|
||||
" for pnames.icu NonContiguousEnumToOffset[%d] at %d\n",
|
||||
length, tempMap->count, pos);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* swap count and _enumArray[] */
|
||||
length=(1+tempMap->count)*sizeof(EnumValue);
|
||||
ds->swapArray32(ds, inMap, length,
|
||||
outMap, pErrorCode);
|
||||
|
||||
/* swap _offsetArray[] */
|
||||
pos+=length;
|
||||
ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset),
|
||||
outBytes+pos, pErrorCode);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
struct NameAndIndex {
|
||||
Offset name, index;
|
||||
};
|
||||
|
||||
static int32_t
|
||||
upname_compareRows(const void *context, const void *left, const void *right) {
|
||||
const char *chars=(const char *)context;
|
||||
return (int32_t)uprv_strcmp(chars+((const NameAndIndex *)left)->name,
|
||||
chars+((const NameAndIndex *)right)->name);
|
||||
}
|
||||
|
||||
int32_t
|
||||
NameToEnum::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode) {
|
||||
const NameToEnum *inMap;
|
||||
NameToEnum *outMap, *tempMap;
|
||||
|
||||
const EnumValue *inEnumArray;
|
||||
EnumValue *outEnumArray;
|
||||
|
||||
const Offset *inNameArray;
|
||||
Offset *outNameArray;
|
||||
|
||||
NameAndIndex *sortArray;
|
||||
|
||||
int32_t i, size, oldIndex;
|
||||
|
||||
tempMap=(NameToEnum *)(temp+pos);
|
||||
if(tempMap->count!=0) {
|
||||
/* this map was swapped already */
|
||||
size=tempMap->getSize();
|
||||
return size;
|
||||
}
|
||||
|
||||
inMap=(const NameToEnum *)(inBytes+pos);
|
||||
outMap=(NameToEnum *)(outBytes+pos);
|
||||
|
||||
tempMap->count=udata_readInt32(ds, inMap->count);
|
||||
size=tempMap->getSize();
|
||||
|
||||
if(length>=0) {
|
||||
if(length<(pos+size)) {
|
||||
if(length<sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n"
|
||||
" for pnames.icu NameToEnum[%d] at %d\n",
|
||||
length, tempMap->count, pos);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* swap count */
|
||||
ds->swapArray32(ds, inMap, 4, outMap, pErrorCode);
|
||||
|
||||
inEnumArray=inMap->getEnumArray();
|
||||
outEnumArray=outMap->getEnumArray();
|
||||
|
||||
inNameArray=(const Offset *)(inEnumArray+tempMap->count);
|
||||
outNameArray=(Offset *)(outEnumArray+tempMap->count);
|
||||
|
||||
/*
|
||||
* ### TODO optimize
|
||||
* After some testing, add a test
|
||||
* if(inCharset==outCharset) { only swap enums and names, do not sort; }
|
||||
* else { sort/copy/swap/permutate as below; }
|
||||
*/
|
||||
|
||||
/*
|
||||
* The name and enum arrays are sorted by names and must be resorted
|
||||
* if inCharset!=outCharset.
|
||||
* We use the corresponding part of the temp array to sort an array
|
||||
* of pairs of name offsets and sorting indexes.
|
||||
* Then the sorting indexes are used to permutate-swap the name and enum arrays.
|
||||
*
|
||||
* The outBytes must already contain the swapped strings.
|
||||
*/
|
||||
sortArray=(NameAndIndex *)tempMap->getEnumArray();
|
||||
for(i=0; i<tempMap->count; ++i) {
|
||||
sortArray[i].name=udata_readInt16(ds, inNameArray[i]);
|
||||
sortArray[i].index=(Offset)i;
|
||||
}
|
||||
|
||||
uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
|
||||
upname_compareRows, outBytes,
|
||||
FALSE, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed - %s\n",
|
||||
tempMap->count, u_errorName(*pErrorCode));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* copy/swap/permutate _enumArray[] and _nameArray[] */
|
||||
for(i=0; i<tempMap->count; ++i) {
|
||||
oldIndex=sortArray[i].index;
|
||||
ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode);
|
||||
ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int32_t
|
||||
PropertyAliases::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
UErrorCode *pErrorCode) {
|
||||
const PropertyAliases *inAliases;
|
||||
PropertyAliases *outAliases;
|
||||
PropertyAliases aliases;
|
||||
|
||||
const ValueMap *inValueMaps;
|
||||
ValueMap *outValueMaps;
|
||||
ValueMap valueMap;
|
||||
|
||||
uint8_t *temp;
|
||||
|
||||
int32_t i;
|
||||
|
||||
inAliases=(const PropertyAliases *)inBytes;
|
||||
outAliases=(PropertyAliases *)outBytes;
|
||||
|
||||
/* read the input PropertyAliases - all 16-bit values */
|
||||
for(i=0; i<sizeof(PropertyAliases)/2; ++i) {
|
||||
((uint16_t *)&aliases)[i]=ds->readUInt16(((const uint16_t *)inBytes)[i]);
|
||||
}
|
||||
|
||||
if(length>=0) {
|
||||
if(length<aliases.total_size) {
|
||||
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for all of pnames.icu\n",
|
||||
length);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* copy the data for inaccessible bytes */
|
||||
if(inBytes!=outBytes) {
|
||||
uprv_memcpy(outBytes, inBytes, aliases.total_size);
|
||||
}
|
||||
|
||||
/* swap the PropertyAliases class fields */
|
||||
ds->swapArray16(ds, inAliases, sizeof(PropertyAliases), outAliases, pErrorCode);
|
||||
|
||||
/* swap the name groups */
|
||||
ds->swapArray16(ds, inBytes+aliases.nameGroupPool_offset,
|
||||
aliases.stringPool_offset-aliases.nameGroupPool_offset,
|
||||
outBytes+aliases.nameGroupPool_offset, pErrorCode);
|
||||
|
||||
/* swap the strings */
|
||||
udata_swapInvStringBlock(ds, inBytes+aliases.stringPool_offset,
|
||||
aliases.total_size-aliases.stringPool_offset,
|
||||
outBytes+aliases.stringPool_offset, pErrorCode);
|
||||
|
||||
/*
|
||||
* alloc uint8_t temp[total_size] and reset it
|
||||
* swap each top-level struct, put at least the count fields into temp
|
||||
* use subclass-specific swap() functions
|
||||
* enumerate value maps, for each
|
||||
* if temp does not have count!=0 yet
|
||||
* read count, put it into temp
|
||||
* swap the array(s)
|
||||
* resort strings in name->enum maps
|
||||
* swap value maps
|
||||
*/
|
||||
temp=(uint8_t *)uprv_malloc(aliases.total_size);
|
||||
if(temp==NULL) {
|
||||
udata_printError(ds, "upname_swap(): unable to allocate temp memory (%d bytes)\n",
|
||||
aliases.total_size);
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
uprv_memset(temp, 0, aliases.total_size);
|
||||
|
||||
/* swap properties->name groups map */
|
||||
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, aliases.enumToName_offset, pErrorCode);
|
||||
|
||||
/* swap name->properties map */
|
||||
NameToEnum::swap(ds, inBytes, length, outBytes,
|
||||
temp, aliases.nameToEnum_offset, pErrorCode);
|
||||
|
||||
/* swap properties->value maps map */
|
||||
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, aliases.enumToValue_offset, pErrorCode);
|
||||
|
||||
/* enumerate all ValueMaps and swap them */
|
||||
inValueMaps=(const ValueMap *)(inBytes+aliases.valueMap_offset);
|
||||
outValueMaps=(ValueMap *)(outBytes+aliases.valueMap_offset);
|
||||
|
||||
for(i=0; i<aliases.valueMap_count; ++i) {
|
||||
valueMap.enumToName_offset=udata_readInt16(ds, inValueMaps[i].enumToName_offset);
|
||||
valueMap.ncEnumToName_offset=udata_readInt16(ds, inValueMaps[i].ncEnumToName_offset);
|
||||
valueMap.nameToEnum_offset=udata_readInt16(ds, inValueMaps[i].nameToEnum_offset);
|
||||
|
||||
if(valueMap.enumToName_offset!=0) {
|
||||
EnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, valueMap.enumToName_offset,
|
||||
pErrorCode);
|
||||
} else if(valueMap.ncEnumToName_offset!=0) {
|
||||
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, valueMap.ncEnumToName_offset,
|
||||
pErrorCode);
|
||||
}
|
||||
if(valueMap.nameToEnum_offset!=0) {
|
||||
NameToEnum::swap(ds, inBytes, length, outBytes,
|
||||
temp, valueMap.nameToEnum_offset,
|
||||
pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
/* swap the ValueMaps array itself */
|
||||
ds->swapArray16(ds, inValueMaps, aliases.valueMap_count*sizeof(ValueMap),
|
||||
outValueMaps, pErrorCode);
|
||||
|
||||
/* name groups and strings were swapped above */
|
||||
|
||||
/* release temp */
|
||||
uprv_free(temp);
|
||||
}
|
||||
|
||||
return aliases.total_size;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
upname_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UDataInfo *pInfo;
|
||||
int32_t headerSize;
|
||||
|
||||
const uint8_t *inBytes;
|
||||
uint8_t *outBytes;
|
||||
|
||||
/* udata_swapDataHeader checks the arguments */
|
||||
headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check data format and format version */
|
||||
pInfo=(const UDataInfo *)((const char *)inData+4);
|
||||
if(!(
|
||||
pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */
|
||||
pInfo->dataFormat[1]==0x6e &&
|
||||
pInfo->dataFormat[2]==0x61 &&
|
||||
pInfo->dataFormat[3]==0x6d &&
|
||||
pInfo->formatVersion[0]==1
|
||||
)) {
|
||||
udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
||||
pInfo->formatVersion[0]);
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inBytes=(const uint8_t *)inData+headerSize;
|
||||
outBytes=(uint8_t *)outData+headerSize;
|
||||
|
||||
if(length>=0) {
|
||||
length-=headerSize;
|
||||
if(length<sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
|
||||
length);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return headerSize+PropertyAliases::swap(ds, inBytes, length, outBytes, pErrorCode);
|
||||
}
|
||||
|
||||
//eof
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002, International Business Machines
|
||||
* Copyright (c) 2002-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -157,6 +157,11 @@ class PropertyAliases {
|
|||
|
||||
inline EnumValue getPropertyValueEnum(EnumValue prop,
|
||||
const char* alias) const;
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -189,6 +194,10 @@ class EnumToOffset {
|
|||
return sizeof(EnumToOffset) + sizeof(Offset) * (n - 1);
|
||||
}
|
||||
|
||||
int32_t getSize() {
|
||||
return getSize(enumLimit - enumStart);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
Offset getOffset(EnumValue enumProbe) const {
|
||||
|
@ -199,6 +208,12 @@ class EnumToOffset {
|
|||
const Offset* p = getOffsetArray();
|
||||
return p[enumProbe - enumStart];
|
||||
}
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -239,6 +254,10 @@ class NonContiguousEnumToOffset {
|
|||
return sizeof(int32_t) + (sizeof(EnumValue) + sizeof(Offset)) * n;
|
||||
}
|
||||
|
||||
int32_t getSize() {
|
||||
return getSize(count);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
Offset getOffset(EnumValue enumProbe) const {
|
||||
|
@ -253,6 +272,12 @@ class NonContiguousEnumToOffset {
|
|||
}
|
||||
return 0; // not found
|
||||
}
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -291,6 +316,10 @@ class NameToEnum {
|
|||
return sizeof(int32_t) + (sizeof(Offset) + sizeof(EnumValue)) * n;
|
||||
}
|
||||
|
||||
int32_t getSize() {
|
||||
return getSize(count);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
EnumValue getEnum(const char* alias, const PropertyAliases& data) const {
|
||||
|
@ -310,8 +339,23 @@ class NameToEnum {
|
|||
|
||||
return UCHAR_INVALID_CODE;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
/**
|
||||
* Swap pnames.icu. See udataswp.h.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
upname_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
*
|
||||
* In-memory layout. THIS IS NOT A STANDALONE DOCUMENT. It goes
|
||||
|
@ -348,7 +392,20 @@ class NameToEnum {
|
|||
* nameToEnum_offset (>2)
|
||||
* enumToValue_offset (>3)
|
||||
* (alignment padding build in to header)
|
||||
*
|
||||
*
|
||||
* The header also contains the following, used by "external readers"
|
||||
* like ICU4J and icuswap.
|
||||
*
|
||||
* // The following are needed by external readers of this data.
|
||||
* // We don't use them ourselves.
|
||||
* int16_t total_size; // size in bytes excluding the udata header
|
||||
* Offset valueMap_offset; // offset to start of array
|
||||
* int16_t valueMap_count; // number of entries
|
||||
* Offset nameGroupPool_offset; // offset to start of array
|
||||
* int16_t nameGroupPool_count; // number of entries (not groups)
|
||||
* Offset stringPool_offset; // offset to start of pool
|
||||
* int16_t stringPool_count; // number of strings (not size in bytes)
|
||||
*
|
||||
* 0: # NonContiguousEnumToOffset obj for props => name groups
|
||||
* count
|
||||
* enumArray [x count]
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "ucnv_bld.h"
|
||||
#include "unormimp.h"
|
||||
#include "sprpimpl.h"
|
||||
#include "propname.h"
|
||||
|
||||
/* swapping implementations in i18n */
|
||||
|
||||
|
@ -297,6 +298,7 @@ static const struct {
|
|||
{ { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */
|
||||
{ { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
|
||||
#endif
|
||||
{ { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */
|
||||
{ { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames } /* dataFormat="unam" */
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue