mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-5138 Separate the casing data from normalization data and data loading.
X-SVN-Rev: 19499
This commit is contained in:
parent
f47dea2b53
commit
7d382500f6
8 changed files with 271 additions and 247 deletions
|
@ -74,7 +74,7 @@ utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_w
|
|||
normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
|
||||
uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
uscript.o usc_impl.o unames.o \
|
||||
utrie.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
utrie.o utrie_swap.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
|
||||
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
|
||||
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
|
||||
|
|
|
@ -460,6 +460,9 @@
|
|||
<File
|
||||
RelativePath=".\utrie.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\utrie_swap.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\uvector.cpp">
|
||||
</File>
|
||||
|
|
|
@ -196,7 +196,9 @@ ucase_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
|
|||
U_CAPI void U_EXPORT2
|
||||
ucase_close(UCaseProps *csp) {
|
||||
if(csp!=NULL) {
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
udata_close(csp->mem);
|
||||
#endif
|
||||
uprv_free(csp);
|
||||
}
|
||||
}
|
||||
|
@ -1482,3 +1484,116 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
|
|||
|
||||
return (result==c) ? ~result : result;
|
||||
}
|
||||
|
||||
/* case mapping properties API ---------------------------------------------- */
|
||||
|
||||
/* get the UCaseProps singleton, or else its dummy, once and for all */
|
||||
static const UCaseProps *
|
||||
getCaseProps() {
|
||||
/*
|
||||
* This lazy intialization with double-checked locking (without mutex protection for
|
||||
* the initial check) is transiently unsafe under certain circumstances.
|
||||
* Check the readme and use u_init() if necessary.
|
||||
*/
|
||||
|
||||
/* the initial check is performed by the GET_CASE_PROPS() macro */
|
||||
const UCaseProps *csp;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
csp=ucase_getDummy(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return csp;
|
||||
}
|
||||
|
||||
/*
|
||||
* In ICU 3.0, most Unicode properties were loaded from uprops.icu.
|
||||
* ICU 3.2 adds ucase.icu for case mapping properties.
|
||||
* ICU 3.4 adds ubidi.icu for bidi/shaping properties and
|
||||
* removes case/bidi/shaping properties from uprops.icu.
|
||||
*
|
||||
* Loading of uprops.icu was never mutex-protected and required u_init()
|
||||
* for thread safety.
|
||||
* In order to maintain performance for all such properties,
|
||||
* ucase.icu and ubidi.icu are loaded lazily, without mutexing.
|
||||
* u_init() will try to load them for thread safety,
|
||||
* but u_init() will not fail if they are missing.
|
||||
*
|
||||
* uchar.c maintains a tri-state flag for (not loaded/loaded/failed to load)
|
||||
* and an error code for load failure.
|
||||
* Instead, here we try to load at most once.
|
||||
* If it works, we use the resulting singleton object.
|
||||
* If it fails, then we get a dummy object, which always works unless
|
||||
* we are seriously out of memory.
|
||||
* After the first try, we have a never-changing pointer to either the
|
||||
* real singleton or the dummy.
|
||||
*
|
||||
* This method is used in Unicode properties APIs (uchar.h) that
|
||||
* do not have a service object and also do not have an error code parameter.
|
||||
* Other API implementations get the singleton themselves
|
||||
* (with mutexing), store it in the service object, and report errors.
|
||||
*/
|
||||
#define GET_CASE_PROPS() (gCsp!=NULL ? gCsp : getCaseProps())
|
||||
|
||||
/* public API (see uchar.h) */
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isULowercase(UChar32 c) {
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c));
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isUUppercase(UChar32 c) {
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c));
|
||||
}
|
||||
|
||||
/* Transforms the Unicode character to its lower case equivalent.*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_tolower(UChar32 c) {
|
||||
return ucase_tolower(GET_CASE_PROPS(), c);
|
||||
}
|
||||
|
||||
/* Transforms the Unicode character to its upper case equivalent.*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_toupper(UChar32 c) {
|
||||
return ucase_toupper(GET_CASE_PROPS(), c);
|
||||
}
|
||||
|
||||
/* Transforms the Unicode character to its title case equivalent.*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_totitle(UChar32 c) {
|
||||
return ucase_totitle(GET_CASE_PROPS(), c);
|
||||
}
|
||||
|
||||
/* return the simple case folding mapping for c */
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_foldCase(UChar32 c, uint32_t options) {
|
||||
return ucase_fold(GET_CASE_PROPS(), c, options);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t U_EXPORT2
|
||||
ucase_hasBinaryProperty(UChar32 c, UProperty which) {
|
||||
/* case mapping properties */
|
||||
const UCaseProps *csp=GET_CASE_PROPS();
|
||||
if(csp==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
switch(which) {
|
||||
case UCHAR_LOWERCASE:
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
|
||||
case UCHAR_UPPERCASE:
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
|
||||
case UCHAR_SOFT_DOTTED:
|
||||
return ucase_isSoftDotted(csp, c);
|
||||
case UCHAR_CASE_SENSITIVE:
|
||||
return ucase_isCaseSensitive(csp, c);
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2005, International Business Machines
|
||||
* Copyright (C) 2004-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -238,6 +238,9 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
|
|||
const UChar **pString,
|
||||
uint32_t options);
|
||||
|
||||
U_CFUNC int32_t U_EXPORT2
|
||||
ucase_hasBinaryProperty(UChar32 c, UProperty which);
|
||||
|
||||
/* file definitions --------------------------------------------------------- */
|
||||
|
||||
#define UCASE_DATA_NAME "ucase"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2005, International Business Machines
|
||||
* Copyright (C) 2002-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -36,115 +36,13 @@
|
|||
|
||||
/* cleanup ------------------------------------------------------------------ */
|
||||
|
||||
static const UCaseProps *gCsp=NULL;
|
||||
static const UBiDiProps *gBdp=NULL;
|
||||
|
||||
static UBool U_CALLCONV uprops_cleanup(void) {
|
||||
gCsp=NULL;
|
||||
gBdp=NULL;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* case mapping properties API ---------------------------------------------- */
|
||||
|
||||
/* get the UCaseProps singleton, or else its dummy, once and for all */
|
||||
static const UCaseProps *
|
||||
getCaseProps() {
|
||||
/*
|
||||
* This lazy intialization with double-checked locking (without mutex protection for
|
||||
* the initial check) is transiently unsafe under certain circumstances.
|
||||
* Check the readme and use u_init() if necessary.
|
||||
*/
|
||||
|
||||
/* the initial check is performed by the GET_CASE_PROPS() macro */
|
||||
const UCaseProps *csp;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
csp=ucase_getDummy(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
umtx_lock(NULL);
|
||||
if(gCsp==NULL) {
|
||||
gCsp=csp;
|
||||
csp=NULL;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
return gCsp;
|
||||
}
|
||||
|
||||
/*
|
||||
* In ICU 3.0, most Unicode properties were loaded from uprops.icu.
|
||||
* ICU 3.2 adds ucase.icu for case mapping properties.
|
||||
* ICU 3.4 adds ubidi.icu for bidi/shaping properties and
|
||||
* removes case/bidi/shaping properties from uprops.icu.
|
||||
*
|
||||
* Loading of uprops.icu was never mutex-protected and required u_init()
|
||||
* for thread safety.
|
||||
* In order to maintain performance for all such properties,
|
||||
* ucase.icu and ubidi.icu are loaded lazily, without mutexing.
|
||||
* u_init() will try to load them for thread safety,
|
||||
* but u_init() will not fail if they are missing.
|
||||
*
|
||||
* uchar.c maintains a tri-state flag for (not loaded/loaded/failed to load)
|
||||
* and an error code for load failure.
|
||||
* Instead, here we try to load at most once.
|
||||
* If it works, we use the resulting singleton object.
|
||||
* If it fails, then we get a dummy object, which always works unless
|
||||
* we are seriously out of memory.
|
||||
* After the first try, we have a never-changing pointer to either the
|
||||
* real singleton or the dummy.
|
||||
*
|
||||
* This method is used in Unicode properties APIs (uchar.h) that
|
||||
* do not have a service object and also do not have an error code parameter.
|
||||
* Other API implementations get the singleton themselves
|
||||
* (with mutexing), store it in the service object, and report errors.
|
||||
*/
|
||||
#define GET_CASE_PROPS() (gCsp!=NULL ? gCsp : getCaseProps())
|
||||
|
||||
/* public API (see uchar.h) */
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isULowercase(UChar32 c) {
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c));
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isUUppercase(UChar32 c) {
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c));
|
||||
}
|
||||
|
||||
/* Transforms the Unicode character to its lower case equivalent.*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_tolower(UChar32 c) {
|
||||
return ucase_tolower(GET_CASE_PROPS(), c);
|
||||
}
|
||||
|
||||
/* Transforms the Unicode character to its upper case equivalent.*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_toupper(UChar32 c) {
|
||||
return ucase_toupper(GET_CASE_PROPS(), c);
|
||||
}
|
||||
|
||||
/* Transforms the Unicode character to its title case equivalent.*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_totitle(UChar32 c) {
|
||||
return ucase_totitle(GET_CASE_PROPS(), c);
|
||||
}
|
||||
|
||||
/* return the simple case folding mapping for c */
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_foldCase(UChar32 c, uint32_t options) {
|
||||
return ucase_fold(GET_CASE_PROPS(), c, options);
|
||||
}
|
||||
|
||||
/* bidi/shaping properties API ---------------------------------------------- */
|
||||
|
||||
/* get the UBiDiProps singleton, or else its dummy, once and for all */
|
||||
|
@ -261,23 +159,7 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
|||
return (u_getUnicodeProperties(c, column)&mask)!=0;
|
||||
} else {
|
||||
if(column==UPROPS_SRC_CASE) {
|
||||
/* case mapping properties */
|
||||
const UCaseProps *csp=GET_CASE_PROPS();
|
||||
if(csp==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
switch(which) {
|
||||
case UCHAR_LOWERCASE:
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
|
||||
case UCHAR_UPPERCASE:
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
|
||||
case UCHAR_SOFT_DOTTED:
|
||||
return ucase_isSoftDotted(csp, c);
|
||||
case UCHAR_CASE_SENSITIVE:
|
||||
return ucase_isCaseSensitive(csp, c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ucase_hasBinaryProperty(c, which);
|
||||
} else if(column==UPROPS_SRC_NORM) {
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
/* normalization properties from unorm.icu */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2005, International Business Machines
|
||||
* Copyright (C) 2001-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -23,7 +23,6 @@
|
|||
#endif
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "udataswp.h"
|
||||
#include "cmemory.h"
|
||||
#include "utrie.h"
|
||||
|
||||
|
@ -686,55 +685,6 @@ utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) {
|
|||
|
||||
/* serialization ------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* Trie data structure in serialized form:
|
||||
*
|
||||
* UTrieHeader header;
|
||||
* uint16_t index[header.indexLength];
|
||||
* uint16_t data[header.dataLength];
|
||||
*/
|
||||
struct UTrieHeader {
|
||||
/** "Trie" in big-endian US-ASCII (0x54726965) */
|
||||
uint32_t signature;
|
||||
|
||||
/**
|
||||
* options bit field:
|
||||
* 9 1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH
|
||||
* 8 0=16-bit data, 1=32-bit data
|
||||
* 7..4 UTRIE_INDEX_SHIFT // 0..UTRIE_SHIFT
|
||||
* 3..0 UTRIE_SHIFT // 1..9
|
||||
*/
|
||||
uint32_t options;
|
||||
|
||||
/** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */
|
||||
int32_t indexLength;
|
||||
|
||||
/** dataLength>=UTRIE_DATA_BLOCK_LENGTH */
|
||||
int32_t dataLength;
|
||||
};
|
||||
|
||||
typedef struct UTrieHeader UTrieHeader;
|
||||
|
||||
/**
|
||||
* Constants for use with UTrieHeader.options.
|
||||
*/
|
||||
enum {
|
||||
/** Mask to get the UTRIE_SHIFT value from options. */
|
||||
UTRIE_OPTIONS_SHIFT_MASK=0xf,
|
||||
|
||||
/** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */
|
||||
UTRIE_OPTIONS_INDEX_SHIFT=4,
|
||||
|
||||
/** If set, then the data (stage 2) array is 32 bits wide. */
|
||||
UTRIE_OPTIONS_DATA_IS_32_BIT=0x100,
|
||||
|
||||
/**
|
||||
* If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array
|
||||
* as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH.
|
||||
*/
|
||||
UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200
|
||||
};
|
||||
|
||||
/*
|
||||
* Default function for the folding value:
|
||||
* Just store the offset (16 bits) if there is any non-initial-value entry.
|
||||
|
@ -1077,79 +1027,6 @@ utrie_unserializeDummy(UTrie *trie,
|
|||
return actualLength;
|
||||
}
|
||||
|
||||
/* swapping ----------------------------------------------------------------- */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UTrieHeader *inTrie;
|
||||
UTrieHeader trie;
|
||||
int32_t size;
|
||||
UBool dataIs32;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* setup and swapping */
|
||||
if(length>=0 && length<sizeof(UTrieHeader)) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inTrie=(const UTrieHeader *)inData;
|
||||
trie.signature=ds->readUInt32(inTrie->signature);
|
||||
trie.options=ds->readUInt32(inTrie->options);
|
||||
trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
|
||||
trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
|
||||
|
||||
if( trie.signature!=0x54726965 ||
|
||||
(trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
|
||||
((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
|
||||
trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
|
||||
(trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
|
||||
trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
|
||||
(trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
|
||||
((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
|
||||
) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
|
||||
return 0;
|
||||
}
|
||||
|
||||
dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
|
||||
size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
|
||||
|
||||
if(length>=0) {
|
||||
UTrieHeader *outTrie;
|
||||
|
||||
if(length<size) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
outTrie=(UTrieHeader *)outData;
|
||||
|
||||
/* swap the header */
|
||||
ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
|
||||
|
||||
/* swap the index and the data */
|
||||
if(dataIs32) {
|
||||
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
||||
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
|
||||
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
|
||||
} else {
|
||||
ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/* enumeration -------------------------------------------------------------- */
|
||||
|
||||
/* default UTrieEnumValue() returns the input value itself */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2005, International Business Machines
|
||||
* Copyright (C) 2001-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -736,6 +736,57 @@ utrie_swap(const UDataSwapper *ds,
|
|||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/* serialization ------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* Trie data structure in serialized form:
|
||||
*
|
||||
* UTrieHeader header;
|
||||
* uint16_t index[header.indexLength];
|
||||
* uint16_t data[header.dataLength];
|
||||
* @internal
|
||||
*/
|
||||
typedef struct UTrieHeader {
|
||||
/** "Trie" in big-endian US-ASCII (0x54726965) */
|
||||
uint32_t signature;
|
||||
|
||||
/**
|
||||
* options bit field:
|
||||
* 9 1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH
|
||||
* 8 0=16-bit data, 1=32-bit data
|
||||
* 7..4 UTRIE_INDEX_SHIFT // 0..UTRIE_SHIFT
|
||||
* 3..0 UTRIE_SHIFT // 1..9
|
||||
*/
|
||||
uint32_t options;
|
||||
|
||||
/** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */
|
||||
int32_t indexLength;
|
||||
|
||||
/** dataLength>=UTRIE_DATA_BLOCK_LENGTH */
|
||||
int32_t dataLength;
|
||||
} UTrieHeader;
|
||||
|
||||
/**
|
||||
* Constants for use with UTrieHeader.options.
|
||||
* @internal
|
||||
*/
|
||||
enum {
|
||||
/** Mask to get the UTRIE_SHIFT value from options. */
|
||||
UTRIE_OPTIONS_SHIFT_MASK=0xf,
|
||||
|
||||
/** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */
|
||||
UTRIE_OPTIONS_INDEX_SHIFT=4,
|
||||
|
||||
/** If set, then the data (stage 2) array is 32 bits wide. */
|
||||
UTRIE_OPTIONS_DATA_IS_32_BIT=0x100,
|
||||
|
||||
/**
|
||||
* If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array
|
||||
* as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH.
|
||||
*/
|
||||
UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200
|
||||
};
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
|
|
93
icu4c/source/common/utrie_swap.c
Normal file
93
icu4c/source/common/utrie_swap.c
Normal file
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: utrie_swap.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This performs data swapping for a folded trie (see utrie.c for details).
|
||||
*/
|
||||
|
||||
#include "udataswp.h"
|
||||
#include "utrie.h"
|
||||
|
||||
/* swapping ----------------------------------------------------------------- */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UTrieHeader *inTrie;
|
||||
UTrieHeader trie;
|
||||
int32_t size;
|
||||
UBool dataIs32;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* setup and swapping */
|
||||
if(length>=0 && length<sizeof(UTrieHeader)) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inTrie=(const UTrieHeader *)inData;
|
||||
trie.signature=ds->readUInt32(inTrie->signature);
|
||||
trie.options=ds->readUInt32(inTrie->options);
|
||||
trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
|
||||
trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
|
||||
|
||||
if( trie.signature!=0x54726965 ||
|
||||
(trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
|
||||
((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
|
||||
trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
|
||||
(trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
|
||||
trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
|
||||
(trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
|
||||
((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
|
||||
) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
|
||||
return 0;
|
||||
}
|
||||
|
||||
dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
|
||||
size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
|
||||
|
||||
if(length>=0) {
|
||||
UTrieHeader *outTrie;
|
||||
|
||||
if(length<size) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
outTrie=(UTrieHeader *)outData;
|
||||
|
||||
/* swap the header */
|
||||
ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
|
||||
|
||||
/* swap the index and the data */
|
||||
if(dataIs32) {
|
||||
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
||||
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
|
||||
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
|
||||
} else {
|
||||
ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue