diff --git a/icu4c/source/common/common.dsp b/icu4c/source/common/common.dsp index 360782d6b06..9cbff7a38a3 100644 --- a/icu4c/source/common/common.dsp +++ b/icu4c/source/common/common.dsp @@ -365,17 +365,7 @@ SOURCE=.\ucnvsbcs.c # Begin Source File SOURCE=.\udata.c - -!IF "$(CFG)" == "common - Win32 Release" - # ADD CPP /Ze - -!ELSEIF "$(CFG)" == "common - Win32 Debug" - -# ADD CPP /Ze - -!ENDIF - # End Source File # Begin Source File @@ -400,6 +390,10 @@ SOURCE=.\uloc.c # End Source File # Begin Source File +SOURCE=.\umemstrm.c +# End Source File +# Begin Source File + SOURCE=.\umutex.c # ADD CPP /Ze # End Source File @@ -1074,6 +1068,10 @@ InputPath=.\unicode\umachine.h # End Source File # Begin Source File +SOURCE=.\umemstrm.h +# End Source File +# Begin Source File + SOURCE=.\unicode\umisc.h !IF "$(CFG)" == "common - Win32 Release" diff --git a/icu4c/source/common/umemstrm.c b/icu4c/source/common/umemstrm.c new file mode 100644 index 00000000000..bf68acba3ab --- /dev/null +++ b/icu4c/source/common/umemstrm.c @@ -0,0 +1,125 @@ +/* +******************************************************************************* +* +* Copyright (C) 1997-1999, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* +* File UMEMSTRM.C +* +* @author Vladimir Weinstein +* +* Modification History: +* +* Date Name Description +* 5/17/00 weiv Created +******************************************************************************* +*/ + +#include "umemstrm.h" +#include "cmemory.h" + +U_CAPI UMemoryStream * U_EXPORT2 uprv_mstrm_openNew(int32_t size) { + UMemoryStream *MS = (UMemoryStream *)uprv_malloc(sizeof(UMemoryStream)); + if(MS == NULL) { + return NULL; + } + + MS->fReadOnly = FALSE; + if (size == 0) { + MS->fSize = 0xFFFF; + } else { + MS->fSize = size; + } + MS->fStart = NULL; + MS->fPos = 0; + MS->fReadPos = 0; + MS->fError = FALSE; + MS->fStart = (uint8_t *)uprv_malloc(MS->fSize); + if(MS->fStart == NULL) { + MS->fError = TRUE; + uprv_free(MS); + return NULL; + } + return MS; +} + +U_CAPI UMemoryStream * U_EXPORT2 uprv_mstrm_openBuffer(uint8_t *buffer, int32_t len){ + UMemoryStream *MS = (UMemoryStream *)uprv_malloc(sizeof(UMemoryStream)); + if(MS == NULL) { + return NULL; + } + MS->fReadOnly = TRUE; + MS->fStart = buffer; + MS->fPos = 0; + MS->fReadPos = 0; + MS->fError = FALSE; + return MS; +} + +U_CAPI void U_EXPORT2 uprv_mstrm_close(UMemoryStream *MS){ + if(MS->fReadOnly == FALSE && MS->fStart != NULL) { + uprv_free(MS->fStart); + } + uprv_free(MS); +} + +U_CAPI bool_t U_EXPORT2 uprv_mstrm_setError(UMemoryStream *MS){ + MS->fError = TRUE; + return MS->fError; +} + +U_CAPI bool_t U_EXPORT2 uprv_mstrm_error(UMemoryStream *MS){ + return MS->fError; +} + +U_CAPI int32_t U_EXPORT2 uprv_mstrm_read(UMemoryStream *MS, void* addr, int32_t len) { + if(MS->fError == FALSE) { + if(len + MS->fReadPos > MS->fPos) { + len = MS->fPos - MS->fReadPos; + MS->fError = TRUE; + } + + uprv_memcpy(addr, MS->fStart+MS->fReadPos, len); + + return len; + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 uprv_mstrm_write(UMemoryStream *MS, uint8_t *buffer, int32_t len){ + if(MS->fError == FALSE) { + if(MS->fReadOnly == FALSE) { + if(len + MS->fPos > MS->fSize) { + uint8_t *newstart = (uint8_t *)uprv_realloc(MS->fStart, 2*MS->fSize); + if(newstart != NULL) { + MS->fSize*=2; + MS->fStart = newstart; + } else { + MS->fError = TRUE; + return -1; + } + } + uprv_memcpy(MS->fStart + MS->fPos, buffer, len); + MS->fPos += len; + return len; + } else { + MS->fError = TRUE; + return 0; + } + } else { + return 0; + } +} + +U_CAPI uint8_t * U_EXPORT2 uprv_mstrm_getBuffer(UMemoryStream *MS, int32_t *len){ + if(MS->fError == FALSE) { + *len = MS->fPos; + return MS->fStart; + } else { + *len = 0; + return NULL; + } +} diff --git a/icu4c/source/common/umemstrm.h b/icu4c/source/common/umemstrm.h new file mode 100644 index 00000000000..1d428e4c11f --- /dev/null +++ b/icu4c/source/common/umemstrm.h @@ -0,0 +1,56 @@ +/* +***************************************************************************************** +* +* Copyright (C) 1997-1999, International Business Machines +* Corporation and others. All Rights Reserved. +* +***************************************************************************************** +* +* File UMEMSTRM.H +* +* Contains UMemoryStream interface +* +* @author Vladimir Weinstein +* +* Modification History: +* +* Date Name Description +* 5/17/00 weiv Created. +* +***************************************************************************************** +*/ + +#ifndef UMEMSTRM_H +#define UMEMSTRM_H + +#ifndef _UTYPES +#include "unicode/utypes.h" +#endif + +struct UMemoryStream; + +typedef struct UMemoryStream UMemoryStream; + +struct UMemoryStream{ + uint8_t *fStart; + int32_t fSize; + int32_t fPos; + int32_t fReadPos; + bool_t fReadOnly; + bool_t fError; +}; + +U_CAPI UMemoryStream * U_EXPORT2 uprv_mstrm_openNew(int32_t size); +U_CAPI UMemoryStream * U_EXPORT2 uprv_mstrm_openBuffer(uint8_t *buffer, int32_t len); +U_CAPI void U_EXPORT2 uprv_mstrm_close(UMemoryStream *MS); +U_CAPI bool_t U_EXPORT2 uprv_mstrm_setError(UMemoryStream *MS); +U_CAPI bool_t U_EXPORT2 uprv_mstrm_error(UMemoryStream *MS); +U_CAPI int32_t U_EXPORT2 uprv_mstrm_read(UMemoryStream *MS, void* addr, int32_t len); +U_CAPI int32_t U_EXPORT2 uprv_mstrm_write(UMemoryStream *MS, uint8_t *buffer, int32_t len); +U_CAPI uint8_t * U_EXPORT2 uprv_mstrm_getBuffer(UMemoryStream *MS, int32_t *len); + +#endif /* _FILESTRM*/ + + + + diff --git a/icu4c/source/common/unicode/ures.h b/icu4c/source/common/unicode/ures.h index c490599ab40..98e51d69aab 100644 --- a/icu4c/source/common/unicode/ures.h +++ b/icu4c/source/common/unicode/ures.h @@ -372,6 +372,35 @@ U_CAPI void ures_openFillIn(UResourceBundle *r, const char* path, U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resourceBundle, int32_t* len, UErrorCode* status); +/** + * returns a binary data from a resource. Can be used at most primitive resource types (binaries, + * strings, ints) + * + * @param resourceBundle: a string resource + * @param len: fills in the length of resulting byte chunk + * @param status: fills in the outgoing error code + * could be U_MISSING_RESOURCE_ERROR if the key is not found + * could be a non-failing error + * e.g.: U_USING_FALLBACK_ERROR,U_USING_DEFAULT_ERROR + * @return a pointer to a chuck of unsigned bytes which live in a memory mapped/DLL file. + * @draft + */ +U_CAPI const uint8_t* U_EXPORT2 ures_getBinary(const UResourceBundle* resourceBundle, int32_t* len, + UErrorCode* status); + +/** + * returns an integer from a resource. + * + * @param resourceBundle: a string resource + * @param status: fills in the outgoing error code + * could be U_MISSING_RESOURCE_ERROR if the key is not found + * could be a non-failing error + * e.g.: U_USING_FALLBACK_ERROR,U_USING_DEFAULT_ERROR + * @return an integer value + * @draft + */ +U_CAPI uint32_t U_EXPORT2 ures_getInt(const UResourceBundle* resourceBundle, UErrorCode *status); + /** * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is * the number of child resources. diff --git a/icu4c/source/common/uresbund.c b/icu4c/source/common/uresbund.c index 79bd5bd4d5a..6cd1effaa8e 100644 --- a/icu4c/source/common/uresbund.c +++ b/icu4c/source/common/uresbund.c @@ -493,7 +493,9 @@ U_CFUNC UChar** ures_listInstalledLocales(const char* path, int32_t* count) { U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resB, int32_t* len, UErrorCode* status) { - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -515,6 +517,46 @@ U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resB, int32_ } } +U_CAPI const uint8_t* U_EXPORT2 ures_getBinary(const UResourceBundle* resB, int32_t* len, + UErrorCode* status) { + + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + switch(RES_GET_TYPE(resB->fRes)) { + case RES_BINARY: + return res_getBinary(&(resB->fResData), resB->fRes, len); + break; + case RES_INT: + case RES_STRING: + case RES_INT_VECTOR: + case RES_ARRAY: + case RES_TABLE: + default: + *status = U_RESOURCE_TYPE_MISMATCH; + return NULL; + break; + } + +} + +U_CAPI uint32_t U_EXPORT2 ures_getInt(const UResourceBundle* resB, UErrorCode *status) { + + if (status==NULL || U_FAILURE(*status)) { + return 0xffffffff; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0xffffffff; + } + return RES_GET_UINT(resB->fRes); +} + + U_CAPI UResType U_EXPORT2 ures_getType(UResourceBundle *resB) { if(resB == NULL) { return RES_BOGUS; @@ -555,7 +597,9 @@ U_CAPI bool_t U_EXPORT2 ures_hasNext(UResourceBundle *resB) { U_CAPI const UChar* U_EXPORT2 ures_getNextString(UResourceBundle *resB, int32_t* len, const char ** key, UErrorCode *status) { Resource r = RES_BOGUS; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -599,7 +643,9 @@ U_CAPI UResourceBundle* U_EXPORT2 ures_getNextResource(UResourceBundle *resB, UR const char *key = NULL; Resource r = RES_BOGUS; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -642,7 +688,9 @@ U_CAPI UResourceBundle* U_EXPORT2 ures_getByIndex(const UResourceBundle *resB, i const char* key = NULL; Resource r = RES_BOGUS; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -684,7 +732,9 @@ U_CAPI const UChar* U_EXPORT2 ures_getStringByIndex(const UResourceBundle *resB, const char* key = NULL; Resource r = RES_BOGUS; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -726,7 +776,9 @@ U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, con Resource res = RES_BOGUS; const char *key = inKey; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -769,7 +821,9 @@ U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, c Resource res = RES_BOGUS; const char* key = inKey; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -820,7 +874,9 @@ U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, c U_CFUNC const char* ures_getRealLocale(const UResourceBundle* resourceBundle, UErrorCode* status) { const UResourceDataEntry *resB = resourceBundle->fData; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if (!resourceBundle) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -1003,7 +1059,9 @@ U_CAPI const UChar* ures_getArrayItem(const UResourceBundle* resB, UErrorCode* status) { UResourceBundle res; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -1026,7 +1084,9 @@ U_CAPI const UChar* ures_get2dArrayItem(const UResourceBundle* resB, UErrorCode* status) { UResourceBundle res; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -1055,7 +1115,9 @@ U_CAPI const UChar* ures_getTaggedArrayItem(const UResourceBundle* resB, UErrorCode* status) { UResourceBundle res; - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if(resB == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; @@ -1082,7 +1144,9 @@ U_CAPI int32_t ures_countArrayItems(const UResourceBundle* resourceBundle, Resource res = RES_BOGUS; UResourceBundle resData; - if (status==NULL || U_FAILURE(*status)) return 0; + if (status==NULL || U_FAILURE(*status)) { + return 0; + } if(resourceBundle == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -1174,7 +1238,9 @@ U_CAPI const char* ures_getVersionNumber(const UResourceBundle* resourceBundle */ U_CAPI const char* ures_getLocale(const UResourceBundle* resourceBundle, UErrorCode* status) { - if (status==NULL || U_FAILURE(*status)) return NULL; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } if (!resourceBundle) { *status = U_ILLEGAL_ARGUMENT_ERROR; diff --git a/icu4c/source/common/uresdata.c b/icu4c/source/common/uresdata.c index 4b72e652e76..8ac2f05afd6 100644 --- a/icu4c/source/common/uresdata.c +++ b/icu4c/source/common/uresdata.c @@ -28,9 +28,6 @@ /* get a const char* pointer to the key with the keyOffset byte offset from pRoot */ #define RES_GET_KEY(pRoot, keyOffset) ((const char *)(pRoot)+(keyOffset)) -/* get signed and unsigned integer values directly from the Resource handle */ -#define RES_GET_INT(res) (((int32_t)((res)<<4L))>>4L) -#define RES_GET_UINT(res) ((res)&0xfffffff) /* * All the type-access functions assume that @@ -51,12 +48,24 @@ _res_getString(Resource *pRoot, Resource res, int32_t *pLength) { *pLength=0; return &nulUChar; } else { - int32_t *p=(int32_t *)(pRoot+res); + int32_t *p=(int32_t *)RES_GET_POINTER(pRoot, res); *pLength=*p++; return (UChar *)p; } } +static const uint8_t * +_res_getBinary(Resource *pRoot, Resource res, int32_t *pLength) { + if(res==0) { + *pLength=0; + return NULL; + } else { + int32_t *p=(int32_t *)RES_GET_POINTER(pRoot, res); + *pLength=*p++; + return (uint8_t *)p; + } +} + /* * Array functions */ @@ -236,6 +245,16 @@ res_getString(const ResourceData *pResData, const Resource res, int32_t *pLength } } +U_CFUNC const uint8_t * +res_getBinary(const ResourceData *pResData, const Resource res, int32_t *pLength) { + if(res!=RES_BOGUS && RES_GET_TYPE(res)==RES_BINARY) { + return _res_getBinary(pResData->pRoot, res, pLength); + } else { + *pLength=0; + return NULL; + } +} + U_CFUNC Resource res_getStringArray(const ResourceData *pResData, const char *key, int32_t *pCount) { Resource res=_res_findTableItem(pResData->pRoot, pResData->rootRes, key); diff --git a/icu4c/source/common/uresdata.h b/icu4c/source/common/uresdata.h index 45765f6aa47..8e9a56a9acb 100644 --- a/icu4c/source/common/uresdata.h +++ b/icu4c/source/common/uresdata.h @@ -30,9 +30,13 @@ typedef uint32_t Resource; #define RES_BOGUS 0xffffffff #define RES_GET_TYPE(res) ((res)>>28UL) -#define RES_GET_OFFSET(res) ((res)&0xfffffff) +#define RES_GET_OFFSET(res) ((res)&0x0fffffff) #define RES_GET_POINTER(pRoot, res) ((pRoot)+RES_GET_OFFSET(res)) +/* get signed and unsigned integer values directly from the Resource handle */ +#define RES_GET_INT(res) (((int32_t)((res)<<4L))>>4L) +#define RES_GET_UINT(res) ((res)&0x0fffffff) + /* * Resource types: * Most resources have their values stored at four-byte offsets from the start @@ -45,7 +49,7 @@ typedef uint32_t Resource; * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding) * or (empty string ("") if offset==0) * 1 Binary: int32_t length, uint8_t[length], (padding) - * - this value should be 16-aligned - + * - this value should be 32-aligned - * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count] * * 7 Integer: (28-bit offset is integer value) @@ -88,6 +92,9 @@ res_unload(ResourceData *pResData); U_CFUNC const UChar * res_getString(const ResourceData *pResData, const Resource res, int32_t *pLength); +U_CFUNC const uint8_t * +res_getBinary(const ResourceData *pResData, const Resource res, int32_t *pLength); + /* * Get a Resource handle for an array of strings, and get the number of strings. * Returns RES_BOGUS if not found. diff --git a/icu4c/source/i18n/colrules.cpp b/icu4c/source/i18n/colrules.cpp index 63260aef711..b6b0e066ec7 100644 --- a/icu4c/source/i18n/colrules.cpp +++ b/icu4c/source/i18n/colrules.cpp @@ -32,8 +32,6 @@ #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0]) - - static const UChar defaultRulesArray[] = { 0x003D, 0x0027, 0x200B, 0x0027, 0x003D, 0x200C, 0x003D, 0x200D, 0x003D, 0x200E, @@ -126,3 +124,8 @@ static const UChar defaultRulesArray[] = UnicodeString RuleBasedCollator::DEFAULTRULES(defaultRulesArray, ARRAY_LENGTH(defaultRulesArray)); +U_CAPI const UChar * U_EXPORT2 ucol_getDefaultRulesArray(uint32_t *size) { + *size = ARRAY_LENGTH(defaultRulesArray); + return defaultRulesArray; +} + diff --git a/icu4c/source/i18n/tblcoll.cpp b/icu4c/source/i18n/tblcoll.cpp index fce86b4bd84..edd0125f9e6 100644 --- a/icu4c/source/i18n/tblcoll.cpp +++ b/icu4c/source/i18n/tblcoll.cpp @@ -2694,10 +2694,24 @@ RuleBasedCollator::chopLocale(UnicodeString& localeName) if (i < 0) { - i = 0; + i = 0; } localeName.remove(i, size - i); } + +uint8_t * +RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &status) +{ + if(U_FAILURE(status)) { + return NULL; + } + + status = U_UNSUPPORTED_ERROR; + + return NULL; +} + + //eof diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index 5fc1e0985f0..99ba63c912a 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -402,3 +402,9 @@ ucol_getVersion(const UCollator* coll, { ((Collator*)coll)->getVersion(versionInfo); } + +U_CAPI uint8_t * +ucol_cloneRuleData(UCollator *coll, int32_t *length, UErrorCode *status) +{ + return ((RuleBasedCollator*)coll)->cloneRuleData(*length,*status); +} diff --git a/icu4c/source/i18n/unicode/tblcoll.h b/icu4c/source/i18n/unicode/tblcoll.h index 47cf162d6f5..4e5ca8f2a3d 100644 --- a/icu4c/source/i18n/unicode/tblcoll.h +++ b/icu4c/source/i18n/unicode/tblcoll.h @@ -558,6 +558,18 @@ public: */ static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; } + + /** + * Returns the binary format of the class's rules. The format is + * that of .col files. + * + * @param length Returns the length of the data, in bytes + * @param status the error code status. + * @return memory, owned by the caller, of size 'length' bytes. + * @draft INTERNAL USE ONLY + */ + uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); + /***************************************************************************** * PRIVATE *****************************************************************************/ diff --git a/icu4c/source/i18n/unicode/ucol.h b/icu4c/source/i18n/unicode/ucol.h index e4414e042c1..07d8fa676fa 100644 --- a/icu4c/source/i18n/unicode/ucol.h +++ b/icu4c/source/i18n/unicode/ucol.h @@ -747,4 +747,17 @@ ucol_setOffset( UCollationElements *elems, U_CAPI void U_EXPORT2 ucol_getVersion(const UCollator* coll, UVersionInfo info); +/** + * Makes a copy of the Collator's rule data. The format is + * that of .col files. + * + * @param length returns the length of the data, in bytes. + * @param status the error status + * @return memory, owned by the caller, of size 'length' bytes. + * @draft INTERNAL USE ONLY + */ +U_CAPI uint8_t * +ucol_cloneRuleData(UCollator *coll, int32_t *length, UErrorCode *status); + + #endif diff --git a/icu4c/source/tools/genrb/parse.c b/icu4c/source/tools/genrb/parse.c index c9f432207d4..c525f2d77d6 100644 --- a/icu4c/source/tools/genrb/parse.c +++ b/icu4c/source/tools/genrb/parse.c @@ -25,6 +25,19 @@ #include "ustr.h" #include "reslist.h" #include "unicode/ustring.h" +#include "unicode/ucol.h" + +U_CAPI const UChar * U_EXPORT2 ucol_getDefaultRulesArray(uint32_t *size); + +U_STRING_DECL(k_start_string, "string", 6); +U_STRING_DECL(k_start_binary, "binary", 6); +U_STRING_DECL(k_start_table, "table", 5); +U_STRING_DECL(k_start_int, "int", 3); +U_STRING_DECL(k_start_array, "array", 5); +U_STRING_DECL(k_start_intvector, "intvector", 9); +U_STRING_DECL(k_start_reserved, "reserved", 8); + +static bool_t didInit=FALSE; /* Node IDs for the state transition table. */ enum ENode { @@ -32,7 +45,7 @@ enum ENode { eInitial, /* Next: Locale name */ eGotLoc, /* Next: { */ eIdle, /* Next: Tag name | } */ - eGotTag, /* Next: { */ + eGotTag, /* Next: { | : */ eNode5, /* Next: Data | Subtag */ eNode6, /* Next: } | { | , */ eList, /* Next: List data */ @@ -44,7 +57,9 @@ enum ENode { e2dArray, /* Next: Data | } */ eNode14, /* Next: , | } */ eNode15, /* Next: , | } */ - eNode16 /* Next: { | } */ + eNode16, /* Next: { | } */ + eTypeStart, /* Next: Type name */ + eGotType /* Next: { */ }; /* Action codes for the state transtiion table. */ @@ -75,7 +90,11 @@ enum EAction { string as the first subtag */ eEndTagged = 0x3200, /* Close a tagged list being build */ eSubtag = 0x3300, /* Record the last string as the subtag */ - eTaggedStr = 0x3400 /* Record the last string as a tagged string */ + eTaggedStr = 0x3400, /* Record the last string as a tagged string */ + + /* Type support */ + eBegType = 0x4100, /* Start getting a type */ + eSetType = 0x4200 /* Record and init type */ }; /* A struct which encapsulates a node ID and an action. */ @@ -95,33 +114,37 @@ struct STransition { comma-delimited list (transition from eList to eIdle on kCloseBrace). */ static struct STransition gTransitionTable [] = { - /* kString kOpenBrace kCloseBrace kComma kColon*/ - /*eError*/ {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, + /* kString kOpenBrace kCloseBrace kComma + /*eError*/ {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, - /*eInitial*/ {eGotLoc,eOpen}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, - /*eGotLoc*/ {eError,eNOP}, {eIdle,eNOP}, {eError,eNOP}, {eError,eNOP}, + /*eInitial*/ {eGotLoc,eOpen}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, + /*eGotLoc*/ {eError,eNOP}, {eIdle,eNOP}, {eError,eNOP}, {eError,eNOP}, - /*eIdle*/ {eGotTag,eSetTag}, {eError,eNOP}, {eInitial,eClose}, {eError,eNOP}, - /*eGotTag*/ {eError,eNOP}, {eNode5,eNOP}, {eError,eNOP}, {eError,eNOP}, - /*eNode5*/ {eNode6,eNOP}, {e2dArray,eBeg2dList},{eError,eNOP}, {eError,eNOP}, + /*eIdle*/ {eGotTag,eSetTag}, {eError,eNOP}, {eInitial,eClose}, {eError,eNOP}, + /*eGotTag*/ {eError,eNOP}, {eNode5,eNOP}, {eError,eNOP}, {eError,eNOP}, + /*eNode5*/ {eNode6,eNOP}, {e2dArray,eBeg2dList},{eError,eNOP}, {eError,eNOP}, /*eNode6*/ {eError,eNOP}, {eTagList,eBegTagged},{eIdle,eStr}, {eList,eBegList}, - /*eList*/ {eNode8,eListStr}, {eError,eNOP}, {eIdle,eEndList}, {eError,eNOP}, - /*eNode8*/ {eError,eNOP}, {eError,eNOP}, {eIdle,eEndList}, {eList,eNOP}, + /*eList*/ {eNode8,eListStr}, {eError,eNOP}, {eIdle,eEndList}, {eError,eNOP}, + /*eNode8*/ {eError,eNOP}, {eError,eNOP}, {eIdle,eEndList}, {eList,eNOP}, - /*eTagList*/ {eNode10,eTaggedStr},{eError,eNOP}, {eError,eNOP}, {eError,eNOP}, - /*eNode10*/ {eError,eNOP}, {eError,eNOP}, {eNode11,eNOP}, {eError,eNOP}, - /*eNode11*/ {eNode12,eNOP}, {eError,eNOP}, {eIdle,eEndTagged},{eError,eNOP}, - /*eNode12*/ {eError,eNOP}, {eTagList,eSubtag}, {eError,eNOP}, {eError,eNOP}, + /*eTagList*/ {eNode10,eTaggedStr},{eError,eNOP}, {eError,eNOP}, {eError,eNOP}, + /*eNode10*/ {eError,eNOP}, {eError,eNOP}, {eNode11,eNOP}, {eError,eNOP}, + /*eNode11*/ {eNode12,eNOP}, {eError,eNOP}, {eIdle,eEndTagged},{eError,eNOP}, + /*eNode12*/ {eError,eNOP}, {eTagList,eSubtag}, {eError,eNOP}, {eError,eNOP}, - /*e2dArray*/ {eNode14,e2dStr}, {eError,eNOP}, {eNode15,eNOP}, {eError,eNOP}, - /*eNode14*/ {eError,eNOP}, {eError,eNOP}, {eNode15,eNOP}, {e2dArray,eNOP}, - /*eNode15*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eNode16,eNOP}, - /*eNode16*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eError,eNOP} + /*e2dArray*/ {eNode14,e2dStr}, {eError,eNOP}, {eNode15,eNOP}, {eError,eNOP}, + /*eNode14*/ {eError,eNOP}, {eError,eNOP}, {eNode15,eNOP}, {e2dArray,eNOP}, + /*eNode15*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eNode16,eNOP}, + /*eNode16*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eError,eNOP}, + /*eTypeStart*/{eGotType,eSetType}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, + /*eGotType*/ {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP} }; /* Row length is 4 */ #define GETTRANSITION(row,col) (gTransitionTable[col + (row<<2)]) +/* Not anymore, it is 5 now */ +/*#define GETTRANSITION(row,col) (gTransitionTable[col + (row*5)])*/ /********************************************************************* * Hashtable glue @@ -176,6 +199,7 @@ parse(FileStream *f, const char *cp, struct SResource *temp = NULL; struct SResource *temp1 = NULL; struct SResource *temp2 = NULL; + bool_t colEl = FALSE; /* Hashtable for keeping track of seen tag names */ struct UHashtable *data; @@ -253,6 +277,11 @@ parse(FileStream *f, const char *cp, ustr_cpy(&tag, &token, status); u_UCharsToChars(tag.fChars, cTag, u_strlen(tag.fChars)+1); if(U_FAILURE(*status)) goto finish; + if(uprv_strchr(cTag, ':')) { + /* type modificator - do the type modification*/ + } else if(uprv_strcmp(cTag, "CollationElements") == 0) { + colEl = TRUE; + } /*if(uhash_get(data, uhash_hashUString(tag.fChars)) != 0) {*/ if(get(data, &tag)) { char *s; @@ -274,6 +303,38 @@ parse(FileStream *f, const char *cp, } temp = string_open(bundle, cTag, token.fChars, status); table_add(rootTable, temp, status); + if(colEl == TRUE) { + const UChar * defaultRulesArray; + uint32_t defaultRulesArrayLength = 0; + /* do the collation elements */ + int32_t len = 0; + uint8_t *data = NULL; + UCollator *coll = NULL; + UChar *rules = NULL; + defaultRulesArray = ucol_getDefaultRulesArray(&defaultRulesArrayLength); + rules = uprv_malloc(sizeof(defaultRulesArray[0])*(defaultRulesArrayLength + token.fLength)); + uprv_memcpy(rules, defaultRulesArray, defaultRulesArrayLength*sizeof(defaultRulesArray[0])); + uprv_memcpy(rules + defaultRulesArrayLength, token.fChars, token.fLength*sizeof(token.fChars[0])); + + coll = ucol_openRules(rules, defaultRulesArrayLength + token.fLength, 0, 0, status); + if(U_SUCCESS(*status) && coll !=NULL) { + /* This is just for testing & should be removed + temp1 = bin_open(bundle, "%%Collation", sizeof(defaultRulesArray[0])*(defaultRulesArrayLength + token.fLength), (uint8_t *) rules, status); + table_add(rootTable, temp1, status); + */ +/* + data = ucol_cloneRuleData(coll, &len, status); + if(U_SUCCESS(*status) && data != NULL) { + temp1 = bin_open(bundle, "%%Collation", len, data, status); + table_add(rootTable, temp1, status); + uprv_free(data); + } +*/ + ucol_close(coll); + } + uprv_free(rules); + colEl = FALSE; + } /*uhash_put(data, tag.fChars, status);*/ put(data, &tag, status); if(U_FAILURE(*status)) goto finish; @@ -396,6 +457,37 @@ parse(FileStream *f, const char *cp, goto finish; } break; + case eSetType: + /* type recognition */ + if(!didInit) { + U_STRING_INIT(k_start_string, "string", 6); + U_STRING_INIT(k_start_binary, "binary", 6); + U_STRING_INIT(k_start_table, "table", 5); + U_STRING_INIT(k_start_int, "int", 3); + U_STRING_INIT(k_start_array, "array", 5); + U_STRING_INIT(k_start_intvector, "intvector", 9); + U_STRING_INIT(k_start_reserved, "reserved", 8); + didInit=TRUE; + } + if(u_strcmp(token.fChars, k_start_string) == 0) { + node = eGotTag; + } else if(u_strcmp(token.fChars, k_start_array) == 0) { + node = eGotTag; + } else if(u_strcmp(token.fChars, k_start_table) == 0) { + node = eGotTag; + } else if(u_strcmp(token.fChars, k_start_binary) == 0) { + /* start of binary */ + } else if(u_strcmp(token.fChars, k_start_int) == 0) { + /* start of integer */ + } else if(u_strcmp(token.fChars, k_start_intvector) == 0) { + /* start of intvector */ + } else if(u_strcmp(token.fChars, k_start_reserved) == 0) { + /* start of reserved */ + } else { + *status = U_INTERNAL_PROGRAM_ERROR; + goto finish; + } + break; } } diff --git a/icu4c/source/tools/genrb/read.c b/icu4c/source/tools/genrb/read.c index 8fd802e45e2..2b0561f8b72 100644 --- a/icu4c/source/tools/genrb/read.c +++ b/icu4c/source/tools/genrb/read.c @@ -29,6 +29,7 @@ #define ASTERISK 0x002A #define SPACE 0x0020 #define COLON 0x003A +#define BADBOM 0xFFFE U_STRING_DECL(k_start_string, "string", 6); U_STRING_DECL(k_start_binary, "binary", 6); @@ -42,8 +43,8 @@ static bool_t didInit=FALSE; /* Protos */ static enum ETokenType getStringToken(UFILE *f, UChar initialChar, - struct UString *token, - UErrorCode *status); + struct UString *token, + UErrorCode *status); static UChar unescape(UFILE *f, UErrorCode *status); static UChar getNextChar(UFILE *f, bool_t skipwhite, UErrorCode *status); static void seekUntilNewline(UFILE *f, UErrorCode *status); @@ -61,12 +62,12 @@ static bool_t isNewline(UChar c); string tokens will be merged into one, with no intervening space. */ enum ETokenType getNextToken(UFILE *f, - struct UString *token, - UErrorCode *status) + struct UString *token, + UErrorCode *status) { UChar c; - enum ETokenType tokenType; + /*enum ETokenType tokenType;*/ if(U_FAILURE(*status)) return tok_error; @@ -75,14 +76,17 @@ enum ETokenType getNextToken(UFILE *f, if(U_FAILURE(*status)) return tok_error; switch(c) { + case BADBOM: return tok_error; case OPENBRACE: return tok_open_brace; case CLOSEBRACE: return tok_close_brace; case COMMA: return tok_comma; case U_EOF: return tok_EOF; - case COLON: +/* + case COLON: return tok_colon; c = getNextChar(f, TRUE, status); tokenType = getStringToken(f, c, token, status); break; +*/ default: return getStringToken(f, c, token, status); } if(!didInit) { @@ -193,7 +197,7 @@ static enum ETokenType getStringToken(UFILE *f, || c == OPENBRACE || c == CLOSEBRACE || c == COMMA - || c == COLON) + /*|| c == COLON*/) { u_fungetc(c, f); /*u_fungetc(c, f, status);*/ @@ -215,7 +219,7 @@ static enum ETokenType getStringToken(UFILE *f, if(U_FAILURE(*status)) return tok_string; - if(c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) { + if(c == OPENBRACE || c == CLOSEBRACE || c == COMMA/* || c == COLON*/) { u_fungetc(c, f); /*u_fungetc(c, f, status);*/ return tok_string; @@ -399,8 +403,8 @@ static UChar unescape(UFILE *f, static bool_t isWhitespace(UChar c) { switch (c) { - /* ' ', '\t', '\n', '\r', 0x2029 */ - case 0x0020: case 0x0009: case 0x000A: case 0x000D: case 0x2029: + /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */ + case 0x0020: case 0x0009: case 0x000A: case 0x000D: case 0x2029: case 0xFEFF: return TRUE; default: diff --git a/icu4c/source/tools/genrb/read.h b/icu4c/source/tools/genrb/read.h index 5abbdc77e3b..390dfc1033a 100644 --- a/icu4c/source/tools/genrb/read.h +++ b/icu4c/source/tools/genrb/read.h @@ -29,6 +29,7 @@ enum ETokenType tok_open_brace, /* An opening brace character */ tok_close_brace, /* A closing brace character */ tok_comma, /* A comma */ + tok_colon, /* A colon */ tok_start_string, /* :String */ tok_start_binary, /* :Binary */ tok_start_table, /* :Table */ @@ -39,7 +40,7 @@ enum ETokenType tok_EOF, /* End of the file has been reached successfully */ tok_error, /* An error, such an unterminated quoted string */ - tok_token_type_count = 11 /* Number of "real" token types */ + tok_token_type_count = 12 /* Number of "real" token types */ }; enum ETokenType getNextToken(UFILE *f, diff --git a/icu4c/source/tools/genrb/reslist.c b/icu4c/source/tools/genrb/reslist.c index fb39e446b57..8da527dda20 100644 --- a/icu4c/source/tools/genrb/reslist.c +++ b/icu4c/source/tools/genrb/reslist.c @@ -19,6 +19,8 @@ #include "unewdata.h" #include "unicode/ures.h" +#define BIN_ALIGNMENT 16 + uint32_t res_write(UNewDataMemory *mem, struct SResource *res, uint32_t usedOffset, UErrorCode *status); @@ -75,6 +77,11 @@ uint32_t array_write(UNewDataMemory *mem, struct SResource *res, while(current != NULL) { if(current->fType == RES_INT) { *(resources+i) = (current->fType)<<28 | (current->u.fIntValue.fValue & 0xFFFFFFF); + } else if(current->fType == RES_BINARY) { + uint32_t uo = usedOffset; + usedOffset = res_write(mem, current, usedOffset, status); + *(resources+i) = (current->fType)<<28 | (usedOffset>>2) ; + usedOffset += (current->fSize) + calcPadding(current->fSize) - (usedOffset-uo); } else { usedOffset = res_write(mem, current, usedOffset, status); *(resources+i) = (current->fType)<<28 | (usedOffset>>2); @@ -104,6 +111,18 @@ uint32_t intvector_write(UNewDataMemory *mem, struct SResource *res, uint32_t bin_write(UNewDataMemory *mem, struct SResource *res, uint32_t usedOffset, UErrorCode *status) { + uint32_t pad = 0; + uint32_t extrapad = calcPadding(res->fSize); + uint32_t dataStart = usedOffset+sizeof(res->u.fBinaryValue.fLength); + if(dataStart%BIN_ALIGNMENT) { + pad = (BIN_ALIGNMENT-dataStart%BIN_ALIGNMENT); + udata_writePadding(mem, pad); + usedOffset += pad; + } + + udata_write32(mem, res->u.fBinaryValue.fLength); + udata_writeBlock(mem, res->u.fBinaryValue.fData, res->u.fBinaryValue.fLength); + udata_writePadding(mem, (BIN_ALIGNMENT - pad + extrapad)); return usedOffset; } @@ -149,7 +168,12 @@ uint32_t table_write(UNewDataMemory *mem, struct SResource *res, *(keys+i) = (current->fKey)+sizeof(uint32_t); /*where the key is plus root pointer*/ if(current->fType == RES_INT) { *(resources+i) = (current->fType)<<28 | (current->u.fIntValue.fValue & 0xFFFFFFF); - } else { + } else if(current->fType == RES_BINARY) { + uint32_t uo = usedOffset; + usedOffset = res_write(mem, current, usedOffset, status); + *(resources+i) = (current->fType)<<28 | (usedOffset>>2) ; + usedOffset += (current->fSize) + calcPadding(current->fSize) - (usedOffset-uo); + } else { usedOffset = res_write(mem, current, usedOffset, status); *(resources+i) = (current->fType)<<28 | (usedOffset>>2) ; usedOffset += (current->fSize) + calcPadding(current->fSize); @@ -428,7 +452,7 @@ struct SResource *bin_open(struct SRBRoot *bundle, char *tag, uint32_t length, u return NULL; } - res->fType = RES_STRING; + res->fType = RES_BINARY; res->fKey = bundle_addtag(bundle, tag, status); if(U_FAILURE(*status)) { @@ -447,7 +471,7 @@ struct SResource *bin_open(struct SRBRoot *bundle, char *tag, uint32_t length, u } uprv_memcpy(res->u.fBinaryValue.fData, data, length); - res->fSize = sizeof(int32_t) + sizeof(uint8_t) * length; + res->fSize = sizeof(int32_t) + sizeof(uint8_t) * length + BIN_ALIGNMENT; return res; }