From 180d06a431db2e1f6a883ee48d7ff03b7c456e66 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 26 Aug 2005 15:30:42 +0000 Subject: [PATCH] ICU-4739 move data swapping implementation code from the icuswap tool to the toolutil library for sharing among multiple tools X-SVN-Rev: 18466 --- icu4c/source/tools/icuswap/Makefile.in | 2 +- icu4c/source/tools/icuswap/icuswap.cpp | 217 ++++-------------- icu4c/source/tools/icuswap/icuswap.vcproj | 21 +- .../tools/{icuswap => toolutil}/swapimpl.cpp | 136 ++++++++++- icu4c/source/tools/toolutil/swapimpl.h | 43 ++++ 5 files changed, 223 insertions(+), 196 deletions(-) rename icu4c/source/tools/{icuswap => toolutil}/swapimpl.cpp (75%) create mode 100644 icu4c/source/tools/toolutil/swapimpl.h diff --git a/icu4c/source/tools/icuswap/Makefile.in b/icu4c/source/tools/icuswap/Makefile.in index 073a2abb457..7ceecd9cc88 100644 --- a/icu4c/source/tools/icuswap/Makefile.in +++ b/icu4c/source/tools/icuswap/Makefile.in @@ -33,7 +33,7 @@ TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT) CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil $(BIR_CPPFLAGS) LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M) -OBJECTS = icuswap.o swapimpl.o +OBJECTS = icuswap.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/tools/icuswap/icuswap.cpp b/icu4c/source/tools/icuswap/icuswap.cpp index f2c8869ee7f..dfa01528da0 100644 --- a/icu4c/source/tools/icuswap/icuswap.cpp +++ b/icu4c/source/tools/icuswap/icuswap.cpp @@ -31,29 +31,14 @@ #include "uarrsort.h" #include "ucmndata.h" #include "udataswp.h" +#include "swapimpl.h" #include "toolutil.h" #include "uoptions.h" -/* swapping implementations in common */ - -#include "uresdata.h" -#include "ucnv_io.h" -#include "uprops.h" -#include "ucase.h" -#include "ubidi_props.h" -#include "ucol_swp.h" -#include "ucnv_bld.h" -#include "unormimp.h" -#include "sprpimpl.h" -#include "propname.h" -#include "rbbidata.h" - #include #include #include -/* swapping implementations in i18n */ - /* definitions */ #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -80,39 +65,15 @@ fileSize(FILE *f) { return size; } -/** - * Identifies and then transforms the ICU data piece in-place, or determines - * its length. See UDataSwapFn. - * This function handles .dat data packages as well as single data pieces - * and internally dispatches to per-type swap functions. - * Sets a U_UNSUPPORTED_ERROR if the data format is not recognized. - * - * @see UDataSwapFn - * @see udata_openSwapper - * @see udata_openSwapperForInputData - * @internal ICU 2.8 - */ -static int32_t -udata_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - /** * Swap an ICU .dat package, including swapping of enclosed items. */ U_CFUNC int32_t U_CALLCONV -udata_swapPackage(const UDataSwapper *ds, +udata_swapPackage(const char *inFilename, const char *outFilename, + const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode); -/* - * udata_swapPackage() needs to rename ToC name entries from the old package - * name to the new one. - * We store the filenames here, and udata_swapPackage() will extract the - * package names. - */ -static const char *inFilename, *outFilename; - U_CDECL_BEGIN static void U_CALLCONV printError(void *context, const char *fmt, va_list args) { @@ -148,6 +109,7 @@ main(int argc, char *argv[]) { int rc; UDataSwapper *ds; + const UDataInfo *pInfo; UErrorCode errorCode; uint8_t outCharset; UBool outIsBigEndian; @@ -197,10 +159,6 @@ main(int argc, char *argv[]) { in=out=NULL; data=NULL; - /* udata_swapPackage() needs the filenames */ - inFilename=argv[1]; - outFilename=argv[2]; - /* open the input file, get its length, allocate memory for it, read the file */ in=fopen(argv[1], "rb"); if(in==NULL) { @@ -254,13 +212,40 @@ main(int argc, char *argv[]) { ds->printError=printError; ds->printErrorContext=stderr; - length=udata_swap(ds, data, length, data, &errorCode); - udata_closeSwapper(ds); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", - pname, argv[1], u_errorName(errorCode)); - rc=4; - goto done; + /* speculative cast, protected by the following length check */ + pInfo=(const UDataInfo *)((const char *)data+4); + + if( length>=20 && + pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ + pInfo->dataFormat[1]==0x6d && + pInfo->dataFormat[2]==0x6e && + pInfo->dataFormat[3]==0x44 + ) { + /* + * swap the .dat package + * udata_swapPackage() needs to rename ToC name entries from the old package + * name to the new one. + * We pass it the filenames, and udata_swapPackage() will extract the + * package names. + */ + length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode); + udata_closeSwapper(ds); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n", + pname, argv[1], u_errorName(errorCode)); + rc=4; + goto done; + } + } else { + /* swap the data, which is not a .dat package */ + length=udata_swap(ds, data, length, data, &errorCode); + udata_closeSwapper(ds); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", + pname, argv[1], u_errorName(errorCode)); + rc=4; + goto done; + } } out=fopen(argv[2], "wb"); @@ -295,127 +280,6 @@ done: return rc; } -/* swap the data ------------------------------------------------------------ */ - -static const struct { - uint8_t dataFormat[4]; - UDataSwapFn *swapFn; -} swapFns[]={ - { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ -#if !UCONFIG_NO_LEGACY_CONVERSION - { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ - { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ -#endif - { { 0x43, 0x6d, 0x6e, 0x44 }, udata_swapPackage }, /* dataFormat="CmnD" */ -#if !UCONFIG_NO_IDNA - { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ -#endif - /* insert data formats here, descending by expected frequency of occurrence */ - { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ - - { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, - ucase_swap }, /* dataFormat="cAsE" */ - - { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, - ubidi_swap }, /* dataFormat="BiDi" */ - -#if !UCONFIG_NO_NORMALIZATION - { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ -#endif -#if !UCONFIG_NO_COLLATION - { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ - { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ -#endif -#if !UCONFIG_NO_BREAK_ITERATION - { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ -#endif - { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ - { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames } /* dataFormat="unam" */ -}; - -static int32_t -udata_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - char dataFormatChars[4]; - const UDataInfo *pInfo; - int32_t headerSize, i, swappedLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* - * Preflight the header first; checks for illegal arguments, too. - * Do not swap the header right away because the format-specific swapper - * will swap it, get the headerSize again, and also use the header - * information. Otherwise we would have to pass some of the information - * and not be able to use the UDataSwapFn signature. - */ - headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); - - /* - * If we wanted udata_swap() to also handle non-loadable data like a UTrie, - * then we could check here for further known magic values and structures. - */ - if(U_FAILURE(*pErrorCode)) { - return 0; /* the data format was not recognized */ - } - - pInfo=(const UDataInfo *)((const char *)inData+4); - - { - /* convert the data format from ASCII to Unicode to the system charset */ - UChar u[4]={ - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3] - }; - - if(uprv_isInvariantUString(u, 4)) { - u_UCharsToChars(u, dataFormatChars, 4); - } else { - dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; - } - } - - /* dispatch to the swap function for the dataFormat */ - for(i=0; idataFormat, 4)) { - swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); - - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - dataFormatChars[0], dataFormatChars[1], - dataFormatChars[2], dataFormatChars[3], - u_errorName(*pErrorCode)); - } else if(swappedLength<(length-15)) { - /* swapped less than expected */ - udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", - swappedLength, length, - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - dataFormatChars[0], dataFormatChars[1], - dataFormatChars[2], dataFormatChars[3], - u_errorName(*pErrorCode)); - } - - return swappedLength; - } - } - - /* the dataFormat was not recognized */ - udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - dataFormatChars[0], dataFormatChars[1], - dataFormatChars[2], dataFormatChars[3]); - - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; -} - /* swap .dat package files -------------------------------------------------- */ static int32_t @@ -465,7 +329,8 @@ compareToCEntries(const void *context, const void *left, const void *right) { U_CDECL_END U_CFUNC int32_t U_CALLCONV -udata_swapPackage(const UDataSwapper *ds, +udata_swapPackage(const char *inFilename, const char *outFilename, + const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; @@ -577,7 +442,7 @@ udata_swapPackage(const UDataSwapper *ds, } } if((uint32_t)length - - - - - - - - - - + + diff --git a/icu4c/source/tools/icuswap/swapimpl.cpp b/icu4c/source/tools/toolutil/swapimpl.cpp similarity index 75% rename from icu4c/source/tools/icuswap/swapimpl.cpp rename to icu4c/source/tools/toolutil/swapimpl.cpp index 41e6d4cafed..189f8b6e35d 100644 --- a/icu4c/source/tools/icuswap/swapimpl.cpp +++ b/icu4c/source/tools/toolutil/swapimpl.cpp @@ -34,11 +34,25 @@ #include "ucmndata.h" #include "udataswp.h" +/* swapping implementations in common */ + +#include "uresdata.h" +#include "ucnv_io.h" #include "uprops.h" #include "ucase.h" #include "ubidi_props.h" +#include "ucol_swp.h" +#include "ucnv_bld.h" #include "unormimp.h" -#include "utrie.h" +#include "sprpimpl.h" +#include "propname.h" +#include "rbbidata.h" + +/* swapping implementations in i18n */ + +/* definitions */ + +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) /* Unicode properties data swapping ----------------------------------------- */ @@ -503,3 +517,123 @@ unorm_swap(const UDataSwapper *ds, } #endif + +/* swap any data (except a .dat package) ------------------------------------ */ + +static const struct { + uint8_t dataFormat[4]; + UDataSwapFn *swapFn; +} swapFns[]={ + { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ +#if !UCONFIG_NO_LEGACY_CONVERSION + { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ + { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ +#endif +#if !UCONFIG_NO_IDNA + { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ +#endif + /* insert data formats here, descending by expected frequency of occurrence */ + { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ + + { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, + ucase_swap }, /* dataFormat="cAsE" */ + + { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, + ubidi_swap }, /* dataFormat="BiDi" */ + +#if !UCONFIG_NO_NORMALIZATION + { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ +#endif +#if !UCONFIG_NO_COLLATION + { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ + { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ +#endif +#if !UCONFIG_NO_BREAK_ITERATION + { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ +#endif + { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ + { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames } /* dataFormat="unam" */ +}; + +U_CAPI int32_t U_EXPORT2 +udata_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + char dataFormatChars[4]; + const UDataInfo *pInfo; + int32_t headerSize, i, swappedLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* + * Preflight the header first; checks for illegal arguments, too. + * Do not swap the header right away because the format-specific swapper + * will swap it, get the headerSize again, and also use the header + * information. Otherwise we would have to pass some of the information + * and not be able to use the UDataSwapFn signature. + */ + headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); + + /* + * If we wanted udata_swap() to also handle non-loadable data like a UTrie, + * then we could check here for further known magic values and structures. + */ + if(U_FAILURE(*pErrorCode)) { + return 0; /* the data format was not recognized */ + } + + pInfo=(const UDataInfo *)((const char *)inData+4); + + { + /* convert the data format from ASCII to Unicode to the system charset */ + UChar u[4]={ + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3] + }; + + if(uprv_isInvariantUString(u, 4)) { + u_UCharsToChars(u, dataFormatChars, 4); + } else { + dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; + } + } + + /* dispatch to the swap function for the dataFormat */ + for(i=0; idataFormat, 4)) { + swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); + + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + dataFormatChars[0], dataFormatChars[1], + dataFormatChars[2], dataFormatChars[3], + u_errorName(*pErrorCode)); + } else if(swappedLength<(length-15)) { + /* swapped less than expected */ + udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", + swappedLength, length, + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + dataFormatChars[0], dataFormatChars[1], + dataFormatChars[2], dataFormatChars[3], + u_errorName(*pErrorCode)); + } + + return swappedLength; + } + } + + /* the dataFormat was not recognized */ + udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + dataFormatChars[0], dataFormatChars[1], + dataFormatChars[2], dataFormatChars[3]); + + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; +} diff --git a/icu4c/source/tools/toolutil/swapimpl.h b/icu4c/source/tools/toolutil/swapimpl.h new file mode 100644 index 00000000000..b36ec2609da --- /dev/null +++ b/icu4c/source/tools/toolutil/swapimpl.h @@ -0,0 +1,43 @@ +/* +******************************************************************************* +* +* Copyright (C) 2005, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: swapimpl.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2005jul29 +* created by: Markus W. Scherer +* +* Declarations for data file swapping functions not declared in internal +* library headers. +*/ + +#ifndef __SWAPIMPL_H__ +#define __SWAPIMPL_H__ + +#include "unicode/utypes.h" +#include "udataswp.h" + +/** + * Identifies and then transforms the ICU data piece in-place, or determines + * its length. See UDataSwapFn. + * This function handles single data pieces (but not .dat data packages) + * and internally dispatches to per-type swap functions. + * Sets a U_UNSUPPORTED_ERROR if the data format is not recognized. + * + * @see UDataSwapFn + * @see udata_openSwapper + * @see udata_openSwapperForInputData + * @internal ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +udata_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#endif