diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 1c81f620381..705aabd4bb2 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -27,7 +27,7 @@ DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"' LIBS = $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) OBJECTS = allcoll.o apicoll.o callimts.o calregts.o caltest.o \ -caltztst.o canittst.o citrtest.o cntabcol.o cputilst.o currcoll.o dacoll.o \ +caltztst.o canittst.o citrtest.o cntabcol.o convtest.o cputilst.o currcoll.o dacoll.o \ dadrcoll.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \ encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \ itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \ diff --git a/icu4c/source/test/intltest/convtest.cpp b/icu4c/source/test/intltest/convtest.cpp new file mode 100644 index 00000000000..dbe54db7334 --- /dev/null +++ b/icu4c/source/test/intltest/convtest.cpp @@ -0,0 +1,899 @@ +/* +******************************************************************************* +* +* Copyright (C) 2003, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: convtest.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003jul15 +* created by: Markus W. Scherer +* +* Test file for data-driven conversion tests. +*/ + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" +#include "unicode/unistr.h" +#include "unicode/ustring.h" +#include "unicode/ures.h" +#include "convtest.h" +#include "tstdtmod.h" +#include +#include + +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + +enum { + // characters used in test data for callbacks + SUB_CB='?', + SKIP_CB='0', + STOP_CB='.', + ESC_CB='&' +}; + +void +ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { + if (exec) logln("TestSuite StringCaseTest: "); + switch (index) { + case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; + case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; + default: name=""; break; //needed to end loop + } +} + +// test data interface ----------------------------------------------------- *** + +void +ConversionTest::TestToUnicode() { + ConversionCase cc; + char charset[100], cbopt[4]; + const char *option; + UnicodeString s, unicode; + int32_t offsetsLength; + UConverterToUCallback callback; + + TestLog testLog; + TestDataModule *dataModule; + TestData *testData; + const DataMap *testCase; + UErrorCode errorCode; + int32_t i; + + errorCode=U_ZERO_ERROR; + dataModule=TestDataModule::getTestDataModule("conversion", testLog, errorCode); + if(U_SUCCESS(errorCode)) { + testData=dataModule->createTestData("toUnicode", errorCode); + if(U_SUCCESS(errorCode)) { + for(i=0; testData->nextCase(testCase, errorCode); ++i) { + cc.caseNr=i; + + s=testCase->getString("charset", errorCode); + s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); + cc.charset=charset; + + cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); + unicode=testCase->getString("unicode", errorCode); + cc.unicode=unicode.getBuffer(); + cc.unicodeLength=unicode.length(); + + offsetsLength=0; + cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); + if(offsetsLength==0) { + cc.offsets=NULL; + } else if(offsetsLength!=unicode.length()) { + errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length", + i, unicode.length(), offsetsLength); + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + + cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); + cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); + + s=testCase->getString("errorCode", errorCode); + if(s==UNICODE_STRING("invalid", 7)) { + cc.outErrorCode=U_INVALID_CHAR_FOUND; + } else if(s==UNICODE_STRING("illegal", 7)) { + cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; + } else if(s==UNICODE_STRING("truncated", 9)) { + cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; + } else { + cc.outErrorCode=U_ZERO_ERROR; + } + + s=testCase->getString("callback", errorCode); + s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); + cc.cbopt=cbopt; + switch(cbopt[0]) { + case SUB_CB: + callback=UCNV_TO_U_CALLBACK_SUBSTITUTE; + break; + case SKIP_CB: + callback=UCNV_TO_U_CALLBACK_SKIP; + break; + case STOP_CB: + callback=UCNV_TO_U_CALLBACK_STOP; + break; + case ESC_CB: + callback=UCNV_TO_U_CALLBACK_ESCAPE; + break; + default: + callback=NULL; + break; + } + option=callback==NULL ? cbopt : cbopt+1; + if(*option==0) { + option=NULL; + } + + cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode); + + if(U_FAILURE(errorCode)) { + errln("error parsing conversion/toUnicode test case %d - %s", + i, u_errorName(errorCode)); + errorCode=U_ZERO_ERROR; + } else { + ToUnicodeCase(cc, callback, option); + } + } + delete testData; + } + delete dataModule; + } +} + +void +ConversionTest::TestFromUnicode() { + ConversionCase cc; + char charset[100], cbopt[4]; + const char *option; + UnicodeString s, unicode, invalidUChars; + int32_t offsetsLength; + UConverterFromUCallback callback; + + TestLog testLog; + TestDataModule *dataModule; + TestData *testData; + const DataMap *testCase; + UErrorCode errorCode; + int32_t i; + + errorCode=U_ZERO_ERROR; + dataModule=TestDataModule::getTestDataModule("conversion", testLog, errorCode); + if(U_SUCCESS(errorCode)) { + testData=dataModule->createTestData("fromUnicode", errorCode); + if(U_SUCCESS(errorCode)) { + for(i=0; testData->nextCase(testCase, errorCode); ++i) { + cc.caseNr=i; + + s=testCase->getString("charset", errorCode); + s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); + cc.charset=charset; + + unicode=testCase->getString("unicode", errorCode); + cc.unicode=unicode.getBuffer(); + cc.unicodeLength=unicode.length(); + cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); + + offsetsLength=0; + cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); + if(offsetsLength==0) { + cc.offsets=NULL; + } else if(offsetsLength!=cc.bytesLength) { + errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length", + i, cc.bytesLength, offsetsLength); + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + + cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); + cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); + + s=testCase->getString("errorCode", errorCode); + if(s==UNICODE_STRING("invalid", 7)) { + cc.outErrorCode=U_INVALID_CHAR_FOUND; + } else if(s==UNICODE_STRING("illegal", 7)) { + cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; + } else if(s==UNICODE_STRING("truncated", 9)) { + cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; + } else { + cc.outErrorCode=U_ZERO_ERROR; + } + + s=testCase->getString("callback", errorCode); + s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); + cc.cbopt=cbopt; + switch(cbopt[0]) { + case SUB_CB: + callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE; + break; + case SKIP_CB: + callback=UCNV_FROM_U_CALLBACK_SKIP; + break; + case STOP_CB: + callback=UCNV_FROM_U_CALLBACK_STOP; + break; + case ESC_CB: + callback=UCNV_FROM_U_CALLBACK_ESCAPE; + break; + default: + callback=NULL; + break; + } + option=callback==NULL ? cbopt : cbopt+1; + if(*option==0) { + option=NULL; + } + + invalidUChars=testCase->getString("invalidUChars", errorCode); + cc.invalidUChars=invalidUChars.getBuffer(); + cc.invalidLength=invalidUChars.length(); + + if(U_FAILURE(errorCode)) { + errln("error parsing conversion/fromUnicode test case %d - %s", + i, u_errorName(errorCode)); + errorCode=U_ZERO_ERROR; + } else { + FromUnicodeCase(cc, callback, option); + } + } + delete testData; + } + delete dataModule; + } +} + +// output helpers ---------------------------------------------------------- *** + +static U_INLINE char +hexDigit(uint8_t digit) { + return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); +} + +static char * +printBytes(const uint8_t *bytes, int32_t length, char *out) { + uint8_t b; + + if(length>0) { + b=*bytes++; + --length; + *out++=hexDigit((uint8_t)(b>>4)); + *out++=hexDigit((uint8_t)(b&0xf)); + } + + while(length>0) { + b=*bytes++; + --length; + *out++=' '; + *out++=hexDigit((uint8_t)(b>>4)); + *out++=hexDigit((uint8_t)(b&0xf)); + } + *out++=0; + return out; +} + +static char * +printUnicode(const UChar *unicode, int32_t length, char *out) { + UChar32 c; + int32_t i; + + for(i=0; i0) { + *out++=' '; + } + U16_NEXT(unicode, i, length, c); + // write 4..6 digits + if(c>=0x100000) { + *out++='1'; + } + if(c>=0x10000) { + *out++=hexDigit((uint8_t)((c>>16)&0xf)); + } + *out++=hexDigit((uint8_t)((c>>12)&0xf)); + *out++=hexDigit((uint8_t)((c>>8)&0xf)); + *out++=hexDigit((uint8_t)((c>>4)&0xf)); + *out++=hexDigit((uint8_t)(c&0xf)); + } + *out++=0; + return out; +} + +static char * +printOffsets(const int32_t *offsets, int32_t length, char *out) { + int32_t i, o, d; + + if(offsets==NULL) { + length=0; + } + + for(i=0; i0) { + *out++=' '; + } + o=offsets[i]; + + // print all offsets with 2 characters each (-x, -9..99, xx) + if(o<-9) { + *out++='-'; + *out++='x'; + } else if(o<0) { + *out++='-'; + *out++=(char)('0'-o); + } else if(o<=99) { + *out++=(d=o/10)==0 ? ' ' : (char)('0'+d); + *out++=(char)('0'+o%10); + } else /* o>99 */ { + *out++='x'; + *out++='x'; + } + } + *out++=0; + return out; +} + +// toUnicode test worker functions ----------------------------------------- *** + +static int32_t +stepToUnicode(ConversionCase &cc, UConverter *cnv, + UChar *result, int32_t resultCapacity, + int32_t *resultOffsets, /* also resultCapacity */ + int32_t step, + UErrorCode *pErrorCode) { + const char *source, *sourceLimit, *bytesLimit; + UChar *target, *targetLimit, *resultLimit; + UBool flush; + + source=(const char *)cc.bytes; + target=result; + bytesLimit=source+cc.bytesLength; + resultLimit=result+resultCapacity; + + if(step>=0) { + // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time + // move only one buffer (in vs. out) at a time to be extra mean + // step==0 performs bulk conversion and generates offsets + + // initialize the partial limits for the loop + if(step==0) { + // use the entire buffers + sourceLimit=bytesLimit; + targetLimit=resultLimit; + flush=cc.finalFlush; + } else { + // start with empty partial buffers + sourceLimit=source; + targetLimit=target; + flush=FALSE; + + // output offsets only for bulk conversion + resultOffsets=NULL; + } + + for(;;) { + // resetting the opposite conversion direction must not affect this one + ucnv_resetFromUnicode(cnv); + + // convert + ucnv_toUnicode(cnv, + &target, targetLimit, + &source, sourceLimit, + resultOffsets, + flush, pErrorCode); + + // check pointers and errors + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + if(target!=targetLimit) { + // buffer overflow must only be set when the target is filled + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + break; + } else if(targetLimit==resultLimit) { + // not just a partial overflow + break; + } + + // the partial target is filled, set a new limit, reset the error and continue + targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; + *pErrorCode=U_ZERO_ERROR; + } else if(U_FAILURE(*pErrorCode)) { + // some other error occurred, done + break; + } else { + if(source!=sourceLimit) { + // when no error occurs, then the input must be consumed + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + break; + } + + if(sourceLimit==bytesLimit) { + // we are done + break; + } + + // the partial conversion succeeded, set a new limit and continue + sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit; + flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit); + } + } + } else /* step<0 */ { + // step==-1 or -2: call ucnv_toUnicode() and ucnv_getNextUChar() alternatingly + // step==-3: call only ucnv_getNextUChar() + UChar32 c; + + // end the loop by getting an index out of bounds error + for(;;) { + // resetting the opposite conversion direction must not affect this one + ucnv_resetFromUnicode(cnv); + + // convert + if((step&1)!=0 /* odd: -1 or -3 */) { + sourceLimit=source; // use sourceLimit not as a real limit + // but to remember the pre-getNextUChar source pointer + c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode); + + // check pointers and errors + if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) { + if(source!=bytesLimit) { + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + } else { + *pErrorCode=U_ZERO_ERROR; + } + break; + } else if(U_FAILURE(*pErrorCode)) { + break; + } + // source may not move if c is from previous overflow + + if(target==resultLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + if(c<=0xffff) { + *target++=(UChar)c; + } else { + *target++=U16_LEAD(c); + if(target==resultLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + *target++=U16_TRAIL(c); + } + + // alternate between -1 and -2 but leave -3 alone + if(step==-1) { + step=-2; + } + } else /* step==-2 */ { + // allow only one UChar output + targetLimit=targetsizeof(buffer)) { + errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n", + cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); + exit(1); + } + + errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" + " bytes <%s>[%d]\n" + " expected <%s>[%d]\n" + " result <%s>[%d]\n" + " offsets <%s>\n" + " result offsets <%s>\n" + " error code expected %s got %s\n" + " invalidChars expected <%s> got <%s>\n", + cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg, + bytesString, cc.bytesLength, + unicodeString, cc.unicodeLength, + resultString, resultLength, + offsetsString, + resultOffsetsString, + u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), + invalidCharsString, resultInvalidCharsString); + + return FALSE; + } +} + +// fromUnicode test worker functions --------------------------------------- *** + +static int32_t +stepFromUnicode(ConversionCase &cc, UConverter *cnv, + char *result, int32_t resultCapacity, + int32_t *resultOffsets, /* also resultCapacity */ + int32_t step, + UErrorCode *pErrorCode) { + const UChar *source, *sourceLimit, *unicodeLimit; + char *target, *targetLimit, *resultLimit; + UBool flush; + + source=cc.unicode; + target=result; + unicodeLimit=source+cc.unicodeLength; + resultLimit=result+resultCapacity; + + // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time + // move only one buffer (in vs. out) at a time to be extra mean + // step==0 performs bulk conversion and generates offsets + + // initialize the partial limits for the loop + if(step==0) { + // use the entire buffers + sourceLimit=unicodeLimit; + targetLimit=resultLimit; + flush=cc.finalFlush; + } else { + // start with empty partial buffers + sourceLimit=source; + targetLimit=target; + flush=FALSE; + + // output offsets only for bulk conversion + resultOffsets=NULL; + } + + for(;;) { + // resetting the opposite conversion direction must not affect this one + ucnv_resetToUnicode(cnv); + + // convert + ucnv_fromUnicode(cnv, + &target, targetLimit, + &source, sourceLimit, + resultOffsets, + flush, pErrorCode); + + // check pointers and errors + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + if(target!=targetLimit) { + // buffer overflow must only be set when the target is filled + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + break; + } else if(targetLimit==resultLimit) { + // not just a partial overflow + break; + } + + // the partial target is filled, set a new limit, reset the error and continue + targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; + *pErrorCode=U_ZERO_ERROR; + } else if(U_FAILURE(*pErrorCode)) { + // some other error occurred, done + break; + } else { + if(source!=sourceLimit) { + // when no error occurs, then the input must be consumed + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + break; + } + + if(sourceLimit==unicodeLimit) { + // we are done + break; + } + + // the partial conversion succeeded, set a new limit and continue + sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit; + flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit); + } + } + + return (int32_t)(target-result); +} + +UBool +ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) { + UConverter *cnv; + UErrorCode errorCode; + + // open the converter + errorCode=U_ZERO_ERROR; + cnv=ucnv_open(cc.charset, &errorCode); + if(U_FAILURE(errorCode)) { + errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", + cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); + return FALSE; + } + + // set the callback + if(callback!=NULL) { + ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode); + if(U_FAILURE(errorCode)) { + errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s", + cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); + ucnv_close(cnv); + return FALSE; + } + } + + // set the fallbacks flag + // TODO change with Jitterbug 2401, then add a similar call for toUnicode too + ucnv_setFallback(cnv, cc.fallbacks); + + int32_t resultOffsets[200]; + char result[200]; + int32_t resultLength; + + static const struct { + int32_t step; + const char *name; + } steps[]={ + { 0, "bulk" }, // must be first for offsets to be checked + { 1, "step=1" }, + { 3, "step=3" }, + { 7, "step=7" } + }; + int32_t i, step; + + for(i=0; isizeof(buffer)) { + errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n", + cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); + exit(1); + } + + errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" + " unicode <%s>[%d]\n" + " expected <%s>[%d]\n" + " result <%s>[%d]\n" + " offsets <%s>\n" + " result offsets <%s>\n" + " error code expected %s got %s\n" + " invalidChars expected <%s> got <%s>\n", + cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg, + unicodeString, cc.unicodeLength, + bytesString, cc.bytesLength, + resultString, resultLength, + offsetsString, + resultOffsetsString, + u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), + invalidCharsString, resultInvalidUCharsString); + + return FALSE; + } +} diff --git a/icu4c/source/test/intltest/convtest.h b/icu4c/source/test/intltest/convtest.h new file mode 100644 index 00000000000..511aa5e53eb --- /dev/null +++ b/icu4c/source/test/intltest/convtest.h @@ -0,0 +1,81 @@ +/* +******************************************************************************* +* +* Copyright (C) 2003, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: convtest.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003jul15 +* created by: Markus W. Scherer +* +* Test file for data-driven conversion tests. +*/ + +#ifndef __CONVTEST_H__ +#define __CONVTEST_H__ + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" +#include "intltest.h" + +struct ConversionCase { + int32_t caseNr; + const char *charset, *cbopt, *name; + + const uint8_t *bytes; + int32_t bytesLength; + const UChar *unicode; + int32_t unicodeLength; + const int32_t *offsets; + + UBool finalFlush; + UBool fallbacks; + UErrorCode outErrorCode; + const uint8_t *invalidChars; + const UChar *invalidUChars; + int32_t invalidLength; + + uint8_t resultBytes[200]; + UChar resultUnicode[200]; + int32_t resultOffsets[200]; + int32_t resultLength; + + UErrorCode resultErrorCode; +}; + +class ConversionTest : public IntlTest { +public: + ConversionTest() {} + virtual ~ConversionTest() {} + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); + + void TestToUnicode(); + void TestFromUnicode(); + +private: + UBool + ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option); + + UBool + FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option); + + UBool + checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name, + const UChar *result, int32_t resultLength, + const int32_t *resultOffsets, + UErrorCode resultErrorCode); + + UBool + checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name, + const uint8_t *result, int32_t resultLength, + const int32_t *resultOffsets, + UErrorCode resultErrorCode); +}; + +#endif diff --git a/icu4c/source/test/intltest/intltest.dsp b/icu4c/source/test/intltest/intltest.dsp index 38598e26a85..870d7bf0e49 100644 --- a/icu4c/source/test/intltest/intltest.dsp +++ b/icu4c/source/test/intltest/intltest.dsp @@ -411,6 +411,18 @@ SOURCE=.\tsputil.cpp SOURCE=.\tsputil.h # End Source File # End Group +# Begin Group "conversion" + +# PROP Default_Filter "" +# Begin Source File + +SOURCE=.\convtest.cpp +# End Source File +# Begin Source File + +SOURCE=.\convtest.h +# End Source File +# End Group # Begin Group "data & memory" # PROP Default_Filter "" @@ -703,6 +715,46 @@ SOURCE=.\restsnew.cpp SOURCE=.\restsnew.h # End Source File # End Group +# Begin Group "idna" + +# PROP Default_Filter "*.c,*.h" +# Begin Source File + +SOURCE=.\idnaref.cpp +# End Source File +# Begin Source File + +SOURCE=.\idnaref.h +# End Source File +# Begin Source File + +SOURCE=.\nptrans.cpp +# End Source File +# Begin Source File + +SOURCE=.\nptrans.h +# End Source File +# Begin Source File + +SOURCE=.\punyref.c +# End Source File +# Begin Source File + +SOURCE=.\punyref.h +# End Source File +# Begin Source File + +SOURCE=.\testidn.cpp +# End Source File +# Begin Source File + +SOURCE=.\testidna.cpp +# End Source File +# Begin Source File + +SOURCE=.\testidna.h +# End Source File +# End Group # Begin Group "misc" # PROP Default_Filter "" @@ -963,45 +1015,5 @@ SOURCE=.\unhxtrts.cpp SOURCE=.\unhxtrts.h # End Source File # End Group -# Begin Group "idna" - -# PROP Default_Filter "*.c,*.h" -# Begin Source File - -SOURCE=.\idnaref.cpp -# End Source File -# Begin Source File - -SOURCE=.\idnaref.h -# End Source File -# Begin Source File - -SOURCE=.\nptrans.cpp -# End Source File -# Begin Source File - -SOURCE=.\nptrans.h -# End Source File -# Begin Source File - -SOURCE=.\punyref.c -# End Source File -# Begin Source File - -SOURCE=.\punyref.h -# End Source File -# Begin Source File - -SOURCE=.\testidn.cpp -# End Source File -# Begin Source File - -SOURCE=.\testidna.cpp -# End Source File -# Begin Source File - -SOURCE=.\testidna.h -# End Source File -# End Group # End Target # End Project diff --git a/icu4c/source/test/intltest/itmajor.cpp b/icu4c/source/test/intltest/itmajor.cpp index 2873de91d45..3de30c92d26 100644 --- a/icu4c/source/test/intltest/itmajor.cpp +++ b/icu4c/source/test/intltest/itmajor.cpp @@ -20,7 +20,6 @@ #include "itutil.h" #include "tscoll.h" #include "itformat.h" -//%#include "itconv.h" #include "ittrans.h" #include "itrbbi.h" #include "itrbnf.h" @@ -31,6 +30,8 @@ #include "canittst.h" #include "icusvtst.h" #include "testidna.h" +#include "convtest.h" + #define CASE_SUITE(id, suite) case id: \ name = #suite; \ if(exec) { \ @@ -147,6 +148,14 @@ void MajorTestLevel::runIndexedTest( int32_t index, UBool exec, const char* &nam } #endif break; + case 11: name = "conversion"; + if (exec) { + logln("TestSuite Conversion---"); logln(); + ConversionTest test; + callTest( test, par ); + } + break; + default: name = ""; break; } } diff --git a/icu4c/source/test/testdata/conversion.txt b/icu4c/source/test/testdata/conversion.txt new file mode 100644 index 00000000000..de661874f59 --- /dev/null +++ b/icu4c/source/test/testdata/conversion.txt @@ -0,0 +1,66 @@ +//******************************************************************************* +// +// Copyright (C) 2003, International Business Machines +// Corporation and others. All Rights Reserved. +// +//******************************************************************************* + +conversion { + Info { + Description { "Test data for conversion" } + LongDescription { + "Test data for data-driven conversion tests in icu/source/test/intltest/convtest.cpp\n" + "Run intltest conversion\n" + + "ICU callbacks are specified as strings with pairs of characters, each optional.\n" + "Callback function - '?'=Sub '0'=Skip '.'=Stop '&'=Escape\n" + "Callback option - a letter is passed in directly as const char * see ucnv_err.h\n" + "Empty string: Sub callback with NULL option\n" + + "fallbacks: per-direction boolean, currently only for fromUnicode; see Jitterbug 2401\n" + + "errorCode: (empty)==zero | invalid | illegal | truncated\n" + } + } + TestData { + toUnicode { + Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" } + Cases { + // surrogates in CESU-8 + { "CESU-8", :bin{ eda080eda081edb081 }, "\ud800\U00010401", :intvector{ 0, 3, 6 }, :int{1}, :int{0}, "", "", :bin{""} } + // e080 is a partial sequence + { "UTF-8", :bin{ 31ffe4ba8ce08061 }, "1\ufffd\u4e8c\ufffda", :intvector{ 0, 1, 2, 5, 7 }, :int{1}, :int{0}, "", "", :bin{ e080 } } + // fbbfbfbfbf exceedes U+10ffff + { "UTF-8", :bin{ 31fbbfbfbfbf61 }, "1\ufffda", :intvector{ 0, 1, 6 }, :int{1}, :int{0}, "", "", :bin{ fbbfbfbfbf } } + + // lead byte a2 without trail byte + { "ibm-1363", :bin{ a2aea2 }, "\u00a1", :intvector{ 0 }, :int{1}, :int{0}, "truncated", ".", :bin{ a2 } } + { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "", "?", :bin{ a2 } } + + // e4b8 is a partial sequence + { "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c", :intvector{ 0, 1 }, :int{1}, :int{0}, "truncated", ".", :bin{ e4b8 } } + { "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c\ufffd", :intvector{ 0, 1, 4 }, :int{1}, :int{0}, "", "?", :bin{ e4b8 } } + + // simple sample, no error handling + { "UTF-8", :bin{ 61F48FBFBF }, "a\U0010FFFF", :intvector{ 0, 1, 1 }, :int{1}, :int{0}, "", "", :bin{""} } + } + } + + // --------------------------------------------------------------------- *** + + fromUnicode { + Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" } + Cases { + // sub callback for supplementary code point + { "LATIN1", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "", "" } + { "ibm-920", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "", "" } + // same but not flushing + { "LATIN1", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{0}, :int{0}, "", "", "\U00010401" } + { "ibm-920", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{0}, :int{0}, "", "", "\U00010401" } + + // simple sample, no error handling + { "UTF-8", "a\U0010FFFF", :bin{ 61F48FBFBF }, :intvector{ 0, 1, 1, 1, 1 }, :int{1}, :int{0}, "", "", "" } + } + } + } +} diff --git a/icu4c/source/test/testdata/testdata.mk b/icu4c/source/test/testdata/testdata.mk index 9cbee624bb3..00a8cf6c57d 100644 --- a/icu4c/source/test/testdata/testdata.mk +++ b/icu4c/source/test/testdata/testdata.mk @@ -14,11 +14,12 @@ TESTDT=$(TESTPKG)_ ALL : "$(TESTDATAOUT)\testdata.dat" @echo Test data is built. -"$(TESTDATAOUT)\testdata.dat" : "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\$(TESTDT)iscii.res" "$(TESTDATABLD)\$(TESTDT)idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" $(TESTDATABLD)\$(TESTDT)test.icu "$(TESTDATABLD)\$(TESTDT)test1.cnv" "$(TESTDATABLD)\$(TESTDT)test3.cnv" "$(TESTDATABLD)\$(TESTDT)test4.cnv" "$(TESTDATABLD)\$(TESTDT)ibm9027.cnv" +"$(TESTDATAOUT)\testdata.dat" : "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\conversion.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\$(TESTDT)iscii.res" "$(TESTDATABLD)\$(TESTDT)idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" $(TESTDATABLD)\$(TESTDT)test.icu "$(TESTDATABLD)\$(TESTDT)test1.cnv" "$(TESTDATABLD)\$(TESTDT)test3.cnv" "$(TESTDATABLD)\$(TESTDT)test4.cnv" "$(TESTDATABLD)\$(TESTDT)ibm9027.cnv" @echo Building test data @copy "$(TESTDATABLD)\$(TESTDT)te.res" "$(TESTDATAOUT)\$(TESTDT)nam.typ" @"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -f -v -m common -c -p"$(TESTPKG)" -O "$(PKGOPT)" -d "$(TESTDATAOUT)" -T "$(TESTDATABLD)" -s "$(TESTDATABLD)" << $(TESTDT)casing.res +$(TESTDT)conversion.res $(TESTDT)mc.res $(TESTDT)root.res $(TESTDT)te.res