diff --git a/icu4c/source/common/uinvchar.c b/icu4c/source/common/uinvchar.c index 55a644570bb..d465b2efd41 100644 --- a/icu4c/source/common/uinvchar.c +++ b/icu4c/source/common/uinvchar.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -534,3 +534,51 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { } } } + + +U_INTERNAL uint8_t* U_EXPORT2 +uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) +{ + uint8_t *orig_dst = dst; + + if(n==-1) { + n = uprv_strlen((const char*)src)+1; /* copy NUL */ + } + /* copy non-null */ + while(*src && n>0) { + *(dst++) = asciiFromEbcdic[*(src++)]; + n--; + } + /* pad */ + while(n>0) { + *(dst++) = 0; + n--; + } + return orig_dst; +} + +U_INTERNAL uint8_t* U_EXPORT2 +uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) +{ + uint8_t *orig_dst = dst; + + if(n==-1) { + n = uprv_strlen((const char*)src)+1; /* copy NUL */ + } + /* copy non-null */ + while(*src && n>0) { + char ch = ebcdicFromAscii[*(src++)]; + if(ch == 0) { + ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ + } + *(dst++) = ch; + n--; + } + /* pad */ + while(n>0) { + *(dst++) = 0; + n--; + } + return orig_dst; +} + diff --git a/icu4c/source/common/uinvchar.h b/icu4c/source/common/uinvchar.h index 9e246345429..0f405727dd0 100644 --- a/icu4c/source/common/uinvchar.h +++ b/icu4c/source/common/uinvchar.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -83,4 +83,23 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); # error Unknown charset family! #endif +/** + * Copy EBCDIC to ASCII + * @internal + * @see uprv_strncpy + */ +U_INTERNAL uint8_t* U_EXPORT2 +uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n); + + +/** + * Copy ASCII to EBCDIC + * @internal + * @see uprv_strncpy + */ +U_INTERNAL uint8_t* U_EXPORT2 +uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n); + + + #endif diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h index ea6685af915..f97adedac4f 100644 --- a/icu4c/source/common/unicode/uloc.h +++ b/icu4c/source/common/unicode/uloc.h @@ -281,22 +281,44 @@ #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 /** - * Character separating keywords from the locale string - * different for EBCDIC - TODO + * Invariant character separating keywords from the locale string * @stable ICU 2.8 */ #define ULOC_KEYWORD_SEPARATOR '@' + /** - * Character for assigning value to a keyword + * Unicode code point for '@' separating keywords from the locale string. + * @see ULOC_KEYWORD_SEPARATOR + * @internal + */ +#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40 + +/** + * Invariant character for assigning value to a keyword * @stable ICU 2.8 */ #define ULOC_KEYWORD_ASSIGN '=' + /** - * Character separating keywords + * Unicode code point for '=' for assigning value to a keyword. + * @see ULOC_KEYWORD_ASSIGN + * @internal + */ +#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D + +/** + * Invariant character separating keywords * @stable ICU 2.8 */ #define ULOC_KEYWORD_ITEM_SEPARATOR ';' +/** + * Unicode code point for ';' separating keywords + * @see ULOC_KEYWORD_ITEM_SEPARATOR + * @internal + */ +#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B + /** * Constants for *_getLocale() * Allow user to select whether she wants information on diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp index 249ab66c8f5..a2bd12ef41a 100644 --- a/icu4c/source/i18n/decimfmt.cpp +++ b/icu4c/source/i18n/decimfmt.cpp @@ -3414,7 +3414,7 @@ DecimalFormat::toPattern(UnicodeString& result, UBool localized) const } if (fRoundingIncrement != NULL) { for(i=0; igetCount(); ++i) { - roundingDigits.append((UChar)fRoundingIncrement->getDigit(i)); + roundingDigits.append(zero+(fRoundingIncrement->getDigitValue(i))); // Convert to Unicode digit } roundingDecimalPos = fRoundingIncrement->getDecimalAt(); } diff --git a/icu4c/source/i18n/digitlst.cpp b/icu4c/source/i18n/digitlst.cpp index 7e5eb8e1c1c..4a146f44281 100644 --- a/icu4c/source/i18n/digitlst.cpp +++ b/icu4c/source/i18n/digitlst.cpp @@ -47,6 +47,7 @@ /** * This is the zero digit. The base for the digits returned by getDigit() + * Note that it is the platform invariant digit, and is not Unicode. */ #define kZero '0' @@ -329,6 +330,14 @@ DigitList::getDigit(int32_t i) { return fDecNumber->lsu[count-i-1] + '0'; } +// copied from DigitList::getDigit() +uint8_t +DigitList::getDigitValue(int32_t i) { + int32_t count = fDecNumber->digits; + U_ASSERT(ilsu[count-i-1]; +} + // ------------------------------------- // Appends the digit to the digit list if it's not out of scope. // Ignores the digit, otherwise. diff --git a/icu4c/source/i18n/digitlst.h b/icu4c/source/i18n/digitlst.h index 7b1507696b0..c95fb5f13c1 100644 --- a/icu4c/source/i18n/digitlst.h +++ b/icu4c/source/i18n/digitlst.h @@ -113,7 +113,7 @@ template class U_I18N_API MaybeStackHeaderAndArray + static UBool compareWithNAN(double x, double y); static void doAssert(double expect, double got, const char *message); @@ -452,6 +455,56 @@ static void TestErrorName(void){ } } +#define AESTRNCPY_SIZE 13 + +static const char * dump_binline(uint8_t *bytes) { + static char buf[512]; + int32_t i; + for(i=0;i<13;i++) { + sprintf(buf+(i*3), "%02x ", bytes[i]); + } + return buf; +} + +static void Test_aestrncpy(int32_t line, const uint8_t *expect, const uint8_t *src, int32_t len) +{ + uint8_t str_buf[AESTRNCPY_SIZE] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + uint8_t *ret; + + log_verbose("\n%s:%d: Beginning test of uprv_aestrncpy(dst, src, %d)\n", __FILE__, line, len); + ret = uprv_aestrncpy(str_buf, src, len); + if(ret != str_buf) { + log_err("\n%s:%d: FAIL: uprv_aestrncpy returned %p expected %p\n", __FILE__, line, (void*)ret, (void*)str_buf); + } + if(!uprv_memcmp(str_buf, expect, AESTRNCPY_SIZE)) { + log_verbose("\n%s:%d: OK - compared OK.", __FILE__, line); + log_verbose("\n%s:%d: expected: %s", __FILE__, line, dump_binline(expect)); + log_verbose("\n%s:%d: got : %s\n", __FILE__, line, dump_binline(str_buf)); + } else { + log_err ("\n%s:%d: FAIL: uprv_aestrncpy output differs", __FILE__, line); + log_err ("\n%s:%d: expected: %s", __FILE__, line, dump_binline(expect)); + log_err ("\n%s:%d: got : %s\n", __FILE__, line, dump_binline(str_buf)); + } +} + +static void TestString(void) +{ + + uint8_t str_tst[AESTRNCPY_SIZE] = { 0x81, 0x4b, 0x5c, 0x82, 0x25, 0x00, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; + + uint8_t str_exp1[AESTRNCPY_SIZE] = { 0x61, 0x2e, 0x2a, 0x62, 0x0a, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + uint8_t str_exp2[AESTRNCPY_SIZE] = { 0x61, 0x2e, 0x2a, 0x62, 0x0a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + uint8_t str_exp3[AESTRNCPY_SIZE] = { 0x61, 0x2e, 0x2a, 0x62, 0x0a, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff }; + + + + /* test #1- copy with -1 length */ + Test_aestrncpy(__LINE__, str_exp1, str_tst, -1); + Test_aestrncpy(__LINE__, str_exp1, str_tst, 6); + Test_aestrncpy(__LINE__, str_exp2, str_tst, 5); + Test_aestrncpy(__LINE__, str_exp3, str_tst, 8); +} + void addPUtilTest(TestNode** root); static void addToolUtilTests(TestNode** root); @@ -464,7 +517,7 @@ addPUtilTest(TestNode** root) /* addTest(root, &testIEEEremainder, "putiltst/testIEEEremainder"); */ addTest(root, &TestErrorName, "putiltst/TestErrorName"); addTest(root, &TestPUtilAPI, "putiltst/TestPUtilAPI"); - + addTest(root, &TestString, "putiltst/TestString"); addToolUtilTests(root); } diff --git a/icu4c/source/test/intltest/dcfmtest.cpp b/icu4c/source/test/intltest/dcfmtest.cpp index 41983574e9b..00b1b6fe633 100644 --- a/icu4c/source/test/intltest/dcfmtest.cpp +++ b/icu4c/source/test/intltest/dcfmtest.cpp @@ -482,7 +482,7 @@ UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen amtReadNoBOM = amtRead - 3; if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) { // TODO: restore this check. - // errln("Test data file %s is missing its BOM", fileName); + errln("Test data file %s is missing its BOM", fileName); fileBufNoBOM = fileBuf; amtReadNoBOM = amtRead; } diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index f7c839b10e6..6730874c0ee 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -3244,19 +3244,22 @@ void DateFormatTest::Test6726(void) strm = fmtm->format(dt, strm); strs = fmts->format(dt, strs); + /* Locale data is not yet updated - if (strf.charAt(13) == UChar(' ')) { - errln((UnicodeString)"FAIL: Improper formated date: " + strf); + if (strf.charAt(13) == UChar(0x20)) { + errln((UnicodeString)"FAIL: Improper formatted date: " + strf); } - if (strl.charAt(10) == UChar(' ')) { - errln((UnicodeString)"FAIL: Improper formated date: " + strl); + if (strl.charAt(10) == UChar(0x20)) { + errln((UnicodeString)"FAIL: Improper formatted date: " + strl); } */ - if (strm.charAt(10) != UChar(' ')) { - errln((UnicodeString)"FAIL: Improper formated date: " + strm); + logln("strm.charAt(10)=%04X wanted 0x20\n", strm.charAt(10)); + if (strm.charAt(10) != UChar(0x0020)) { + errln((UnicodeString)"FAIL: Improper formatted date: " + strm ); } - if (strs.charAt(8) != UChar(' ')) { - errln((UnicodeString)"FAIL: Improper formated date: " + strs); + logln("strs.charAt(10)=%04X wanted 0x20\n", strs.charAt(8)); + if (strs.charAt(8) != UChar(0x0020)) { + errln((UnicodeString)"FAIL: Improper formatted date: " + strs); } delete fmtf; diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 987f677804c..22ac90daf23 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -2841,6 +2841,7 @@ NumberFormatTest::TestCurrencyFormatForMixParsing() { const CurrencyAmount* curramt = NULL; for (uint32_t i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { UnicodeString stringToBeParsed = ctou(formats[i]); + logln(UnicodeString("stringToBeParsed: ") + stringToBeParsed); Formattable result; UErrorCode status = U_ZERO_ERROR; curFmt->parseObject(stringToBeParsed, result, status); @@ -2961,12 +2962,8 @@ NumberFormatTest::TestCurrencyIsoPluralFormat() { dataerrln((UnicodeString)"can not create instance, locale:" + localeString + ", style: " + k + " - " + u_errorName(status)); continue; } - // TODO: need to be UChar* UChar currencyCode[4]; - currencyCode[0] = currencyISOCode[0]; - currencyCode[1] = currencyISOCode[1]; - currencyCode[2] = currencyISOCode[2]; - currencyCode[3] = currencyISOCode[3]; + u_charsToUChars(currencyISOCode, currencyCode, 4); numFmt->setCurrency(currencyCode, status); if (U_FAILURE(status)) { delete numFmt; diff --git a/icu4c/source/test/intltest/numrgts.cpp b/icu4c/source/test/intltest/numrgts.cpp index f31e5a5feca..ab55bc2d2f9 100644 --- a/icu4c/source/test/intltest/numrgts.cpp +++ b/icu4c/source/test/intltest/numrgts.cpp @@ -2424,6 +2424,11 @@ void NumberFormatRegressionTest::Test4212072(void) { errln(UnicodeString("FAIL: ") + type[j] + avail[i].getDisplayName(l) + " -> \"" + pat + "\" -> \"" + f2.toPattern(p) + "\""); + } else { + UnicodeString l, p; + logln(UnicodeString("PASS: ") + type[j] + avail[i].getDisplayName(l) + + " -> \"" + pat + + "\""); } // Test toLocalizedPattern/applyLocalizedPattern round trip diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index bfda5a5ec79..eb7f462b058 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -2250,7 +2250,16 @@ void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator * // Scan through each test case, building up the string to be broken in testString, // and the positions that should be boundaries in the breakPositions vector. // + int spin = 0; while (tokenMatcher.find()) { + if(tokenMatcher.hitEnd()) { + /* Shouldnt Happen(TM). This means we didn't find the symbols we were looking for. + This occurred when the text file was corrupt (wasn't marked as UTF-8) + and caused an infinite loop here on EBCDIC systems! + */ + fprintf(stderr,"FAIL: hit end of file %s for the %8dth time- corrupt data file?\r", fileName, ++spin); + // return; + } if (tokenMatcher.start(1, status) >= 0) { // Scanned a divide sign, indicating a break position in the test data. if (testString.length()>0) { diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index d665549ebf4..0decca1126e 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -23,6 +23,8 @@ #include #include #include +#include "cstring.h" +#include "uinvchar.h" #define SUPPORT_MUTATING_INPUT_STRING 0 @@ -114,15 +116,84 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch } +/** + * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage + * into ASCII. + * @see utext_openUTF8 + */ +static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status); + +static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) { +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + return utext_openUTF8(ut, inv, length, status); +#else + char buf[1024]; + + uprv_aestrncpy((uint8_t*)buf, (const uint8_t*)inv, length); + + return utext_openUTF8(ut, buf, length, status); +#endif +} + //--------------------------------------------------------------------------- // // Error Checking / Reporting macros used in all of the tests. // //--------------------------------------------------------------------------- -#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("RegexTest failure at line %d. status=%s", \ -__LINE__, u_errorName(status)); return;}} -#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("RegexTest failure at line %d.\n", __LINE__);};} +static const char * dump_hex(UChar32 *s, int32_t len) { + static char buf[512]; + int32_t i; + for(i=0;imatcher(&input1, RegexPattern::PATTERN_IS_UTEXT, status); REGEX_CHECK_STATUS; REGEX_ASSERT(m1->lookingAt(status) == TRUE); - REGEX_ASSERT_UTEXT("abcdef this is a test", m1->inputText()); + const char str_abcdefthisisatest[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x00 }; /* abcdef this is a test */ + REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); m1->reset(&input2); REGEX_ASSERT(m1->lookingAt(status) == FALSE); - REGEX_ASSERT_UTEXT("not abc", m1->inputText()); + const char str_notabc[] = { 0x6e, 0x6f, 0x74, 0x20, 0x61, 0x62, 0x63, 0x00 }; /* not abc */ + REGEX_ASSERT_UTEXT_UTF8(str_notabc, m1->inputText()); m1->reset(&input1); - REGEX_ASSERT_UTEXT("abcdef this is a test", m1->inputText()); + REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); REGEX_ASSERT(m1->lookingAt(status) == TRUE); m1->reset(&empty); REGEX_ASSERT(m1->lookingAt(status) == FALSE); @@ -1649,7 +1755,7 @@ void RegexTest::API_Match_UTF8() { m1->reset(&input1); m1->reset(4, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("abcdef this is a test", m1->inputText()); + REGEX_ASSERT_UTEXT_UTF8(str_abcdefthisisatest, m1->inputText()); REGEX_ASSERT(m1->lookingAt(status) == TRUE); m1->reset(-1, status); @@ -1744,13 +1850,15 @@ void RegexTest::API_Match_UTF8() { UParseError pe; UErrorCode status=U_ZERO_ERROR; UText re=UTEXT_INITIALIZER; - utext_openUTF8(&re, "01(23(45)67)(.*)", -1, &status); + const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */ + utext_openUTF8(&re, str_01234567_pat, -1, &status); RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); REGEX_CHECK_STATUS; UText input = UTEXT_INITIALIZER; - utext_openUTF8(&input, "0123456789", -1, &status); + const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ + utext_openUTF8(&input, str_0123456789, -1, &status); RegexMatcher *matcher = pat->matcher(&input, RegexPattern::PATTERN_IS_UTEXT, status); REGEX_CHECK_STATUS; @@ -1762,14 +1870,14 @@ void RegexTest::API_Match_UTF8() { int32_t actualStart = matcher->start(i, status); REGEX_CHECK_STATUS; if (actualStart != matchStarts[i]) { - errln("RegexTest failure at line %d, index %d. Expected %d, got %d\n", - __LINE__, i, matchStarts[i], actualStart); + errln("RegexTest failure at %s:%d, index %d. Expected %d, got %d\n", + __FILE__, __LINE__, i, matchStarts[i], actualStart); } int32_t actualEnd = matcher->end(i, status); REGEX_CHECK_STATUS; if (actualEnd != matchEnds[i]) { - errln("RegexTest failure at line %d index %d. Expected %d, got %d\n", - __LINE__, i, matchEnds[i], actualEnd); + errln("RegexTest failure at %s:%d index %d. Expected %d, got %d\n", + __FILE__, __LINE__, i, matchEnds[i], actualEnd); } } @@ -1787,51 +1895,54 @@ void RegexTest::API_Match_UTF8() { UText destText = UTEXT_INITIALIZER; utext_openUnicodeString(&destText, &dest, &status); UText *result; - + //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ result = matcher->group((UText *)NULL, RegexMatcher::MATCHER_DEST_IS_UTEXT, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("0123456789", result); + REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); utext_close(result); result = matcher->group(&destText, RegexMatcher::MATCHER_DEST_IS_UTEXT, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("0123456789", result); + REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); result = matcher->group(0, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("0123456789", result); + REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); utext_close(result); result = matcher->group(0, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("0123456789", result); + REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); result = matcher->group(1, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("234567", result); + const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */ + REGEX_ASSERT_UTEXT_UTF8(str_234567, result); utext_close(result); result = matcher->group(1, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("234567", result); + REGEX_ASSERT_UTEXT_UTF8(str_234567, result); result = matcher->group(2, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("45", result); + const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ + REGEX_ASSERT_UTEXT_UTF8(str_45, result); utext_close(result); result = matcher->group(2, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("45", result); + REGEX_ASSERT_UTEXT_UTF8(str_45, result); result = matcher->group(3, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("89", result); + const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ + REGEX_ASSERT_UTEXT_UTF8(str_89, result); utext_close(result); result = matcher->group(3, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("89", result); + REGEX_ASSERT_UTEXT_UTF8(str_89, result); REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR); @@ -1854,12 +1965,14 @@ void RegexTest::API_Match_UTF8() { UParseError pe; UErrorCode status=U_ZERO_ERROR; UText re=UTEXT_INITIALIZER; - utext_openUTF8(&re, "abc", -1, &status); + const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ + utext_openUTF8(&re, str_abc, -1, &status); RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); REGEX_CHECK_STATUS; UText input = UTEXT_INITIALIZER; - utext_openUTF8(&input, ".abc..abc...abc..", -1, &status); + const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ + utext_openUTF8(&input, str_abcabcabc, -1, &status); // 012345678901234567 RegexMatcher *matcher = pat->matcher(&input, RegexPattern::PATTERN_IS_UTEXT, status); @@ -1913,13 +2026,15 @@ void RegexTest::API_Match_UTF8() { UParseError pe; UErrorCode status=U_ZERO_ERROR; UText re=UTEXT_INITIALIZER; - utext_openUTF8(&re, ".*?(?:(\\Gabc)|(abc))", -1, &status); + const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x00 }; /* .*?(?:(\\Gabc)|(abc)) */ + utext_openUTF8(&re, str_Gabcabc, -1, &status); RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); REGEX_CHECK_STATUS; UText input = UTEXT_INITIALIZER; - utext_openUTF8(&input, ".abcabc.abc..", -1, &status); + const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ + utext_openUTF8(&input, str_abcabcabc, -1, &status); // 012345678901234567 RegexMatcher *matcher = pat->matcher(&input, RegexPattern::PATTERN_IS_UTEXT, status); @@ -2035,8 +2150,10 @@ void RegexTest::API_Match_UTF8() { UErrorCode status = U_ZERO_ERROR; UText testPattern = UTEXT_INITIALIZER; UText testText = UTEXT_INITIALIZER; - utext_openUTF8(&testPattern, ".*", -1, &status); - utext_openUTF8(&testText, "This is test data", -1, &status); + regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); + REGEX_VERBOSE_TEXT(&testPattern); + regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status); + REGEX_VERBOSE_TEXT(&testText); RegexMatcher m(&testPattern, &testText, 0, status); REGEX_CHECK_STATUS; @@ -2056,7 +2173,8 @@ void RegexTest::API_Match_UTF8() { REGEX_ASSERT(m.regionStart() == 0); REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); - utext_openUTF8(&testText, "short", -1, &status); + regextst_openUTF8FromInvariant(&testText, "short", -1, &status); + REGEX_VERBOSE_TEXT(&testText); m.reset(&testText); REGEX_ASSERT(m.regionStart() == 0); REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); @@ -2094,8 +2212,10 @@ void RegexTest::API_Match_UTF8() { UErrorCode status = U_ZERO_ERROR; UText testPattern = UTEXT_INITIALIZER; UText testText = UTEXT_INITIALIZER; - utext_openUTF8(&testPattern, ".*", -1, &status); - utext_openUTF8(&testText, "aabb", -1, &status); + const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ + const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ + utext_openUTF8(&testPattern, str_, -1, &status); + utext_openUTF8(&testText, str_aabb, -1, &status); RegexMatcher m1(&testPattern, &testText, 0, status); REGEX_ASSERT(m1.lookingAt(status) == TRUE); @@ -2104,7 +2224,8 @@ void RegexTest::API_Match_UTF8() { REGEX_CHECK_STATUS; status = U_ZERO_ERROR; - utext_openUTF8(&testPattern, "a*", -1, &status); + const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ + utext_openUTF8(&testPattern, str_a, -1, &status); RegexMatcher m2(&testPattern, &testText, 0, status); REGEX_ASSERT(m2.lookingAt(status) == TRUE); REGEX_ASSERT(m2.hitEnd() == FALSE); @@ -2112,7 +2233,8 @@ void RegexTest::API_Match_UTF8() { REGEX_CHECK_STATUS; status = U_ZERO_ERROR; - utext_openUTF8(&testPattern, ".*$", -1, &status); + const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ + utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); RegexMatcher m3(&testPattern, &testText, 0, status); REGEX_ASSERT(m3.lookingAt(status) == TRUE); REGEX_ASSERT(m3.hitEnd() == TRUE); @@ -2140,14 +2262,17 @@ void RegexTest::API_Replace_UTF8() { UErrorCode status=U_ZERO_ERROR; UText re=UTEXT_INITIALIZER; - utext_openUTF8(&re, "abc", -1, &status); + regextst_openUTF8FromInvariant(&re, "abc", -1, &status); + REGEX_VERBOSE_TEXT(&re); RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); REGEX_CHECK_STATUS; - char data[] = ".abc..abc...abc.."; + char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ // 012345678901234567 UText dataText = UTEXT_INITIALIZER; utext_openUTF8(&dataText, data, -1, &status); + REGEX_CHECK_STATUS; + REGEX_VERBOSE_TEXT(&dataText); RegexMatcher *matcher = pat->matcher(&dataText, RegexPattern::PATTERN_IS_UTEXT, status); // @@ -2160,51 +2285,56 @@ void RegexTest::API_Replace_UTF8() { UText replText = UTEXT_INITIALIZER; - utext_openUTF8(&replText, "yz", -1, &status); + const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ + utext_openUTF8(&replText, str_yz, -1, &status); + REGEX_VERBOSE_TEXT(&replText); result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT(".yz..abc...abc..", result); + const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ + REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); utext_close(result); result = matcher->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT(".yz..abc...abc..", result); + REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT(".yz..yz...yz..", result); + const char str_yzyzyz[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x00 }; /* .yz..yz...yz.. */ + REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher->replaceAll(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT(".yz..yz...yz..", result); + REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); // // Plain vanilla non-matches. // - utext_openUTF8(&dataText, ".abx..abx...abx..", -1, &status); + const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */ + utext_openUTF8(&dataText, str_abxabxabx, -1, &status); matcher->reset(&dataText); result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT(".abx..abx...abx..", result); + REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); utext_close(result); result = matcher->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT(".abx..abx...abx..", result); + REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT(".abx..abx...abx..", result); + REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher->replaceAll(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT(".abx..abx...abx..", result); + REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); // // Empty source string @@ -2214,21 +2344,21 @@ void RegexTest::API_Replace_UTF8() { result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("", result); + REGEX_ASSERT_UTEXT_UTF8("", result); utext_close(result); result = matcher->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("", result); + REGEX_ASSERT_UTEXT_UTF8("", result); result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("", result); + REGEX_ASSERT_UTEXT_UTF8("", result); utext_close(result); result = matcher->replaceAll(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("", result); + REGEX_ASSERT_UTEXT_UTF8("", result); // // Empty substitution string @@ -2239,95 +2369,107 @@ void RegexTest::API_Replace_UTF8() { utext_openUTF8(&replText, NULL, 0, &status); result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("...abc...abc..", result); + const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ + REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); utext_close(result); result = matcher->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("...abc...abc..", result); + REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("........", result); + const char str_dots[] = { 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x00 }; /* ........ */ + REGEX_ASSERT_UTEXT_UTF8(str_dots, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher->replaceAll(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("........", result); + REGEX_ASSERT_UTEXT_UTF8(str_dots, result); // // match whole string // - utext_openUTF8(&dataText, "abc", -1, &status); + const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ + utext_openUTF8(&dataText, str_abc, -1, &status); matcher->reset(&dataText); - utext_openUTF8(&replText, "xyz", -1, &status); + const char str_xyz[] = { 0x78, 0x79, 0x7a, 0x00 }; /* xyz */ + utext_openUTF8(&replText, str_xyz, -1, &status); result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("xyz", result); + REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("xyz", result); + REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("xyz", result); + REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher->replaceAll(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("xyz", result); + REGEX_ASSERT_UTEXT_UTF8(str_xyz, result); // // Capture Group, simple case // - utext_openUTF8(&re, "a(..)", -1, &status); + const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ + utext_openUTF8(&re, str_add, -1, &status); RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); REGEX_CHECK_STATUS; - - utext_openUTF8(&dataText, "abcdefg", -1, &status); + + const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */ + utext_openUTF8(&dataText, str_abcdefg, -1, &status); RegexMatcher *matcher2 = pat2->matcher(&dataText, RegexPattern::PATTERN_IS_UTEXT, status); REGEX_CHECK_STATUS; - utext_openUTF8(&replText, "$1$1", -1, &status); + const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ + utext_openUTF8(&replText, str_11, -1, &status); result = matcher2->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("bcbcdefg", result); + const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* bcbcdefg */ + REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher2->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("bcbcdefg", result); + REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); - utext_openUTF8(&replText, "The value of \\$1 is $1.", -1, &status); + regextst_openUTF8FromInvariant(&replText, "The value of \\$1 is $1.", -1, &status); result = matcher2->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("The value of $1 is bc.defg", result); + const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg */ + REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher2->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("The value of $1 is bc.defg", result); + REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); - utext_openUTF8(&replText, "$ by itself, no group number $$$", -1, &status); + const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */ + utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); result = matcher2->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("$ by itself, no group number $$$defg", result); + const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ + REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher2->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("$ by itself, no group number $$$defg", result); + REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); - unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE + unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */ + //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001D7CF, MATHEMATICAL BOLD DIGIT ONE // 012345678901234567890123456 supplDigitChars[22] = 0xF0; supplDigitChars[23] = 0x9D; @@ -2337,47 +2479,53 @@ void RegexTest::API_Replace_UTF8() { result = matcher2->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("Supplemental Digit 1 bc.defg", result); + const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */ + REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher2->replaceFirst(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("Supplemental Digit 1 bc.defg", result); - - utext_openUTF8(&replText, "bad capture group number $5...", -1, &status); + REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); + const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */ + utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)), U_INDEX_OUTOFBOUNDS_ERROR); -// REGEX_ASSERT_UTEXT("abcdefg", result); +// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, status)), U_INDEX_OUTOFBOUNDS_ERROR); REGEX_ASSERT(result == &destText); -// REGEX_ASSERT_UTEXT("abcdefg", result); +// REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); // // Replacement String with \u hex escapes // { - utext_openUTF8(&dataText, "abc 1 abc 2 abc 3", -1, &status); - utext_openUTF8(&replText, "--\\u0043--", -1, &status); + const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 abc 2 abc 3 */ + const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */ + utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); + utext_openUTF8(&replText, str_u0043, -1, &status); matcher->reset(&dataText); result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("--C-- 1 --C-- 2 --C-- 3", result); + const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ + REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher->replaceAll(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT("--C-- 1 --C-- 2 --C-- 3", result); + REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); } { - utext_openUTF8(&dataText, "abc !", -1, &status); - utext_openUTF8(&replText, "--\\U00010000--", -1, &status); + const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ + utext_openUTF8(&dataText, str_abc, -1, &status); + const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ + utext_openUTF8(&replText, str_U00010000, -1, &status); matcher->reset(&dataText); - unsigned char expected[] = "--xxxx-- !"; // \U00010000, "LINEAR B SYLLABLE B008 A" + unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A" // 0123456789 expected[2] = 0xF0; expected[3] = 0x90; @@ -2386,13 +2534,13 @@ void RegexTest::API_Replace_UTF8() { result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT((char *)expected, result); + REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); utext_close(result); utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status); result = matcher->replaceAll(&replText, &destText, status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); - REGEX_ASSERT_UTEXT((char *)expected, result); + REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); } // TODO: need more through testing of capture substitutions. @@ -2400,9 +2548,12 @@ void RegexTest::API_Replace_UTF8() { // { status = U_ZERO_ERROR; - utext_openUTF8(&re, "ss(.*?)ee", -1, &status); - utext_openUTF8(&dataText, "The matches start with ss and end with ee ss stuff ee fin", -1, &status); - utext_openUTF8(&replText, "ooh", -1, &status); +const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */ +const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ss stuff ee fin */ +const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ + utext_openUTF8(&re, str_ssee, -1, &status); + utext_openUTF8(&dataText, str_blah, -1, &status); + utext_openUTF8(&replText, str_ooh, -1, &status); RegexMatcher m(&re, 0, status); REGEX_CHECK_STATUS; @@ -2417,7 +2568,8 @@ void RegexTest::API_Replace_UTF8() { m.find(); m.appendReplacement(&resultText, &replText, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("The matches start with ss and end with ee ooh", &resultText); + const char str_blah2[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ + REGEX_ASSERT_UTEXT_UTF8(str_blah2, &resultText); // After a reset into the interior of a string, appendReplacement still starts at beginning. status = U_ZERO_ERROR; @@ -2428,7 +2580,8 @@ void RegexTest::API_Replace_UTF8() { m.find(); m.appendReplacement(&resultText, &replText, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("The matches start with ss and end with ee ooh", &resultText); + const char str_blah3[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ + REGEX_ASSERT_UTEXT_UTF8(str_blah3, &resultText); // find() at interior of string, appendReplacement still starts at beginning. status = U_ZERO_ERROR; @@ -2439,10 +2592,12 @@ void RegexTest::API_Replace_UTF8() { m.find(); m.appendReplacement(&resultText, &replText, status); REGEX_CHECK_STATUS; - REGEX_ASSERT_UTEXT("The matches start with ss and end with ee ooh", &resultText); + const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ + REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); m.appendTail(&resultText); - REGEX_ASSERT_UTEXT("The matches start with ss and end with ee ooh fin", &resultText); + const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ + REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); utext_close(&resultText); } @@ -2477,8 +2632,10 @@ void RegexTest::API_Pattern_UTF8() { UErrorCode status = U_ZERO_ERROR; UParseError pe; - utext_openUTF8(&re1, "abc[a-l][m-z]", -1, &status); - utext_openUTF8(&re2, "def", -1, &status); + const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ + const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ + utext_openUTF8(&re1, str_abcalmz, -1, &status); + utext_openUTF8(&re2, str_def, -1, &status); RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); @@ -2535,7 +2692,8 @@ void RegexTest::API_Pattern_UTF8() { { UErrorCode status = U_ZERO_ERROR; UText pattern = UTEXT_INITIALIZER; - utext_openUTF8(&pattern, "\\p{L}+", -1, &status); + const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */ + utext_openUTF8(&pattern, str_pL, -1, &status); RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); RegexPattern *pClone = pSource->clone(); @@ -2544,7 +2702,8 @@ void RegexTest::API_Pattern_UTF8() { REGEX_CHECK_STATUS; UText input = UTEXT_INITIALIZER; - utext_openUTF8(&input, "Hello World", -1, &status); + const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ + utext_openUTF8(&input, str_HelloWorld, -1, &status); mFromClone->reset(&input); REGEX_ASSERT(mFromClone->find() == TRUE); REGEX_ASSERT(mFromClone->group(status) == "Hello"); @@ -2566,30 +2725,35 @@ void RegexTest::API_Pattern_UTF8() { UText pattern = UTEXT_INITIALIZER; UText input = UTEXT_INITIALIZER; - utext_openUTF8(&input, "random input", -1, &status); + const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ + utext_openUTF8(&input, str_randominput, -1, &status); - utext_openUTF8(&pattern, ".*", -1, &status); + const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ + utext_openUTF8(&pattern, str_dotstar, -1, &status); REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE); REGEX_CHECK_STATUS; - utext_openUTF8(&pattern, "abc", -1, &status); + const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ + utext_openUTF8(&pattern, str_abc, -1, &status); REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); REGEX_CHECK_STATUS; - utext_openUTF8(&pattern, ".*nput", -1, &status); + const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */ + utext_openUTF8(&pattern, str_nput, -1, &status); REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); REGEX_CHECK_STATUS; - utext_openUTF8(&pattern, "random input", -1, &status); + utext_openUTF8(&pattern, str_randominput, -1, &status); REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); REGEX_CHECK_STATUS; - utext_openUTF8(&pattern, ".*u", -1, &status); + const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ + utext_openUTF8(&pattern, str_u, -1, &status); REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); REGEX_CHECK_STATUS; - utext_openUTF8(&input, "abc", -1, &status); - utext_openUTF8(&pattern, "abc", -1, &status); + utext_openUTF8(&input, str_abc, -1, &status); + utext_openUTF8(&pattern, str_abc, -1, &status); status = U_INDEX_OUTOFBOUNDS_ERROR; REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); @@ -2603,7 +2767,8 @@ void RegexTest::API_Pattern_UTF8() { // Split() // status = U_ZERO_ERROR; - utext_openUTF8(&re1, " +", -1, &status); + const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ + utext_openUTF8(&re1, str_spaceplus, -1, &status); pat1 = RegexPattern::compile(&re1, pe, status); REGEX_CHECK_STATUS; UnicodeString fields[10]; @@ -2658,7 +2823,7 @@ void RegexTest::API_Pattern_UTF8() { delete pat1; // split, with a pattern with (capture) - utext_openUTF8(&re1, "<(\\w*)>", -1, &status); + regextst_openUTF8FromInvariant(&re1, "<(\\w*)>", -1, &status); pat1 = RegexPattern::compile(&re1, pe, status); REGEX_CHECK_STATUS; @@ -2734,7 +2899,7 @@ void RegexTest::API_Pattern_UTF8() { status = U_ZERO_ERROR; delete pat1; - utext_openUTF8(&re1, "([-,])", -1, &status); + regextst_openUTF8FromInvariant(&re1, "([-,])", -1, &status); pat1 = RegexPattern::compile(&re1, pe, status); REGEX_CHECK_STATUS; n = pat1->split("1-10,20", fields, 10, status); @@ -2753,14 +2918,14 @@ void RegexTest::API_Pattern_UTF8() { // pat1 = new RegexPattern(); REGEX_ASSERT(pat1->pattern() == ""); - REGEX_ASSERT_UTEXT("", pat1->patternText()); + REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText()); delete pat1; - utext_openUTF8(&re1, "(Hello, world)*", -1, &status); + regextst_openUTF8FromInvariant(&re1, "(Hello, world)*", -1, &status); pat1 = RegexPattern::compile(&re1, pe, status); REGEX_CHECK_STATUS; REGEX_ASSERT(pat1->pattern() == "(Hello, world)*"); - REGEX_ASSERT_UTEXT("(Hello, world)*", pat1->patternText()); + REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText()); delete pat1; utext_close(&re1); @@ -2836,7 +3001,7 @@ void RegexTest::Extended() { while (lineMat.find()) { lineNum++; if (U_FAILURE(status)) { - errln("line %d: ICU Error \"%s\"", lineNum, u_errorName(status)); + errln("%s:%d: ICU Error \"%s\"", srcPath, lineNum, u_errorName(status)); } status = U_ZERO_ERROR; @@ -2864,7 +3029,7 @@ void RegexTest::Extended() { testPattern = quotedStuffMat.group(2, status); testLine.remove(0, quotedStuffMat.end(0, status)); } else { - errln("Bad pattern (missing quotes?) at test file line %d", lineNum); + errln("Bad pattern (missing quotes?) at %s:%d", srcPath, lineNum); continue; } @@ -2907,7 +3072,7 @@ void RegexTest::Extended() { // // Run the test // - regex_find(testPattern, testFlags, matchString, lineNum); + regex_find(testPattern, testFlags, matchString, srcPath, lineNum); } delete [] testData; @@ -2946,6 +3111,7 @@ static void set(UVector &vec, int32_t val, UnicodeString index) { void RegexTest::regex_find(const UnicodeString &pattern, const UnicodeString &flags, const UnicodeString &inputString, + const char *srcPath, int32_t line) { UnicodeString unEscapedInput; UnicodeString deTaggedInput; @@ -3056,7 +3222,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, if (UTF8Pattern == NULL) { // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine - logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for line %d", line); + logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line); status = U_ZERO_ERROR; } @@ -3154,7 +3320,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, if (UTF8Matcher == NULL) { // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine - logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for line %d", line); + logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line); status = U_ZERO_ERROR; } } @@ -4552,12 +4718,12 @@ void RegexTest::PreAllocatedUTextCAPI () { UText *resultText; status = U_ZERO_ERROR; - utext_openUTF8(&text1, "abcccd", -1, &status); - utext_openUTF8(&text2, "abcccxd", -1, &status); + regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); + regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); utext_openUChars(&text2, text2Chars, -1, &status); - utext_openUTF8(&patternText, "abc*d", -1, &status); + regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); re = uregex_openUText(&patternText, 0, NULL, &status); /* First set a UText */ @@ -4612,14 +4778,14 @@ void RegexTest::PreAllocatedUTextCAPI () { actual = uregex_groupUText(re, 0, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(actual == &bufferText); - REGEX_ASSERT_UTEXT("abc interior def", actual); + REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual); /* Capture group #1. Should succeed. */ status = U_ZERO_ERROR; actual = uregex_groupUText(re, 1, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(actual == &bufferText); - REGEX_ASSERT_UTEXT(" interior ", actual); + REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual); /* Capture group out of range. Error. */ status = U_ZERO_ERROR; @@ -4643,7 +4809,7 @@ void RegexTest::PreAllocatedUTextCAPI () { status = U_ZERO_ERROR; u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); u_uastrncpy(text2, "No match here.", sizeof(text2)/2); - utext_openUTF8(&replText, "<$1>", -1, &status); + regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status); REGEX_CHECK_STATUS; @@ -4654,7 +4820,7 @@ void RegexTest::PreAllocatedUTextCAPI () { result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &bufferText); - REGEX_ASSERT_UTEXT("Replace x1x x...x.", result); + REGEX_ASSERT_UTEXT_INVARIANT("Replace x1x x...x.", result); /* No match. Text should copy to output with no changes. */ uregex_setText(re, text2, -1, &status); @@ -4662,16 +4828,16 @@ void RegexTest::PreAllocatedUTextCAPI () { result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &bufferText); - REGEX_ASSERT_UTEXT("No match here.", result); + REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); /* Unicode escapes */ uregex_setText(re, text1, -1, &status); - utext_openUTF8(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status); + regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status); utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &bufferText); - REGEX_ASSERT_UTEXT("Replace \\AaaB$a x1x x...x.", result); + REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); uregex_close(re); utext_close(&replText); @@ -4690,7 +4856,7 @@ void RegexTest::PreAllocatedUTextCAPI () { status = U_ZERO_ERROR; u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); u_uastrncpy(text2, "No match here.", sizeof(text2)/2); - utext_openUTF8(&replText, "<$1>", -1, &status); + regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status); REGEX_CHECK_STATUS; @@ -4701,7 +4867,7 @@ void RegexTest::PreAllocatedUTextCAPI () { result = uregex_replaceAllUText(re, &replText, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &bufferText); - REGEX_ASSERT_UTEXT("Replace <1> <...>.", result); + REGEX_ASSERT_UTEXT_INVARIANT("Replace <1> <...>.", result); /* No match. Text should copy to output with no changes. */ uregex_setText(re, text2, -1, &status); @@ -4709,7 +4875,7 @@ void RegexTest::PreAllocatedUTextCAPI () { result = uregex_replaceAllUText(re, &replText, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(result == &bufferText); - REGEX_ASSERT_UTEXT("No match here.", result); + REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); uregex_close(re); utext_close(&replText); diff --git a/icu4c/source/test/intltest/regextst.h b/icu4c/source/test/intltest/regextst.h index 0758c2d7eb8..cbae0e4a152 100644 --- a/icu4c/source/test/intltest/regextst.h +++ b/icu4c/source/test/intltest/regextst.h @@ -46,10 +46,11 @@ public: // The following functions are internal to the regexp tests. virtual void assertUText(const char *expected, UText *actual, const char *file, int line); + virtual void assertUTextInvariant(const char *invariant, UText *actual, const char *file, int line); virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line); virtual UBool doRegexLMTestUTF8(const char *pat, const char *text, UBool looking, UBool match, int32_t line); virtual void regex_find(const UnicodeString &pat, const UnicodeString &flags, - const UnicodeString &input, int32_t line); + const UnicodeString &input, const char *srcPath, int32_t line); virtual void regex_err(const char *pat, int32_t errline, int32_t errcol, UErrorCode expectedStatus, int32_t line); virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, const char *charset, UErrorCode &status); diff --git a/icu4c/source/test/intltest/sdtfmtts.cpp b/icu4c/source/test/intltest/sdtfmtts.cpp index 6ca0748be54..0d566e0cce2 100644 --- a/icu4c/source/test/intltest/sdtfmtts.cpp +++ b/icu4c/source/test/intltest/sdtfmtts.cpp @@ -58,13 +58,13 @@ void IntlTestSimpleDateFormatAPI::testAPI(/*char *par*/) } status = U_ZERO_ERROR; - const UnicodeString pattern("yyyy.MM.dd G 'at' hh:mm:ss z"); - const UnicodeString override("y=hebr;d=thai;s=arab"); - const UnicodeString override_bogus("y=hebr;d=thai;s=bogus"); + const UnicodeString pattern("yyyy.MM.dd G 'at' hh:mm:ss z", ""); + const UnicodeString override("y=hebr;d=thai;s=arab", ""); /* use invariant converter */ + const UnicodeString override_bogus("y=hebr;d=thai;s=bogus", ""); SimpleDateFormat pat(pattern, status); if(U_FAILURE(status)) { - errln("ERROR: Could not create SimpleDateFormat (pattern)"); + errln("ERROR: Could not create SimpleDateFormat (pattern) - %s", u_errorName(status)); } status = U_ZERO_ERROR; @@ -93,18 +93,20 @@ void IntlTestSimpleDateFormatAPI::testAPI(/*char *par*/) } status = U_ZERO_ERROR; + logln(UnicodeString("Override with: ") + override); SimpleDateFormat ovr1(pattern, override, status); if(U_FAILURE(status)) { - errln("ERROR: Could not create SimpleDateFormat (pattern, override)"); + errln("ERROR: Could not create SimpleDateFormat (pattern, override) - %s", u_errorName(status)); } status = U_ZERO_ERROR; SimpleDateFormat ovr2(pattern, override, Locale::getGerman(), status); if(U_FAILURE(status)) { - errln("ERROR: Could not create SimpleDateFormat (pattern, override, locale)"); + errln("ERROR: Could not create SimpleDateFormat (pattern, override, locale) - %s", u_errorName(status)); } status = U_ZERO_ERROR; + logln(UnicodeString("Override with: ") + override_bogus); SimpleDateFormat ovr3(pattern, override_bogus, Locale::getGerman(), status); if(U_SUCCESS(status)) { errln("ERROR: Should not have been able to create SimpleDateFormat (pattern, override, locale) with a bogus override"); diff --git a/icu4c/source/test/intltest/selfmts.cpp b/icu4c/source/test/intltest/selfmts.cpp index 31b39ae11e2..46aca86c2de 100644 --- a/icu4c/source/test/intltest/selfmts.cpp +++ b/icu4c/source/test/intltest/selfmts.cpp @@ -14,12 +14,18 @@ #include "unicode/selfmt.h" #include "stdio.h" -const UnicodeString SIMPLE_PATTERN = UnicodeString("feminine {feminineVerbValue} other{otherVerbValue}"); +#define SIMPLE_PATTERN_STRING "feminine {feminineVerbValue} other{otherVerbValue}" + + #define SELECT_PATTERN_DATA 4 #define SELECT_SYNTAX_DATA 10 #define EXP_FORMAT_RESULT_DATA 12 #define NUM_OF_FORMAT_ARGS 3 +#define VERBOSE_INT(x) {logln("%s:%d: int %s=%d\n", __FILE__, __LINE__, #x, (x));} +#define VERBOSE_USTRING(text) {logln("%s:%d: UnicodeString %s(%d) = ", __FILE__, __LINE__, #text, text.length()); logln(UnicodeString(" \"")+text+UnicodeString("\";"));} + + void SelectFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) { if (exec) logln("TestSuite SelectFormat"); @@ -36,6 +42,8 @@ void SelectFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n */ void SelectFormatTest::selectFormatUnitTest(/*char *par*/) { + const UnicodeString SIMPLE_PATTERN(SIMPLE_PATTERN_STRING); /* Don't static init this! */ + UnicodeString patternTestData[SELECT_PATTERN_DATA] = { UNICODE_STRING_SIMPLE("fem {femValue} other{even}"), UNICODE_STRING_SIMPLE("other{odd or even}"), @@ -99,6 +107,7 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/) }; UErrorCode status = U_ZERO_ERROR; + VERBOSE_USTRING(SIMPLE_PATTERN); SelectFormat* selFmt = new SelectFormat( SIMPLE_PATTERN , status); if (U_FAILURE(status)) { dataerrln("ERROR: SelectFormat Unit Test constructor failed in unit tests.- exitting"); @@ -109,6 +118,8 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/) logln("SelectFormat Unit Test : Testing SelectFormat pattern syntax."); for (int32_t i=0; iapplyPattern(checkSyntaxData[i], status); if( status!= expErrorCodes[i] ){ errln("\nERROR: Unexpected result - SelectFormat Unit Test failed to detect syntax error with pattern: "+checkSyntaxData[i]+" and expected status="+ u_errorName(expErrorCodes[i]) + " and resulted status="+u_errorName(status)); @@ -188,6 +199,7 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/) */ void SelectFormatTest::selectFormatAPITest(/*char *par*/) { + const UnicodeString SIMPLE_PATTERN(SIMPLE_PATTERN_STRING); /* Don't static init this! */ int numOfConstructors =3; UErrorCode status[3]; SelectFormat* selFmt[3] = { NULL, NULL, NULL }; @@ -200,7 +212,7 @@ void SelectFormatTest::selectFormatAPITest(/*char *par*/) selFmt[0]= new SelectFormat(SIMPLE_PATTERN, status[0]); if ( U_FAILURE(status[0]) ) { - errln("ERROR: SelectFormat API test constructor with pattern and status failed!"); + errln("ERROR: SelectFormat API test constructor with pattern and status failed! with %s\n", u_errorName(status[0])); return; } diff --git a/icu4c/source/test/testdata/GraphemeBreakTest.txt b/icu4c/source/test/testdata/GraphemeBreakTest.txt index 924528aaa86..79ef404537f 100644 --- a/icu4c/source/test/testdata/GraphemeBreakTest.txt +++ b/icu4c/source/test/testdata/GraphemeBreakTest.txt @@ -1,3 +1,4 @@ +# Note: Please make sure that this utf-8 file contains a BOM. # GraphemeBreakTest-5.2.0.txt # Date: 2009-05-28, 20:37:56 GMT [MD] # diff --git a/icu4c/source/test/testdata/LineBreakTest.txt b/icu4c/source/test/testdata/LineBreakTest.txt index d9803377c73..22602c937ba 100644 --- a/icu4c/source/test/testdata/LineBreakTest.txt +++ b/icu4c/source/test/testdata/LineBreakTest.txt @@ -1,3 +1,4 @@ +# Note: Please make sure that this utf-8 file contains a BOM. # LineBreakTest-5.2.0.txt # Date: 2009-08-26, 01:14:01 GMT [MD] # diff --git a/icu4c/source/test/testdata/SentenceBreakTest.txt b/icu4c/source/test/testdata/SentenceBreakTest.txt index 1b75d75a1b8..a78f7695a1a 100644 --- a/icu4c/source/test/testdata/SentenceBreakTest.txt +++ b/icu4c/source/test/testdata/SentenceBreakTest.txt @@ -1,3 +1,4 @@ +# Note: Please make sure that this utf-8 file contains a BOM. # SentenceBreakTest-5.2.0.txt # Date: 2009-05-28, 20:38:05 GMT [MD] # diff --git a/icu4c/source/test/testdata/WordBreakTest.txt b/icu4c/source/test/testdata/WordBreakTest.txt index f3cb41ef997..5211f5dfc80 100644 --- a/icu4c/source/test/testdata/WordBreakTest.txt +++ b/icu4c/source/test/testdata/WordBreakTest.txt @@ -1,3 +1,4 @@ +# Note: Please make sure that this utf-8 file contains a BOM. # WordBreakTest-5.2.0.txt # Date: 2009-05-28, 20:38:06 GMT [MD] # diff --git a/icu4c/source/test/testdata/dcfmtest.txt b/icu4c/source/test/testdata/dcfmtest.txt index 31fb770ea6f..aed22fa532c 100644 --- a/icu4c/source/test/testdata/dcfmtest.txt +++ b/icu4c/source/test/testdata/dcfmtest.txt @@ -1,4 +1,5 @@ - + +# Note: Please make sure that this utf-8 file contains a BOM. # Copyright (c) 2010, International Business Machines Corporation and # others. All Rights Reserved. #