From a8678f38b6723aff90a5c6854247f0b675c9f580 Mon Sep 17 00:00:00 2001 From: Eric Mader Date: Tue, 17 Jun 2008 00:55:35 +0000 Subject: [PATCH] ICU-6337 Use invariant code page to create UnicodeStrings from string literals that contain "\\". X-SVN-Rev: 24201 --- icu4c/source/test/intltest/csdetest.cpp | 2 +- icu4c/source/test/intltest/dtfmttst.cpp | 2 +- icu4c/source/test/intltest/itrbnf.cpp | 4 +- icu4c/source/test/intltest/jamotest.cpp | 8 +- icu4c/source/test/intltest/numfmtst.cpp | 4 +- icu4c/source/test/intltest/numrgts.cpp | 4 +- icu4c/source/test/intltest/rbbiapts.cpp | 6 +- icu4c/source/test/intltest/rbbitst.cpp | 177 +++++++++++----------- icu4c/source/test/intltest/regextst.cpp | 54 +++---- icu4c/source/test/intltest/ssearch.cpp | 8 +- icu4c/source/test/intltest/testidna.cpp | 8 +- icu4c/source/test/intltest/transapi.cpp | 6 +- icu4c/source/test/intltest/transrt.cpp | 18 +-- icu4c/source/test/intltest/transtst.cpp | 84 +++++----- icu4c/source/test/intltest/usettest.cpp | 70 ++++----- icu4c/source/test/intltest/utxttest.cpp | 6 +- icu4c/source/tools/toolutil/xmlparser.cpp | 24 +-- 17 files changed, 242 insertions(+), 243 deletions(-) diff --git a/icu4c/source/test/intltest/csdetest.cpp b/icu4c/source/test/intltest/csdetest.cpp index c1f49c85b0a..1e3b64ddc32 100644 --- a/icu4c/source/test/intltest/csdetest.cpp +++ b/icu4c/source/test/intltest/csdetest.cpp @@ -426,7 +426,7 @@ void CharsetDetectionTest::C1BytesTest() #if !UCONFIG_NO_LEGACY_CONVERSION UErrorCode status = U_ZERO_ERROR; UnicodeString sISO = "This is a small sample of some English text. Just enough to be sure that it detects correctly."; - UnicodeString ssWindows = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes."; + UnicodeString ssWindows("This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.", -1, US_INV); UnicodeString sWindows = ssWindows.unescape(); int32_t lISO = 0, lWindows = 0; char *bISO = extractBytes(sISO, "ISO-8859-1", lISO); diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index aa13785ff85..ab5a5cc79b8 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -1047,7 +1047,7 @@ DateFormatTest::TestLocaleDateFormat() // Bug 495 DateFormat::FULL, Locale::getFrench()); DateFormat *dfUS = DateFormat::createDateTimeInstance(DateFormat::FULL, DateFormat::FULL, Locale::getUS()); - UnicodeString expectedFRENCH ( "lundi 15 septembre 1997 00:00:00 \\u00C9tats-Unis (Los Angeles)" ); + UnicodeString expectedFRENCH ( "lundi 15 septembre 1997 00:00:00 \\u00C9tats-Unis (Los Angeles)", -1, US_INV ); expectedFRENCH = expectedFRENCH.unescape(); //UnicodeString expectedUS ( "Monday, September 15, 1997 12:00:00 o'clock AM PDT" ); UnicodeString expectedUS ( "Monday, September 15, 1997 12:00:00 AM PT" ); diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index 58233a39255..ceb16039cbf 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1996-2007, International Business Machines Corporation and * + * Copyright (C) 1996-2008, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -1824,7 +1824,7 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testDat errln(msg); break; } else { - UnicodeString expectedString = UnicodeString(expectedWords).unescape(); + UnicodeString expectedString = UnicodeString(expectedWords, -1, US_INV).unescape(); if (actualString != expectedString) { UnicodeString msg = "FAIL: check failed for "; decFmt.format(expectedNumber, msg, status); diff --git a/icu4c/source/test/intltest/jamotest.cpp b/icu4c/source/test/intltest/jamotest.cpp index 635794101d3..e89e34b16cb 100644 --- a/icu4c/source/test/intltest/jamotest.cpp +++ b/icu4c/source/test/intltest/jamotest.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2007, International Business Machines Corporation and + * Copyright (c) 1997-2008, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** ************************************************************************ @@ -26,7 +26,7 @@ JamoTest::JamoTest() UParseError parseError; UErrorCode status = U_ZERO_ERROR; NAME_JAMO = Transliterator::createFromRules("Name-Jamo", - JAMO_NAMES_RULES, + UNICODE_STRING_SIMPLE(JAMO_NAMES_RULES), UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { @@ -35,7 +35,7 @@ JamoTest::JamoTest() } status = U_ZERO_ERROR; JAMO_NAME = Transliterator::createFromRules("Jamo-Name", - JAMO_NAMES_RULES, + UNICODE_STRING_SIMPLE(JAMO_NAMES_RULES), UTRANS_REVERSE, parseError, status); if (U_FAILURE(status)) { delete JAMO_NAME; @@ -400,7 +400,7 @@ JamoTest::TestRealText() { int32_t i; for (i=0; i < WHAT_IS_UNICODE_length; ++i) { ++total; - UnicodeString hangul = WHAT_IS_UNICODE[i]; + UnicodeString hangul = UNICODE_STRING_SIMPLE(WHAT_IS_UNICODE[i]); hangul = hangul.unescape(); // Parse backslash-u escapes UnicodeString hangulX = hangul; rt.transliterate(hangulX); diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index ef41c27b190..fe70fa16da9 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -696,7 +696,7 @@ NumberFormatTest::TestCurrency(void) for(int i=0; i < (int)(sizeof(testCases)/sizeof(testCases[i])); i++){ status = U_ZERO_ERROR; const char *localeID = testCases[i][0]; - UnicodeString expected(testCases[i][1]); + UnicodeString expected(testCases[i][1], -1, US_INV); expected = expected.unescape(); s.truncate(0); char loc[256]={0}; @@ -1358,7 +1358,7 @@ void NumberFormatTest::TestSurrogateSupport(void) { custom.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, (UChar)0x30); custom.setSymbol(DecimalFormatSymbols::kCurrencySymbol, "units of money"); custom.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, "money separator"); - patternStr = "0.00 \\u00A4' in your bank account'"; + patternStr = UNICODE_STRING_SIMPLE("0.00 \\u00A4' in your bank account'"); patternStr = patternStr.unescape(); expStr = UnicodeString(" minus 20money separator00 units of money in your bank account", ""); status = U_ZERO_ERROR; diff --git a/icu4c/source/test/intltest/numrgts.cpp b/icu4c/source/test/intltest/numrgts.cpp index e93eb5ce1d6..10c9a56587c 100644 --- a/icu4c/source/test/intltest/numrgts.cpp +++ b/icu4c/source/test/intltest/numrgts.cpp @@ -1,5 +1,5 @@ /*********************************************************************** - * Copyright (c) 1997-2007, International Business Machines Corporation + * Copyright (c) 1997-2008, International Business Machines Corporation * and others. All Rights Reserved. ***********************************************************************/ @@ -1085,7 +1085,7 @@ void NumberFormatRegressionTest::Test4071859 (void) String expectedPercent = "-578.998%"; */ UnicodeString expectedDefault("-5.789,988"); - UnicodeString expectedCurrency("-\\u20A4 5.790"); + UnicodeString expectedCurrency("-\\u20A4 5.790", -1, US_INV); UnicodeString expectedPercent("-578.999%"); expectedCurrency = expectedCurrency.unescape(); diff --git a/icu4c/source/test/intltest/rbbiapts.cpp b/icu4c/source/test/intltest/rbbiapts.cpp index 36305991862..f9ffcf84657 100644 --- a/icu4c/source/test/intltest/rbbiapts.cpp +++ b/icu4c/source/test/intltest/rbbiapts.cpp @@ -1,5 +1,5 @@ /******************************************************************** - * Copyright (c) 1999-2007, International Business Machines + * Copyright (c) 1999-2008, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************** * Date Name Description @@ -744,12 +744,12 @@ void RBBIAPITest::TestRuleStatus() { // Test the vector form of break rule status. // void RBBIAPITest::TestRuleStatusVec() { - UnicodeString rulesString = "[A-N]{100}; \n" + UnicodeString rulesString( "[A-N]{100}; \n" "[a-w]{200}; \n" "[\\p{L}]{300}; \n" "[\\p{N}]{400}; \n" "[0-5]{500}; \n" - "!.*;\n"; + "!.*;\n", -1, US_INV); UnicodeString testString1 = "Aapz5?"; int32_t statusVals[10]; int32_t numStatuses; diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index df44ec1003c..8c512e21578 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -330,13 +330,13 @@ static const int T_IDEO = 400; // //----------------------------------------------------------------------------------- void RBBITest::TestStatusReturn() { - UnicodeString rulesString1 = "$Letters = [:L:];\n" + UnicodeString rulesString1("$Letters = [:L:];\n" "$Numbers = [:N:];\n" "$Letters+{1};\n" "$Numbers+{2};\n" "Help\\ {4}/me\\!;\n" "[^$Letters $Numbers];\n" - "!.*;\n"; + "!.*;\n", -1, US_INV); UnicodeString testString1 = "abc123..abc Help me Help me!"; // 01234567890123456789012345678 int32_t bounds1[] = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1}; @@ -1189,7 +1189,7 @@ void RBBITest::TestBug5775() { return; } - UnicodeString s("One.\\u00ad Two."); + UnicodeString s("One.\\u00ad Two.", -1, US_INV); // 01234 56789 s = s.unescape(); bi->setText(s); @@ -1422,7 +1422,7 @@ void RBBITest::TestExtended() { tp.srcLine = new UVector32(status); tp.srcCol = new UVector32(status); - RegexMatcher localeMatcher("", 0, status); + RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE(""), 0, status); TEST_ASSERT_SUCCESS(status); @@ -1597,7 +1597,7 @@ void RBBITest::TestExtended() { break; } - if (testString.compare(charIdx-1, 3, "\\N{") == 0) { + if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) { // Named character, e.g. \N{COMBINING GRAVE ACCENT} // Get the code point from the name and insert it into the test data. // (Damn, no API takes names in Unicode !!! @@ -2016,7 +2016,7 @@ void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator * // Caputure Group # 1 2 3 4 5 // Parses this item: divide x hex digits comment \n unrecognized \n // - UnicodeString tokenExpr = "[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))"; + UnicodeString tokenExpr("[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))", -1, US_INV); RegexMatcher tokenMatcher(tokenExpr, testFileAsString, UREGEX_MULTILINE | UREGEX_DOTALL, status); UnicodeString testString; UVector32 breakPositions(status); @@ -2245,23 +2245,23 @@ RBBICharMonkey::RBBICharMonkey() { fText = NULL; - fCRLFSet = new UnicodeSet("[\\r\\n]", status); - fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]", status); - fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]", status); - fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]", status); - fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]", status); - fLSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]", status); - fVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]", status); - fTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]", status); - fLVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]", status); - fLVTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]", status); + fCRLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status); + fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Control}]"), status); + fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Extend}]"), status); + fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status); + fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status); + fLSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status); + fVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status); + fTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status); + fLVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status); + fLVTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status); fHangulSet = new UnicodeSet(); fHangulSet->addAll(*fLSet); fHangulSet->addAll(*fVSet); fHangulSet->addAll(*fTSet); fHangulSet->addAll(*fLVSet); fHangulSet->addAll(*fLVTSet); - fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]", status); + fAnySet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0000-\\U0010ffff]"), status); fSets = new UVector(status); fSets->addElement(fCRLFSet, status); @@ -2457,18 +2457,18 @@ RBBIWordMonkey::RBBIWordMonkey() fSets = new UVector(status); - fCRSet = new UnicodeSet("[\\p{Word_Break = CR}]", status); - fLFSet = new UnicodeSet("[\\p{Word_Break = LF}]", status); - fNewlineSet = new UnicodeSet("[\\p{Word_Break = Newline}]", status); - fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]", status); - fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]", status); - fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]", status); - fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]", status); - fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]", status); - fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]", status); - fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]", status); - fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]", status); - fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]", status); + fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status); + fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status); + fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status); + fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status); + fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"), status); + fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"), status); + fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"), status); + fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"), status); + fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}]"), status); + fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"), status); + fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status); + fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status); fOtherSet = new UnicodeSet(); if(U_FAILURE(status)) { @@ -2489,7 +2489,7 @@ RBBIWordMonkey::RBBIWordMonkey() fOtherSet->removeAll(*fFormatSet); fOtherSet->removeAll(*fExtendSet); // Inhibit dictionary characters from being tested at all. - fOtherSet->removeAll(UnicodeSet("[\\p{LineBreak = Complex_Context}]", status)); + fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status)); fSets->addElement(fCRSet, status); fSets->addElement(fLFSet, status); @@ -2736,18 +2736,18 @@ RBBISentMonkey::RBBISentMonkey() // Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator // set and made into character classes of their own. For the monkey impl, // they remain in SEP, since Sep always appears with CR and LF in the rules. - fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]", status); - fFormatSet = new UnicodeSet("[\\p{Sentence_Break = Format}]", status); - fSpSet = new UnicodeSet("[\\p{Sentence_Break = Sp}]", status); - fLowerSet = new UnicodeSet("[\\p{Sentence_Break = Lower}]", status); - fUpperSet = new UnicodeSet("[\\p{Sentence_Break = Upper}]", status); - fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}]", status); - fNumericSet = new UnicodeSet("[\\p{Sentence_Break = Numeric}]", status); - fATermSet = new UnicodeSet("[\\p{Sentence_Break = ATerm}]", status); - fSContinueSet = new UnicodeSet("[\\p{Sentence_Break = SContinue}]", status); - fSTermSet = new UnicodeSet("[\\p{Sentence_Break = STerm}]", status); - fCloseSet = new UnicodeSet("[\\p{Sentence_Break = Close}]", status); - fExtendSet = new UnicodeSet("[\\p{Sentence_Break = Extend}]", status); + fSepSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"), status); + fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"), status); + fSpSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"), status); + fLowerSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"), status); + fUpperSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"), status); + fOLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"), status); + fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"), status); + fATermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"), status); + fSContinueSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status); + fSTermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"), status); + fCloseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"), status); + fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"), status); fOtherSet = new UnicodeSet(); if(U_FAILURE(status)) { @@ -3082,42 +3082,42 @@ RBBILineMonkey::RBBILineMonkey() fSets = new UVector(status); - fBK = new UnicodeSet("[\\p{Line_Break=BK}]", status); - fCR = new UnicodeSet("[\\p{Line_break=CR}]", status); - fLF = new UnicodeSet("[\\p{Line_break=LF}]", status); - fCM = new UnicodeSet("[\\p{Line_break=CM}]", status); - fNL = new UnicodeSet("[\\p{Line_break=NL}]", status); - fWJ = new UnicodeSet("[\\p{Line_break=WJ}]", status); - fZW = new UnicodeSet("[\\p{Line_break=ZW}]", status); - fGL = new UnicodeSet("[\\p{Line_break=GL}]", status); - fCB = new UnicodeSet("[\\p{Line_break=CB}]", status); - fSP = new UnicodeSet("[\\p{Line_break=SP}]", status); - fB2 = new UnicodeSet("[\\p{Line_break=B2}]", status); - fBA = new UnicodeSet("[\\p{Line_break=BA}]", status); - fBB = new UnicodeSet("[\\p{Line_break=BB}]", status); - fHY = new UnicodeSet("[\\p{Line_break=HY}]", status); - fH2 = new UnicodeSet("[\\p{Line_break=H2}]", status); - fH3 = new UnicodeSet("[\\p{Line_break=H3}]", status); - fCL = new UnicodeSet("[\\p{Line_break=CL}]", status); - fEX = new UnicodeSet("[\\p{Line_break=EX}]", status); - fIN = new UnicodeSet("[\\p{Line_break=IN}]", status); - fJL = new UnicodeSet("[\\p{Line_break=JL}]", status); - fJV = new UnicodeSet("[\\p{Line_break=JV}]", status); - fJT = new UnicodeSet("[\\p{Line_break=JT}]", status); - fNS = new UnicodeSet("[\\p{Line_break=NS}]", status); - fOP = new UnicodeSet("[\\p{Line_break=OP}]", status); - fQU = new UnicodeSet("[\\p{Line_break=QU}]", status); - fIS = new UnicodeSet("[\\p{Line_break=IS}]", status); - fNU = new UnicodeSet("[\\p{Line_break=NU}]", status); - fPO = new UnicodeSet("[\\p{Line_break=PO}]", status); - fPR = new UnicodeSet("[\\p{Line_break=PR}]", status); - fSY = new UnicodeSet("[\\p{Line_break=SY}]", status); - fAI = new UnicodeSet("[\\p{Line_break=AI}]", status); - fAL = new UnicodeSet("[\\p{Line_break=AL}]", status); - fID = new UnicodeSet("[\\p{Line_break=ID}]", status); - fSA = new UnicodeSet("[\\p{Line_break=SA}]", status); - fSG = new UnicodeSet("[\\ud800-\\udfff]", status); - fXX = new UnicodeSet("[\\p{Line_break=XX}]", status); + fBK = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_Break=BK}]"), status); + fCR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CR}]"), status); + fLF = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=LF}]"), status); + fCM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CM}]"), status); + fNL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NL}]"), status); + fWJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=WJ}]"), status); + fZW = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZW}]"), status); + fGL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=GL}]"), status); + fCB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CB}]"), status); + fSP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SP}]"), status); + fB2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=B2}]"), status); + fBA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BA}]"), status); + fBB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BB}]"), status); + fHY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HY}]"), status); + fH2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H2}]"), status); + fH3 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status); + fCL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CL}]"), status); + fEX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status); + fIN = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status); + fJL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status); + fJV = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JV}]"), status); + fJT = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JT}]"), status); + fNS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NS}]"), status); + fOP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=OP}]"), status); + fQU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=QU}]"), status); + fIS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IS}]"), status); + fNU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NU}]"), status); + fPO = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PO}]"), status); + fPR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PR}]"), status); + fSY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SY}]"), status); + fAI = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AI}]"), status); + fAL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AL}]"), status); + fID = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ID}]"), status); + fSA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SA}]"), status); + fSG = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status); + fXX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status); if (U_FAILURE(status)) { deferredStatus = status; @@ -3169,13 +3169,14 @@ RBBILineMonkey::RBBILineMonkey() fSets->addElement(fSG, status); fNumberMatcher = new RegexMatcher( - "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?" - "((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?" - "\\p{Line_Break=NU}\\p{Line_Break=CM}*" - "((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*" - "(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?" - "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?", - 0, status); + UNICODE_STRING_SIMPLE( + "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?" + "((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?" + "\\p{Line_Break=NU}\\p{Line_Break=CM}*" + "((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*" + "(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?" + "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?" + ), 0, status); fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status); @@ -3758,7 +3759,6 @@ void RBBITest::TestWordBreaks(void) UErrorCode status = U_ZERO_ERROR; // BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); BreakIterator *bi = BreakIterator::createWordInstance(locale, status); - UChar str[300]; static const char *strlist[] = { "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d", @@ -3808,8 +3808,7 @@ void RBBITest::TestWordBreaks(void) } for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { // printf("looping %d\n", loop); - u_unescape(strlist[loop], str, 25); - UnicodeString ustr(str); + UnicodeString ustr = CharsToUnicodeString(strlist[loop]); // RBBICharMonkey monkey; RBBIWordMonkey monkey; @@ -4105,7 +4104,7 @@ void RBBITest::TestMonkey(char *params) { // m.reset(p); - if (RegexMatcher("\\S", p, 0, status).find()) { + if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) { // Each option is stripped out of the option string as it is processed. // All options have been checked. The option string should have been completely emptied.. char buf[100]; diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index c26b7396f9c..ba389063d88 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -115,15 +115,15 @@ if (status!=errcode) {errln("RegexTest failure at line %d. Expected status=%s, #define REGEX_TESTLM(pat, text, looking, match) doRegexLMTest(pat, text, looking, match, __LINE__); UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) { - const UnicodeString pattern(pat); - const UnicodeString inputText(text); + const UnicodeString pattern(pat, -1, US_INV); + const UnicodeString inputText(text, -1, US_INV); UErrorCode status = U_ZERO_ERROR; UParseError pe; RegexPattern *REPattern = NULL; RegexMatcher *REMatcher = NULL; UBool retVal = TRUE; - UnicodeString patString(pat); + UnicodeString patString(pat, -1, US_INV); REPattern = RegexPattern::compile(patString, 0, pe, status); if (U_FAILURE(status)) { errln("RegexTest failure in RegexPattern::compile() at line %d. Status = %s\n", @@ -636,7 +636,7 @@ void RegexTest::API_Match() { UParseError pe; UErrorCode status=U_ZERO_ERROR; - UnicodeString re(".*?(?:(\\Gabc)|(abc))"); + UnicodeString re(".*?(?:(\\Gabc)|(abc))", -1, US_INV); RegexPattern *pat = RegexPattern::compile(re, flags, pe, status); REGEX_CHECK_STATUS; UnicodeString data = ".abcabc.abc.."; @@ -681,7 +681,7 @@ void RegexTest::API_Match() { REGEX_ASSERT(i==5); // Check that the bump goes over surrogate pairs OK - s = "\\U00010001\\U00010002\\U00010003\\U00010004"; + s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004"); s = s.unescape(); m.reset(s); for (i=0; ; i+=2) { @@ -1027,7 +1027,7 @@ void RegexTest::API_Replace() { REGEX_CHECK_STATUS; REGEX_ASSERT(dest == "bcbcdefg"); - dest = matcher2->replaceFirst("The value of \\$1 is $1.", status); + dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status); REGEX_CHECK_STATUS; REGEX_ASSERT(dest == "The value of $1 is bc.defg"); @@ -1035,7 +1035,7 @@ void RegexTest::API_Replace() { REGEX_CHECK_STATUS; REGEX_ASSERT(dest == "$ by itself, no group number $$$defg"); - UnicodeString replacement = "Supplemental Digit 1 $\\U0001D7CF."; + UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF."); replacement = replacement.unescape(); dest = matcher2->replaceFirst(replacement, status); REGEX_CHECK_STATUS; @@ -1049,7 +1049,7 @@ void RegexTest::API_Replace() { // { UnicodeString src = "abc 1 abc 2 abc 3"; - UnicodeString substitute = "--\\u0043--"; + UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\u0043--"); matcher->reset(src); UnicodeString result = matcher->replaceAll(substitute, status); REGEX_CHECK_STATUS; @@ -1057,7 +1057,7 @@ void RegexTest::API_Replace() { } { UnicodeString src = "abc !"; - UnicodeString substitute = "--\\U00010000--"; + UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\U00010000--"); matcher->reset(src); UnicodeString result = matcher->replaceAll(substitute, status); REGEX_CHECK_STATUS; @@ -1186,7 +1186,7 @@ void RegexTest::API_Pattern() { // { UErrorCode status = U_ZERO_ERROR; - RegexPattern *pSource = RegexPattern::compile("\\p{L}+", 0, status); + RegexPattern *pSource = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status); RegexPattern *pClone = pSource->clone(); delete pSource; RegexMatcher *mFromClone = pClone->matcher(status); @@ -1278,7 +1278,7 @@ void RegexTest::API_Pattern() { delete pat1; // split, with a pattern with (capture) - pat1 = RegexPattern::compile("<(\\w*)>", pe, status); + pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"), pe, status); REGEX_CHECK_STATUS; status = U_ZERO_ERROR; @@ -1444,11 +1444,11 @@ void RegexTest::Extended() { // UnicodeString testString(FALSE, testData, len); - RegexMatcher quotedStuffMat("\\s*([\\'\\\"/])(.*?)\\1", 0, status); - RegexMatcher commentMat ("\\s*(#.*)?$", 0, status); - RegexMatcher flagsMat ("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)", 0, status); + RegexMatcher quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status); + RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status); + RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)"), 0, status); - RegexMatcher lineMat("(.*?)\\r?\\n", testString, 0, status); + RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status); UnicodeString testPattern; // The pattern for test from the test file. UnicodeString testFlags; // the flags for a test. UnicodeString matchString; // The marked up string to be used as input @@ -2073,7 +2073,7 @@ void RegexTest::PerlTests() { // Regex to break the input file into lines, and strip the new lines. // One line per match, capture group one is the desired data. // - RegexPattern* linePat = RegexPattern::compile("(.+?)[\\r\\n]+", 0, pe, status); + RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status); if (U_FAILURE(status)) { dataerrln("RegexPattern::compile() error"); return; @@ -2084,7 +2084,7 @@ void RegexTest::PerlTests() { // Regex to split a test file line into fields. // There are six fields, separated by tabs. // - RegexPattern* fieldPat = RegexPattern::compile("\\t", 0, pe, status); + RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status); // // Regex to identify test patterns with flag settings, and to separate them. @@ -2092,7 +2092,7 @@ void RegexTest::PerlTests() { // Test patterns without flags are not quoted: pattern // Coming out, capture group 2 is the pattern, capture group 3 is the flags. // - RegexPattern *flagPat = RegexPattern::compile("('?)(.*)\\1(.*)", 0, pe, status); + RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status); RegexMatcher* flagMat = flagPat->matcher(status); // @@ -2101,19 +2101,19 @@ void RegexTest::PerlTests() { // are string constants and REs for these constructs. // UnicodeString nulnulSrc("${nulnul}"); - UnicodeString nulnul("\\u0000\\u0000"); + UnicodeString nulnul("\\u0000\\u0000", -1, US_INV); nulnul = nulnul.unescape(); UnicodeString ffffSrc("${ffff}"); - UnicodeString ffff("\\uffff"); + UnicodeString ffff("\\uffff", -1, US_INV); ffff = ffff.unescape(); // regexp for $-[0], $+[2], etc. - RegexPattern *groupsPat = RegexPattern::compile("\\$([+\\-])\\[(\\d+)\\]", 0, pe, status); + RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status); RegexMatcher *groupsMat = groupsPat->matcher(status); // regexp for $0, $1, $2, etc. - RegexPattern *cgPat = RegexPattern::compile("\\$(\\d+)", 0, pe, status); + RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status); RegexMatcher *cgMat = cgPat->matcher(status); @@ -2138,7 +2138,7 @@ void RegexTest::PerlTests() { flagMat->matches(status); UnicodeString pattern = flagMat->group(2, status); pattern.findAndReplace("${bang}", "!"); - pattern.findAndReplace(nulnulSrc, "\\u0000\\u0000"); + pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000")); pattern.findAndReplace(ffffSrc, ffff); // @@ -2218,7 +2218,7 @@ void RegexTest::PerlTests() { // Replace any \n in the match string with an actual new-line char. // Don't do full unescape, as this unescapes more than Perl does, which // causes other spurious failures in the tests. - matchString.findAndReplace("\\n", "\n"); + matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); @@ -2315,7 +2315,7 @@ void RegexTest::PerlTests() { perlExpr.remove(0, 2); } - else if (perlExpr.startsWith("\\")) { // \Escape. Take following char as a literal. + else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal. // or as an escaped sequence (e.g. \n) if (perlExpr.length() > 1) { perlExpr.remove(0, 1); // Remove the '\', but only if not last char. @@ -2349,7 +2349,7 @@ void RegexTest::PerlTests() { UnicodeString expectedS(fields[4]); expectedS.findAndReplace(nulnulSrc, nulnul); expectedS.findAndReplace(ffffSrc, ffff); - expectedS.findAndReplace("\\n", "\n"); + expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n"); if (expectedS.compare(resultString) != 0) { @@ -2437,7 +2437,7 @@ void RegexTest::Callbacks() { const void *returnedContext; URegexMatchCallback *returnedFn; UErrorCode status = U_ZERO_ERROR; - RegexMatcher matcher("((.)+\\2)+x", 0, status); // A pattern that can run long. + RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. REGEX_CHECK_STATUS; matcher.setMatchCallback(testCallBackFn, &cbInfo, status); REGEX_CHECK_STATUS; diff --git a/icu4c/source/test/intltest/ssearch.cpp b/icu4c/source/test/intltest/ssearch.cpp index 9e8a1fc72d9..caf4e4d6149 100644 --- a/icu4c/source/test/intltest/ssearch.cpp +++ b/icu4c/source/test/intltest/ssearch.cpp @@ -546,7 +546,7 @@ static char *printOrders(char *buffer, OrderList &list) void SSearchTest::offsetTest() { - UnicodeString test[] = { + const char *test[] = { "\\ua191\\u16ef\\u2036\\u017a", #if 0 @@ -610,7 +610,7 @@ void SSearchTest::offsetTest() col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); for(int32_t i = 0; i < testCount; i += 1) { - UnicodeString ts = test[i].unescape(); + UnicodeString ts = CharsToUnicodeString(test[i]); CollationElementIterator *iter = col->createCollationElementIterator(ts); OrderList forwardList; OrderList backwardList; @@ -644,7 +644,7 @@ void SSearchTest::offsetTest() backwardList.reverse(); if (forwardList.compare(backwardList)) { - logln("Works with \"%s\"", test[i].getTerminatedBuffer()); + logln("Works with \"%s\"", test[i]); logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); @@ -653,7 +653,7 @@ void SSearchTest::offsetTest() logln(); } else { - errln("Fails with \"%S\"", test[i].getTerminatedBuffer()); + errln("Fails with \"%s\"", test[i]); infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); diff --git a/icu4c/source/test/intltest/testidna.cpp b/icu4c/source/test/intltest/testidna.cpp index 202923a94e5..3e77baf09d5 100644 --- a/icu4c/source/test/intltest/testidna.cpp +++ b/icu4c/source/test/intltest/testidna.cpp @@ -1480,7 +1480,7 @@ void TestIDNA::TestIDNAMonkeyTest(){ /* for debugging */ for (i=0; i<(int)(sizeof(failures)/sizeof(failures[0])); i++){ source.truncate(0); - source.append( failures[i] ); + source.append( UNICODE_STRING_SIMPLE(failures[i]) ); source = source.unescape(); source.append((UChar)0x0000); const UChar *src = source.getBuffer(); @@ -1490,13 +1490,13 @@ void TestIDNA::TestIDNAMonkeyTest(){ source.truncate(0); - source.append("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C"); + source.append(UNICODE_STRING_SIMPLE("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C")); debug(source.getBuffer(),source.length(),UIDNA_ALLOW_UNASSIGNED); { // test deletion of code points - UnicodeString source("\\u043f\\u00AD\\u034f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000"); + UnicodeString source("\\u043f\\u00AD\\u034f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000", -1, US_INV); source = source.unescape(); - UnicodeString expected("\\u043f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000"); + UnicodeString expected("\\u043f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000", -1, US_INV); expected = expected.unescape(); UnicodeString ascii("xn--b1abfaaepdrnnbgefbadotcwatmq2g4l"); ascii.append((UChar)0x0000); diff --git a/icu4c/source/test/intltest/transapi.cpp b/icu4c/source/test/intltest/transapi.cpp index faf995c16fc..6f497c68b5a 100644 --- a/icu4c/source/test/intltest/transapi.cpp +++ b/icu4c/source/test/intltest/transapi.cpp @@ -1,6 +1,6 @@ /************************************************************************ * COPYRIGHT: - * Copyright (c) 2000-2007, International Business Machines Corporation + * Copyright (c) 2000-2008, International Business Machines Corporation * and others. All Rights Reserved. ************************************************************************/ /************************************************************************ @@ -274,7 +274,7 @@ void TransliteratorAPITest::TestTransliterate1(){ "Latin-Devanagari",CharsToUnicodeString("bha\\u0304rata"), CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924") , "Latin-Devanagari",UnicodeString("kra ksha khra gra cra dya dhya",""), CharsToUnicodeString("\\u0915\\u094D\\u0930 \\u0915\\u094D\\u0936 \\u0916\\u094D\\u0930 \\u0917\\u094D\\u0930 \\u091a\\u094D\\u0930 \\u0926\\u094D\\u092F \\u0927\\u094D\\u092F") , - "Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), UnicodeString("bh\\u0101rata"), + "Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), CharsToUnicodeString("bh\\u0101rata"), // "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042") , // "Expanded-Contracted", CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042"), CharsToUnicodeString("\\u00C0\\u00C1\\u0042") , //"Latin-Arabic", "aap", CharsToUnicodeString("\\u0627\\u06A4") , @@ -325,7 +325,7 @@ void TransliteratorAPITest::TestTransliterate2(){ "Hex-Any", CharsToUnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F\\u0021\\u0020"), "0", "5", "hello", "hello! " , // "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), "1", "2", CharsToUnicodeString("\\u0041\\u0301"), CharsToUnicodeString("\\u00C0\\u0041\\u0301\\u0042") , "Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "0", "1", "bha", CharsToUnicodeString("bha\\u093E\\u0930\\u0924") , - "Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", "\\u0314\\u0101", CharsToUnicodeString("\\u092D\\u0314\\u0101\\u0930\\u0924") + "Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", CharsToUnicodeString("\\u0314\\u0101"), CharsToUnicodeString("\\u092D\\u0314\\u0101\\u0930\\u0924") }; logln("\n Testing transliterate(String, int, int, StringBuffer)"); diff --git a/icu4c/source/test/intltest/transrt.cpp b/icu4c/source/test/intltest/transrt.cpp index 11d2c3731da..0a629f0ae5e 100644 --- a/icu4c/source/test/intltest/transrt.cpp +++ b/icu4c/source/test/intltest/transrt.cpp @@ -522,7 +522,7 @@ void RTTest::test(const UnicodeString& sourceRangeVal, this->roundtripExclusionsSet.clear(); if (roundtripExclusions != NULL && strlen(roundtripExclusions) > 0) { - this->roundtripExclusionsSet.applyPattern(roundtripExclusions, status); + this->roundtripExclusionsSet.applyPattern(UNICODE_STRING_SIMPLE(roundtripExclusions), status); if (U_FAILURE(status)) { parent->errln("FAIL: UnicodeSet::applyPattern(%s)", roundtripExclusions); return; @@ -991,7 +991,7 @@ void TransliteratorRoundTripTest::TestHiragana() { RTTest test("Latin-Hiragana"); Legal *legal = new Legal(); test.test(UnicodeString("[a-zA-Z]", ""), - HIRAGANA, + UNICODE_STRING_SIMPLE(HIRAGANA), HIRAGANA_ITERATION, this, quick, legal); delete legal; } @@ -1005,7 +1005,7 @@ void TransliteratorRoundTripTest::TestKatakana() { strcat(temp, HALFWIDTH_KATAKANA); strcat(temp, "]"); test.test(UnicodeString("[a-zA-Z]", ""), - KATAKANA, + UNICODE_STRING_SIMPLE(KATAKANA), temp, this, quick, legal); delete legal; @@ -1105,7 +1105,7 @@ void TransliteratorRoundTripTest::TestHan() { UnicodeString nfded = target2; nfd->transliterate(nfded); - UnicodeSet allMarks("[\\u0304\\u0301\\u030C\\u0300\\u0306]", status); // look only for Pinyin tone marks, not all marks (there are some others in there) + UnicodeSet allMarks(UNICODE_STRING_SIMPLE("[\\u0304\\u0301\\u030C\\u0300\\u0306]"), status); // look only for Pinyin tone marks, not all marks (there are some others in there) ASSERT_SUCCESS(status); assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfded)); @@ -1231,10 +1231,10 @@ void TransliteratorRoundTripTest::Testel() { void TransliteratorRoundTripTest::TestArabic() { - UnicodeString ARABIC("[\\u060C\\u061B\\u061F\\u0621\\u0627-\\u063A\\u0641-\\u0655\\u0660-\\u066C\\u067E\\u0686\\u0698\\u06A4\\u06AD\\u06AF\\u06CB-\\u06CC\\u06F0-\\u06F9]"); + UnicodeString ARABIC("[\\u060C\\u061B\\u061F\\u0621\\u0627-\\u063A\\u0641-\\u0655\\u0660-\\u066C\\u067E\\u0686\\u0698\\u06A4\\u06AD\\u06AF\\u06CB-\\u06CC\\u06F0-\\u06F9]", -1, US_INV); Legal *legal = new Legal(); RTTest test("Latin-Arabic"); - test.test("[a-zA-Z\\u02BE\\u02BF\\u207F]", ARABIC, "[a-zA-Z\\u02BE\\u02BF\\u207F]",this, quick, legal); // + test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BE\\u02BF\\u207F]"), ARABIC, "[a-zA-Z\\u02BE\\u02BF\\u207F]",this, quick, legal); // delete legal; } class LegalHebrew : public Legal { @@ -1249,8 +1249,8 @@ public: }; LegalHebrew::LegalHebrew(UErrorCode& error){ - FINAL.applyPattern("[\\u05DA\\u05DD\\u05DF\\u05E3\\u05E5]", error); - NON_FINAL.applyPattern("[\\u05DB\\u05DE\\u05E0\\u05E4\\u05E6]", error); + FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DA\\u05DD\\u05DF\\u05E3\\u05E5]"), error); + NON_FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DB\\u05DE\\u05E0\\u05E4\\u05E6]"), error); LETTER.applyPattern("[:letter:]", error); } UBool LegalHebrew::is(const UnicodeString& sourceString)const{ @@ -1285,7 +1285,7 @@ void TransliteratorRoundTripTest::TestHebrew() { return; } RTTest test("Latin-Hebrew"); - test.test("[a-zA-Z\\u02BC\\u02BB]", "[[[:hebrew:]-[\\u05BD\\uFB00-\\uFBFF]]&[:Age=4.0:]]", "[\\u05F0\\u05F1\\u05F2]", this, quick, legal); + test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BC\\u02BB]"), UNICODE_STRING_SIMPLE("[[[:hebrew:]-[\\u05BD\\uFB00-\\uFBFF]]&[:Age=4.0:]]"), "[\\u05F0\\u05F1\\u05F2]", this, quick, legal); //showElapsed(start, "TestHebrew"); delete legal; diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 5ecc5a155d5..94f02c29144 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -1269,7 +1269,7 @@ void TransliteratorTest::TestNameMap(void) { // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"), CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{}\\\\N{}")); - expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{}\\N{}\\N{}\\N{", + expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{}\\N{}\\N{}\\N{"), CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); delete uni2name; @@ -1516,7 +1516,7 @@ void TransliteratorTest::TestCompoundRBT(void) { errln("FAIL: createFromRules failed"); return; } - expect(*t, "\\u0043at in the hat, bat on the mat", + expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"), "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); UnicodeString r; t->toRules(r, TRUE); @@ -1728,7 +1728,7 @@ void TransliteratorTest::TestToRules(void) { UParseError parseError; UErrorCode status = U_ZERO_ERROR; Transliterator *t = Transliterator::createFromRules("ID", - DATA[d+1], UTRANS_FORWARD, parseError, status); + UNICODE_STRING_SIMPLE(DATA[d+1]), UTRANS_FORWARD, parseError, status); if (t == 0) { errln("FAIL: createFromRules failed"); return; @@ -1737,19 +1737,19 @@ void TransliteratorTest::TestToRules(void) { t->toRules(rules, FALSE); t->toRules(escapedRules, TRUE); UnicodeString expRules = CharsToUnicodeString(DATA[d+2]); - UnicodeString expEscapedRules(DATA[d+2]); + UnicodeString expEscapedRules(DATA[d+2], -1, US_INV); if (rules == expRules) { - logln((UnicodeString)"Ok: " + DATA[d+1] + + logln((UnicodeString)"Ok: " + UNICODE_STRING_SIMPLE(DATA[d+1]) + " => " + rules); } else { - errln((UnicodeString)"FAIL: " + DATA[d+1] + + errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[d+1]) + " => " + rules + ", exp " + expRules); } if (escapedRules == expEscapedRules) { - logln((UnicodeString)"Ok: " + DATA[d+1] + + logln((UnicodeString)"Ok: " + UNICODE_STRING_SIMPLE(DATA[d+1]) + " => " + escapedRules); } else { - errln((UnicodeString)"FAIL: " + DATA[d+1] + + errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[d+1]) + " => " + escapedRules + ", exp " + expEscapedRules); } delete t; @@ -1757,8 +1757,8 @@ void TransliteratorTest::TestToRules(void) { } else { // UnicodeSet test UErrorCode status = U_ZERO_ERROR; - UnicodeString pat(DATA[d+1]); - UnicodeString expToPat(DATA[d+2]); + UnicodeString pat(DATA[d+1], -1, US_INV); + UnicodeString expToPat(DATA[d+2], -1, US_INV); UnicodeSet set(pat, status); if (U_FAILURE(status)) { errln("FAIL: UnicodeSet ct failed"); @@ -1820,23 +1820,23 @@ void TransliteratorTest::TestSupplemental() { expectT("Any-Name", CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), - "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); + UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}")); expectT("Any-Hex/Unicode", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "U+10330U+10FF00U+E0061U+00A0"); + UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0")); expectT("Any-Hex/C", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); + UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0")); expectT("Any-Hex/Perl", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); + UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}")); expectT("Any-Hex/Java", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); + UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0")); expectT("Any-Hex/XML", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), @@ -1846,7 +1846,7 @@ void TransliteratorTest::TestSupplemental() { CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), "𐌰􏼀󠁡 "); - expectT("[\\U000E0000-\\U000E0FFF] Remove", + expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"), CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); } @@ -2363,7 +2363,7 @@ void TransliteratorTest::TestCompoundFilterID(void) { * Test new property set syntax */ void TransliteratorTest::TestPropertySet() { - expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx"); + expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx"); expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", "[ a stitch ]\n[ in time ]\r[ saves 9]"); } @@ -2838,8 +2838,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){ // (\u0902) (when preceded by vowel) ---> (\u0A02) // (\u0902) (when preceded by consonant) ---> (\u0A70) UErrorCode status = U_ZERO_ERROR; - UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status); - UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status); + UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status); + UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status); UParseError parseError; UnicodeSetIterator vIter(vowel); @@ -2850,8 +2850,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){ delete trans; return; } - UnicodeString src (" \\u0902"); - UnicodeString expected(" \\u0A02"); + UnicodeString src (" \\u0902", -1, US_INV); + UnicodeString expected(" \\u0A02", -1, US_INV); src = src.unescape(); expected= expected.unescape(); @@ -3165,8 +3165,8 @@ void TransliteratorTest::TestToRulesMark() { UParseError pe; UErrorCode ec = U_ZERO_ERROR; - Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec); - Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec); + Transliterator *t2 = Transliterator::createFromRules("source-target", UNICODE_STRING_SIMPLE(testRules), UTRANS_FORWARD, pe, ec); + Transliterator *t3 = Transliterator::createFromRules("target-source", UNICODE_STRING_SIMPLE(testRules), UTRANS_REVERSE, pe, ec); if (U_FAILURE(ec)) { delete t2; @@ -3178,8 +3178,8 @@ void TransliteratorTest::TestToRulesMark() { expect(*t2, source, target); expect(*t3, target, source); - checkRules("Failed toRules FORWARD", *t2, testRulesForward); - checkRules("Failed toRules BACKWARD", *t3, testRulesBackward); + checkRules("Failed toRules FORWARD", *t2, UNICODE_STRING_SIMPLE(testRulesForward)); + checkRules("Failed toRules BACKWARD", *t3, UNICODE_STRING_SIMPLE(testRulesBackward)); delete t2; delete t3; @@ -3199,7 +3199,7 @@ void TransliteratorTest::TestEscape() { errln((UnicodeString)"FAIL: createInstance"); } else { expect(*t, - "\\x{40}\\U000000312Q", + UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"), "@12Q"); } delete t; @@ -3211,7 +3211,7 @@ void TransliteratorTest::TestEscape() { } else { expect(*t, CharsToUnicodeString("A\\U0010BEEF\\uFEED"), - "\\u0041\\U0010BEEF\\uFEED"); + UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED")); } delete t; @@ -3222,7 +3222,7 @@ void TransliteratorTest::TestEscape() { } else { expect(*t, CharsToUnicodeString("A\\U0010BEEF\\uFEED"), - "\\u0041\\uDBEF\\uDEEF\\uFEED"); + UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED")); } delete t; @@ -3233,7 +3233,7 @@ void TransliteratorTest::TestEscape() { } else { expect(*t, CharsToUnicodeString("A\\U0010BEEF\\uFEED"), - "\\x{41}\\x{10BEEF}\\x{FEED}"); + UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}")); } delete t; } @@ -3638,7 +3638,7 @@ void TransliteratorTest::TestFunction() { } expect(*t, "The Quick Brown Fox", - "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); + UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox")); delete t; } @@ -3756,7 +3756,7 @@ void TransliteratorTest::TestUserFunction() { // There's no need to register inverses if we don't use them t = Transliterator::createFromRules("gif", - "'\\'u(..)(..) > '';", + UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '';"), UTRANS_FORWARD, pe, ec); if (t == NULL || U_FAILURE(ec)) { errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec)); @@ -3765,13 +3765,13 @@ void TransliteratorTest::TestUserFunction() { _TUFReg("Any-gif", t, 0); t = Transliterator::createFromRules("RemoveCurly", - "[\\{\\}] > ; '\\N' > ;", + UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"), UTRANS_FORWARD, pe, ec); if (t == NULL || U_FAILURE(ec)) { errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec)); goto FAIL; } - expect(*t, "\\N{name}", "name"); + expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name"); _TUFReg("Any-RemoveCurly", t, 1); logln("Trying &hex"); @@ -3789,7 +3789,7 @@ void TransliteratorTest::TestUserFunction() { errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec)); goto FAIL; } - expect(*t, "abc", "\\u0061\\u0062\\u0063"); + expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063")); delete t; logln("Trying &gif"); @@ -3820,7 +3820,7 @@ void TransliteratorTest::TestUserFunction() { goto FAIL; } expect(*t, "abc", - "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); + UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ")); delete t; FAIL: @@ -4039,7 +4039,7 @@ void TransliteratorTest::TestAlternateSyntax() { "xbz"); expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"), CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"), - "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); + UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}")); } static const char* BEGIN_END_RULES[] = { @@ -4276,9 +4276,9 @@ void TransliteratorTest::TestBeginEnd() { int32_t i = 0; for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { expect((UnicodeString)"Test case #" + (i / 3), - UnicodeString(BEGIN_END_TEST_CASES[i]), - UnicodeString(BEGIN_END_TEST_CASES[i + 1]), - UnicodeString(BEGIN_END_TEST_CASES[i + 2])); + UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), + UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), + UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); } // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing @@ -4319,7 +4319,7 @@ void TransliteratorTest::TestBeginEndToRules() { for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { UParseError parseError; UErrorCode status = U_ZERO_ERROR; - Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i]), + Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status); @@ -4334,8 +4334,8 @@ void TransliteratorTest::TestBeginEndToRules() { delete t; } else { expect(*t2, - UnicodeString(BEGIN_END_TEST_CASES[i + 1]), - UnicodeString(BEGIN_END_TEST_CASES[i + 2])); + UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), + UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); delete t; delete t2; } diff --git a/icu4c/source/test/intltest/usettest.cpp b/icu4c/source/test/intltest/usettest.cpp index 8f02ac363c5..696f9898581 100644 --- a/icu4c/source/test/intltest/usettest.cpp +++ b/icu4c/source/test/intltest/usettest.cpp @@ -170,19 +170,19 @@ void UnicodeSetTest::TestToPattern() { const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL}; expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2); - s->applyPattern("[a-z {\\{l} {r\\}}]", ec); + s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec); if (U_FAILURE(ec)) break; const char* exp3[] = {"{l", "r}", NOT, "xy", NULL}; - expectToPattern(*s, "[a-z{r\\}}{\\{l}]", exp3); + expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3); s->add("[]"); const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL}; - expectToPattern(*s, "[a-z{\\[\\]}{r\\}}{\\{l}]", exp4); + expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4); - s->applyPattern("[a-z {\\u4E01\\u4E02}{\\n\\r}]", ec); + s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec); if (U_FAILURE(ec)) break; const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL}; - expectToPattern(*s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]", exp5); + expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5); // j2189 s->clear(); @@ -318,14 +318,14 @@ UnicodeSetTest::TestCloneEqualHash(void) { // set1 and set2 used to be built with the obsolete constructor taking // UCharCategory values; replaced with pattern constructors // markus 20030502 - UnicodeSet *set1=new UnicodeSet("\\p{Lowercase Letter}", status); // :Ll: Letter, lowercase - UnicodeSet *set1a=new UnicodeSet("[:Ll:]", status); // Letter, lowercase + UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase + UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase if (U_FAILURE(status)){ errln((UnicodeString)"FAIL: Can't construst set with category->Ll"); return; } - UnicodeSet *set2=new UnicodeSet("\\p{Decimal Number}", status); //Number, Decimal digit - UnicodeSet *set2a=new UnicodeSet("[:Nd:]", status); //Number, Decimal digit + UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit + UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status); //Number, Decimal digit if (U_FAILURE(status)){ errln((UnicodeString)"FAIL: Can't construct set with category->Nd"); return; @@ -705,7 +705,7 @@ void UnicodeSetTest::TestIteration() { // 6 code points, 3 ranges, 2 strings, 8 total elements // Iteration will access them in sorted order - a, b, c, y, z, U0001abcd, "str1", "str2" - UnicodeSet set("[zabyc\\U0001abcd{str1}{str2}]", ec); + UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec); TEST_ASSERT_SUCCESS(ec); UnicodeSetIterator it(set); @@ -822,12 +822,12 @@ void UnicodeSetTest::TestStrings() { * Test the [:Latin:] syntax. */ void UnicodeSetTest::TestScriptSet() { - expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1")); + expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1")); - expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA"); + expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA"); /* Jitterbug 1423 */ - expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA"); + expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA"); } @@ -1002,7 +1002,7 @@ void UnicodeSetTest::TestPropertySet() { static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); for (int32_t i=0; i " + newpat); } else { errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat); @@ -1587,7 +1587,7 @@ void UnicodeSetTest::TestSymbolTable() { // Set up variables while (DATA[i+2] != NULL) { - sym.add(DATA[i], DATA[i+1], ec); + sym.add(UNICODE_STRING_SIMPLE(DATA[i]), UNICODE_STRING_SIMPLE(DATA[i+1]), ec); if (U_FAILURE(ec)) { errln("FAIL: couldn't add to TokenSymbolTable"); continue; @@ -1596,7 +1596,7 @@ void UnicodeSetTest::TestSymbolTable() { } // Input pattern and expected output pattern - UnicodeString inpat = DATA[i], exppat = DATA[i+1]; + UnicodeString inpat = UNICODE_STRING_SIMPLE(DATA[i]), exppat = UNICODE_STRING_SIMPLE(DATA[i+1]); i += 2; ParsePosition pos(0); @@ -1640,8 +1640,8 @@ void UnicodeSetTest::TestSurrogate() { }; for (int i=0; DATA[i] != 0; ++i) { UErrorCode ec = U_ZERO_ERROR; - logln((UnicodeString)"Test pattern " + i + " :" + DATA[i]); - UnicodeSet set(DATA[i], ec); + logln((UnicodeString)"Test pattern " + i + " :" + UNICODE_STRING_SIMPLE(DATA[i])); + UnicodeSet set(UNICODE_STRING_SIMPLE(DATA[i]), ec); if (U_FAILURE(ec)) { errln("FAIL: UnicodeSet constructor"); continue; @@ -1650,7 +1650,7 @@ void UnicodeSetTest::TestSurrogate() { CharsToUnicodeString("abc\\U00010000"), CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair if (set.size() != 4) { - errln((UnicodeString)"FAIL: " + DATA[i] + ".size() == " + + errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[i]) + ".size() == " + set.size() + ", expected 4"); } } diff --git a/icu4c/source/test/intltest/utxttest.cpp b/icu4c/source/test/intltest/utxttest.cpp index d6264d9e6be..35564ca08ba 100644 --- a/icu4c/source/test/intltest/utxttest.cpp +++ b/icu4c/source/test/intltest/utxttest.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2005-2007, International Business Machines Corporation and + * Copyright (c) 2005-2008, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /************************************************************************ @@ -1044,7 +1044,7 @@ void UTextTest::ErrorTest() { // Similar test, with utf16 instead of utf8 // TODO: merge the common parts of these tests. - UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000"); + UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV); int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6}; int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6}; int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4}; @@ -1112,7 +1112,7 @@ void UTextTest::ErrorTest() { // Similar test, with UText over Replaceable // TODO: merge the common parts of these tests. - UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000"); + UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV); int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6}; int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6}; int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4}; diff --git a/icu4c/source/tools/toolutil/xmlparser.cpp b/icu4c/source/tools/toolutil/xmlparser.cpp index 01213db46bd..55688320213 100644 --- a/icu4c/source/tools/toolutil/xmlparser.cpp +++ b/icu4c/source/tools/toolutil/xmlparser.cpp @@ -61,16 +61,16 @@ UXMLParser::UXMLParser(UErrorCode &status) : // example: " // This is a sloppy implementation - just look for the leading // allow for a possible leading BOM. - mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>"), 0, status), + mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status), // XML Comment production #15 // example: " // note, does not detect an illegal "--" within comments - mXMLComment(UnicodeString("(?s)"), 0, status), + mXMLComment(UnicodeString("(?s)", -1, US_INV), 0, status), // XML Spaces // production [3] - mXMLSP(UnicodeString(XML_SPACES "+"), 0, status), + mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status), // XML Doctype decl production #28 // example " @@ -81,12 +81,12 @@ UXMLParser::UXMLParser(UErrorCode &status) : // of closeing square brackets. These could appear in comments, // or in parameter entity declarations, for example. mXMLDoctype(UnicodeString( - "(?s)|\\[.*?\\].*?>)" + "(?s)|\\[.*?\\].*?>)", -1, US_INV ), 0, status), // XML PI production #16 // example " - mXMLPI(UnicodeString("(?s)<\\?.+?\\?>"), 0, status), + mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status), // XML Element Start Productions #40, #41 // example @@ -97,11 +97,11 @@ UXMLParser::UXMLParser(UErrorCode &status) : XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = " "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"' ")*" // * for zero or more attributes. - XML_SPACES "*?>"), 0, status), // match " >" + XML_SPACES "*?>", -1, US_INV), 0, status), // match " >" // XML Element End production #42 // example - mXMLElemEnd (UnicodeString(""), 0, status), + mXMLElemEnd (UnicodeString("", -1, US_INV), 0, status), // XML Element Empty production #44 // example @@ -110,11 +110,11 @@ UXMLParser::UXMLParser(UErrorCode &status) : XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = " "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"' ")*" // * for zero or more attributes. - XML_SPACES "*?/>"), 0, status), // match " />" + XML_SPACES "*?/>", -1, US_INV), 0, status), // match " />" // XMLCharData. Everything but '<'. Note that & will be dealt with later. - mXMLCharData(UnicodeString("(?s)[^<]*"), 0, status), + mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status), // Attribute name = "value". XML Productions 10, 40/41 // Capture group 1 is name, @@ -126,14 +126,14 @@ UXMLParser::UXMLParser(UErrorCode &status) : // Here, we match a single attribute, and make its name and // attribute value available to the parser code. mAttrValue(UnicodeString(XML_SPACES "+(" XML_NAME ")" XML_SPACES "*=" XML_SPACES "*" - "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"), 0, status), + "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status), - mAttrNormalizer(UnicodeString(XML_SPACES), 0, status), + mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status), // Match any of the new-line sequences in content. // All are changed to \u000a. - mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028"), 0, status), + mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status), // & char references // We will figure out what we've got based on which capture group has content.