From 87643423be862dee019d6518ef6da6e7556dd7b9 Mon Sep 17 00:00:00 2001 From: Rich Gillam <62772518+richgillam@users.noreply.github.com> Date: Wed, 21 Oct 2020 19:23:46 -0700 Subject: [PATCH] ICU-21343 Added code to keep us from messing up quoted literal pattern text when performing find-and-replace operations on it. --- icu4c/source/i18n/dtitvfmt.cpp | 63 ++++++++++++++----- icu4c/source/i18n/unicode/dtitvfmt.h | 11 ++++ icu4c/source/test/intltest/dtifmtts.cpp | 3 + .../com/ibm/icu/text/DateIntervalFormat.java | 51 ++++++++++++--- .../test/format/DateIntervalFormatTest.java | 3 + 5 files changed, 107 insertions(+), 24 deletions(-) diff --git a/icu4c/source/i18n/dtitvfmt.cpp b/icu4c/source/i18n/dtitvfmt.cpp index 8dd2bcdca4b..7cba167130f 100644 --- a/icu4c/source/i18n/dtitvfmt.cpp +++ b/icu4c/source/i18n/dtitvfmt.cpp @@ -1698,27 +1698,23 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); if (suppressDayPeriodField) { - adjustedPtn.findAndReplace(UnicodeString(LOW_A), UnicodeString()); - adjustedPtn.findAndReplace(UnicodeString(" "), UnicodeString(" ")); + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString()); + findReplaceInPattern(adjustedPtn, UnicodeString(" "), UnicodeString(" ")); adjustedPtn.trim(); } if ( differenceInfo == 2 ) { if (inputSkeleton.indexOf(LOW_Z) != -1) { - adjustedPtn.findAndReplace(UnicodeString(LOW_V), - UnicodeString(LOW_Z)); - } - if (inputSkeleton.indexOf(CAP_K) != -1) { - adjustedPtn.findAndReplace(UnicodeString(LOW_H), - UnicodeString(CAP_K)); - } - if (inputSkeleton.indexOf(LOW_K) != -1) { - adjustedPtn.findAndReplace(UnicodeString(CAP_H), - UnicodeString(LOW_K)); - } - if (inputSkeleton.indexOf(LOW_B) != -1) { - adjustedPtn.findAndReplace(UnicodeString(LOW_A), - UnicodeString(LOW_B)); - } + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_V), UnicodeString(LOW_Z)); + } + if (inputSkeleton.indexOf(CAP_K) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_H), UnicodeString(CAP_K)); + } + if (inputSkeleton.indexOf(LOW_K) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(CAP_H), UnicodeString(LOW_K)); + } + if (inputSkeleton.indexOf(LOW_B) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString(LOW_B)); + } } if (adjustedPtn.indexOf(LOW_A) != -1 && bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] == 0) { bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] = 1; @@ -1796,6 +1792,39 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, } } +void +DateIntervalFormat::findReplaceInPattern(UnicodeString& targetString, + const UnicodeString& strToReplace, + const UnicodeString& strToReplaceWith) { + int32_t firstQuoteIndex = targetString.indexOf(u'\''); + if (firstQuoteIndex == -1) { + targetString.findAndReplace(strToReplace, strToReplaceWith); + } else { + UnicodeString result; + UnicodeString source = targetString; + + while (firstQuoteIndex >= 0) { + int32_t secondQuoteIndex = source.indexOf(u'\'', firstQuoteIndex + 1); + if (secondQuoteIndex == -1) { + secondQuoteIndex = source.length() - 1; + } + + UnicodeString unquotedText(source, 0, firstQuoteIndex); + UnicodeString quotedText(source, firstQuoteIndex, secondQuoteIndex - firstQuoteIndex + 1); + + unquotedText.findAndReplace(strToReplace, strToReplaceWith); + result += unquotedText; + result += quotedText; + + source.remove(0, secondQuoteIndex + 1); + firstQuoteIndex = source.indexOf(u'\''); + } + source.findAndReplace(strToReplace, strToReplaceWith); + result += source; + targetString = result; + } +} + void diff --git a/icu4c/source/i18n/unicode/dtitvfmt.h b/icu4c/source/i18n/unicode/dtitvfmt.h index c997c31afc6..f45e68038ac 100644 --- a/icu4c/source/i18n/unicode/dtitvfmt.h +++ b/icu4c/source/i18n/unicode/dtitvfmt.h @@ -1037,6 +1037,17 @@ private: UBool suppressDayPeriodField, UnicodeString& adjustedIntervalPattern); + /** + * Does the same thing as UnicodeString::findAndReplace(), except that it won't perform + * the substitution inside quoted literal text. + * @param targetString The string to perform the find-replace operation on. + * @param strToReplace The string to search for and replace in the target string. + * @param strToReplaceWith The string to substitute in wherever `stringToReplace` was found. + */ + static void U_EXPORT2 findReplaceInPattern(UnicodeString& targetString, + const UnicodeString& strToReplace, + const UnicodeString& strToReplaceWith); + /** * Concat a single date pattern with a time interval pattern, * set it into the intervalPatterns, while field is time field. diff --git a/icu4c/source/test/intltest/dtifmtts.cpp b/icu4c/source/test/intltest/dtifmtts.cpp index 878eca1291e..22a1cb3bcac 100644 --- a/icu4c/source/test/intltest/dtifmtts.cpp +++ b/icu4c/source/test/intltest/dtifmtts.cpp @@ -1166,6 +1166,9 @@ void DateIntervalFormatTest::testHourMetacharacters() { "en_GB", "CE 2010 09 27 00:00:00", "CE 2010 09 27 11:00:00", "kk", "24\\u201311", "en_GB", "CE 2010 09 27 00:00:00", "CE 2010 09 27 12:00:00", "kk", "24\\u201312", "en_GB", "CE 2010 09 27 00:00:00", "CE 2010 09 27 13:00:00", "kk", "24\\u201313", + + // regression test for ICU-21343 + "de", "CE 2010 09 27 01:00:00", "CE 2010 09 27 10:00:00", "KK", "1 \\u2013 10 Uhr AM", }; expect(DATA, UPRV_LENGTHOF(DATA)); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java index 2b4e8599514..4a59b01e84b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java @@ -2105,24 +2105,24 @@ public class DateIntervalFormat extends UFormat { DateIntervalInfo.parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); if (suppressDayPeriodField) { if (bestMatchIntervalPattern.indexOf(" a") != -1) { - bestMatchIntervalPattern = bestMatchIntervalPattern.replace(" a", ""); + bestMatchIntervalPattern = findReplaceInPattern(bestMatchIntervalPattern, " a", ""); } else if (bestMatchIntervalPattern.indexOf("a ") != -1) { - bestMatchIntervalPattern = bestMatchIntervalPattern.replace("a ", ""); + bestMatchIntervalPattern = findReplaceInPattern(bestMatchIntervalPattern, "a ", ""); } - bestMatchIntervalPattern = bestMatchIntervalPattern.replace("a", ""); + bestMatchIntervalPattern = findReplaceInPattern(bestMatchIntervalPattern, "a", ""); } if ( differenceInfo == 2 ) { if (inputSkeleton.indexOf('z') != -1) { - bestMatchIntervalPattern = bestMatchIntervalPattern.replace('v', 'z'); + bestMatchIntervalPattern = findReplaceInPattern(bestMatchIntervalPattern, "v", "z"); } if (inputSkeleton.indexOf('K') != -1) { - bestMatchIntervalPattern = bestMatchIntervalPattern.replace('h', 'K'); + bestMatchIntervalPattern = findReplaceInPattern(bestMatchIntervalPattern, "h", "K"); } if (inputSkeleton.indexOf('k') != -1) { - bestMatchIntervalPattern = bestMatchIntervalPattern.replace('H', 'k'); + bestMatchIntervalPattern = findReplaceInPattern(bestMatchIntervalPattern, "H", "k"); } if (inputSkeleton.indexOf('b') != -1) { - bestMatchIntervalPattern = bestMatchIntervalPattern.replace('a', 'b'); + bestMatchIntervalPattern = findReplaceInPattern(bestMatchIntervalPattern, "a", "b"); } } if (bestMatchIntervalPattern.indexOf('a') != -1 && bestMatchSkeletonFieldWidth['a' - PATTERN_CHAR_BASE] == 0) { @@ -2197,6 +2197,43 @@ public class DateIntervalFormat extends UFormat { } return adjustedPtn.toString(); } + + /** + * Does the same thing as String.replace(), except that it won't perform the + * substitution inside quoted literal text. + * @param targetString The string to perform the find-replace operation on. + * @param strToReplace The string to search for and replace in the target string. + * @param strToReplaceWith The string to substitute in wherever `stringToReplace` was found. + */ + private static String findReplaceInPattern(String targetString, + String strToReplace, + String strToReplaceWith) { + int firstQuoteIndex = targetString.indexOf("\'"); + if (firstQuoteIndex < 0) { + return targetString.replace(strToReplace, strToReplaceWith); + } else { + StringBuilder result = new StringBuilder(); + String source = targetString; + + while (firstQuoteIndex >= 0) { + int secondQuoteIndex = source.indexOf("\'", firstQuoteIndex + 1); + if (secondQuoteIndex < 0) { + secondQuoteIndex = source.length() - 1; + } + + String unquotedText = source.substring(0, firstQuoteIndex); + String quotedText = source.substring(firstQuoteIndex, secondQuoteIndex + 1); + + result.append(unquotedText.replace(strToReplace, strToReplaceWith)); + result.append(quotedText); + + source = source.substring(secondQuoteIndex + 1); + firstQuoteIndex = source.indexOf("\'"); + } + result.append(source.replace(strToReplace, strToReplaceWith)); + return result.toString(); + } + } /* diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java index d9d9522c275..8d3ad1f5be5 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java @@ -811,6 +811,9 @@ public class DateIntervalFormatTest extends TestFmwk { "en_GB", "CE 2010 09 27 00:00:00", "CE 2010 09 27 11:00:00", "kk", "24\\u201311", "en_GB", "CE 2010 09 27 00:00:00", "CE 2010 09 27 12:00:00", "kk", "24\\u201312", "en_GB", "CE 2010 09 27 00:00:00", "CE 2010 09 27 13:00:00", "kk", "24\\u201313", + + // regression test for ICU-21343 + "de", "CE 2010 09 27 01:00:00", "CE 2010 09 27 10:00:00", "KK", "1 \\u2013 10 Uhr AM", }; expect(DATA, DATA.length); }