ICU-21650 Modified DateTimePatternGenerator to handle skeletons with "e" and "c" better (allowing support for numeric

day-of-week abbreviations).
This commit is contained in:
Rich Gillam 2021-07-06 13:56:15 -07:00
parent cfffa2b83b
commit ac51a1921f
5 changed files with 143 additions and 2 deletions

View file

@ -1648,7 +1648,11 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern,
(typeValue==UDATPG_MINUTE_FIELD && (options & UDATPG_MATCH_MINUTE_FIELD_LENGTH)==0) ||
(typeValue==UDATPG_SECOND_FIELD && (options & UDATPG_MATCH_SECOND_FIELD_LENGTH)==0) ) {
adjFieldLen = field.length();
} else if (specifiedSkeleton) {
} else if (specifiedSkeleton && reqFieldChar != LOW_C && reqFieldChar != LOW_E) {
// (we skip this section for 'c' and 'e' because unlike the other characters considered in this function,
// they have no minimum field length-- 'E' and 'EE' are equivalent to 'EEE', but 'e' and 'ee' are not
// equivalent to 'eee' -- see the entries for "week day" in
// https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table for more info)
int32_t skelFieldLen = specifiedSkeleton->original.getFieldLength(typeValue);
UBool patFieldIsNumeric = (row->type > 0);
UBool skelFieldIsNumeric = (specifiedSkeleton->type[typeValue] > 0);
@ -1663,6 +1667,9 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern,
&& (typeValue!= UDATPG_YEAR_FIELD || reqFieldChar==CAP_Y))
? reqFieldChar
: field.charAt(0);
if (c == CAP_E && adjFieldLen < 3) {
c = LOW_E;
}
if (typeValue == UDATPG_HOUR_FIELD && fDefaultHourFormatChar != 0) {
// The adjustment here is required to match spec (https://www.unicode.org/reports/tr35/tr35-dates.html#dfst-hour).
// It is necessary to match the hour-cycle preferred by the Locale.

View file

@ -45,6 +45,7 @@ void IntlTestDateTimePatternGeneratorAPI::runIndexedTest( int32_t index, UBool e
TESTCASE(9, testFallbackWithDefaultRootLocale);
TESTCASE(10, testGetDefaultHourCycle_OnEmptyInstance);
TESTCASE(11, test_jConsistencyOddLocales);
TESTCASE(12, testBestPattern);
default: name = ""; break;
}
}
@ -1557,4 +1558,69 @@ void IntlTestDateTimePatternGeneratorAPI::test_jConsistencyOddLocales() { // ICU
}
}
void IntlTestDateTimePatternGeneratorAPI::testBestPattern() {
// generic test for DateTimePatternGenerator::getBestPattern() that can be used to test multiple
// bugs in the resource data
static const char* testCases[] = {
// ICU-21650: (See the "week day" section of https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
// for a full explanation of why this is the desired behavior)
// if the user asks for E, the minimum field length is 3, but if he asks for c or e, it's 1
"en_US", "E", "ccc",
"en_US", "c", "c",
"en_US", "e", "c",
"en_US", "EE", "ccc",
"en_US", "cc", "cc",
"en_US", "ee", "cc",
"en_US", "EEE", "ccc",
"en_US", "ccc", "ccc",
"en_US", "eee", "ccc",
// and if the user asked for c or e and the field length is 1 or 2, the output pattern should contain
// e instead of E (e supports numeric abbreviations; E doesn't)
"en_US", "yMEd", "EEE, M/d/y",
"en_US", "yMcd", "e, M/d/y",
"en_US", "yMed", "e, M/d/y",
"en_US", "yMMEEdd", "EEE, MM/dd/y",
"en_US", "yMMccdd", "ee, MM/dd/y",
"en_US", "yMMeedd", "ee, MM/dd/y",
"en_US", "yMMMEd", "EEE, MMM d, y",
"en_US", "yMMMcccd", "EEE, MMM d, y",
"en_US", "yMMMeeed", "EEE, MMM d, y",
"en_US", "yMMMMEEEEd", "EEEE, MMMM d, y",
"en_US", "yMMMMccccd", "EEEE, MMMM d, y",
"en_US", "yMMMMeeeed", "EEEE, MMMM d, y",
};
for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); i += 3) {
const char* localeID(testCases[i]);
const char* skeleton(testCases[i + 1]);
const char* expectedPattern(testCases[i + 2]);
UErrorCode err = U_ZERO_ERROR;
UnicodeString actualPattern;
if (uprv_strcmp(skeleton, "full") != 0) {
LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstance(localeID, err), err);
actualPattern = dtpg->getBestPattern(UnicodeString(skeleton), err);
} else {
LocalPointer<DateFormat> df(DateFormat::createDateInstance(DateFormat::kFull, localeID));
SimpleDateFormat* sdf = dynamic_cast<SimpleDateFormat*>(df.getAlias());
if (sdf != NULL) {
sdf->toPattern(actualPattern);
}
}
if (U_FAILURE(err)) {
errln("Failure for test case %s/%s: %s", localeID, skeleton, u_errorName(err));
} else {
char failureMessage[100];
strcpy(failureMessage, "Wrong result for test case ");
strcat(failureMessage, localeID);
strcat(failureMessage, "/");
strcat(failureMessage, skeleton);
assertEquals(failureMessage, UnicodeString(expectedPattern), actualPattern);
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -37,6 +37,7 @@ private:
void testFallbackWithDefaultRootLocale();
void testGetDefaultHourCycle_OnEmptyInstance();
void test_jConsistencyOddLocales();
void testBestPattern();
};
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -2200,7 +2200,11 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
(type == MINUTE && (options & MATCH_MINUTE_FIELD_LENGTH)==0) ||
(type == SECOND && (options & MATCH_SECOND_FIELD_LENGTH)==0) ) {
adjFieldLen = fieldBuilder.length();
} else if (matcherWithSkeleton != null) {
} else if (matcherWithSkeleton != null && reqFieldChar != 'c' && reqFieldChar != 'e') {
// (we skip this section for 'c' and 'e' because unlike the other characters considered in this function,
// they have no minimum field length-- 'E' and 'EE' are equivalent to 'EEE', but 'e' and 'ee' are not
// equivalent to 'eee' -- see the entries for "week day" in
// https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table for more info)
int skelFieldLen = matcherWithSkeleton.original.getFieldLength(type);
boolean patFieldIsNumeric = variableField.isNumeric();
boolean skelFieldIsNumeric = matcherWithSkeleton.fieldIsNumeric(type);
@ -2217,6 +2221,12 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
&& (type != YEAR || reqFieldChar=='Y'))
? reqFieldChar
: fieldBuilder.charAt(0);
if (c == 'E' && adjFieldLen < 3) {
// see https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table:
// If we want a numeric day-of-week field, we have to use 'e'-- 'E' doesn't support
// numeric day-of-week abbreivations
c = 'e';
}
if (type == HOUR) {
// The adjustment here is required to match spec (https://www.unicode.org/reports/tr35/tr35-dates.html#dfst-hour).
// It is necessary to match the hour-cycle preferred by the Locale.

View file

@ -1793,4 +1793,61 @@ public class DateTimeGeneratorTest extends TestFmwk {
}
}
}
@Test
public void testBestPattern() {
// generic test for DateTimePatternGenerator::getBestPattern() that can be used to test multiple
// bugs in the resource data
String[] testCases = {
// ICU-21650: (See the "week day" section of https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
// for a full explanation of why this is the desired behavior)
// if the user asks for E, the minimum field length is 3, but if he asks for c or e, it's 1
"en_US", "E", "ccc",
"en_US", "c", "c",
"en_US", "e", "c",
"en_US", "EE", "ccc",
"en_US", "cc", "cc",
"en_US", "ee", "cc",
"en_US", "EEE", "ccc",
"en_US", "ccc", "ccc",
"en_US", "eee", "ccc",
// and if the user asked for c or e and the field length is 1 or 2, the output pattern should contain
// e instead of E (e supports numeric abbreviations; E doesn't)
"en_US", "yMEd", "EEE, M/d/y",
"en_US", "yMcd", "e, M/d/y",
"en_US", "yMed", "e, M/d/y",
"en_US", "yMMEEdd", "EEE, MM/dd/y",
"en_US", "yMMccdd", "ee, MM/dd/y",
"en_US", "yMMeedd", "ee, MM/dd/y",
"en_US", "yMMMEd", "EEE, MMM d, y",
"en_US", "yMMMcccd", "EEE, MMM d, y",
"en_US", "yMMMeeed", "EEE, MMM d, y",
"en_US", "yMMMMEEEEd", "EEEE, MMMM d, y",
"en_US", "yMMMMccccd", "EEEE, MMMM d, y",
"en_US", "yMMMMeeeed", "EEEE, MMMM d, y",
};
for (int i = 0; i < testCases.length; i += 3) {
String localeID = testCases[i];
ULocale locale = new ULocale(localeID);
String skeleton = testCases[i + 1];
String expectedPattern = testCases[i + 2];
String actualPattern = null;
if (!skeleton.equals("full")) {
DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(locale);
actualPattern = dtpg.getBestPattern(skeleton);
} else {
DateFormat df = DateFormat.getDateInstance(DateFormat.FULL, locale);
SimpleDateFormat sdf = (SimpleDateFormat)df;
if (sdf != null) {
actualPattern = sdf.toPattern();
}
}
assertEquals("Wrong result for test case " + localeID + "/" + skeleton, expectedPattern, actualPattern);
}
}
}