ICU-20383 DateTimePatternGen should map 'j' using timeData, not short time cycle (#419)

This commit is contained in:
Peter Edberg 2019-02-11 22:22:08 -08:00 committed by pedberg-icu
parent f0e374cbb1
commit 2b82c096e0
5 changed files with 239 additions and 66 deletions

View file

@ -471,9 +471,14 @@ enum AllowedHourFormat{
ALLOWED_HOUR_FORMAT_UNKNOWN = -1,
ALLOWED_HOUR_FORMAT_h,
ALLOWED_HOUR_FORMAT_H,
ALLOWED_HOUR_FORMAT_K, // Added ICU-20383, used by JP
ALLOWED_HOUR_FORMAT_k, // Added ICU-20383, not currently used
ALLOWED_HOUR_FORMAT_hb,
ALLOWED_HOUR_FORMAT_Hb,
ALLOWED_HOUR_FORMAT_hB,
ALLOWED_HOUR_FORMAT_Kb, // Added ICU-20383, not currently used
ALLOWED_HOUR_FORMAT_KB, // Added ICU-20383, not currently used
// ICU-20383 The following are unlikely and not currently used
ALLOWED_HOUR_FORMAT_Hb,
ALLOWED_HOUR_FORMAT_HB
};
@ -511,36 +516,55 @@ struct AllowedHourFormatsSink : public ResourceSink {
const char *regionOrLocale = key;
ResourceTable formatList = value.getTable(errorCode);
if (U_FAILURE(errorCode)) { return; }
// below we construct a list[] that has an entry for the "preferred" value at [0],
// followed by 1 or more entries for the "allowed" values, terminated with an
// entry for ALLOWED_HOUR_FORMAT_UNKNOWN (not included in length below)
LocalMemory<int32_t> list;
int32_t length = 0;
int32_t preferredFormat = ALLOWED_HOUR_FORMAT_UNKNOWN;
for (int32_t j = 0; formatList.getKeyAndValue(j, key, value); ++j) {
if (uprv_strcmp(key, "allowed") == 0) { // Ignore "preferred" list.
LocalMemory<int32_t> list;
int32_t length;
if (uprv_strcmp(key, "allowed") == 0) {
if (value.getType() == URES_STRING) {
if (list.allocateInsteadAndReset(2) == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
list[0] = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode));
length = 1;
}
else {
ResourceArray allowedFormats = value.getArray(errorCode);
length = allowedFormats.getSize();
length = 2; // 1 preferred to add later, 1 allowed to add now
if (list.allocateInsteadAndReset(length + 1) == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (int32_t k = 0; k < length; ++k) {
allowedFormats.getValue(k, value);
list[1] = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode));
}
else {
ResourceArray allowedFormats = value.getArray(errorCode);
length = allowedFormats.getSize() + 1; // 1 preferred, getSize allowed
if (list.allocateInsteadAndReset(length + 1) == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (int32_t k = 1; k < length; ++k) {
allowedFormats.getValue(k-1, value);
list[k] = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode));
}
}
list[length] = ALLOWED_HOUR_FORMAT_UNKNOWN;
uhash_put(localeToAllowedHourFormatsMap,
const_cast<char *>(regionOrLocale), list.orphan(), &errorCode);
if (U_FAILURE(errorCode)) { return; }
} else if (uprv_strcmp(key, "preferred") == 0) {
preferredFormat = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode));
}
}
if (length > 1) {
list[0] = (preferredFormat!=ALLOWED_HOUR_FORMAT_UNKNOWN)? preferredFormat: list[1];
} else {
// fallback handling for missing data
length = 2; // 1 preferred, 1 allowed
if (list.allocateInsteadAndReset(length + 1) == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
list[0] = (preferredFormat!=ALLOWED_HOUR_FORMAT_UNKNOWN)? preferredFormat: ALLOWED_HOUR_FORMAT_H;
list[1] = list[0];
}
list[length] = ALLOWED_HOUR_FORMAT_UNKNOWN;
// At this point list[] will have at least two non-ALLOWED_HOUR_FORMAT_UNKNOWN entries,
// followed by ALLOWED_HOUR_FORMAT_UNKNOWN.
uhash_put(localeToAllowedHourFormatsMap, const_cast<char *>(regionOrLocale), list.orphan(), &errorCode);
if (U_FAILURE(errorCode)) { return; }
}
}
@ -548,10 +572,14 @@ struct AllowedHourFormatsSink : public ResourceSink {
if (s.length() == 1) {
if (s[0] == LOW_H) { return ALLOWED_HOUR_FORMAT_h; }
if (s[0] == CAP_H) { return ALLOWED_HOUR_FORMAT_H; }
if (s[0] == CAP_K) { return ALLOWED_HOUR_FORMAT_K; }
if (s[0] == LOW_K) { return ALLOWED_HOUR_FORMAT_k; }
} else if (s.length() == 2) {
if (s[0] == LOW_H && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_hb; }
if (s[0] == CAP_H && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_Hb; }
if (s[0] == LOW_H && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_hB; }
if (s[0] == CAP_K && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_Kb; }
if (s[0] == CAP_K && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_KB; }
if (s[0] == CAP_H && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_Hb; }
if (s[0] == CAP_H && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_HB; }
}
@ -614,13 +642,23 @@ void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErro
}
if (allowedFormats != nullptr) { // Lookup is successful
// Here allowedFormats points to a list consisting of key for preferredFormat,
// followed by one or more keys for allowedFormats, then followed by ALLOWED_HOUR_FORMAT_UNKNOWN.
switch (allowedFormats[0]) {
case ALLOWED_HOUR_FORMAT_h: fDefaultHourFormatChar = LOW_H; break;
case ALLOWED_HOUR_FORMAT_H: fDefaultHourFormatChar = CAP_H; break;
case ALLOWED_HOUR_FORMAT_K: fDefaultHourFormatChar = CAP_K; break;
case ALLOWED_HOUR_FORMAT_k: fDefaultHourFormatChar = LOW_K; break;
default: fDefaultHourFormatChar = CAP_H; break;
}
for (int32_t i = 0; i < UPRV_LENGTHOF(fAllowedHourFormats); ++i) {
fAllowedHourFormats[i] = allowedFormats[i];
if (allowedFormats[i] == ALLOWED_HOUR_FORMAT_UNKNOWN) {
fAllowedHourFormats[i] = allowedFormats[i + 1];
if (fAllowedHourFormats[i] == ALLOWED_HOUR_FORMAT_UNKNOWN) {
break;
}
}
} else { // Lookup failed, twice
fDefaultHourFormatChar = CAP_H;
fAllowedHourFormats[0] = ALLOWED_HOUR_FORMAT_H;
fAllowedHourFormats[1] = ALLOWED_HOUR_FORMAT_UNKNOWN;
}
@ -750,8 +788,6 @@ DateTimePatternGenerator::hackTimes(const UnicodeString& hackPattern, UErrorCode
#define ULOC_LOCALE_IDENTIFIER_CAPACITY (ULOC_FULLNAME_CAPACITY + 1 + ULOC_KEYWORD_AND_VALUES_CAPACITY)
static const UChar hourFormatChars[] = { CAP_H, LOW_H, CAP_K, LOW_K, 0 }; // HhKk, the hour format characters
void
DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& destination, UErrorCode& err) {
destination.clear().append(DT_DateTimeGregorianTag, -1, err); // initial default
@ -791,18 +827,9 @@ void
DateTimePatternGenerator::consumeShortTimePattern(const UnicodeString& shortTimePattern,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
// set fDefaultHourFormatChar to the hour format character from this pattern
int32_t tfIdx, tfLen = shortTimePattern.length();
UBool ignoreChars = FALSE;
for (tfIdx = 0; tfIdx < tfLen; tfIdx++) {
UChar tfChar = shortTimePattern.charAt(tfIdx);
if ( tfChar == SINGLE_QUOTE ) {
ignoreChars = !ignoreChars; // toggle (handle quoted literals & '' for single quote)
} else if ( !ignoreChars && u_strchr(hourFormatChars, tfChar) != nullptr ) {
fDefaultHourFormatChar = tfChar;
break;
}
}
// ICU-20383 No longer set fDefaultHourFormatChar to the hour format character from
// this pattern; instead it is set from localeToAllowedHourFormatsMap which now
// includes entries for both preferred and allowed formats.
// HACK for hh:ss
hackTimes(shortTimePattern, status);
@ -1140,20 +1167,24 @@ DateTimePatternGenerator::mapSkeletonMetacharacters(const UnicodeString& pattern
if (patChr == LOW_J) {
hourChar = fDefaultHourFormatChar;
} else {
AllowedHourFormat preferred;
AllowedHourFormat bestAllowed;
if (fAllowedHourFormats[0] != ALLOWED_HOUR_FORMAT_UNKNOWN) {
preferred = (AllowedHourFormat)fAllowedHourFormats[0];
bestAllowed = (AllowedHourFormat)fAllowedHourFormats[0];
} else {
status = U_INVALID_FORMAT_ERROR;
return UnicodeString();
}
if (preferred == ALLOWED_HOUR_FORMAT_H || preferred == ALLOWED_HOUR_FORMAT_HB || preferred == ALLOWED_HOUR_FORMAT_Hb) {
if (bestAllowed == ALLOWED_HOUR_FORMAT_H || bestAllowed == ALLOWED_HOUR_FORMAT_HB || bestAllowed == ALLOWED_HOUR_FORMAT_Hb) {
hourChar = CAP_H;
} else if (bestAllowed == ALLOWED_HOUR_FORMAT_K || bestAllowed == ALLOWED_HOUR_FORMAT_KB || bestAllowed == ALLOWED_HOUR_FORMAT_Kb) {
hourChar = CAP_K;
} else if (bestAllowed == ALLOWED_HOUR_FORMAT_k) {
hourChar = LOW_K;
}
// in #13183 just add b/B to skeleton, no longer need to set special flags
if (preferred == ALLOWED_HOUR_FORMAT_HB || preferred == ALLOWED_HOUR_FORMAT_hB) {
if (bestAllowed == ALLOWED_HOUR_FORMAT_HB || bestAllowed == ALLOWED_HOUR_FORMAT_hB || bestAllowed == ALLOWED_HOUR_FORMAT_KB) {
dayPeriodChar = CAP_B;
} else if (preferred == ALLOWED_HOUR_FORMAT_Hb || preferred == ALLOWED_HOUR_FORMAT_hb) {
} else if (bestAllowed == ALLOWED_HOUR_FORMAT_Hb || bestAllowed == ALLOWED_HOUR_FORMAT_hb || bestAllowed == ALLOWED_HOUR_FORMAT_Kb) {
dayPeriodChar = LOW_B;
}
}

View file

@ -19,7 +19,9 @@
#include "unicode/dtfmtsym.h"
#include "unicode/dtptngen.h"
#include "unicode/ustring.h"
#include "unicode/datefmt.h"
#include "cmemory.h"
#include "cstring.h"
#include "loctest.h"
@ -38,6 +40,7 @@ void IntlTestDateTimePatternGeneratorAPI::runIndexedTest( int32_t index, UBool e
TESTCASE(4, testC);
TESTCASE(5, testSkeletonsWithDayPeriods);
TESTCASE(6, testGetFieldDisplayNames);
TESTCASE(7, testJjMapping);
default: name = ""; break;
}
}
@ -1259,4 +1262,88 @@ void IntlTestDateTimePatternGeneratorAPI::testGetFieldDisplayNames() {
}
}
static const UChar timeCycleChars[] = { (UChar)0x0048, (UChar)0x0068, (UChar)0x004B, (UChar)0x006B, (UChar)0 };
void IntlTestDateTimePatternGeneratorAPI::testJjMapping() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString jSkeleton("j");
// First test that j maps correctly by region in a locale for which we do not have data.
{
const char* testLocaleID = "de_US"; // short patterns from fallback locale "de" have "HH"
Locale testLocale(testLocaleID);
LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstance(testLocale, status));
if (U_FAILURE(status)) {
dataerrln("FAIL: DateTimePatternGenerator::createInstance failed for locale %s: %s", testLocaleID, u_errorName(status));
} else {
UnicodeString jPattern = dtpg->getBestPattern(jSkeleton, UDATPG_MATCH_ALL_FIELDS_LENGTH, status); // get pattern with h e.g. "h 'Uhr' a"
if (U_FAILURE(status)) {
errln("FAIL: DateTimePatternGenerator::getBestPattern locale %s, pattern j: %s", testLocaleID, u_errorName(status));
} else {
UnicodeString jPatSkeleton = DateTimePatternGenerator::staticGetSkeleton(jPattern, status); // strip literals, get e.g. "ah"
if (U_FAILURE(status)) {
errln("FAIL: DateTimePatternGenerator::staticGetSkeleton locale %s: %s", testLocaleID, u_errorName(status));
} else if (jPatSkeleton.indexOf(u'h') < 0) { // expect US preferred cycle 'h', not H or other cycle
errln("ERROR: DateTimePatternGenerator::getBestPattern locale %s, pattern j did not use 'h'", testLocaleID);
}
}
}
}
// Next test that in all available Locales, the actual short time pattern uses the same cycle as produced by 'j'
int32_t locCount;
const Locale* localePtr = DateFormat::getAvailableLocales(locCount);
for (; locCount-- > 0; localePtr++) {
const char* localeID = localePtr->getName();
if ( logKnownIssue("cldrbug:11853", "locales with known timeData vs short time format mismatch") && ( uprv_strcmp(localeID,"af_NA")==0
|| uprv_strcmp(localeID,"ar_001")==0 || uprv_strcmp(localeID,"ar_SA")==0 || uprv_strcmp(localeID,"ckb_IR")==0
|| uprv_strcmp(localeID,"en_001")==0 || uprv_strcmp(localeID,"en_BI")==0 || uprv_strcmp(localeID,"en_NG")==0
|| uprv_strcmp(localeID,"fr_CA")==0 || uprv_strcmp(localeID,"ha_GH")==0 || uprv_strncmp(localeID,"lkt",3)==0 ) ) {
continue;
}
status = U_ZERO_ERROR;
LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstance(*localePtr, status));
if (U_FAILURE(status)) {
dataerrln("FAIL: DateTimePatternGenerator::createInstance failed for locale %s: %s", localeID, u_errorName(status));
continue;
}
LocalPointer<DateFormat> dfmt(DateFormat::createTimeInstance(DateFormat::kShort, *localePtr));
if (U_FAILURE(status)) {
dataerrln("FAIL: DateFormat::createTimeInstance kShort failed for locale %s: %s", localeID, u_errorName(status));
continue;
}
const SimpleDateFormat* sdfmt;
if ((sdfmt = dynamic_cast<const SimpleDateFormat*>(reinterpret_cast<const DateFormat*>(dfmt.getAlias()))) == NULL) {
continue;
}
UnicodeString shortPattern;
shortPattern = sdfmt->toPattern(shortPattern);
UnicodeString jPattern = dtpg->getBestPattern(jSkeleton, status);
if (U_FAILURE(status)) {
errln("FAIL: DateTimePatternGenerator::getBestPattern locale %s, pattern j: %s", localeID, u_errorName(status));
continue;
}
// Now check that shortPattern and jPattern use the same hour cycle
UnicodeString jPatSkeleton = DateTimePatternGenerator::staticGetSkeleton(jPattern, status);
UnicodeString shortPatSkeleton = DateTimePatternGenerator::staticGetSkeleton(shortPattern, status);
if (U_FAILURE(status)) {
errln("FAIL: DateTimePatternGenerator::staticGetSkeleton locale %s: %s", localeID, u_errorName(status));
continue;
}
const UChar* charPtr = timeCycleChars;
for (; *charPtr != (UChar)0; charPtr++) {
if (jPatSkeleton.indexOf(*charPtr) >= 0) {
if (shortPatSkeleton.indexOf(*charPtr) < 0) {
char jcBuf[2], spBuf[32];
u_austrncpy(jcBuf, charPtr, 1);
jcBuf[1] = 0;
shortPattern.extract(0, shortPattern.length(), spBuf, 32);
const char* dfmtCalType = (dfmt->getCalendar())->getType();
errln("ERROR: locale %s, expected j resolved char %s to occur in short time pattern %s for %s", localeID, jcBuf, spBuf, dfmtCalType);
}
break;
}
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -32,6 +32,7 @@ private:
void testC();
void testSkeletonsWithDayPeriods();
void testGetFieldDisplayNames();
void testJjMapping();
};
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -178,21 +178,9 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
private void consumeShortTimePattern(String shortTimePattern, PatternInfo returnInfo) {
// keep this pattern to populate other time field
// combination patterns by hackTimes later in this method.
// use hour style in SHORT time pattern as the default
// hour style for the locale
FormatParser fp = new FormatParser();
fp.set(shortTimePattern);
List<Object> items = fp.getItems();
for (int idx = 0; idx < items.size(); idx++) {
Object item = items.get(idx);
if (item instanceof VariableField) {
VariableField fld = (VariableField)item;
if (fld.getType() == HOUR) {
defaultHourFormatChar = fld.toString().charAt(0);
break;
}
}
}
// ICU-20383 No longer set defaultHourFormatChar to the hour format character from
// this pattern; instead it is set from LOCALE_TO_ALLOWED_HOUR which now
// includes entries for both preferred and allowed formats.
// some languages didn't add mm:ss or HH:mm, so put in a hack to compute that from the short time.
hackTimes(returnInfo, shortTimePattern);
@ -359,11 +347,14 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
String[] list = LOCALE_TO_ALLOWED_HOUR.get(langCountry);
if (list == null) {
list = LOCALE_TO_ALLOWED_HOUR.get(country);
if (list == null) {
list = LAST_RESORT_ALLOWED_HOUR_FORMAT;
}
}
allowedHourFormats = list;
if (list != null) {
defaultHourFormatChar = list[0].charAt(0);
allowedHourFormats = Arrays.copyOfRange(list, 1, list.length-1);
} else {
allowedHourFormats = LAST_RESORT_ALLOWED_HOUR_FORMAT;
defaultHourFormatChar = allowedHourFormats[0].charAt(0);
}
}
private static class DayPeriodAllowedHoursSink extends UResource.Sink {
@ -379,11 +370,29 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
for (int i = 0; timeData.getKeyAndValue(i, key, value); ++i) {
String regionOrLocale = key.toString();
UResource.Table formatList = value.getTable();
String[] allowed = null;
String preferred = null;
for (int j = 0; formatList.getKeyAndValue(j, key, value); ++j) {
if (key.contentEquals("allowed")) { // Ignore "preferred" list.
tempMap.put(regionOrLocale, value.getStringArrayOrStringAsArray());
if (key.contentEquals("allowed")) {
allowed = value.getStringArrayOrStringAsArray();
} else if (key.contentEquals("preferred")) {
preferred = value.getString();
}
}
// below we construct a list[] that has an entry for the "preferred" value at [0],
// followed by 1 or more entries for the "allowed" values.
String[] list = null;
if (allowed!=null && allowed.length > 0) {
list = new String[allowed.length + 1];
list[0] = (preferred != null)? preferred: allowed[0];
System.arraycopy(allowed, 0, list, 1, allowed.length);
} else {
// fallback handling for missing data
list = new String[2];
list[0] = (preferred != null)? preferred: LAST_RESORT_ALLOWED_HOUR_FORMAT[0];
list[1] = list[0];
}
tempMap.put(regionOrLocale, list);
}
}
}
@ -623,10 +632,10 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
if (patChr == 'j') {
hourChar = defaultHourFormatChar;
} else { // patChr == 'C'
String preferred = allowedHourFormats[0];
hourChar = preferred.charAt(0);
String bestAllowed = allowedHourFormats[0];
hourChar = bestAllowed.charAt(0);
// in #13183 just add b/B to skeleton, no longer need to set special flags
char last = preferred.charAt(preferred.length()-1);
char last = bestAllowed.charAt(bestAllowed.length()-1);
if (last=='b' || last=='B') {
dayPeriodChar = last;
}

View file

@ -1668,4 +1668,49 @@ public class DateTimeGeneratorTest extends TestFmwk {
}
}
}
@Test
public void testJjMapping() {
final String jSkeleton = "j";
final char[] timeCycleChars = { 'H', 'h', 'K', 'k' };
// First test that j maps correctly by region in a locale for which we do not have data.
{
String testLocaleID = "de_US"; // short patterns from fallback locale "de" have "HH"
ULocale testLocale = new ULocale(testLocaleID);
DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(testLocale);
String jPattern = dtpg.getBestPattern(jSkeleton);
String jPatSkeleton = dtpg.getSkeleton(jPattern);
if (jPatSkeleton.indexOf('h') < 0) { // expect US preferred cycle 'h', not H or other cycle
errln("DateTimePatternGeneratorgetBestpattern locale " + testLocaleID + ", pattern j did not use 'h'");
}
}
// Next test that in all available Locales, the actual short time pattern uses the same cycle as produced by 'j'
ULocale[] locales = DateFormat.getAvailableULocales();
for (ULocale locale: locales) {
String localeID = locale.getName();
if ( logKnownIssue("cldrbug:11853", "locales with known timeData vs short time format mismatch") &&
( localeID.equals("af_NA") || localeID.equals("ar_001") || localeID.equals("ckb_IR") // ar_SA not a problem in J
|| localeID.equals("en_001") || localeID.equals("en_BI") || localeID.equals("en_NG")
|| localeID.equals("fr_CA") || localeID.equals("ha_GH") || localeID.startsWith("lkt") ) ) {
continue;
}
DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(locale);
DateFormat dfmt = DateFormat.getTimeInstance(DateFormat.SHORT, locale);
String shortPattern = ((SimpleDateFormat)dfmt).toPattern();
String jPattern = dtpg.getBestPattern(jSkeleton);
// Now check that shortPattern and jPattern use the same hour cycle
String jPatSkeleton = dtpg.getSkeleton(jPattern);
String shortPatSkeleton = dtpg.getSkeleton(shortPattern);
for (char timeCycleChar: timeCycleChars) {
if (jPatSkeleton.indexOf(timeCycleChar) >= 0) {
if (shortPatSkeleton.indexOf(timeCycleChar) < 0) {
String dfmtCalType = dfmt.getCalendar().getType();
errln("locale " + localeID + ", expected j resolved char " + timeCycleChar +
" to occur in short time pattern " + shortPattern + " for " + dfmtCalType);
}
}
}
}
}
}