ICU-22452 API for resolving preferred IANA zone ID from a zone ID.

This commit is contained in:
yumaoka 2023-08-09 18:39:08 -04:00 committed by Yoshito Umaoka
parent d91859de21
commit 6338b704ff
23 changed files with 420 additions and 38 deletions

View file

@ -8,6 +8,9 @@ keyTypeData:table(nofallback){
}
tz:alias{"/ICUDATA/timezoneTypes/bcpTypeAlias/tz"}
}
ianaMap{
timezone:alias{"/ICUDATA/timezoneTypes/ianaMap/timezone"}
}
keyInfo{
deprecated{
kh{"true"}

View file

@ -5,14 +5,47 @@ timezoneTypes:table(nofallback){
bcpTypeAlias{
tz{
aqams{"nzakl"}
aukns{"auhba"}
caffs{"cawnp"}
camtr{"cator"}
canpg{"cator"}
capnt{"caiql"}
cathu{"cator"}
cayzf{"caedm"}
cnckg{"cnsha"}
cnhrb{"cnsha"}
cnkhg{"cnurc"}
gaza{"gazastrp"}
mxstis{"mxtij"}
uaozh{"uaiev"}
uauzh{"uaiev"}
umjon{"ushnl"}
usnavajo{"usden"}
}
}
ianaMap{
timezone{
"Africa:Asmera"{"Africa/Asmara"}
"America:Buenos_Aires"{"America/Argentina/Buenos_Aires"}
"America:Catamarca"{"America/Argentina/Catamarca"}
"America:Coral_Harbour"{"America/Atikokan"}
"America:Cordoba"{"America/Argentina/Cordoba"}
"America:Godthab"{"America/Nuuk"}
"America:Indianapolis"{"America/Indiana/Indianapolis"}
"America:Jujuy"{"America/Argentina/Jujuy"}
"America:Louisville"{"America/Kentucky/Louisville"}
"America:Mendoza"{"America/Argentina/Mendoza"}
"Asia:Calcutta"{"Asia/Kolkata"}
"Asia:Katmandu"{"Asia/Kathmandu"}
"Asia:Rangoon"{"Asia/Yangon"}
"Asia:Saigon"{"Asia/Ho_Chi_Minh"}
"Atlantic:Faeroe"{"Atlantic/Faroe"}
"Europe:Kiev"{"Europe/Kyiv"}
"Pacific:Enderbury"{"Pacific/Kanton"}
"Pacific:Ponape"{"Pacific/Pohnpei"}
"Pacific:Truk"{"Pacific/Chuuk"}
}
}
typeAlias{
timezone{
"Africa:Asmara"{"Africa/Asmera"}
@ -31,11 +64,17 @@ timezoneTypes:table(nofallback){
"America:Kentucky:Louisville"{"America/Louisville"}
"America:Knox_IN"{"America/Indiana/Knox"}
"America:Montreal"{"America/Toronto"}
"America:Nipigon"{"America/Toronto"}
"America:Nuuk"{"America/Godthab"}
"America:Pangnirtung"{"America/Iqaluit"}
"America:Porto_Acre"{"America/Rio_Branco"}
"America:Rainy_River"{"America/Winnipeg"}
"America:Rosario"{"America/Cordoba"}
"America:Santa_Isabel"{"America/Tijuana"}
"America:Shiprock"{"America/Denver"}
"America:Thunder_Bay"{"America/Toronto"}
"America:Virgin"{"America/St_Thomas"}
"America:Yellowknife"{"America/Edmonton"}
"Antarctica:South_Pole"{"Pacific/Auckland"}
"Asia:Ashkhabad"{"Asia/Ashgabat"}
"Asia:Chongqing"{"Asia/Shanghai"}
@ -57,6 +96,7 @@ timezoneTypes:table(nofallback){
"Atlantic:Jan_Mayen"{"Arctic/Longyearbyen"}
"Australia:ACT"{"Australia/Sydney"}
"Australia:Canberra"{"Australia/Sydney"}
"Australia:Currie"{"Australia/Hobart"}
"Australia:LHI"{"Australia/Lord_Howe"}
"Australia:NSW"{"Australia/Sydney"}
"Australia:North"{"Australia/Darwin"}
@ -92,10 +132,13 @@ timezoneTypes:table(nofallback){
"Europe:Kyiv"{"Europe/Kiev"}
"Europe:Nicosia"{"Asia/Nicosia"}
"Europe:Tiraspol"{"Europe/Chisinau"}
"Europe:Uzhgorod"{"Europe/Kiev"}
"Europe:Zaporozhye"{"Europe/Kiev"}
"Mexico:BajaNorte"{"America/Tijuana"}
"Mexico:BajaSur"{"America/Mazatlan"}
"Mexico:General"{"America/Mexico_City"}
"Pacific:Chuuk"{"Pacific/Truk"}
"Pacific:Johnston"{"Pacific/Honolulu"}
"Pacific:Kanton"{"Pacific/Enderbury"}
"Pacific:Pohnpei"{"Pacific/Ponape"}
"Pacific:Samoa"{"Pacific/Pago_Pago"}
@ -307,7 +350,6 @@ timezoneTypes:table(nofallback){
"America:Montserrat"{"msmni"}
"America:Nassau"{"bsnas"}
"America:New_York"{"usnyc"}
"America:Nipigon"{"canpg"}
"America:Nome"{"usome"}
"America:Noronha"{"brfen"}
"America:North_Dakota:Beulah"{"usxul"}
@ -315,7 +357,6 @@ timezoneTypes:table(nofallback){
"America:North_Dakota:New_Salem"{"usndnsl"}
"America:Ojinaga"{"mxoji"}
"America:Panama"{"papty"}
"America:Pangnirtung"{"capnt"}
"America:Paramaribo"{"srpbm"}
"America:Phoenix"{"usphx"}
"America:Port-au-Prince"{"htpap"}
@ -323,13 +364,11 @@ timezoneTypes:table(nofallback){
"America:Porto_Velho"{"brpvh"}
"America:Puerto_Rico"{"prsju"}
"America:Punta_Arenas"{"clpuq"}
"America:Rainy_River"{"caffs"}
"America:Rankin_Inlet"{"cayek"}
"America:Recife"{"brrec"}
"America:Regina"{"careg"}
"America:Resolute"{"careb"}
"America:Rio_Branco"{"brrbr"}
"America:Santa_Isabel"{"mxstis"}
"America:Santarem"{"brstm"}
"America:Santiago"{"clscl"}
"America:Santo_Domingo"{"dosdq"}
@ -345,7 +384,6 @@ timezoneTypes:table(nofallback){
"America:Swift_Current"{"cayyn"}
"America:Tegucigalpa"{"hntgu"}
"America:Thule"{"glthu"}
"America:Thunder_Bay"{"cathu"}
"America:Tijuana"{"mxtij"}
"America:Toronto"{"cator"}
"America:Tortola"{"vgtov"}
@ -353,7 +391,6 @@ timezoneTypes:table(nofallback){
"America:Whitehorse"{"cayxy"}
"America:Winnipeg"{"cawnp"}
"America:Yakutat"{"usyak"}
"America:Yellowknife"{"cayzf"}
"Antarctica:Casey"{"aqcas"}
"Antarctica:Davis"{"aqdav"}
"Antarctica:DumontDUrville"{"aqddu"}
@ -462,7 +499,6 @@ timezoneTypes:table(nofallback){
"Australia:Adelaide"{"auadl"}
"Australia:Brisbane"{"aubne"}
"Australia:Broken_Hill"{"aubhq"}
"Australia:Currie"{"aukns"}
"Australia:Darwin"{"audrw"}
"Australia:Eucla"{"aueuc"}
"Australia:Hobart"{"auhba"}
@ -550,7 +586,6 @@ timezoneTypes:table(nofallback){
"Europe:Tallinn"{"eetll"}
"Europe:Tirane"{"altia"}
"Europe:Ulyanovsk"{"ruuly"}
"Europe:Uzhgorod"{"uauzh"}
"Europe:Vaduz"{"livdz"}
"Europe:Vatican"{"vavat"}
"Europe:Vienna"{"atvie"}
@ -558,7 +593,6 @@ timezoneTypes:table(nofallback){
"Europe:Volgograd"{"ruvog"}
"Europe:Warsaw"{"plwaw"}
"Europe:Zagreb"{"hrzag"}
"Europe:Zaporozhye"{"uaozh"}
"Europe:Zurich"{"chzrh"}
"Indian:Antananarivo"{"mgtnr"}
"Indian:Chagos"{"iodga"}
@ -586,7 +620,6 @@ timezoneTypes:table(nofallback){
"Pacific:Guadalcanal"{"sbhir"}
"Pacific:Guam"{"gugum"}
"Pacific:Honolulu"{"ushnl"}
"Pacific:Johnston"{"umjon"}
"Pacific:Kiritimati"{"kicxi"}
"Pacific:Kosrae"{"fmksa"}
"Pacific:Kwajalein"{"mhkwa"}

View file

@ -1591,6 +1591,22 @@ TimeZone::getCanonicalID(const UnicodeString& id, UnicodeString& canonicalID, UB
return canonicalID;
}
UnicodeString&
TimeZone::getIanaID(const UnicodeString& id, UnicodeString& ianaID, UErrorCode& status)
{
ianaID.remove();
if (U_FAILURE(status)) {
return ianaID;
}
if (id.compare(ConstChar16Ptr(UNKNOWN_ZONE_ID), UNKNOWN_ZONE_ID_LENGTH) == 0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
ianaID.setToBogus();
} else {
ZoneMeta::getIanaID(id, ianaID, status);
}
return ianaID;
}
UnicodeString&
TimeZone::getWindowsID(const UnicodeString& id, UnicodeString& winid, UErrorCode& status) {
winid.remove();

View file

@ -628,6 +628,16 @@ ucal_getCanonicalTimeZoneID(const char16_t* id, int32_t len,
return reslen;
}
U_DRAFT int32_t U_EXPORT2
ucal_getIanaTimeZoneID(const char16_t* id, int32_t len,
char16_t* result, int32_t resultCapacity, UErrorCode* status)
{
UnicodeString ianaID;
TimeZone::getIanaID(UnicodeString(id, len), ianaID, *status);
return ianaID.extract(result, resultCapacity, *status);
}
U_CAPI const char * U_EXPORT2
ucal_getType(const UCalendar *cal, UErrorCode* status)
{

View file

@ -444,6 +444,37 @@ public:
static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id,
UnicodeString& canonicalID, UBool& isSystemID, UErrorCode& status);
#ifndef U_HIDE_DRAFT_API
/**
* Returns the preferred time zone ID in the IANA time zone database for the given time zone ID.
* There are two types of preferred IDs. The first type is the one defined in zone.tab file,
* such as "America/Los_Angeles". The second types is the one defined for zones not associated
* with a specific region, but not defined with "Link" syntax such as "Etc/GMT+10".
*
* <p>Note: For most of valid time zone IDs, this method returns an ID same as getCanonicalID().
* getCanonicalID() is based on canonical time zone IDs defined in Unicode CLDR.
* These canonical time zone IDs in CLDR were based on very old version of the time zone database.
* In the IANA time zone database, some IDs were updated since then. This API returns a newer
* time zone ID. For example, CLDR defines "Asia/Calcutta" as the canonical time zone ID. This
* method returns "Asia/Kolkata" instead.
* <p> "Etc/Unknown" is a special time zone ID defined by CLDR. There are no corresponding zones
* in the IANA time zone database. Therefore, this API returns U_ILLEGAL_ARGUMENT_ERROR when the
* input ID is "Etc/Unknown".
*
* @param id The input time zone ID.
* @param ianaID Receives the preferred time zone ID in the IANA time zone database. When
* the given time zone ID is not a known time zone ID, this method sets an
* invalid (bogus) string.
* @param status Receives the status. When the given time zone ID is not a known time zone
* ID, U_ILLEGAL_ARGUMENT_ERROR is set.
* @return A reference to the result.
* @draft ICU 74
*/
static UnicodeString& U_EXPORT2 getIanaID(const UnicodeString&id, UnicodeString& ianaID,
UErrorCode& status);
#endif // U_HIDE_DRAFT_API
/**
* Converts a system time zone ID to an equivalent Windows time zone ID. For example,
* Windows time zone ID "Pacific Standard Time" is returned for input "America/Los_Angeles".

View file

@ -1393,6 +1393,38 @@ ucal_getTZDataVersion(UErrorCode* status);
U_CAPI int32_t U_EXPORT2
ucal_getCanonicalTimeZoneID(const UChar* id, int32_t len,
UChar* result, int32_t resultCapacity, UBool *isSystemID, UErrorCode* status);
#ifndef U_HIDE_DRAFT_API
/**
* Returns the preferred time zone ID in the IANA time zone database for the given time zone ID.
* There are two types of preferred IDs. The first type is the one defined in zone.tab file,
* such as "America/Los_Angeles". The second types is the one defined for zones not associated
* with a specific region, but not defined with "Link" syntax such as "Etc/GMT+10".
*
* <p>Note: For most of valid time zone IDs, this method returns an ID same as ucal_getCanonicalTimeZoneID().
* ucal_getCanonicalTimeZoneID() is based on canonical time zone IDs defined in Unicode CLDR.
* These canonical time zone IDs in CLDR were based on very old version of the time zone database.
* In the IANA time zone database, some IDs were updated since then. This API returns a newer
* time zone ID. For example, CLDR defines "Asia/Calcutta" as the canonical time zone ID. This
* method returns "Asia/Kolkata" instead.
* <p> "Etc/Unknown" is a special time zone ID defined by CLDR. There are no corresponding zones
* in the IANA time zone database. Therefore, this API returns U_ILLEGAL_ARGUMENT_ERROR when the
* input ID is "Etc/Unknown".
*
* @param id The input time zone ID.
* @param len The length of the input time zone ID.
* @param result The buffer receives the preferred time zone ID in the IANA time zone database.
* @param resultCapacity The capacity of the result buffer.
* @param status Receives the status. When the given time zone ID is not a known system time zone
* ID, U_ILLEGAL_ARGUMENT_ERROR is set.
* @return The result string length, not including the terminating null.
* @draft ICU 74
*/
U_CAPI int32_t U_EXPORT2
ucal_getIanaTimeZoneID(const UChar* id, int32_t len,
UChar* result, int32_t resultCapacity, UErrorCode* status);
#endif // U_HIDE_DRAFT_API
/**
* Get the resource keyword value string designating the calendar type for the UCalendar.
* @param cal The UCalendar to query.

View file

@ -120,6 +120,7 @@ static const char gKeyTypeData[] = "keyTypeData";
static const char gTypeAliasTag[] = "typeAlias";
static const char gTypeMapTag[] = "typeMap";
static const char gTimezoneTag[] = "timezone";
static const char gIanaMapTag[] = "ianaMap";
static const char gPrimaryZonesTag[] = "primaryZones";
@ -389,6 +390,35 @@ ZoneMeta::getCanonicalCLDRID(const TimeZone& tz) {
return getCanonicalCLDRID(tz.getID(tzID), status);
}
UnicodeString& U_EXPORT2
ZoneMeta::getIanaID(const UnicodeString& tzid, UnicodeString& ianaID, UErrorCode& status) {
// First, get CLDR canonical ID
const char16_t *canonicalID = getCanonicalCLDRID(tzid, status);
if (U_FAILURE(status) || canonicalID == nullptr) {
ianaID.setToBogus();
return ianaID;
}
// Find IANA mapping if any.
UErrorCode tmpStatus = U_ZERO_ERROR;
UnicodeString tmpKey(canonicalID);
tmpKey.findAndReplace(UnicodeString("/"), UnicodeString(":"));
char keyBuf[ZID_KEY_MAX + 1];
/* int32_t keyLen = */ tmpKey.extract(0, tmpKey.length(), keyBuf, sizeof(keyBuf), US_INV);
StackUResourceBundle r;
ures_openDirectFillIn(r.getAlias(), nullptr, gKeyTypeData, &tmpStatus);
ures_getByKey(r.getAlias(), gIanaMapTag, r.getAlias(), &tmpStatus);
ures_getByKey(r.getAlias(), gTimezoneTag, r.getAlias(), &tmpStatus);
int32_t tmpLen = 0;
const char16_t* tmpIana = ures_getStringByKey(r.getAlias(), keyBuf, &tmpLen, &tmpStatus);
if (U_SUCCESS(tmpStatus)) {
ianaID.setTo(true, tmpIana, -1);
} else {
ianaID.setTo(true, canonicalID, -1);
}
return ianaID;
}
static void U_CALLCONV countryInfoVectorsInit(UErrorCode &status) {
// Create empty vectors
// No deleters for these UVectors, it's a reference to a resource bundle string.

View file

@ -54,6 +54,17 @@ public:
*/
static const char16_t* U_EXPORT2 getCanonicalCLDRID(const TimeZone& tz);
/**
* Returns primary IANA zone ID for the input zone ID, which might be the id itself.
* If the given system tzid is not known, U_ILLEGAL_ARGUMENT_ERROR is set in the status.
*
* @param tzid Zone ID
* @param ianaID Output IANA ID
* @param status Receives the status
* @return A primary IANA zone ID equivalent to the input zone ID.
*/
static UnicodeString& U_EXPORT2 getIanaID(const UnicodeString& tzid, UnicodeString& ianaID, UErrorCode& status);
/**
* Return the canonical country code for this tzid. If we have none, or if the time zone
* is not associated with a country, return bogus string.

View file

@ -71,6 +71,7 @@ void addCalTest(TestNode** root)
addTest(root, &TestUcalOpenBufferRead, "tsformat/ccaltst/TestUcalOpenBufferRead");
addTest(root, &TestGetTimeZoneOffsetFromLocal, "tsformat/ccaltst/TestGetTimeZoneOffsetFromLocal");
addTest(root, &TestFWWithISO8601, "tsformat/ccaltst/TestFWWithISO8601");
addTest(root, &TestGetIanaTimeZoneID, "tstformat/ccaltst/TestGetIanaTimeZoneID");
}
/* "GMT" */
@ -2826,4 +2827,53 @@ TestFWWithISO8601() {
}
}
void
TestGetIanaTimeZoneID() {
const UChar* UNKNOWN = u"Etc/Unknown";
typedef struct {
const UChar* id;
const UChar* expected;
} IanaTimeZoneIDTestData;
const IanaTimeZoneIDTestData TESTDATA[] = {
{u"", UNKNOWN},
{0, UNKNOWN},
{UNKNOWN, UNKNOWN},
{u"America/New_York", u"America/New_York"},
{u"Asia/Calcutta", u"Asia/Kolkata"},
{u"Europe/Kiev", u"Europe/Kyiv"},
{u"Europe/Zaporozhye", u"Europe/Kyiv"},
{u"Etc/GMT-1", u"Etc/GMT-1"},
{u"Etc/GMT+20", UNKNOWN},
{u"PST8PDT", u"PST8PDT"},
{u"GMT-08:00", UNKNOWN},
{0, 0}
};
for (int32_t i = 0; TESTDATA[i].expected != 0; i++) {
UErrorCode sts = U_ZERO_ERROR;
UChar ianaID[128];
int32_t ianaLen = 0;
ianaLen = ucal_getIanaTimeZoneID(TESTDATA[i].id, -1, ianaID, sizeof(ianaID), &sts);
if (u_strcmp(TESTDATA[i].expected, UNKNOWN) == 0) {
if (sts != U_ILLEGAL_ARGUMENT_ERROR) {
log_err("Expected U_ILLEGAL_ERROR: TESTDATA[%d]", i);
}
} else {
if (u_strlen(TESTDATA[i].expected) != ianaLen || u_strncmp(TESTDATA[i].expected, ianaID, ianaLen) != 0) {
log_err("Error: TESTDATA[%d]", i);
}
// Calling ucal_getIanaTimeZoneID with an IANA ID should return the same
UChar ianaID2[128];
int32_t ianaLen2 = 0;
ianaLen2 = ucal_getIanaTimeZoneID(ianaID, ianaLen, ianaID2, sizeof(ianaID2), &sts);
if (U_FAILURE(sts) || ianaLen != ianaLen2 || u_strncmp(ianaID, ianaID2, ianaLen) != 0) {
log_err("Error: IANA ID for IANA ID %s", ianaID);
}
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -72,6 +72,10 @@
* Test ambiguous wall time
*/
static void TestAmbiguousWallTime(void);
/**
* Test ucal_getIanaTimeZoneID()
*/
static void TestGetIanaTimeZoneID(void);
/*Internal functions used*/
/**

View file

@ -17,6 +17,7 @@
#include "unicode/localpointer.h"
#include "unicode/resbund.h"
#include "unicode/strenum.h"
#include "unicode/ustring.h"
#include "unicode/uversion.h"
#include "tztest.h"
#include "cmemory.h"
@ -78,6 +79,7 @@ void TimeZoneTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
TESTCASE_AUTO(TestGetIDForWindowsID);
TESTCASE_AUTO(TestCasablancaNameAndOffset22041);
TESTCASE_AUTO(TestRawOffsetAndOffsetConsistency22041);
TESTCASE_AUTO(TestGetIanaID);
TESTCASE_AUTO_END;
}
@ -2623,4 +2625,43 @@ void TimeZoneTest::TestRawOffsetAndOffsetConsistency22041() {
zone->getRawOffset(), raw);
}
}
void TimeZoneTest::TestGetIanaID() {
const char16_t* UNKNOWN = u"Etc/Unknown";
static const struct {
const char16_t* id;
const char16_t* expected;
} TESTDATA[] = {
{u"", UNKNOWN},
{0, UNKNOWN},
{UNKNOWN, UNKNOWN},
{u"America/New_York", u"America/New_York"},
{u"Asia/Calcutta", u"Asia/Kolkata"},
{u"Europe/Kiev", u"Europe/Kyiv"},
{u"Europe/Zaporozhye", u"Europe/Kyiv"},
{u"Etc/GMT-1", u"Etc/GMT-1"},
{u"Etc/GMT+20", UNKNOWN},
{u"PST8PDT", u"PST8PDT"},
{u"GMT-08:00", UNKNOWN},
{0, 0}
};
for (int32_t i = 0; TESTDATA[i].expected != 0; i++) {
UErrorCode sts = U_ZERO_ERROR;
UnicodeString inputID(TESTDATA[i].id);
UnicodeString ianaID;
TimeZone::getIanaID(inputID, ianaID, sts);
if (u_strcmp(TESTDATA[i].expected, UNKNOWN) == 0) {
assertEquals(inputID + " should fail", (int32_t)U_ILLEGAL_ARGUMENT_ERROR, sts);
assertTrue(inputID + " should set bogus", ianaID.isBogus());
} else {
assertEquals(inputID, UnicodeString(TESTDATA[i].expected), ianaID);
// Calling getIanaID with an IANA ID should return the same
UnicodeString ianaID2;
TimeZone::getIanaID(ianaID, ianaID2, sts);
assertEquals(ianaID, ianaID, ianaID2);
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -106,6 +106,8 @@ public:
void TestCasablancaNameAndOffset22041();
void TestRawOffsetAndOffsetConsistency22041();
void TestGetIanaID();
static const UDate INTERVAL;
private:

View file

@ -69,7 +69,7 @@ public final class ZoneMeta {
systemZones = REF_SYSTEM_ZONES.get();
}
if (systemZones == null) {
Set<String> systemIDs = new TreeSet<String>();
Set<String> systemIDs = new TreeSet<>();
String[] allIDs = getZoneIDs();
for (String id : allIDs) {
// exclude Etc/Unknown
@ -79,7 +79,7 @@ public final class ZoneMeta {
systemIDs.add(id);
}
systemZones = Collections.unmodifiableSet(systemIDs);
REF_SYSTEM_ZONES = new SoftReference<Set<String>>(systemZones);
REF_SYSTEM_ZONES = new SoftReference<>(systemZones);
}
return systemZones;
}
@ -96,7 +96,7 @@ public final class ZoneMeta {
canonicalSystemZones = REF_CANONICAL_SYSTEM_ZONES.get();
}
if (canonicalSystemZones == null) {
Set<String> canonicalSystemIDs = new TreeSet<String>();
Set<String> canonicalSystemIDs = new TreeSet<>();
String[] allIDs = getZoneIDs();
for (String id : allIDs) {
// exclude Etc/Unknown
@ -109,7 +109,7 @@ public final class ZoneMeta {
}
}
canonicalSystemZones = Collections.unmodifiableSet(canonicalSystemIDs);
REF_CANONICAL_SYSTEM_ZONES = new SoftReference<Set<String>>(canonicalSystemZones);
REF_CANONICAL_SYSTEM_ZONES = new SoftReference<>(canonicalSystemZones);
}
return canonicalSystemZones;
}
@ -128,7 +128,7 @@ public final class ZoneMeta {
canonicalSystemLocationZones = REF_CANONICAL_SYSTEM_LOCATION_ZONES.get();
}
if (canonicalSystemLocationZones == null) {
Set<String> canonicalSystemLocationIDs = new TreeSet<String>();
Set<String> canonicalSystemLocationIDs = new TreeSet<>();
String[] allIDs = getZoneIDs();
for (String id : allIDs) {
// exclude Etc/Unknown
@ -144,7 +144,7 @@ public final class ZoneMeta {
}
}
canonicalSystemLocationZones = Collections.unmodifiableSet(canonicalSystemLocationIDs);
REF_CANONICAL_SYSTEM_LOCATION_ZONES = new SoftReference<Set<String>>(canonicalSystemLocationZones);
REF_CANONICAL_SYSTEM_LOCATION_ZONES = new SoftReference<>(canonicalSystemLocationZones);
}
return canonicalSystemLocationZones;
}
@ -182,7 +182,7 @@ public final class ZoneMeta {
}
// Filter by region/rawOffset
Set<String> result = new TreeSet<String>();
Set<String> result = new TreeSet<>();
for (String id : baseSet) {
if (region != null) {
String r = getRegion(id);
@ -339,9 +339,9 @@ public final class ZoneMeta {
return zoneIdx;
}
private static ICUCache<String, String> CANONICAL_ID_CACHE = new SimpleCache<String, String>();
private static ICUCache<String, String> REGION_CACHE = new SimpleCache<String, String>();
private static ICUCache<String, Boolean> SINGLE_COUNTRY_CACHE = new SimpleCache<String, Boolean>();
private static ICUCache<String, String> CANONICAL_ID_CACHE = new SimpleCache<>();
private static ICUCache<String, String> REGION_CACHE = new SimpleCache<>();
private static ICUCache<String, Boolean> SINGLE_COUNTRY_CACHE = new SimpleCache<>();
public static String getCanonicalCLDRID(TimeZone tz) {
if (tz instanceof OlsonTimeZone) {
@ -353,7 +353,7 @@ public final class ZoneMeta {
/**
* Return the canonical id for this tzid defined by CLDR, which might be
* the id itself. If the given tzid is not known, return null.
*
*
* Note: This internal API supports all known system IDs and "Etc/Unknown" (which is
* NOT a system ID).
*/
@ -419,6 +419,33 @@ public final class ZoneMeta {
return canonical;
}
/**
* Returns primary IANA zone ID for the input zone ID. When input zone ID
* is not known, this method returns null.
*
* @param tzid An input zone ID.
* @return A primary IANA zone ID equivalent to the input zone ID.
*/
public static String getIanaID(String tzid) {
// First, get CLDR canonical ID
String canonicalID = getCanonicalCLDRID(tzid);
if (canonicalID == null) {
return null;
}
// Find IANA mapping if any.
UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
"keyTypeData", ICUResourceBundle.ICU_DATA_CLASS_LOADER);
UResourceBundle ianaMap = keyTypeData.get("ianaMap");
UResourceBundle ianaTzMap = ianaMap.get("timezone");
try {
return ianaTzMap.getString(canonicalID.replace('/', ':'));
} catch (MissingResourceException e) {
// No IANA zone ID mapping. In this case, ianaId set by getCanonicalCLDRID()
// is also a primary IANA id.
return canonicalID;
}
}
/**
* Return the region code for this tzid.
* If tzid is not a system zone ID, this method returns null.

View file

@ -1166,6 +1166,41 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable<Tim
return canonicalID;
}
/**
* Returns the preferred time zone ID in the IANA database for the given time zone ID.
* There are two types of preferred IDs. The first type is the one defined in zone.tab file,
* such as "America/Los_Angeles". The second types is the one defined for zones not associated
* with a specific region, but not defined with "Link" syntax, such as "Etc/GMT+10".
*
* <p>Note: For most of system time zone IDs, this method returns an ID same as {@link TimeZone#getCanonicalID(String)}.
* {@link TimeZone#getCanonicalID(String)} is based on canonical time zone IDs defined in Unicode CLDR.
* These canonical time zone IDs in CLDR were based on very old version of the time zone database.
* In the IANA time zone database, some IDs were updated since then. This API returns a newer
* time zone ID. For example, CLDR defines "Asia/Calcutta" as the canonical time zone ID. This
* method returns "Asia/Kolkata" instead.
* <p> "Etc/Unknown" is a special time zone ID defined by CLDR. There are no corresponding zones
* in the IANA time zone database. when the input is "Etc/Unknown", this method returns "Etc/Unknown",
* but it really means no mappings available. Caller of this method should interpret "Etc/Unknown"
* as an error.
*
* @param id The input time zone ID.
* @return The preferred time zone ID in the IANA time zone database, or {@link TimeZone#UNKNOWN_ZONE_ID}
* if the input ID is not a system ID.
* @see #getCanonicalID(String)
* @draft ICU 74
*/
public static String getIanaID(String id) {
String ianaId = TimeZone.UNKNOWN_ZONE_ID;
if (id == null || id.length() == 0 || id.equals(TimeZone.UNKNOWN_ZONE)) {
return ianaId;
}
String tmpIanaId = ZoneMeta.getIanaID(id);
if (tmpIanaId != null) {
ianaId = tmpIanaId;
}
return ianaId;
}
/**
* {@icu} Returns the region code associated with the given
* system time zone ID. The region code is either ISO 3166

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:061f157df826b782de9c9a9b6f1d1a18e8cfce1987fc00de508fc131052883b1
size 14335588
oid sha256:9c37b86f8c6383d6bdc6115586fb10b81ecf9d0ee92b79218d528b1986c4d7ce
size 14335623

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:154021a877dc34aa7bc932095b10daba1ccd142409c162c9caace5d4e0278461
size 94813
oid sha256:26fca028e85a299bbfdc05fafdada697d1bf94f290bfa26c86b85072bce92175
size 95136

View file

@ -2390,6 +2390,35 @@ public class TimeZoneTest extends TestFmwk
zone.getRawOffset(), raw);
}
}
@Test
public void TestGetIanaID() {
final String UNKNOWN = TimeZone.UNKNOWN_ZONE_ID;
final String[][] TESTDATA = {
{"", UNKNOWN},
{null, UNKNOWN},
{UNKNOWN, UNKNOWN},
{"America/New_York", "America/New_York"},
{"Asia/Calcutta", "Asia/Kolkata"},
{"Europe/Kiev", "Europe/Kyiv"},
{"Europe/Zaporozhye", "Europe/Kyiv"},
{"Etc/GMT-1", "Etc/GMT-1"},
{"Etc/GMT+20", UNKNOWN},
{"PST8PDT", "PST8PDT"},
{"GMT-08:00", UNKNOWN},
};
for (String[] test : TESTDATA) {
String ianaId = TimeZone.getIanaID(test[0]);
assertEquals("IANA ID for " + test[0], test[1], ianaId);
if (test[1].equals(UNKNOWN)) {
// Calling getIanaID with an IANA ID should return the same
String ianaId2 = TimeZone.getIanaID(ianaId);
assertEquals("IANA ID for " + ianaId, ianaId, ianaId2);
}
}
}
}
//eof

View file

@ -1,6 +1,6 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package main.java.org.unicode.icu.tool.cldrtoicu;
package org.unicode.icu.tool.cldrtoicu;
import java.io.PrintWriter;
import java.nio.file.Path;

View file

@ -2,17 +2,20 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.ant;
import main.java.org.unicode.icu.tool.cldrtoicu.CodeGenerator;
import main.java.org.unicode.icu.tool.cldrtoicu.generator.ResourceFallbackCodeGenerator;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Task;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.*;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import static com.google.common.base.Preconditions.checkNotNull;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Task;
import org.unicode.icu.tool.cldrtoicu.CodeGenerator;
import org.unicode.icu.tool.cldrtoicu.generator.ResourceFallbackCodeGenerator;
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
public final class GenerateCodeTask extends Task {

View file

@ -1,14 +1,23 @@
package main.java.org.unicode.icu.tool.cldrtoicu.generator;
package org.unicode.icu.tool.cldrtoicu.generator;
import com.google.common.base.Splitter;
import main.java.org.unicode.icu.tool.cldrtoicu.CodeGenerator;
import org.unicode.cldr.api.*;
import static com.google.common.base.CharMatcher.whitespace;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.util.*;
import java.util.Collection;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import static com.google.common.base.CharMatcher.whitespace;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.CodeGenerator;
import com.google.common.base.Splitter;
public class ResourceFallbackCodeGenerator implements CodeGenerator {
private Map<String, String> defaultScripts;

View file

@ -48,6 +48,8 @@ public final class Bcp47Mapper {
private static final AttributeKey TYPE_ALIASES = keyOf("type", "alias");
private static final AttributeKey PREFERRED_TYPE_NAME = keyOf("type", "preferred");
private static final AttributeKey TYPE_IANA = keyOf("type", "iana");
// Deprecation of the data is not the same as deprecation of attributes themselves. This
// deprecation relates to identifying data which exists, but is not longer the right way to
// represent things (which means it can be important for clients to know about).
@ -66,6 +68,7 @@ public final class Bcp47Mapper {
private static final RbPath RB_TYPE_ALIAS = RbPath.of("typeAlias", "timezone:alias");
private static final RbPath RB_MAP_ALIAS = RbPath.of("typeMap", "timezone:alias");
private static final RbPath RB_BCP_ALIAS = RbPath.of("bcpTypeAlias", "tz:alias");
private static final RbPath RB_IANAMAP_ALIAS = RbPath.of("ianaMap", "timezone:alias");
private static final CldrDataProcessor<Bcp47Mapper> BCP47_PROCESSOR;
static {
@ -127,6 +130,7 @@ public final class Bcp47Mapper {
keyData.add(RB_TYPE_ALIAS, "/ICUDATA/timezoneTypes/typeAlias/timezone");
keyData.add(RB_MAP_ALIAS, "/ICUDATA/timezoneTypes/typeMap/timezone");
keyData.add(RB_BCP_ALIAS, "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
keyData.add(RB_IANAMAP_ALIAS, "/ICUDATA/timezoneTypes/ianaMap/timezone");
}
private final class ValueCollector {
@ -166,6 +170,7 @@ public final class Bcp47Mapper {
RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
String icuTypeName = typeName;
if (typeAliases.isEmpty()) {
// Generate type map entry using empty value (an empty value indicates same
// type name is used for both BCP47 and legacy type).
@ -179,7 +184,18 @@ public final class Bcp47Mapper {
.skip(1)
.map(Bcp47Mapper::quoteAlias)
.forEach(a -> icuData.add(typeAliasPrefix.extendBy(a), mainAlias));
icuTypeName = mainAlias;
}
// The 'iana' attribute was introduced in CLDR 44 for provide IANA zone.tab ID mapping
// for timezones.xml.
Optional<String> iana = TYPE_IANA.optionalValueFrom(value);
if (iana.isPresent()) {
assert icuTypeName != null;
RbPath ianaMapPrefix = RbPath.of("ianaMap", keyAlias);
icuData.add(ianaMapPrefix.extendBy(quoteAlias(icuTypeName)), iana.get());
}
addInfoAttributes(keyName, typeName, value.getValueAttributes());
}