ICU-2966 support equivalency classes

X-SVN-Rev: 12864
This commit is contained in:
Alan Liu 2003-08-19 00:24:49 +00:00
parent ef1d04ccb7
commit 9478f07479
9 changed files with 580 additions and 1134 deletions

File diff suppressed because it is too large Load diff

View file

@ -166,10 +166,14 @@ OlsonTimeZone::OlsonTimeZone(const UResourceBundle* top,
// TODO remove nonconst casts below when ures_* API is fixed
setID(ures_getKey((UResourceBundle*) res)); // cast away const
// Size 3 is a purely historical zone (no final rules);
// size 5 is a hybrid zone, with historical and final elements.
// Size 1 is an alias TO another zone (int)
// HOWEVER, the caller should dereference this and never pass it in to us
// Size 3 is a purely historical zone (no final rules)
// Size 4 is like size 3, but with an alias list at the end
// Size 5 is a hybrid zone, with historical and final elements
// Size 6 is like size 5, but with an alias list at the end
int32_t size = ures_getSize((UResourceBundle*) res); // cast away const
if (size != 3 && size != 5) {
if (size < 3 || size > 6) {
ec = U_INVALID_FORMAT_ERROR;
}
@ -202,7 +206,7 @@ OlsonTimeZone::OlsonTimeZone(const UResourceBundle* top,
}
// Process final rule and data, if any
if (size == 5) {
if (size >= 5) {
UnicodeString ruleid = ures_getUnicodeStringByIndex(res, 3, &ec);
r = ures_getByIndex(res, 4, NULL, &ec);
const int32_t* data = ures_getIntVector(r, &len, &ec);

View file

@ -298,6 +298,30 @@ TimeZone::createTimeZone(const UnicodeString& ID)
return result;
}
/**
* Given an ID, open the appropriate resource for the given time zone.
* Dereference aliases if necessary.
* @param id zone id
* @param res resource, which must be ready for use (initialized but not open)
* @param ec input-output error code
* @return top-level resource bundle
*/
static UResourceBundle* openOlsonResource(const UnicodeString& id,
UResourceBundle& res,
UErrorCode& ec) {
char buf[128];
id.extract(0, sizeof(buf)-1, buf, sizeof(buf), "");
UResourceBundle *top = ures_openDirect(0, ZONEINFO, &ec);
ures_getByKey(top, buf, &res, &ec);
// Dereference if this is an alias
if (ures_getSize(&res) == 1) {
int32_t deref = ures_getInt(&res, &ec);
ures_close(&res);
ures_getByIndex(top, deref, &res, &ec);
}
return top;
}
/**
* Lookup the given name in our system zone table. If found,
* instantiate a new zone of that name and return it. If not
@ -306,13 +330,10 @@ TimeZone::createTimeZone(const UnicodeString& ID)
TimeZone*
TimeZone::createSystemTimeZone(const UnicodeString& id) {
TimeZone* z = 0;
char buf[128];
id.extract(0, sizeof(buf)-1, buf, sizeof(buf), "");
UErrorCode ec = U_ZERO_ERROR;
UResourceBundle *top = ures_openDirect(0, ZONEINFO, &ec);
UResourceBundle res;
ures_initStackObject(&res);
ures_getByKey(top, buf, &res, &ec);
UResourceBundle *top = openOlsonResource(id, res, ec);
if (U_SUCCESS(ec)) {
z = new OlsonTimeZone(top, &res, ec);
}
@ -949,18 +970,60 @@ TimeZone::createAvailableIDs(int32_t& numIDs)
int32_t
TimeZone::countEquivalentIDs(const UnicodeString& id) {
// As of ICU 2.8, we no longer have equivalency data.
// TODO Add equivalency data
return 0;
int32_t result = 0;
UErrorCode ec = U_ZERO_ERROR;
UResourceBundle res;
ures_initStackObject(&res);
UResourceBundle *top = openOlsonResource(id, res, ec);
if (U_SUCCESS(ec)) {
int32_t size = ures_getSize(&res);
if (size == 4 || size == 6) {
UResourceBundle r;
ures_initStackObject(&r);
ures_getByIndex(&res, size-1, &r, &ec);
result = ures_getSize(&r);
ures_close(&r);
}
}
ures_close(&res);
ures_close(top);
return result;
}
// ---------------------------------------
const UnicodeString
TimeZone::getEquivalentID(const UnicodeString& id, int32_t index) {
// As of ICU 2.8, we no longer have equivalency data.
// TODO Add equivalency data
return UnicodeString();
UnicodeString result;
UErrorCode ec = U_ZERO_ERROR;
UResourceBundle res;
ures_initStackObject(&res);
UResourceBundle *top = openOlsonResource(id, res, ec);
int32_t zone = -1;
if (U_SUCCESS(ec)) {
int32_t size = ures_getSize(&res);
if (size == 4 || size == 6) {
UResourceBundle r;
ures_initStackObject(&r);
ures_getByIndex(&res, size-1, &r, &ec);
const int32_t* v = ures_getIntVector(&r, &size, &ec);
if (index >= 0 && index < size) {
zone = v[index];
}
ures_close(&r);
}
}
ures_close(&res);
if (zone >= 0) {
ures_getByIndex(top, zone, &res, &ec);
if (U_SUCCESS(ec)) {
const char* key = ures_getKey(&res);
result = UnicodeString(key, "");
}
ures_close(&res);
}
ures_close(top);
return result;
}
// ---------------------------------------

View file

@ -944,7 +944,7 @@ void TimeZoneRegressionTest::TestJ449() {
// specify two zones in the same equivalency group. One must have
// locale data in 'loc'; the other must not.
const char* idWithLocaleData = "America/Los_Angeles";
const char* idWithoutLocaleData = "America/Vancouver";
const char* idWithoutLocaleData = "US/Pacific";
const Locale loc("en", "", "");
TimeZone *zoneWith = TimeZone::createTimeZone(idWithLocaleData);

View file

@ -43,9 +43,10 @@ void TimeZoneTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
CASE(7, TestDisplayName);
CASE(8, TestDSTSavings);
CASE(9, TestAlternateRules);
CASE(10,TestCountries);
CASE(10,TestCountries);
CASE(11,TestHistorical);
default: name = ""; break;
CASE(12,TestEquivalentIDs);
default: name = ""; break;
}
}
@ -1243,4 +1244,13 @@ void TimeZoneTest::TestHistorical() {
}
}
void TimeZoneTest::TestEquivalentIDs() {
int32_t n = TimeZone::countEquivalentIDs("PST");
logln((UnicodeString)"PST: " + n);
for (int32_t i=0; i<n; ++i) {
UnicodeString id = TimeZone::getEquivalentID("PST", i);
logln((UnicodeString)i + " : " + id);
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -81,6 +81,8 @@ public:
void TestHistorical(void);
void TestEquivalentIDs(void);
static const UDate INTERVAL;
private:

View file

@ -68,18 +68,17 @@ HOWTO
1. Obtain the current versions of tzcodeYYYYV.tar.gz (aka `tzcode')
and tzdataYYYYV.tar.gz (aka `tzdata') from the FTP site given
above.
above. Either manually download or use wget:
2. Unpack tzcode directly into the directory gentz/tzcode:
$ cd <path_to>/icu/source/tools/gentz/tzcode
$ wget "ftp://elsie.nci.nih.gov/pub/tz*.tar.gz"
$ cd icu/source/tools/gentz/tzcode
$ tar xzvf <path_to>/tzcodeYYYYV.tar.gz
2. Unpack tzcode and tzdata directly into the directory gentz/tzcode:
3. Unpack tzdata into the same directory.
$ tar xzvf tzcode*.tar.gz
$ tar xzvf tzdata*.tar.gz
$ tar xzvf <path_to>/tzdataYYYYV.tar.gz
5. Apply the ICU patch to zic.c:
3. Apply the ICU patch to zic.c:
$ patch < patch-icu-tzcode
@ -87,20 +86,11 @@ HOWTO
manually addressed. See the CVS log of `patch-icu-tzcode' for
version details.
6. Build:
$ make
7. Build the zoneinfo data. This results in a directory `zoneinfo'
and a file `icu_zone.txt'.
$ make posix_only
8. Build the ICU data. This results in a file `zoneinfo.txt'.
4. Build:
$ make icu_data
9. Copy the data file to the correct location in the ICU source tree:
5. Copy the data file to the correct location in the ICU source tree:
$ cp zoneinfo.txt ../../../data/misc/

View file

@ -50,7 +50,7 @@ diff --unified --recursive ../tzcode.orig/Makefile ./Makefile
right_only: zic leapseconds $(TDATA)
$(ZIC) -y $(YEARISTYPE) -d $(TZDIR) -L leapseconds $(TDATA)
+icu_data: tz2icu
+icu_data: tz2icu posix_only
+ ./tz2icu zoneinfo zone.tab
+
# In earlier versions of this makefile, the other two directories were

View file

@ -204,15 +204,48 @@ struct ZoneInfo {
int finalOffset;
int finalYear; // -1 if none
ZoneInfo() : finalYear(-1) {}
// If this is an alias, then all other fields are meaningless, and
// this field will point to the "real" zone 0..n-1.
int aliasTo; // -1 if this is a "real" zone
// If there are aliases TO this zone, then the following set will
// contain their index numbers (each index >= 0).
set<int> aliases;
ZoneInfo() : finalYear(-1), aliasTo(-1) {}
void mergeFinalData(const FinalZone& fz);
void optimizeTypeList();
void print(ostream& os) const;
// Set this zone to be an alias TO another zone.
void setAliasTo(int index);
// Clear the list of aliases OF this zone.
void clearAliases();
// Add an alias to the list of aliases OF this zone.
void addAlias(int index);
void print(ostream& os, const string& id) const;
};
void ZoneInfo::clearAliases() {
assert(aliasTo < 0);
aliases.clear();
}
void ZoneInfo::addAlias(int index) {
assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end());
aliases.insert(index);
}
void ZoneInfo::setAliasTo(int index) {
assert(index >= 0);
assert(aliases.size() == 0);
aliasTo = index;
}
typedef map<string, ZoneInfo> ZoneMap;
typedef ZoneMap::const_iterator ZoneMapIter;
@ -739,6 +772,8 @@ map<string,FinalZone> finalZones;
map<string,FinalRule> finalRules;
map<string, set<string> > links;
map<string, string> reverseLinks;
map<string, string> linkSource; // id => "Olson link" or "ICU alias"
/**
* Predicate used to find FinalRule objects that do not have both
@ -801,18 +836,24 @@ void readFinalZonesAndRules(istream& in) {
int p = fr.part[0].isset ? 1 : 0;
fr.part[p].set(mode, month, dom, dow, time, isstd, isgmt, offset);
} else if (token == "link") {
string fromid, toid;
string fromid, toid; // fromid == "real" zone, toid == alias
in >> fromid >> toid;
// DO NOT consumeLine(in);
if (finalZones.find(toid) != finalZones.end()) {
throw invalid_argument("Bad link: `to' id known");
}
// Not all links refer to final zones; need to check
if (finalZones.find(fromid) != finalZones.end()) {
finalZones[fromid].addLink(toid);
//cout << fromid << ": alias is " << toid << endl;
throw invalid_argument("Bad link: `to' id is a \"real\" zone");
}
// TODO remove
// // Not all links refer to final zones; need to check
// if (finalZones.find(fromid) != finalZones.end()) {
// finalZones[fromid].addLink(toid);
// //cout << fromid << ": alias is " << toid << endl;
// }
links[fromid].insert(toid);
reverseLinks[toid] = fromid;
linkSource[fromid] = "Olson link";
linkSource[toid] = "Olson link";
} else if (token.length() > 0 && token[0] == '#') {
consumeLine(in);
} else {
@ -888,9 +929,19 @@ void readFinalZonesAndRules(istream& in) {
// TODO update format docs
*/
void ZoneInfo::print(ostream& os) const {
void ZoneInfo::print(ostream& os, const string& id) const {
// Implement compressed format #2:
os << " " << id;
if (aliasTo >= 0) {
assert(aliases.size() == 0);
os << ":int { " << aliasTo << " }" << endl;
return;
}
os << ":array {" << endl;
vector<Transition>::const_iterator trn;
vector<ZoneType>::const_iterator typ;
@ -924,12 +975,20 @@ void ZoneInfo::print(ostream& os) const {
os << " :intvector { " << finalOffset << ", "
<< finalYear << " }" << endl;
}
}
inline ostream&
operator<<(ostream& os, const ZoneInfo& info) {
info.print(os);
return os;
// Alias list, if any
if (aliases.size() != 0) {
first = true;
os << " :intvector { ";
for (set<int>::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) {
if (!first) os << ", ";
first = false;
os << *i;
}
os << " }" << endl;
}
os << " }" << endl;
}
inline ostream&
@ -937,9 +996,7 @@ operator<<(ostream& os, const ZoneMap& zoneinfo) {
for (ZoneMapIter it = zoneinfo.begin();
it != zoneinfo.end();
++it) {
os << " " << it->first << ":array {" << endl;
os << it->second;
os << " }" << endl;
it->second.print(os, it->first);
}
return os;
}
@ -948,33 +1005,6 @@ operator<<(ostream& os, const ZoneMap& zoneinfo) {
// main
//--------------------------------------------------------------------
class Stats {
public:
int n; // number of zones
int Nsum, Nmin, Nmax; // transitions count
int Msum, Mmin, Mmax; // types count
int idsum; // total ID chars
Stats() : n(0),
Nsum(0), Nmin(numeric_limits<int>::max()),
Nmax(numeric_limits<int>::min()),
Msum(0), Mmin(numeric_limits<int>::max()),
Mmax(numeric_limits<int>::min()),
idsum(0)
{}
void operator() (const ZoneMap::value_type& p) {
++n;
int N=p.second.transitions.size(), M=p.second.types.size();
Nsum += N; if(N<Nmin)Nmin=N; if(N>Nmax)Nmax=N;
Msum += M; if(M<Mmin)Mmin=M; if(M>Mmax)Mmax=M;
idsum += p.first.size() + 1;
}
};
set<string> zoneIDset;
void insertZoneID(const pair<string,FinalZone>& p) {
zoneIDset.insert(p.first);
}
// Unary predicate for finding transitions after a given time
bool isAfter(const Transition t, long thresh) {
return t.time >= thresh;
@ -1022,6 +1052,8 @@ void ZoneInfo::optimizeTypeList() {
// corresponding to transitions that have been trimmed (during
// merging of final data).
if (aliasTo >= 0) return; // Nothing to do for aliases
// If there are zero transitions and one type, then leave that as-is.
if (transitions.size() == 0) {
if (types.size() != 1) {
@ -1101,9 +1133,10 @@ void mergeFinalZone(const pair<string,FinalZone>& p) {
mergeOne(id, fz);
for (set<string>::const_iterator i=fz.aliases.begin(); i!=fz.aliases.end(); ++i) {
mergeOne(*i, fz);
}
// TODO remoev
// for (set<string>::const_iterator i=fz.aliases.begin(); i!=fz.aliases.end(); ++i) {
// mergeOne(*i, fz);
// }
}
/**
@ -1159,6 +1192,64 @@ int main(int argc, char *argv[]) {
return 1;
}
// Read the legacy alias list and process it. Treat the legacy mappings
// like links, but also record them in the "legacy" hash.
try {
ifstream aliases(ICU_TZ_ALIAS);
if (!aliases) {
cerr << "Error: Unable to open " ICU_TZ_ALIAS << endl;
return 1;
}
int n = 0;
string line;
while (getline(aliases, line)) {
string::size_type lb = line.find('#');
if (lb != string::npos) {
line.resize(lb); // trim comments
}
vector<string> a;
istringstream is(line);
copy(istream_iterator<string>(is),istream_iterator<string>(),
back_inserter(a));
if (a.size() == 0) continue; // blank line
if (a.size() != 2) {
cerr << "Error: Can't parse \"" << line << "\" in "
ICU_TZ_ALIAS << endl;
exit(1);
}
++n;
string alias(a[0]), olson(a[1]);
if (links.find(alias) != links.end()) {
cerr << "Error: Alias \"" << alias
<< "\" is an Olson zone in "
ICU_TZ_ALIAS << endl;
return 1;
}
if (reverseLinks.find(alias) != reverseLinks.end()) {
cerr << "Error: Alias \"" << alias
<< "\" is an Olson link to \"" << reverseLinks[olson]
<< "\" in " << ICU_TZ_ALIAS << endl;
return 1;
}
// Record source for error reporting
if (linkSource.find(olson) == linkSource.end()) {
linkSource[olson] = "ICU alias";
}
assert(linkSource.find(alias) == linkSource.end());
linkSource[alias] = "ICU alias";
links[olson].insert(alias);
reverseLinks[alias] = olson;
}
cout << "Finished reading " << n
<< " aliases from " ICU_TZ_ALIAS << endl;
} catch (const exception& error) {
cerr << "Error: While reading " ICU_TZ_ALIAS ": " << error.what() << endl;
return 1;
}
try {
// Recursively scan all files below the given path, accumulating
// their data into ZONEINFO. All files must be TZif files. Any
@ -1173,22 +1264,6 @@ int main(int argc, char *argv[]) {
<< (ZONEINFO.begin())->first << ".."
<< (--ZONEINFO.end())->first << "]" << endl;
#if 0
// Output some stats
Stats s = for_each (ZONEINFO.begin(), ZONEINFO.end(), Stats());
cout << "Transitions per zone: " << s.Nmin << ".." << s.Nmax
<< ", sum " << s.Nsum
<< ", mean " << ((double)s.Nsum / s.n) << endl;
cout << "Types per zone: " << s.Mmin << ".." << s.Mmax
<< ", sum " << s.Msum
<< ", mean " << ((double)s.Msum / s.n) << endl;
cout << "ID characters: " << s.idsum << endl;
cout << "Raw size = ~" << (s.Nsum * 5 + (int)(s.Msum * 4.5) +
s.idsum * 2) << endl;
/* allocate 2 bytes/char for IDs */
#endif
try {
for_each(finalZones.begin(), finalZones.end(), mergeFinalZone);
} catch (const exception& error) {
@ -1196,6 +1271,81 @@ int main(int argc, char *argv[]) {
return 1;
}
// Process links (including ICU aliases). For each link set we have
// a canonical ID (e.g., America/Los_Angeles) and a set of one or more
// aliases (e.g., PST, PST8PDT, ...).
// 1. Add all aliases as zone objects in ZONEINFO
for (map<string,set<string> >::const_iterator i = links.begin();
i!=links.end(); ++i) {
const string& olson = i->first;
const set<string>& aliases = i->second;
if (ZONEINFO.find(olson) == ZONEINFO.end()) {
cerr << "Error: Invalid " << linkSource[olson] << " to non-existent \""
<< olson << "\"" << endl;
return 1;
}
for (set<string>::const_iterator j=aliases.begin();
j!=aliases.end(); ++j) {
ZONEINFO[*j] = ZoneInfo();
}
}
// 2. Create a mapping from zones to index numbers 0..n-1.
map<string,int> zoneIDs;
vector<string> zoneIDlist;
int z=0;
for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
zoneIDs[i->first] = z++;
zoneIDlist.push_back(i->first);
}
assert(z == (int) ZONEINFO.size());
// 3. Merge aliases. Sometimes aliases link to other aliases; we
// resolve these into simplest possible sets.
map<string,set<string> > links2;
map<string,string> reverse2;
for (map<string,set<string> >::const_iterator i = links.begin();
i!=links.end(); ++i) {
string olson = i->first;
while (reverseLinks.find(olson) != reverseLinks.end()) {
olson = reverseLinks[olson];
}
for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
links2[olson].insert(*j);
reverse2[*j] = olson;
}
}
links = links2;
reverseLinks = reverse2;
if (false) { // Debugging: Emit link map
for (map<string,set<string> >::const_iterator i = links.begin();
i!=links.end(); ++i) {
cout << i->first << ": ";
for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
cout << *j << ", ";
}
cout << endl;
}
}
// 4. Update aliases
for (map<string,set<string> >::const_iterator i = links.begin();
i!=links.end(); ++i) {
const string& olson = i->first;
const set<string>& aliases = i->second;
ZONEINFO[olson].clearAliases();
for (set<string>::const_iterator j=aliases.begin();
j!=aliases.end(); ++j) {
assert(zoneIDs.find(olson) != zoneIDs.end());
assert(zoneIDs.find(*j) != zoneIDs.end());
assert(ZONEINFO.find(*j) != ZONEINFO.end());
ZONEINFO[*j].setAliasTo(zoneIDs[olson]);
ZONEINFO[olson].addAlias(zoneIDs[*j]);
}
}
// Once merging of final data is complete, we can optimize the type list
for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
i->second.optimizeTypeList();
@ -1257,62 +1407,6 @@ int main(int argc, char *argv[]) {
}
}
// Read the legacy alias list and process it.
try {
ifstream aliases(ICU_TZ_ALIAS);
if (!aliases) {
cerr << "Error: Unable to open " ICU_TZ_ALIAS << endl;
return 1;
}
int n = 0;
string line;
while (getline(aliases, line)) {
string::size_type lb = line.find('#');
if (lb != string::npos) {
line.resize(lb); // trim comments
}
vector<string> a;
istringstream is(line);
copy(istream_iterator<string>(is),istream_iterator<string>(),
back_inserter(a));
if (a.size() == 0) continue; // blank line
if (a.size() != 2) {
cerr << "Error: Can't parse \"" << line << "\" in "
ICU_TZ_ALIAS << endl;
exit(1);
}
++n;
string alias(a[0]), olson(a[1]);
if (ZONEINFO.find(olson) == ZONEINFO.end()) {
cerr << "Error: Alias to invalid zone " << olson
<< " in " ICU_TZ_ALIAS << endl;
return 1;
}
if (ZONEINFO.find(alias) != ZONEINFO.end()) {
cerr << "Error: Alias \"" << alias
<< "\" is an Olson zone in "
ICU_TZ_ALIAS << endl;
return 1;
}
// Currently we just copy the data for the alias.
// In the future, use a link. TODO Use rb alias facility.
ZONEINFO[alias] = ZONEINFO[olson]; // make alias
// Add alias to country map
if (reverseCountryMap.find(olson) != reverseCountryMap.end()) {
countryMap[reverseCountryMap[olson]].insert(alias);
reverseCountryMap[alias] = reverseCountryMap[olson];
}
}
cout << "Finished reading " << n
<< " aliases from " ICU_TZ_ALIAS << endl;
} catch (const exception& error) {
cerr << "Error: While reading " ICU_TZ_ALIAS ": " << error.what() << endl;
return 1;
}
// Create a pseudo-country containing all zones belonging to no country
set<string> nocountry;
for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
@ -1349,14 +1443,6 @@ int main(int argc, char *argv[]) {
ruleStart++;
ruleCount--;
// Create a mapping from zones to index numbers 0..n-1.
map<string,int> zoneIDs;
int z=0;
for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {
zoneIDs[i->first] = z++;
}
assert(z == zoneCount);
// Get local time & year for below
time_t sec;
time(&sec);