mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 09:21:03 +00:00
ICU-449 TimeZone equivalency support
X-SVN-Rev: 2520
This commit is contained in:
parent
9218332c3a
commit
d893f0de64
3 changed files with 910 additions and 1518 deletions
|
@ -30,75 +30,60 @@
|
|||
# - Lines may be followed by a comment; the parser must ignore
|
||||
# anything of the form /\s+#.*$/ in each line.
|
||||
# |3065,14400 # Asia/Dubai GMT+4:00
|
||||
# - The file contains a header and 5 lists.
|
||||
# - The header contains the version of the unix data, the total
|
||||
# zone count, the maximum number of zones sharing the same value
|
||||
# of gmtOffset, the length of the name table in bytes, and
|
||||
# the length of the longest name (not including the terminating
|
||||
# zero byte).
|
||||
# - The file contains a header and 3 lists.
|
||||
# - The header contains the version of this data file:
|
||||
# 2 original version, without equivalency groups
|
||||
# 3 current version, described here
|
||||
# then the version of the unix data, and other counts:
|
||||
# | 3 # format version number of this file
|
||||
# | 1999 # (tzdata1999j) version of Olson zone
|
||||
# | 10 # data from ftp://elsie.nci.nih.gov
|
||||
# | 387 # total zone count
|
||||
# | 40 # max count of zones with same gmtOffset
|
||||
# | 25 # max name length not incl final zero
|
||||
# | 5906 # length of name table in bytes
|
||||
# | 402 # total zone count
|
||||
# | 40 # maximum zones per offset (used by gentz)
|
||||
# - Lists start with a count of the records to follow, the records
|
||||
# themselves (one per line), and a single line with the keyword
|
||||
# 'end'.
|
||||
# - The first list is the list of standard zones:
|
||||
# | 208 # count of standard zones to follow
|
||||
# | 0,0 # Africa/Abidjan GMT+0:00
|
||||
# | 28,10800 # Africa/Addis_Ababa GMT+3:00
|
||||
# - The first list is the name table:
|
||||
# | 387 # count of names to follow
|
||||
# | 34,Africa/Abidjan
|
||||
# | 23,Africa/Accra
|
||||
# ...
|
||||
# | end
|
||||
# Each standard zone record contains two integers. The first
|
||||
# is a byte offset into the name table for the name of the zone.
|
||||
# The second integer is the GMT offset in SECONDS for this zone.
|
||||
# - The second list is the list of DST zones:
|
||||
# | 179 # count of dst zones to follow
|
||||
# | 15,0,8,1,0,0,w,11,31,0,0,w,20 # Africa/Accra GMT+0:00 Sep 1...
|
||||
# | 184,7200,3,-1,6,0,s,8,-1,5,1380,s,60 # Africa/Cairo GMT+2:0...
|
||||
# Each name is terminated by a newline (like all lines in the file).
|
||||
# The zone numbers in other lists refer to this table. The
|
||||
# integer that precedes the name is an index into the equivalency
|
||||
# table, with the first table entry being entry 0.
|
||||
# - The second list is the equivalency table. It lists, in sorted
|
||||
# order, the equivalency groups. Each group represents a
|
||||
# set of one or more zones that have the same GMT offset and the
|
||||
# same rules. While there are about 400 zones, there are less than
|
||||
# 120 equivalency groups (as of this writing).
|
||||
# | 120 # count of equivalency groups to follow
|
||||
# | s,0,1,0 # GMT+0:00
|
||||
# | d,0,8,1,0,0,w,11,31,0,0,w,20,4,15,16,17,18 # GMT+0:00 Sep 1...
|
||||
# ...
|
||||
# | end
|
||||
# Each record starts with the same two integers as a standard
|
||||
# zone record. Following this are data for the onset rule and
|
||||
# the cease rule. Each rule is described by the following integers:
|
||||
# Entries start with 's' for standard zones, or 'd' for DST zones.
|
||||
# Both zone descriptors start with the GMT offset in SECONDS. DST
|
||||
# zones contain, in addition, data for the onset rule and the cease
|
||||
# rule. Each rule is described by the following integers:
|
||||
# month (JAN = 0)
|
||||
# dowim } These two values are in SimpleTimeZone encoded
|
||||
# dow } format for DOM, DOWIM, DOW>=DOM, or DOW<=DOM.
|
||||
# time MINUTES
|
||||
# time mode ('w', 's', 'u')
|
||||
# The last integer in the record is the DST savings in MINUTES,
|
||||
# The last rule integer in the record is the DST savings in MINUTES,
|
||||
# typically 60.
|
||||
# - The third list is the name table:
|
||||
# | 387 # count of names to follow
|
||||
# | Africa/Abidjan
|
||||
# | Africa/Accra
|
||||
# ...
|
||||
# | end
|
||||
# Each name is terminated by a newline (like all lines in the file).
|
||||
# The offsets in the first two lists refer to this table.
|
||||
# - The fourth list is an index list by name. The index entries
|
||||
# themselves are of the form /[sd]\d+/, where the first character
|
||||
# indicates standard or DST, and the number that follows indexes
|
||||
# into the correpsonding array.
|
||||
# | 416 # count of name index table entries to follow
|
||||
# | d0 # ACT
|
||||
# | d1 # AET
|
||||
# | d2 # AGT
|
||||
# | d3 # ART
|
||||
# | d4 # AST
|
||||
# | s0 # Africa/Abidjan
|
||||
# ...
|
||||
# | end
|
||||
# - The fifth list is an index by GMT offset. Each line lists the
|
||||
# zones with the same offset. The first number on the line
|
||||
# is the GMT offset in seconds. The second number is the default
|
||||
# zone number in the following list, taken from tz.default. The
|
||||
# third number is the count
|
||||
# of zone numbers to follow. Each zone number is an integer from
|
||||
# 0..n-1, where n is the total number of zones. The zone numbers
|
||||
# refer to the zone list in alphabetical order.
|
||||
|
||||
# After either a standard or a DST zone, there is a list of the
|
||||
# members of the equivalency group. This consists of a number of
|
||||
# entries to follow (>=1), then the zone numbers themselves.
|
||||
# - The third list is an index by GMT offset. Each line lists the
|
||||
# zones with the same offset. The first number on the line is the
|
||||
# GMT offset in seconds. The second number is the default zone
|
||||
# number in the following list, taken from tz.default. The list
|
||||
# consists of a number of entries to follow (>=1), then the zone
|
||||
# numbers themselves.
|
||||
# | 39 # index by offset entries to follow
|
||||
# | -43200,280,1,280 # -12:00 d=Etc/GMT+12 Etc/GMT+12
|
||||
# | -39600,374,6,279,366,374,394,396,399 # -11:00 d=Pacific/Apia Etc/GMT+11 MIT Pacific/Apia Pacific/Midway Pacific/Niue Pacific/Pago_Pago
|
||||
|
@ -114,7 +99,7 @@
|
|||
# letter: -, D, GHST, GMT, HS, S, SLST
|
||||
# on: 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1,
|
||||
# Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15,
|
||||
# Sun>=16, Sun>=23, S un>=8, Sun>=9, lastFri, lastSun, lastThu
|
||||
# Sun>=16, Sun>=23, Sun>=8, Sun>=9, lastFri, lastSun, lastThu
|
||||
# save: 0, 0:20, 0:30, 1:00
|
||||
# type: -
|
||||
|
||||
|
@ -123,11 +108,19 @@ use strict;
|
|||
use Getopt::Long;
|
||||
use vars qw(@FILES $YEAR $DATA_DIR $OUT $SEP @MONTH
|
||||
$VERSION_YEAR $VERSION_SUFFIX $RAW_VERSION
|
||||
$TZ_ALIAS $TZ_DEFAULT $URL $HTML_FILE);
|
||||
$TZ_ALIAS $TZ_DEFAULT $URL $HTML_FILE
|
||||
$TZ_TXT_VERSION %ZONE_ID_TO_INDEX $END_MARKER);
|
||||
require 'dumpvar.pl';
|
||||
use tzparse;
|
||||
use tzutil;
|
||||
|
||||
# Current version of the data file. Matches formatVersion[0] in the
|
||||
# binary data file. SEE tzdat.h
|
||||
# 1 - unreleased version (?)
|
||||
# 2 - original version
|
||||
# 3 - added equivalency groups
|
||||
$TZ_TXT_VERSION = 3;
|
||||
|
||||
# File names
|
||||
$OUT = 'tz.txt';
|
||||
$TZ_ALIAS = 'tz.alias';
|
||||
|
@ -139,6 +132,9 @@ $URL = "ftp://elsie.nci.nih.gov/pub";
|
|||
# Separator between fields in the output file
|
||||
$SEP = ','; # Don't use ':'!
|
||||
|
||||
# Marker between sections
|
||||
$END_MARKER = 'end';
|
||||
|
||||
@FILES = qw(africa
|
||||
antarctica
|
||||
asia
|
||||
|
@ -194,6 +190,7 @@ $HTML_FILE = shift;
|
|||
jul aug sep oct nov dec);
|
||||
|
||||
main();
|
||||
exit();
|
||||
|
||||
sub usage {
|
||||
print STDERR "Usage: $0 data_dir [html_out]\n\n";
|
||||
|
@ -242,55 +239,21 @@ sub main {
|
|||
$ZONES{GMT} = \%GMT;
|
||||
}
|
||||
|
||||
# Write out the zone data in a compact readable format.
|
||||
|
||||
# Create a name table from the zone names. The format of
|
||||
# the name table is:
|
||||
#
|
||||
# The names are listed in lexical order, and each name
|
||||
# is assigned an offset. The first name's offset is 0.
|
||||
# The offset of name i+1 is the offset of name i + the
|
||||
# length of name i + 1 (for the zero byte).
|
||||
#
|
||||
# Store the offsets in a hash %NAME_OFFSET. Store the
|
||||
# names in a big scalar, $NAME_LIST, with "\n" between
|
||||
# each name and after the last.
|
||||
#
|
||||
# Store the length of the entire name table in $NAME_SIZE.
|
||||
#
|
||||
# Also, count the number of standard and DST zones.
|
||||
my $offset = 0;
|
||||
my $NAME_LIST = '';
|
||||
my %NAME_OFFSET;
|
||||
my $STD_COUNT = 0; # Count of standard zones
|
||||
my $DST_COUNT = 0; # Count of DST zones
|
||||
my $maxNameLen = 0;
|
||||
# IMPORTANT: This sort must correspond to the sort
|
||||
# order of UnicodeString::compare. That
|
||||
# is, it must be a plain sort.
|
||||
foreach my $z (sort keys %ZONES) {
|
||||
# Validate names and count total size
|
||||
my $NAME_SIZE = 0;
|
||||
foreach my $z (keys %ZONES) {
|
||||
# Make sure zone IDs only contain invariant chars
|
||||
assertInvariantChars($z);
|
||||
|
||||
my $len = length($z);
|
||||
$NAME_OFFSET{$z} = $offset;
|
||||
$offset += $len + 1;
|
||||
$NAME_LIST .= "$z\n";
|
||||
$maxNameLen = $len if ($len > $maxNameLen);
|
||||
if ($ZONES{$z}->{rule} eq $TZ::STANDARD) {
|
||||
$STD_COUNT++;
|
||||
} else {
|
||||
$DST_COUNT++;
|
||||
}
|
||||
$NAME_SIZE += 1 + length($z);
|
||||
}
|
||||
my $NAME_SIZE = $offset;
|
||||
|
||||
# Find the maximum number of zones with the same value of
|
||||
# gmtOffset.
|
||||
my %perOffset; # Hash of offset -> count
|
||||
foreach my $z (keys %ZONES) {
|
||||
# Use parseOffset to normalize values - probably unnecessary
|
||||
++$perOffset{parseOffset($ZONES{$z}->{gmtoff})};
|
||||
# Use TZ::ParseOffset to normalize values - probably unnecessary
|
||||
++$perOffset{TZ::ParseOffset($ZONES{$z}->{gmtoff})};
|
||||
}
|
||||
my $maxPerOffset = 0;
|
||||
foreach (values %perOffset) {
|
||||
|
@ -304,72 +267,116 @@ sub main {
|
|||
# zones for the offset, in sorted order, including the default.
|
||||
my $offsetIndex = createOffsetIndex(\%ZONES, $TZ_DEFAULT);
|
||||
|
||||
# Group zones into equivalency groups
|
||||
my $maxPerEquiv = 0;
|
||||
TZ::FormZoneEquivalencyGroups(\%ZONES, \%RULES, \@EQUIV);
|
||||
print
|
||||
"Equivalency groups (including unique zones): ",
|
||||
scalar @EQUIV, "\n";
|
||||
foreach my $eg (@EQUIV) {
|
||||
$maxPerEquiv = @$eg if (@$eg > $maxPerEquiv);
|
||||
}
|
||||
|
||||
# Sort equivalency table first by GMT offset, then by
|
||||
# alphabetic order of encoded rule string.
|
||||
@EQUIV = sort { my $x = $ZONES{$a->[0]};
|
||||
my $y = $ZONES{$b->[0]};
|
||||
TZ::ParseOffset($x->{gmtoff}) <=>
|
||||
TZ::ParseOffset($y->{gmtoff}) ||
|
||||
TZ::ZoneCompare($x, $y, \%RULES); } @EQUIV;
|
||||
|
||||
# Sort the zones in each equivalency table entry
|
||||
foreach my $eg (@EQUIV) {
|
||||
next unless (@$eg > 1); # Skip single-zone entries
|
||||
my @zoneList = sort @$eg;
|
||||
$eg = \@zoneList;
|
||||
}
|
||||
|
||||
# Create an index from zone ID to index #
|
||||
my $i = 0;
|
||||
foreach my $z (sort keys %ZONES) {
|
||||
$ZONE_ID_TO_INDEX{$z} = $i++;
|
||||
}
|
||||
|
||||
open(OUT,">$OUT") or die "Can't open $OUT for writing: $!";
|
||||
|
||||
############################################################
|
||||
# EMIT HEADER
|
||||
############################################################
|
||||
# Zone data version
|
||||
print OUT "#--- Header ---\n";
|
||||
print OUT $TZ_TXT_VERSION, " # format version number of this file\n";
|
||||
print OUT $VERSION_YEAR, " # ($RAW_VERSION) version of Olson zone\n";
|
||||
print OUT $VERSION_SUFFIX, " # data from $URL\n";
|
||||
print OUT scalar keys %ZONES, " # total zone count\n";
|
||||
print OUT $maxPerOffset, " # max count of zones with same gmtOffset\n";
|
||||
print OUT $maxNameLen, " # max name length not incl final zero\n";
|
||||
# The following counts are all used by gentz during its parse
|
||||
# of the tz.txt file and creation of the tz.dat file, even
|
||||
# if they don't show up in the tz.dat file header. For example,
|
||||
# gentz needs the maxPerOffset to preallocate the offset index
|
||||
# entries. It needs the NAME_SIZE to allocate the big buffer
|
||||
# that will receive all the names.
|
||||
print OUT scalar @EQUIV, " # equivalency groups count\n";
|
||||
print OUT $maxPerOffset, " # max zones with same gmtOffset\n";
|
||||
print OUT $maxPerEquiv, " # max zones in an equivalency group\n";
|
||||
print OUT $NAME_SIZE, " # length of name table in bytes\n";
|
||||
print OUT $END_MARKER, "\n\n";
|
||||
|
||||
############################################################
|
||||
# EMIT ZONE TABLES
|
||||
############################################################
|
||||
# Output first the standard zones, then the dst zones.
|
||||
# Precede each list with the count of zones to follow,
|
||||
# and follow it with the keyword 'end'.
|
||||
for my $type (qw(standard dst)) {
|
||||
print OUT ($type eq 'standard'
|
||||
? $STD_COUNT : $DST_COUNT), " # count of $type zones to follow\n";
|
||||
foreach my $z (sort keys %ZONES) {
|
||||
my $isStd = ($ZONES{$z}->{rule} eq $TZ::STANDARD);
|
||||
next if ($isStd ne ($type eq 'standard'));
|
||||
print OUT $NAME_OFFSET{$z}, ",";
|
||||
print OUT formatZone($z, $ZONES{$z}, \%RULES), "\n";
|
||||
}
|
||||
print OUT "end\n"; # 'end' keyword for error checking
|
||||
}
|
||||
|
||||
############################################################
|
||||
# EMIT NAME TABLE
|
||||
# EMIT ZONE TABLE
|
||||
############################################################
|
||||
# Output the name table, followed by 'end' keyword
|
||||
print OUT scalar keys %ZONES, " # count of names to follow\n";
|
||||
print OUT $NAME_LIST, "end\n";
|
||||
print OUT "#--- Zone table ---\n";
|
||||
print OUT "#| equiv_index,name\n";
|
||||
print OUT scalar keys %ZONES, " # count of zones to follow\n";
|
||||
|
||||
# IMPORTANT: This sort must correspond to the sort
|
||||
# order of UnicodeString::compare. That
|
||||
# is, it must be a plain sort.
|
||||
foreach my $z (sort keys %ZONES) {
|
||||
# Make sure zone IDs only contain invariant chars
|
||||
assertInvariantChars($z);
|
||||
|
||||
print OUT equivIndexOf($z, \@EQUIV), ',', $z, "\n";
|
||||
}
|
||||
print OUT $END_MARKER, "\n\n";
|
||||
|
||||
############################################################
|
||||
# EMIT INDEX BY NAME
|
||||
# EMIT EQUIVALENCY TABLE
|
||||
############################################################
|
||||
# Output the name index table. Since we don't know structure
|
||||
# sizes, we output the index number of each zone. For example,
|
||||
# "s0" is the first standard zone, "s1" is the second, etc.
|
||||
# Likewise, "d0" is the first DST zone, "d1" is the second, etc.
|
||||
|
||||
# First compute index IDs, as described above.
|
||||
my %indexID;
|
||||
my $s = 0;
|
||||
my $d = 0;
|
||||
foreach my $z (sort keys %ZONES) {
|
||||
if ($ZONES{$z}->{rule} eq $TZ::STANDARD) {
|
||||
$indexID{$z} = "s$s";
|
||||
$s++;
|
||||
} else {
|
||||
$indexID{$z} = "d$d";
|
||||
$d++;
|
||||
print OUT "#--- Equivalency table ---\n";
|
||||
print OUT "#| ('s'|'d'),zone_spec,id_count,id_list\n";
|
||||
print OUT scalar @EQUIV, " # count of equivalency groups to follow\n";
|
||||
$i = 0;
|
||||
foreach my $aref (@EQUIV) {
|
||||
# $aref is an array ref; the array is full of zone IDs
|
||||
# Use the ID of the first array element
|
||||
my $z = $aref->[0];
|
||||
|
||||
# Output either 's' or 'd' to indicate standard or DST
|
||||
my $isStd = ($ZONES{$z}->{rule} eq $TZ::STANDARD);
|
||||
print OUT $isStd ? 's,' : 'd,';
|
||||
|
||||
# Format the zone
|
||||
my ($spec, $notes) = formatZone($z, $ZONES{$z}, \%RULES);
|
||||
|
||||
# Now add the equivalency list
|
||||
push @$spec, scalar @$aref;
|
||||
push @$notes, "[";
|
||||
my $min = -1;
|
||||
foreach $z (@$aref) {
|
||||
my $index = $ZONE_ID_TO_INDEX{$z};
|
||||
# Make sure they are in order
|
||||
die("Unsorted equiv table indices") if ($index <= $min);
|
||||
$min = $index;
|
||||
push @$spec, $index;
|
||||
push @$notes, $z;
|
||||
}
|
||||
push @$notes, "]";
|
||||
|
||||
unshift @$notes, $i++; # Insert index of this group at front
|
||||
print OUT join($SEP, @$spec) . " # " . join(' ', @$notes), "\n";
|
||||
}
|
||||
|
||||
# Now emit table sorted by name
|
||||
print OUT scalar keys %ZONES, " # count of name index table entries to follow\n";
|
||||
foreach my $z (sort keys %ZONES) {
|
||||
print OUT $indexID{$z}, " # $z\n";
|
||||
}
|
||||
print OUT "end\n";
|
||||
print OUT $END_MARKER, "\n\n";
|
||||
|
||||
############################################################
|
||||
# EMIT INDEX BY GMT OFFSET
|
||||
|
@ -378,13 +385,15 @@ sub main {
|
|||
# Create an array mapping zone number -> name.
|
||||
my %zoneNumber;
|
||||
my @zoneName;
|
||||
my $i = 0;
|
||||
$i = 0;
|
||||
foreach (sort keys %ZONES) {
|
||||
$zoneName[$i] = $_;
|
||||
$zoneNumber{$_} = $i++;
|
||||
}
|
||||
|
||||
# Emit offset index
|
||||
print OUT "#--- Offset index ---\n";
|
||||
print OUT "#| gmt_offset,default_id,id_count,id_list\n";
|
||||
print OUT scalar keys %{$offsetIndex}, " # index by offset entries to follow\n";
|
||||
foreach (sort {$a <=> $b} keys %{$offsetIndex}) {
|
||||
my $aref = $offsetIndex->{$_};
|
||||
|
@ -399,7 +408,7 @@ sub main {
|
|||
join(" ", @b), "\n";
|
||||
}
|
||||
|
||||
print OUT "end\n";
|
||||
print OUT $END_MARKER, "\n";
|
||||
|
||||
############################################################
|
||||
# END
|
||||
|
@ -409,17 +418,10 @@ sub main {
|
|||
|
||||
# Emit the HTML file
|
||||
if ($HTML_FILE) {
|
||||
emitHTML($HTML_FILE, \%ZONES, \%RULES, $offsetIndex, $aliases);
|
||||
emitHTML($HTML_FILE, \%ZONES, \%RULES, \@EQUIV, $offsetIndex, $aliases);
|
||||
print "$HTML_FILE written.\n";
|
||||
}
|
||||
|
||||
if (0) {
|
||||
TZ::FormZoneEquivalencyGroups(\%ZONES, \%RULES, \@EQUIV);
|
||||
print
|
||||
"Equivalency groups (including unique zones): ",
|
||||
scalar @EQUIV, "\n";
|
||||
}
|
||||
|
||||
#::dumpValue($ZONES{"America/Los_Angeles"});
|
||||
#::dumpValue($RULES{"US"});
|
||||
#::dumpValue($RULES{"Tonga"});
|
||||
|
@ -466,7 +468,7 @@ sub createOffsetIndex {
|
|||
# Create an index by gmtoff.
|
||||
my %offsetMap;
|
||||
foreach (sort keys %{$zones}) {
|
||||
my $offset = parseOffset($zones->{$_}->{gmtoff});
|
||||
my $offset = TZ::ParseOffset($zones->{$_}->{gmtoff});
|
||||
push @{$offsetMap{$offset}}, $_;
|
||||
}
|
||||
|
||||
|
@ -487,7 +489,7 @@ sub createOffsetIndex {
|
|||
$ok = 0;
|
||||
next;
|
||||
}
|
||||
my $offset = parseOffset($zones->{$z}->{gmtoff});
|
||||
my $offset = TZ::ParseOffset($zones->{$z}->{gmtoff});
|
||||
if (exists $defaults{$offset}) {
|
||||
print
|
||||
"Error: Offset ", formatOffset($offset), " has both ",
|
||||
|
@ -560,7 +562,7 @@ sub isDefault {
|
|||
my $name = shift;
|
||||
my $offset = shift;
|
||||
my $offsetIndex = shift;
|
||||
my $aref = $offsetIndex->{parseOffset($offset)};
|
||||
my $aref = $offsetIndex->{TZ::ParseOffset($offset)};
|
||||
return ($aref->[0] eq $name);
|
||||
}
|
||||
|
||||
|
@ -568,17 +570,20 @@ sub isDefault {
|
|||
# Param: File name
|
||||
# Param: ref to zone hash
|
||||
# Param: ref to rule hash
|
||||
# Param: ref to equiv table
|
||||
# Param: ref to offset index
|
||||
# Param: ref to alias hash
|
||||
sub emitHTML {
|
||||
my $file = shift;
|
||||
my $zones = shift;
|
||||
my $rules = shift;
|
||||
my $equiv = shift;
|
||||
my $offsetIndex = shift;
|
||||
my $aliases = shift;
|
||||
|
||||
# These are variables for the template
|
||||
my $_count = scalar keys %{$zones};
|
||||
my $_equiv = scalar @$equiv;
|
||||
|
||||
# Build table in order of zone offset
|
||||
my $_offsetTable = "<p><table>\n";
|
||||
|
@ -617,6 +622,21 @@ sub emitHTML {
|
|||
}
|
||||
$_nameTable .= "</table>\n";
|
||||
|
||||
# Build equivalency group table
|
||||
my $_equivTable = "<p><table>\n";
|
||||
$_equivTable .= "<tr><td>Offset</td><td>DST Begins</td><td>DST Ends</td>";
|
||||
$_equivTable .= "<td>Savings</td><td>Zones</td></tr>\n";
|
||||
|
||||
$_equivTable .= "<tr><td><hr></td>";
|
||||
$_equivTable .= "<td><hr></td><td><hr></td>";
|
||||
$_equivTable .= "<td><hr></td><td><hr></td><td><hr></td></tr>\n";
|
||||
|
||||
# Equiv table is sorted elsewhere -- output it in native order
|
||||
foreach my $eg (@$equiv) {
|
||||
$_equivTable .= emitHTMLEquiv($eg, $zones, $rules);
|
||||
}
|
||||
$_equivTable .= "</table>\n";
|
||||
|
||||
# Time stamp
|
||||
my $_timeStamp = localtime;
|
||||
|
||||
|
@ -641,7 +661,7 @@ sub emitHTML {
|
|||
</tr>
|
||||
<tr>
|
||||
<td>Total zone count</td>
|
||||
<td><strong>$_count</strong></td>
|
||||
<td><strong>$_count</strong> in <strong>$_equiv</strong> equivalency groups</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Original source</td>
|
||||
|
@ -757,6 +777,20 @@ Times without suffixes are in wall time (that is, either standard time or daylig
|
|||
time, depending on which is in effect).</p>
|
||||
|
||||
$_nameTable
|
||||
<hr>
|
||||
|
||||
<h2>Time Zone Equivalency Groups</h2>
|
||||
|
||||
<p>ICU groups zones into <em>equivalency groups</em>. These are
|
||||
groups of zones that are identical in GMT offset and in rules, but
|
||||
that have different IDs. Knowledge of equivalency groups allows ICU
|
||||
to reduce the amount of data stored. More importantly, it allows ICU
|
||||
to apply data for one zone to other equivalent zones when appropriate
|
||||
(e.g., in formatting). Equivalency groups are formed at build time,
|
||||
not at runtime, so the runtime cost to lookup the equivalency group of
|
||||
a given zone is negligible.</p>
|
||||
|
||||
$_equivTable
|
||||
</body>
|
||||
</html>
|
||||
END
|
||||
|
@ -779,6 +813,40 @@ sub bookmark {
|
|||
$_;
|
||||
}
|
||||
|
||||
# Emit an equivalency group as an HTML table row. Return the string.
|
||||
# Param: ref to array of zone IDs
|
||||
# Param: ref to zone hash
|
||||
# Param: ref to rule hash
|
||||
sub emitHTMLEquiv {
|
||||
my $eg = shift;
|
||||
my $zone = shift;
|
||||
my $rule = shift;
|
||||
local $_ = "<tr valign=top>";
|
||||
$_ .= _emitHTMLZone($zone->{$eg->[0]}, $rule);
|
||||
# Don't sort @$eg -- output in native order
|
||||
$_ .= "<td>" . join(" ", @$eg) . "</td>";
|
||||
$_ .= "</tr>\n";
|
||||
$_;
|
||||
}
|
||||
|
||||
# Emit a zone description without ID, alias info etc.
|
||||
# Param: zone OBJECT hash ref
|
||||
# Param: rule hash ref
|
||||
sub _emitHTMLZone {
|
||||
my ($zone, $rules) = @_;
|
||||
my $gmtoff = "GMT" . formatOffset(TZ::ParseOffset($zone->{gmtoff}));
|
||||
local $_ = "<td><a href=\"#" . bookmark($gmtoff) . "\">$gmtoff</a></td>";
|
||||
if ($zone->{rule} ne $TZ::STANDARD) {
|
||||
my $rule = $rules->{$zone->{rule}};
|
||||
$_ .= "<td nowrap>" . emitHTMLRule($rule->[0]) . "</td>";
|
||||
$_ .= "<td nowrap>" . emitHTMLRule($rule->[1]) . "</td>";
|
||||
$_ .= "<td>" . $rule->[0]->{save} . "</td>";
|
||||
} else {
|
||||
$_ .= "<td colspan=3></td>";
|
||||
}
|
||||
$_;
|
||||
}
|
||||
|
||||
# Emit a single zone description as HTML table row. Return the string.
|
||||
# Param: Zone name
|
||||
# Param: Zone hash object ref
|
||||
|
@ -793,16 +861,7 @@ sub emitHTMLZone {
|
|||
my $revalias = exists $revaliases->{$name} ? $revaliases->{$name} : '';
|
||||
local $_ = "<tr><td>" . ($isDefault?"<b>":"") .
|
||||
"<a name=\"" . bookmark($name) . "\">$name</a>" . ($isDefault?"</b>":"") . "</td>";
|
||||
my $gmtoff = "GMT" . formatOffset(parseOffset($zone->{gmtoff}));
|
||||
$_ .= "<td><a href=\"#" . bookmark($gmtoff) . "\">$gmtoff</a></td>";
|
||||
if ($zone->{rule} ne $TZ::STANDARD) {
|
||||
my $rule = $rules->{$zone->{rule}};
|
||||
$_ .= "<td>" . emitHTMLRule($rule->[0]) . "</td>";
|
||||
$_ .= "<td>" . emitHTMLRule($rule->[1]) . "</td>";
|
||||
$_ .= "<td>" . $rule->[0]->{save} . "</td>";
|
||||
} else {
|
||||
$_ .= "<td colspan=3></td>";
|
||||
}
|
||||
$_ .= _emitHTMLZone($zone, $rules);
|
||||
if ($alias) {
|
||||
$_ .= "<td><em>alias for</em> <a href=\"#" .
|
||||
bookmark($alias) . "\">$alias</a></td>";
|
||||
|
@ -867,18 +926,19 @@ sub incorporateAliases {
|
|||
# Param: Zone name
|
||||
# Param: Zone hash
|
||||
# Param: Ref to hash of all rules
|
||||
# Return: One line description of this zone.
|
||||
# Return: Two array refs, one to the specs, one to the notes
|
||||
sub formatZone { # ($z, $ZONES{$z}, \%RULES)
|
||||
my $name = shift;
|
||||
my $zone = shift;
|
||||
my $rules = shift;
|
||||
|
||||
my @spec;
|
||||
my @notes = ( $name );
|
||||
#my @notes = ( $name );
|
||||
my @notes;
|
||||
|
||||
# GMT offset
|
||||
push @notes, ($zone->{gmtoff}=~/^-/?"GMT":"GMT+") . $zone->{gmtoff};
|
||||
push @spec, parseOffset($zone->{gmtoff});
|
||||
push @spec, TZ::ParseOffset($zone->{gmtoff});
|
||||
|
||||
#|rawOffset The new SimpleTimeZone's raw GMT offset
|
||||
#|ID The new SimpleTimeZone's time zone ID.
|
||||
|
@ -917,7 +977,7 @@ sub formatZone { # ($z, $ZONES{$z}, \%RULES)
|
|||
push @spec, $a[0];
|
||||
}
|
||||
|
||||
join($SEP, @spec) . " # " . join(' ', @notes);
|
||||
(\@spec, \@notes);
|
||||
}
|
||||
|
||||
# Format a rule and return the string
|
||||
|
@ -934,22 +994,6 @@ sub formatRule {
|
|||
push @$spec, parseTime($rule->{at}); # Time
|
||||
}
|
||||
|
||||
# Parse an offset of the form d, d:dd, or d:dd:dd, or any of the above
|
||||
# preceded by a '-'. Return the total number of seconds represented.
|
||||
# Param: String
|
||||
# Return: Integer number of seconds
|
||||
sub parseOffset {
|
||||
local $_ = shift;
|
||||
if (/^(-)?(\d{1,2})(:(\d\d))?(:(\d\d))?$/) {
|
||||
# 1 2 4 6
|
||||
my $a = (($2 * 60) + (defined $4?$4:0)) * 60 + (defined $6?$6:0);
|
||||
$a = -$a if (defined $1 && $1 eq '-');
|
||||
return $a;
|
||||
} else {
|
||||
die "Cannot parse offset \"$_\"";
|
||||
}
|
||||
}
|
||||
|
||||
# Format an offset in seconds and return a string of the form
|
||||
# /[+-]\d{1,2}:\d\d(:\d\d)?/.
|
||||
# Param: Offset in seconds
|
||||
|
@ -1086,4 +1130,24 @@ sub assertInvariantChars {
|
|||
}
|
||||
}
|
||||
|
||||
# Map ID to equivalency table index. Return the index of the given ID
|
||||
# in the equivalency array. The array contains array refs. Each ref
|
||||
# points to an array of strings.
|
||||
# Param: ID to find
|
||||
# Param: Ref to equiv array (ref to array of refs to arrays of IDs)
|
||||
# Return: Index into array where ID is found, or -1 if not found
|
||||
# NOTE: This function can be eliminated by generating a reverse
|
||||
# mapping hash when we create the equivalency table.
|
||||
sub equivIndexOf {
|
||||
my $id = shift;
|
||||
my $a = shift;
|
||||
for (my $i=0; $i < scalar @{$a}; ++$i) {
|
||||
my $aa = $a->[$i];
|
||||
foreach (@$aa) {
|
||||
return $i if ($_ eq $id);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
__END__
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -110,7 +110,10 @@ require 'dumpvar.pl';
|
|||
@ISA = qw(Exporter);
|
||||
@EXPORT = qw(ZoneEquals
|
||||
RuleEquals
|
||||
ZoneCompare
|
||||
RuleCompare
|
||||
FormZoneEquivalencyGroups
|
||||
ParseOffset
|
||||
);
|
||||
$VERSION = '0.1';
|
||||
|
||||
|
@ -120,16 +123,47 @@ $STANDARD = '-'; # Name of the Standard Time rule
|
|||
# Param: zone object (hash ref)
|
||||
# Param: zone object (hash ref)
|
||||
# Param: ref to hash of all rules
|
||||
# Return: true if two zones are equivalent
|
||||
sub ZoneEquals {
|
||||
# Return: 0, -1, or 1
|
||||
sub ZoneCompare {
|
||||
my $z1 = shift;
|
||||
my $z2 = shift;
|
||||
my $RULES = shift;
|
||||
|
||||
($z1, $z2) = ($z1->{rule}, $z2->{rule});
|
||||
|
||||
return ($z1 eq $z2) ||
|
||||
RuleEquals($RULES->{$z1}, $RULES->{$z2});
|
||||
return RuleCompare($RULES->{$z1}, $RULES->{$z2});
|
||||
}
|
||||
|
||||
######################################################################
|
||||
# Param: rule object (hash ref)
|
||||
# Param: rule object (hash ref)
|
||||
# Return: 0, -1, or 1
|
||||
sub RuleCompare {
|
||||
my $r1 = shift;
|
||||
my $r2 = shift;
|
||||
|
||||
# Just compare the precomputed encoding strings.
|
||||
# defined() catches undefined rules. The only undefined
|
||||
# rule is $STANDARD; any others would be caught by
|
||||
# Postprocess().
|
||||
|
||||
defined($r1)
|
||||
? (defined($r2) ? ($r1->[2] cmp $r2->[2]) : 1)
|
||||
: (defined($r2) ? -1 : 0);
|
||||
|
||||
# In theory, there's actually one more level of equivalency
|
||||
# analysis we could do. This is to recognize that Sun >=1 is the
|
||||
# same as First Sun. We don't do this yet, but it doesn't matter;
|
||||
# such a date is always referred to as Sun>=1, never as firstSun.
|
||||
}
|
||||
|
||||
######################################################################
|
||||
# Param: zone object (hash ref)
|
||||
# Param: zone object (hash ref)
|
||||
# Param: ref to hash of all rules
|
||||
# Return: true if two zones are equivalent
|
||||
sub ZoneEquals {
|
||||
ZoneCompare(@_) == 0;
|
||||
}
|
||||
|
||||
######################################################################
|
||||
|
@ -137,18 +171,7 @@ sub ZoneEquals {
|
|||
# Param: rule object (hash ref)
|
||||
# Return: true if two rules are equivalent
|
||||
sub RuleEquals {
|
||||
my $r1 = shift;
|
||||
my $r2 = shift;
|
||||
|
||||
# Just compare the precomputed encoding strings.
|
||||
# defined() catches undefined rules. The only undefined
|
||||
# rule is $STANDARD; any others would be cause by
|
||||
# Postprocess().
|
||||
return defined($r1) && defined($r2) && $r1->[2] eq $r2->[2];
|
||||
|
||||
# There's actually one more level of equivalency analysis we could
|
||||
# do. This is to recognize that Sun >=1 is the same as First Sun.
|
||||
# We don't do this yet.
|
||||
RuleCompare(@_) == 0;
|
||||
}
|
||||
|
||||
######################################################################
|
||||
|
@ -162,26 +185,26 @@ sub RuleEquals {
|
|||
# Param: IN ref to hash of all rules
|
||||
# Param: OUT ref to array to receive group refs
|
||||
sub FormZoneEquivalencyGroups {
|
||||
my ($ZONES, $RULES, $EQUIV) = @_;
|
||||
my ($zones, $rules, $equiv) = @_;
|
||||
|
||||
# Group the zones by offset. This improves efficiency greatly;
|
||||
# instead of an n^2 computation, we just need to do n^2 within
|
||||
# each offset; a much smaller total number.
|
||||
my %ZONES_BY_OFFSET;
|
||||
foreach (keys %$ZONES) {
|
||||
push @{$ZONES_BY_OFFSET{$ZONES->{$_}->{gmtoff}}}, $_;
|
||||
my %zones_by_offset;
|
||||
foreach (keys %$zones) {
|
||||
push @{$zones_by_offset{ParseOffset($zones->{$_}->{gmtoff})}}, $_;
|
||||
}
|
||||
|
||||
# Find equivalent rules
|
||||
foreach my $gmtoff (keys %ZONES_BY_OFFSET) {
|
||||
foreach my $gmtoff (keys %zones_by_offset) {
|
||||
# Make an array of equivalency groups
|
||||
# (array of refs to array of names)
|
||||
my @equiv;
|
||||
foreach my $name1 (@{$ZONES_BY_OFFSET{$gmtoff}}) {
|
||||
foreach my $name1 (@{$zones_by_offset{$gmtoff}}) {
|
||||
my $found = 0;
|
||||
foreach my $group (@equiv) {
|
||||
my $name2 = $group->[0];
|
||||
if (ZoneEquals($ZONES->{$name1}, $ZONES->{$name2}, $RULES)) {
|
||||
if (ZoneEquals($zones->{$name1}, $zones->{$name2}, $rules)) {
|
||||
push @$group, $name1;
|
||||
$found = 1;
|
||||
last;
|
||||
|
@ -192,6 +215,23 @@ sub FormZoneEquivalencyGroups {
|
|||
push @equiv, \@newGroup;
|
||||
}
|
||||
}
|
||||
push @$EQUIV, @equiv;
|
||||
push @$equiv, @equiv;
|
||||
}
|
||||
}
|
||||
|
||||
######################################################################
|
||||
# Parse an offset of the form d, d:dd, or d:dd:dd, or any of the above
|
||||
# preceded by a '-'. Return the total number of seconds represented.
|
||||
# Param: String
|
||||
# Return: Integer number of seconds
|
||||
sub ParseOffset {
|
||||
local $_ = shift;
|
||||
if (/^(-)?(\d{1,2})(:(\d\d))?(:(\d\d))?$/) {
|
||||
# 1 2 4 6
|
||||
my $a = (($2 * 60) + (defined $4?$4:0)) * 60 + (defined $6?$6:0);
|
||||
$a = -$a if (defined $1 && $1 eq '-');
|
||||
return $a;
|
||||
} else {
|
||||
confess "Cannot parse offset \"$_\"";
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue