ICU-449 TimeZone equivalency support

X-SVN-Rev: 2520
This commit is contained in:
Alan Liu 2000-09-27 16:26:41 +00:00
parent 9218332c3a
commit d893f0de64
3 changed files with 910 additions and 1518 deletions

View file

@ -30,75 +30,60 @@
# - Lines may be followed by a comment; the parser must ignore
# anything of the form /\s+#.*$/ in each line.
# |3065,14400 # Asia/Dubai GMT+4:00
# - The file contains a header and 5 lists.
# - The header contains the version of the unix data, the total
# zone count, the maximum number of zones sharing the same value
# of gmtOffset, the length of the name table in bytes, and
# the length of the longest name (not including the terminating
# zero byte).
# - The file contains a header and 3 lists.
# - The header contains the version of this data file:
# 2 original version, without equivalency groups
# 3 current version, described here
# then the version of the unix data, and other counts:
# | 3 # format version number of this file
# | 1999 # (tzdata1999j) version of Olson zone
# | 10 # data from ftp://elsie.nci.nih.gov
# | 387 # total zone count
# | 40 # max count of zones with same gmtOffset
# | 25 # max name length not incl final zero
# | 5906 # length of name table in bytes
# | 402 # total zone count
# | 40 # maximum zones per offset (used by gentz)
# - Lists start with a count of the records to follow, the records
# themselves (one per line), and a single line with the keyword
# 'end'.
# - The first list is the list of standard zones:
# | 208 # count of standard zones to follow
# | 0,0 # Africa/Abidjan GMT+0:00
# | 28,10800 # Africa/Addis_Ababa GMT+3:00
# - The first list is the name table:
# | 387 # count of names to follow
# | 34,Africa/Abidjan
# | 23,Africa/Accra
# ...
# | end
# Each standard zone record contains two integers. The first
# is a byte offset into the name table for the name of the zone.
# The second integer is the GMT offset in SECONDS for this zone.
# - The second list is the list of DST zones:
# | 179 # count of dst zones to follow
# | 15,0,8,1,0,0,w,11,31,0,0,w,20 # Africa/Accra GMT+0:00 Sep 1...
# | 184,7200,3,-1,6,0,s,8,-1,5,1380,s,60 # Africa/Cairo GMT+2:0...
# Each name is terminated by a newline (like all lines in the file).
# The zone numbers in other lists refer to this table. The
# integer that precedes the name is an index into the equivalency
# table, with the first table entry being entry 0.
# - The second list is the equivalency table. It lists, in sorted
# order, the equivalency groups. Each group represents a
# set of one or more zones that have the same GMT offset and the
# same rules. While there are about 400 zones, there are less than
# 120 equivalency groups (as of this writing).
# | 120 # count of equivalency groups to follow
# | s,0,1,0 # GMT+0:00
# | d,0,8,1,0,0,w,11,31,0,0,w,20,4,15,16,17,18 # GMT+0:00 Sep 1...
# ...
# | end
# Each record starts with the same two integers as a standard
# zone record. Following this are data for the onset rule and
# the cease rule. Each rule is described by the following integers:
# Entries start with 's' for standard zones, or 'd' for DST zones.
# Both zone descriptors start with the GMT offset in SECONDS. DST
# zones contain, in addition, data for the onset rule and the cease
# rule. Each rule is described by the following integers:
# month (JAN = 0)
# dowim } These two values are in SimpleTimeZone encoded
# dow } format for DOM, DOWIM, DOW>=DOM, or DOW<=DOM.
# time MINUTES
# time mode ('w', 's', 'u')
# The last integer in the record is the DST savings in MINUTES,
# The last rule integer in the record is the DST savings in MINUTES,
# typically 60.
# - The third list is the name table:
# | 387 # count of names to follow
# | Africa/Abidjan
# | Africa/Accra
# ...
# | end
# Each name is terminated by a newline (like all lines in the file).
# The offsets in the first two lists refer to this table.
# - The fourth list is an index list by name. The index entries
# themselves are of the form /[sd]\d+/, where the first character
# indicates standard or DST, and the number that follows indexes
# into the correpsonding array.
# | 416 # count of name index table entries to follow
# | d0 # ACT
# | d1 # AET
# | d2 # AGT
# | d3 # ART
# | d4 # AST
# | s0 # Africa/Abidjan
# ...
# | end
# - The fifth list is an index by GMT offset. Each line lists the
# zones with the same offset. The first number on the line
# is the GMT offset in seconds. The second number is the default
# zone number in the following list, taken from tz.default. The
# third number is the count
# of zone numbers to follow. Each zone number is an integer from
# 0..n-1, where n is the total number of zones. The zone numbers
# refer to the zone list in alphabetical order.
# After either a standard or a DST zone, there is a list of the
# members of the equivalency group. This consists of a number of
# entries to follow (>=1), then the zone numbers themselves.
# - The third list is an index by GMT offset. Each line lists the
# zones with the same offset. The first number on the line is the
# GMT offset in seconds. The second number is the default zone
# number in the following list, taken from tz.default. The list
# consists of a number of entries to follow (>=1), then the zone
# numbers themselves.
# | 39 # index by offset entries to follow
# | -43200,280,1,280 # -12:00 d=Etc/GMT+12 Etc/GMT+12
# | -39600,374,6,279,366,374,394,396,399 # -11:00 d=Pacific/Apia Etc/GMT+11 MIT Pacific/Apia Pacific/Midway Pacific/Niue Pacific/Pago_Pago
@ -114,7 +99,7 @@
# letter: -, D, GHST, GMT, HS, S, SLST
# on: 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1,
# Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15,
# Sun>=16, Sun>=23, S un>=8, Sun>=9, lastFri, lastSun, lastThu
# Sun>=16, Sun>=23, Sun>=8, Sun>=9, lastFri, lastSun, lastThu
# save: 0, 0:20, 0:30, 1:00
# type: -
@ -123,11 +108,19 @@ use strict;
use Getopt::Long;
use vars qw(@FILES $YEAR $DATA_DIR $OUT $SEP @MONTH
$VERSION_YEAR $VERSION_SUFFIX $RAW_VERSION
$TZ_ALIAS $TZ_DEFAULT $URL $HTML_FILE);
$TZ_ALIAS $TZ_DEFAULT $URL $HTML_FILE
$TZ_TXT_VERSION %ZONE_ID_TO_INDEX $END_MARKER);
require 'dumpvar.pl';
use tzparse;
use tzutil;
# Current version of the data file. Matches formatVersion[0] in the
# binary data file. SEE tzdat.h
# 1 - unreleased version (?)
# 2 - original version
# 3 - added equivalency groups
$TZ_TXT_VERSION = 3;
# File names
$OUT = 'tz.txt';
$TZ_ALIAS = 'tz.alias';
@ -139,6 +132,9 @@ $URL = "ftp://elsie.nci.nih.gov/pub";
# Separator between fields in the output file
$SEP = ','; # Don't use ':'!
# Marker between sections
$END_MARKER = 'end';
@FILES = qw(africa
antarctica
asia
@ -194,6 +190,7 @@ $HTML_FILE = shift;
jul aug sep oct nov dec);
main();
exit();
sub usage {
print STDERR "Usage: $0 data_dir [html_out]\n\n";
@ -242,55 +239,21 @@ sub main {
$ZONES{GMT} = \%GMT;
}
# Write out the zone data in a compact readable format.
# Create a name table from the zone names. The format of
# the name table is:
#
# The names are listed in lexical order, and each name
# is assigned an offset. The first name's offset is 0.
# The offset of name i+1 is the offset of name i + the
# length of name i + 1 (for the zero byte).
#
# Store the offsets in a hash %NAME_OFFSET. Store the
# names in a big scalar, $NAME_LIST, with "\n" between
# each name and after the last.
#
# Store the length of the entire name table in $NAME_SIZE.
#
# Also, count the number of standard and DST zones.
my $offset = 0;
my $NAME_LIST = '';
my %NAME_OFFSET;
my $STD_COUNT = 0; # Count of standard zones
my $DST_COUNT = 0; # Count of DST zones
my $maxNameLen = 0;
# IMPORTANT: This sort must correspond to the sort
# order of UnicodeString::compare. That
# is, it must be a plain sort.
foreach my $z (sort keys %ZONES) {
# Validate names and count total size
my $NAME_SIZE = 0;
foreach my $z (keys %ZONES) {
# Make sure zone IDs only contain invariant chars
assertInvariantChars($z);
my $len = length($z);
$NAME_OFFSET{$z} = $offset;
$offset += $len + 1;
$NAME_LIST .= "$z\n";
$maxNameLen = $len if ($len > $maxNameLen);
if ($ZONES{$z}->{rule} eq $TZ::STANDARD) {
$STD_COUNT++;
} else {
$DST_COUNT++;
}
$NAME_SIZE += 1 + length($z);
}
my $NAME_SIZE = $offset;
# Find the maximum number of zones with the same value of
# gmtOffset.
my %perOffset; # Hash of offset -> count
foreach my $z (keys %ZONES) {
# Use parseOffset to normalize values - probably unnecessary
++$perOffset{parseOffset($ZONES{$z}->{gmtoff})};
# Use TZ::ParseOffset to normalize values - probably unnecessary
++$perOffset{TZ::ParseOffset($ZONES{$z}->{gmtoff})};
}
my $maxPerOffset = 0;
foreach (values %perOffset) {
@ -304,72 +267,116 @@ sub main {
# zones for the offset, in sorted order, including the default.
my $offsetIndex = createOffsetIndex(\%ZONES, $TZ_DEFAULT);
# Group zones into equivalency groups
my $maxPerEquiv = 0;
TZ::FormZoneEquivalencyGroups(\%ZONES, \%RULES, \@EQUIV);
print
"Equivalency groups (including unique zones): ",
scalar @EQUIV, "\n";
foreach my $eg (@EQUIV) {
$maxPerEquiv = @$eg if (@$eg > $maxPerEquiv);
}
# Sort equivalency table first by GMT offset, then by
# alphabetic order of encoded rule string.
@EQUIV = sort { my $x = $ZONES{$a->[0]};
my $y = $ZONES{$b->[0]};
TZ::ParseOffset($x->{gmtoff}) <=>
TZ::ParseOffset($y->{gmtoff}) ||
TZ::ZoneCompare($x, $y, \%RULES); } @EQUIV;
# Sort the zones in each equivalency table entry
foreach my $eg (@EQUIV) {
next unless (@$eg > 1); # Skip single-zone entries
my @zoneList = sort @$eg;
$eg = \@zoneList;
}
# Create an index from zone ID to index #
my $i = 0;
foreach my $z (sort keys %ZONES) {
$ZONE_ID_TO_INDEX{$z} = $i++;
}
open(OUT,">$OUT") or die "Can't open $OUT for writing: $!";
############################################################
# EMIT HEADER
############################################################
# Zone data version
print OUT "#--- Header ---\n";
print OUT $TZ_TXT_VERSION, " # format version number of this file\n";
print OUT $VERSION_YEAR, " # ($RAW_VERSION) version of Olson zone\n";
print OUT $VERSION_SUFFIX, " # data from $URL\n";
print OUT scalar keys %ZONES, " # total zone count\n";
print OUT $maxPerOffset, " # max count of zones with same gmtOffset\n";
print OUT $maxNameLen, " # max name length not incl final zero\n";
# The following counts are all used by gentz during its parse
# of the tz.txt file and creation of the tz.dat file, even
# if they don't show up in the tz.dat file header. For example,
# gentz needs the maxPerOffset to preallocate the offset index
# entries. It needs the NAME_SIZE to allocate the big buffer
# that will receive all the names.
print OUT scalar @EQUIV, " # equivalency groups count\n";
print OUT $maxPerOffset, " # max zones with same gmtOffset\n";
print OUT $maxPerEquiv, " # max zones in an equivalency group\n";
print OUT $NAME_SIZE, " # length of name table in bytes\n";
print OUT $END_MARKER, "\n\n";
############################################################
# EMIT ZONE TABLES
############################################################
# Output first the standard zones, then the dst zones.
# Precede each list with the count of zones to follow,
# and follow it with the keyword 'end'.
for my $type (qw(standard dst)) {
print OUT ($type eq 'standard'
? $STD_COUNT : $DST_COUNT), " # count of $type zones to follow\n";
foreach my $z (sort keys %ZONES) {
my $isStd = ($ZONES{$z}->{rule} eq $TZ::STANDARD);
next if ($isStd ne ($type eq 'standard'));
print OUT $NAME_OFFSET{$z}, ",";
print OUT formatZone($z, $ZONES{$z}, \%RULES), "\n";
}
print OUT "end\n"; # 'end' keyword for error checking
}
############################################################
# EMIT NAME TABLE
# EMIT ZONE TABLE
############################################################
# Output the name table, followed by 'end' keyword
print OUT scalar keys %ZONES, " # count of names to follow\n";
print OUT $NAME_LIST, "end\n";
print OUT "#--- Zone table ---\n";
print OUT "#| equiv_index,name\n";
print OUT scalar keys %ZONES, " # count of zones to follow\n";
# IMPORTANT: This sort must correspond to the sort
# order of UnicodeString::compare. That
# is, it must be a plain sort.
foreach my $z (sort keys %ZONES) {
# Make sure zone IDs only contain invariant chars
assertInvariantChars($z);
print OUT equivIndexOf($z, \@EQUIV), ',', $z, "\n";
}
print OUT $END_MARKER, "\n\n";
############################################################
# EMIT INDEX BY NAME
# EMIT EQUIVALENCY TABLE
############################################################
# Output the name index table. Since we don't know structure
# sizes, we output the index number of each zone. For example,
# "s0" is the first standard zone, "s1" is the second, etc.
# Likewise, "d0" is the first DST zone, "d1" is the second, etc.
# First compute index IDs, as described above.
my %indexID;
my $s = 0;
my $d = 0;
foreach my $z (sort keys %ZONES) {
if ($ZONES{$z}->{rule} eq $TZ::STANDARD) {
$indexID{$z} = "s$s";
$s++;
} else {
$indexID{$z} = "d$d";
$d++;
print OUT "#--- Equivalency table ---\n";
print OUT "#| ('s'|'d'),zone_spec,id_count,id_list\n";
print OUT scalar @EQUIV, " # count of equivalency groups to follow\n";
$i = 0;
foreach my $aref (@EQUIV) {
# $aref is an array ref; the array is full of zone IDs
# Use the ID of the first array element
my $z = $aref->[0];
# Output either 's' or 'd' to indicate standard or DST
my $isStd = ($ZONES{$z}->{rule} eq $TZ::STANDARD);
print OUT $isStd ? 's,' : 'd,';
# Format the zone
my ($spec, $notes) = formatZone($z, $ZONES{$z}, \%RULES);
# Now add the equivalency list
push @$spec, scalar @$aref;
push @$notes, "[";
my $min = -1;
foreach $z (@$aref) {
my $index = $ZONE_ID_TO_INDEX{$z};
# Make sure they are in order
die("Unsorted equiv table indices") if ($index <= $min);
$min = $index;
push @$spec, $index;
push @$notes, $z;
}
push @$notes, "]";
unshift @$notes, $i++; # Insert index of this group at front
print OUT join($SEP, @$spec) . " # " . join(' ', @$notes), "\n";
}
# Now emit table sorted by name
print OUT scalar keys %ZONES, " # count of name index table entries to follow\n";
foreach my $z (sort keys %ZONES) {
print OUT $indexID{$z}, " # $z\n";
}
print OUT "end\n";
print OUT $END_MARKER, "\n\n";
############################################################
# EMIT INDEX BY GMT OFFSET
@ -378,13 +385,15 @@ sub main {
# Create an array mapping zone number -> name.
my %zoneNumber;
my @zoneName;
my $i = 0;
$i = 0;
foreach (sort keys %ZONES) {
$zoneName[$i] = $_;
$zoneNumber{$_} = $i++;
}
# Emit offset index
print OUT "#--- Offset index ---\n";
print OUT "#| gmt_offset,default_id,id_count,id_list\n";
print OUT scalar keys %{$offsetIndex}, " # index by offset entries to follow\n";
foreach (sort {$a <=> $b} keys %{$offsetIndex}) {
my $aref = $offsetIndex->{$_};
@ -399,7 +408,7 @@ sub main {
join(" ", @b), "\n";
}
print OUT "end\n";
print OUT $END_MARKER, "\n";
############################################################
# END
@ -409,17 +418,10 @@ sub main {
# Emit the HTML file
if ($HTML_FILE) {
emitHTML($HTML_FILE, \%ZONES, \%RULES, $offsetIndex, $aliases);
emitHTML($HTML_FILE, \%ZONES, \%RULES, \@EQUIV, $offsetIndex, $aliases);
print "$HTML_FILE written.\n";
}
if (0) {
TZ::FormZoneEquivalencyGroups(\%ZONES, \%RULES, \@EQUIV);
print
"Equivalency groups (including unique zones): ",
scalar @EQUIV, "\n";
}
#::dumpValue($ZONES{"America/Los_Angeles"});
#::dumpValue($RULES{"US"});
#::dumpValue($RULES{"Tonga"});
@ -466,7 +468,7 @@ sub createOffsetIndex {
# Create an index by gmtoff.
my %offsetMap;
foreach (sort keys %{$zones}) {
my $offset = parseOffset($zones->{$_}->{gmtoff});
my $offset = TZ::ParseOffset($zones->{$_}->{gmtoff});
push @{$offsetMap{$offset}}, $_;
}
@ -487,7 +489,7 @@ sub createOffsetIndex {
$ok = 0;
next;
}
my $offset = parseOffset($zones->{$z}->{gmtoff});
my $offset = TZ::ParseOffset($zones->{$z}->{gmtoff});
if (exists $defaults{$offset}) {
print
"Error: Offset ", formatOffset($offset), " has both ",
@ -560,7 +562,7 @@ sub isDefault {
my $name = shift;
my $offset = shift;
my $offsetIndex = shift;
my $aref = $offsetIndex->{parseOffset($offset)};
my $aref = $offsetIndex->{TZ::ParseOffset($offset)};
return ($aref->[0] eq $name);
}
@ -568,17 +570,20 @@ sub isDefault {
# Param: File name
# Param: ref to zone hash
# Param: ref to rule hash
# Param: ref to equiv table
# Param: ref to offset index
# Param: ref to alias hash
sub emitHTML {
my $file = shift;
my $zones = shift;
my $rules = shift;
my $equiv = shift;
my $offsetIndex = shift;
my $aliases = shift;
# These are variables for the template
my $_count = scalar keys %{$zones};
my $_equiv = scalar @$equiv;
# Build table in order of zone offset
my $_offsetTable = "<p><table>\n";
@ -617,6 +622,21 @@ sub emitHTML {
}
$_nameTable .= "</table>\n";
# Build equivalency group table
my $_equivTable = "<p><table>\n";
$_equivTable .= "<tr><td>Offset</td><td>DST Begins</td><td>DST Ends</td>";
$_equivTable .= "<td>Savings</td><td>Zones</td></tr>\n";
$_equivTable .= "<tr><td><hr></td>";
$_equivTable .= "<td><hr></td><td><hr></td>";
$_equivTable .= "<td><hr></td><td><hr></td><td><hr></td></tr>\n";
# Equiv table is sorted elsewhere -- output it in native order
foreach my $eg (@$equiv) {
$_equivTable .= emitHTMLEquiv($eg, $zones, $rules);
}
$_equivTable .= "</table>\n";
# Time stamp
my $_timeStamp = localtime;
@ -641,7 +661,7 @@ sub emitHTML {
</tr>
<tr>
<td>Total zone count</td>
<td><strong>$_count</strong></td>
<td><strong>$_count</strong> in <strong>$_equiv</strong> equivalency groups</td>
</tr>
<tr>
<td>Original source</td>
@ -757,6 +777,20 @@ Times without suffixes are in wall time (that is, either standard time or daylig
time, depending on which is in effect).</p>
$_nameTable
<hr>
<h2>Time Zone Equivalency Groups</h2>
<p>ICU groups zones into <em>equivalency groups</em>. These are
groups of zones that are identical in GMT offset and in rules, but
that have different IDs. Knowledge of equivalency groups allows ICU
to reduce the amount of data stored. More importantly, it allows ICU
to apply data for one zone to other equivalent zones when appropriate
(e.g., in formatting). Equivalency groups are formed at build time,
not at runtime, so the runtime cost to lookup the equivalency group of
a given zone is negligible.</p>
$_equivTable
</body>
</html>
END
@ -779,6 +813,40 @@ sub bookmark {
$_;
}
# Emit an equivalency group as an HTML table row. Return the string.
# Param: ref to array of zone IDs
# Param: ref to zone hash
# Param: ref to rule hash
sub emitHTMLEquiv {
my $eg = shift;
my $zone = shift;
my $rule = shift;
local $_ = "<tr valign=top>";
$_ .= _emitHTMLZone($zone->{$eg->[0]}, $rule);
# Don't sort @$eg -- output in native order
$_ .= "<td>" . join(" ", @$eg) . "</td>";
$_ .= "</tr>\n";
$_;
}
# Emit a zone description without ID, alias info etc.
# Param: zone OBJECT hash ref
# Param: rule hash ref
sub _emitHTMLZone {
my ($zone, $rules) = @_;
my $gmtoff = "GMT" . formatOffset(TZ::ParseOffset($zone->{gmtoff}));
local $_ = "<td><a href=\"#" . bookmark($gmtoff) . "\">$gmtoff</a></td>";
if ($zone->{rule} ne $TZ::STANDARD) {
my $rule = $rules->{$zone->{rule}};
$_ .= "<td nowrap>" . emitHTMLRule($rule->[0]) . "</td>";
$_ .= "<td nowrap>" . emitHTMLRule($rule->[1]) . "</td>";
$_ .= "<td>" . $rule->[0]->{save} . "</td>";
} else {
$_ .= "<td colspan=3></td>";
}
$_;
}
# Emit a single zone description as HTML table row. Return the string.
# Param: Zone name
# Param: Zone hash object ref
@ -793,16 +861,7 @@ sub emitHTMLZone {
my $revalias = exists $revaliases->{$name} ? $revaliases->{$name} : '';
local $_ = "<tr><td>" . ($isDefault?"<b>":"") .
"<a name=\"" . bookmark($name) . "\">$name</a>" . ($isDefault?"</b>":"") . "</td>";
my $gmtoff = "GMT" . formatOffset(parseOffset($zone->{gmtoff}));
$_ .= "<td><a href=\"#" . bookmark($gmtoff) . "\">$gmtoff</a></td>";
if ($zone->{rule} ne $TZ::STANDARD) {
my $rule = $rules->{$zone->{rule}};
$_ .= "<td>" . emitHTMLRule($rule->[0]) . "</td>";
$_ .= "<td>" . emitHTMLRule($rule->[1]) . "</td>";
$_ .= "<td>" . $rule->[0]->{save} . "</td>";
} else {
$_ .= "<td colspan=3></td>";
}
$_ .= _emitHTMLZone($zone, $rules);
if ($alias) {
$_ .= "<td><em>alias for</em> <a href=\"#" .
bookmark($alias) . "\">$alias</a></td>";
@ -867,18 +926,19 @@ sub incorporateAliases {
# Param: Zone name
# Param: Zone hash
# Param: Ref to hash of all rules
# Return: One line description of this zone.
# Return: Two array refs, one to the specs, one to the notes
sub formatZone { # ($z, $ZONES{$z}, \%RULES)
my $name = shift;
my $zone = shift;
my $rules = shift;
my @spec;
my @notes = ( $name );
#my @notes = ( $name );
my @notes;
# GMT offset
push @notes, ($zone->{gmtoff}=~/^-/?"GMT":"GMT+") . $zone->{gmtoff};
push @spec, parseOffset($zone->{gmtoff});
push @spec, TZ::ParseOffset($zone->{gmtoff});
#|rawOffset The new SimpleTimeZone's raw GMT offset
#|ID The new SimpleTimeZone's time zone ID.
@ -917,7 +977,7 @@ sub formatZone { # ($z, $ZONES{$z}, \%RULES)
push @spec, $a[0];
}
join($SEP, @spec) . " # " . join(' ', @notes);
(\@spec, \@notes);
}
# Format a rule and return the string
@ -934,22 +994,6 @@ sub formatRule {
push @$spec, parseTime($rule->{at}); # Time
}
# Parse an offset of the form d, d:dd, or d:dd:dd, or any of the above
# preceded by a '-'. Return the total number of seconds represented.
# Param: String
# Return: Integer number of seconds
sub parseOffset {
local $_ = shift;
if (/^(-)?(\d{1,2})(:(\d\d))?(:(\d\d))?$/) {
# 1 2 4 6
my $a = (($2 * 60) + (defined $4?$4:0)) * 60 + (defined $6?$6:0);
$a = -$a if (defined $1 && $1 eq '-');
return $a;
} else {
die "Cannot parse offset \"$_\"";
}
}
# Format an offset in seconds and return a string of the form
# /[+-]\d{1,2}:\d\d(:\d\d)?/.
# Param: Offset in seconds
@ -1086,4 +1130,24 @@ sub assertInvariantChars {
}
}
# Map ID to equivalency table index. Return the index of the given ID
# in the equivalency array. The array contains array refs. Each ref
# points to an array of strings.
# Param: ID to find
# Param: Ref to equiv array (ref to array of refs to arrays of IDs)
# Return: Index into array where ID is found, or -1 if not found
# NOTE: This function can be eliminated by generating a reverse
# mapping hash when we create the equivalency table.
sub equivIndexOf {
my $id = shift;
my $a = shift;
for (my $i=0; $i < scalar @{$a}; ++$i) {
my $aa = $a->[$i];
foreach (@$aa) {
return $i if ($_ eq $id);
}
}
return -1;
}
__END__

File diff suppressed because it is too large Load diff

View file

@ -110,7 +110,10 @@ require 'dumpvar.pl';
@ISA = qw(Exporter);
@EXPORT = qw(ZoneEquals
RuleEquals
ZoneCompare
RuleCompare
FormZoneEquivalencyGroups
ParseOffset
);
$VERSION = '0.1';
@ -120,16 +123,47 @@ $STANDARD = '-'; # Name of the Standard Time rule
# Param: zone object (hash ref)
# Param: zone object (hash ref)
# Param: ref to hash of all rules
# Return: true if two zones are equivalent
sub ZoneEquals {
# Return: 0, -1, or 1
sub ZoneCompare {
my $z1 = shift;
my $z2 = shift;
my $RULES = shift;
($z1, $z2) = ($z1->{rule}, $z2->{rule});
return ($z1 eq $z2) ||
RuleEquals($RULES->{$z1}, $RULES->{$z2});
return RuleCompare($RULES->{$z1}, $RULES->{$z2});
}
######################################################################
# Param: rule object (hash ref)
# Param: rule object (hash ref)
# Return: 0, -1, or 1
sub RuleCompare {
my $r1 = shift;
my $r2 = shift;
# Just compare the precomputed encoding strings.
# defined() catches undefined rules. The only undefined
# rule is $STANDARD; any others would be caught by
# Postprocess().
defined($r1)
? (defined($r2) ? ($r1->[2] cmp $r2->[2]) : 1)
: (defined($r2) ? -1 : 0);
# In theory, there's actually one more level of equivalency
# analysis we could do. This is to recognize that Sun >=1 is the
# same as First Sun. We don't do this yet, but it doesn't matter;
# such a date is always referred to as Sun>=1, never as firstSun.
}
######################################################################
# Param: zone object (hash ref)
# Param: zone object (hash ref)
# Param: ref to hash of all rules
# Return: true if two zones are equivalent
sub ZoneEquals {
ZoneCompare(@_) == 0;
}
######################################################################
@ -137,18 +171,7 @@ sub ZoneEquals {
# Param: rule object (hash ref)
# Return: true if two rules are equivalent
sub RuleEquals {
my $r1 = shift;
my $r2 = shift;
# Just compare the precomputed encoding strings.
# defined() catches undefined rules. The only undefined
# rule is $STANDARD; any others would be cause by
# Postprocess().
return defined($r1) && defined($r2) && $r1->[2] eq $r2->[2];
# There's actually one more level of equivalency analysis we could
# do. This is to recognize that Sun >=1 is the same as First Sun.
# We don't do this yet.
RuleCompare(@_) == 0;
}
######################################################################
@ -162,26 +185,26 @@ sub RuleEquals {
# Param: IN ref to hash of all rules
# Param: OUT ref to array to receive group refs
sub FormZoneEquivalencyGroups {
my ($ZONES, $RULES, $EQUIV) = @_;
my ($zones, $rules, $equiv) = @_;
# Group the zones by offset. This improves efficiency greatly;
# instead of an n^2 computation, we just need to do n^2 within
# each offset; a much smaller total number.
my %ZONES_BY_OFFSET;
foreach (keys %$ZONES) {
push @{$ZONES_BY_OFFSET{$ZONES->{$_}->{gmtoff}}}, $_;
my %zones_by_offset;
foreach (keys %$zones) {
push @{$zones_by_offset{ParseOffset($zones->{$_}->{gmtoff})}}, $_;
}
# Find equivalent rules
foreach my $gmtoff (keys %ZONES_BY_OFFSET) {
foreach my $gmtoff (keys %zones_by_offset) {
# Make an array of equivalency groups
# (array of refs to array of names)
my @equiv;
foreach my $name1 (@{$ZONES_BY_OFFSET{$gmtoff}}) {
foreach my $name1 (@{$zones_by_offset{$gmtoff}}) {
my $found = 0;
foreach my $group (@equiv) {
my $name2 = $group->[0];
if (ZoneEquals($ZONES->{$name1}, $ZONES->{$name2}, $RULES)) {
if (ZoneEquals($zones->{$name1}, $zones->{$name2}, $rules)) {
push @$group, $name1;
$found = 1;
last;
@ -192,6 +215,23 @@ sub FormZoneEquivalencyGroups {
push @equiv, \@newGroup;
}
}
push @$EQUIV, @equiv;
push @$equiv, @equiv;
}
}
######################################################################
# Parse an offset of the form d, d:dd, or d:dd:dd, or any of the above
# preceded by a '-'. Return the total number of seconds represented.
# Param: String
# Return: Integer number of seconds
sub ParseOffset {
local $_ = shift;
if (/^(-)?(\d{1,2})(:(\d\d))?(:(\d\d))?$/) {
# 1 2 4 6
my $a = (($2 * 60) + (defined $4?$4:0)) * 60 + (defined $6?$6:0);
$a = -$a if (defined $1 && $1 eq '-');
return $a;
} else {
confess "Cannot parse offset \"$_\"";
}
}