xref: /freebsd/contrib/tzdata/ziguard.awk (revision c7046f76)
1# Convert tzdata source into vanguard or rearguard form.
2
3# Contributed by Paul Eggert.  This file is in the public domain.
4
5# This is not a general-purpose converter; it is designed for current tzdata.
6# It just converts from current source to main, vanguard, and rearguard forms.
7# Although it might be nice for it to be idempotent, or to be useful
8# for converting back and forth between vanguard and rearguard formats,
9# it does not do these nonessential tasks now.
10#
11# Although main and vanguard forms are currently equivalent,
12# this need not always be the case.  When the two forms differ,
13# this script can convert either from main to vanguard form (needed then),
14# or from vanguard to main form (this conversion would be needed later,
15# after main became rearguard and vanguard became main).
16# There is no need to convert rearguard to other forms.
17#
18# When converting to vanguard form, the output can use negative SAVE
19# values.
20#
21# When converting to rearguard form, the output uses only nonnegative
22# SAVE values.  The idea is for the output data to simulate the behavior
23# of the input data as best it can within the constraints of the
24# rearguard format.
25
26# Given a FIELD like "-0:30", return a minute count like -30.
27function get_minutes(field, \
28		     sign, hours, minutes)
29{
30  sign = field ~ /^-/ ? -1 : 1
31  hours = +field
32  if (field ~ /:/) {
33    minutes = field
34    sub(/[^:]*:/, "", minutes)
35  }
36  return 60 * hours + sign * minutes
37}
38
39# Given an OFFSET, which is a minute count like 300 or 330,
40# return a %z-style abbreviation like "+05" or "+0530".
41function offset_abbr(offset, \
42		     hours, minutes, sign)
43{
44  hours = int(offset / 60)
45  minutes = offset % 60
46  if (minutes) {
47    return sprintf("%+.4d", hours * 100 + minutes);
48  } else {
49    return sprintf("%+.2d", hours)
50  }
51}
52
53# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
54function round_to_second(timestamp, \
55			 hh, mm, ss, seconds, dot_dddd, subseconds)
56{
57  dot_dddd = timestamp
58  if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
59    return timestamp
60  hh = mm = ss = timestamp
61  sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
62  sub(/^[-+]?[0-9]+:/, "", mm)
63  sub(/^[-+]?/, "", hh)
64  seconds = 3600 * hh + 60 * mm + ss
65  subseconds = +dot_dddd
66  seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
67  return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
68		 seconds / 3600, seconds / 60 % 60, seconds % 60)
69}
70
71BEGIN {
72  dataform_type["vanguard"] = 1
73  dataform_type["main"] = 1
74  dataform_type["rearguard"] = 1
75
76  if (PACKRATLIST) {
77    while (getline <PACKRATLIST) {
78      if ($0 ~ /^#/) continue
79      packratlist[$3] = 1
80    }
81  }
82
83  # The command line should set DATAFORM.
84  if (!dataform_type[DATAFORM]) exit 1
85}
86
87$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
88  sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
89}
90
91/^Zone/ { zone = $2 }
92
93DATAFORM != "main" {
94  in_comment = $0 ~ /^#/
95  uncomment = comment_out = 0
96
97  # If this line should differ due to Czechoslovakia using negative SAVE values,
98  # uncomment the desired version and comment out the undesired one.
99  if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
100      && $0 ~ /1947 Feb 23/) {
101    if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
102      uncomment = in_comment
103    } else {
104      comment_out = !in_comment
105    }
106  }
107
108  # If this line should differ due to Ireland using negative SAVE values,
109  # uncomment the desired version and comment out the undesired one.
110  Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
111  Zone_Dublin_post_1968 \
112    = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
113       && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
114  if (Rule_Eire || Zone_Dublin_post_1968) {
115    if ((Rule_Eire \
116	 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT"))	\
117	== (DATAFORM != "rearguard")) {
118      uncomment = in_comment
119    } else {
120      comment_out = !in_comment
121    }
122  }
123
124  # If this line should differ due to Namibia using negative SAVE values,
125  # uncomment the desired version and comment out the undesired one.
126  Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
127  Zone_using_Namibia_rule \
128    = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
129       && ($(in_comment + 2) == "Namibia" \
130	   || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
131	       && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
132		   || in_comment + 3 == NF))))
133  if (Rule_Namibia || Zone_using_Namibia_rule) {
134    if ((Rule_Namibia \
135	 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
136	 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
137	== (DATAFORM != "rearguard")) {
138      uncomment = in_comment
139    } else {
140      comment_out = !in_comment
141    }
142  }
143
144  # If this line should differ due to Portugal benefiting from %z if supported,
145  # uncomment the desired version and comment out the undesired one.
146  if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
147    if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
148      uncomment = in_comment
149    } else {
150      comment_out = !in_comment
151    }
152  }
153
154  if (uncomment) {
155    sub(/^#/, "")
156  }
157  if (comment_out) {
158    sub(/^/, "#")
159  }
160
161  # Prefer %z in vanguard form, explicit abbreviations otherwise.
162  if (DATAFORM == "vanguard") {
163    sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
164	"&CHANGE-TO-%z")
165    sub(/-00CHANGE-TO-%z/, "-00")
166    sub(/[-+][^\t ]+CHANGE-TO-/, "")
167  } else {
168    if ($0 ~ /^[^#]*%z/) {
169      stdoff_column = 2 * ($0 ~ /^Zone/) + 1
170      rules_column = stdoff_column + 1
171      stdoff = get_minutes($stdoff_column)
172      rules = $rules_column
173      stdabbr = offset_abbr(stdoff)
174      if (rules == "-") {
175	abbr = stdabbr
176      } else {
177	dstabbr_only = rules ~ /^[+0-9-]/
178	if (dstabbr_only) {
179	  dstoff = get_minutes(rules)
180	} else {
181	  # The DST offset is normally an hour, but there are special cases.
182	  if (rules == "Morocco" && NF == 3) {
183	    dstoff = -60
184	  } else if (rules == "NBorneo") {
185	    dstoff = 20
186	  } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
187		     || (rules == "Uruguay" \
188			 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
189	    dstoff = 30
190	  } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
191	    dstoff = 90
192	  } else {
193	    dstoff = 60
194	  }
195	}
196	dstabbr = offset_abbr(stdoff + dstoff)
197	if (dstabbr_only) {
198	  abbr = dstabbr
199	} else {
200	  abbr = stdabbr "/" dstabbr
201	}
202      }
203      sub(/%z/, abbr)
204    }
205  }
206
207  # Normally, prefer whole seconds.  However, prefer subseconds
208  # if generating vanguard form and the otherwise-undocumented
209  # VANGUARD_SUBSECONDS environment variable is set.
210  # This relies on #STDOFF comment lines in the data.
211  # It is for hypothetical clients that support UT offsets that are
212  # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
213  # No known clients need this currently, and this experimental
214  # feature may be changed or withdrawn in future releases.
215  if ($1 == "#STDOFF") {
216    stdoff = $2
217    rounded_stdoff = round_to_second(stdoff)
218    if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
219      stdoff_subst[0] = rounded_stdoff
220      stdoff_subst[1] = stdoff
221    } else {
222      stdoff_subst[0] = stdoff
223      stdoff_subst[1] = rounded_stdoff
224    }
225  } else if (stdoff_subst[0]) {
226    stdoff_column = 2 * ($0 ~ /^Zone/) + 1
227    stdoff_column_val = $stdoff_column
228    if (stdoff_column_val == stdoff_subst[0]) {
229      sub(stdoff_subst[0], stdoff_subst[1])
230    } else if (stdoff_column_val != stdoff_subst[1]) {
231      stdoff_subst[0] = 0
232    }
233  }
234
235  # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
236  # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
237  if ($0 ~ /^Rule/ && $2 == "Japan") {
238    if (DATAFORM == "rearguard") {
239      if ($7 == "Sat>=8" && $8 == "25:00") {
240	sub(/Sat>=8/, "Sun>=9")
241	sub(/25:00/, " 1:00")
242      }
243    } else {
244      if ($7 == "Sun>=9" && $8 == "1:00") {
245	sub(/Sun>=9/, "Sat>=8")
246	sub(/ 1:00/, "25:00")
247      }
248    }
249  }
250
251  # In rearguard form, change the Morocco lines with negative SAVE values
252  # to use positive SAVE values.
253  if ($2 == "Morocco") {
254    if ($0 ~ /^Rule/) {
255      if ($4 ~ /^201[78]$/ && $6 == "Oct") {
256	if (DATAFORM == "rearguard") {
257	  sub(/\t2018\t/, "\t2017\t")
258	} else {
259	  sub(/\t2017\t/, "\t2018\t")
260	}
261      }
262
263      if (2019 <= $3) {
264	if ($8 == "2:00") {
265	  if (DATAFORM == "rearguard") {
266	    sub(/\t0\t/, "\t1:00\t")
267	  } else {
268	    sub(/\t1:00\t/, "\t0\t")
269	  }
270	} else {
271	  if (DATAFORM == "rearguard") {
272	    sub(/\t-1:00\t/, "\t0\t")
273	  } else {
274	    sub(/\t0\t/, "\t-1:00\t")
275	  }
276	}
277      }
278    }
279    if ($1 ~ /^[+0-9-]/ && NF == 3) {
280      if (DATAFORM == "rearguard") {
281	sub(/1:00\tMorocco/, "0:00\tMorocco")
282	sub(/\t\+01\/\+00$/, "\t+00/+01")
283      } else {
284	sub(/0:00\tMorocco/, "1:00\tMorocco")
285	sub(/\t\+00\/+01$/, "\t+01/+00")
286      }
287    }
288  }
289}
290
291/^Zone/ {
292  packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
293}
294{
295  if (packrat_ignored && $0 !~ /^Rule/) {
296    sub(/^/, "#")
297  }
298}
299
300# If a Link line is followed by a Link or Zone line for the same data, comment
301# out the Link line.  This can happen if backzone overrides a Link
302# with a Zone or a different Link.
303/^Zone/ {
304  sub(/^Link/, "#Link", line[linkline[$2]])
305}
306/^Link/ {
307  sub(/^Link/, "#Link", line[linkline[$3]])
308  linkline[$3] = NR
309}
310
311{ line[NR] = $0 }
312
313END {
314  for (i = 1; i <= NR; i++)
315    print line[i]
316}
317