1 extern crate parse_zoneinfo;
2 #[cfg(feature = "filter-by-regex")]
3 extern crate regex;
4
5 use std::collections::BTreeSet;
6 use std::env;
7 use std::fs::File;
8 use std::io::{self, BufRead, BufReader, Write};
9 use std::path::Path;
10
11 use parse_zoneinfo::line::{Line, LineParser};
12 use parse_zoneinfo::structure::{Child, Structure};
13 use parse_zoneinfo::table::{Table, TableBuilder};
14 use parse_zoneinfo::transitions::FixedTimespan;
15 use parse_zoneinfo::transitions::TableTransitions;
16
17 /// The name of the environment variable which possibly holds the filter regex.
18 const FILTER_ENV_VAR_NAME: &str = "CHRONO_TZ_TIMEZONE_FILTER";
19
20 // This function is needed until zoneinfo_parse handles comments correctly.
21 // Technically a '#' symbol could occur between double quotes and should be
22 // ignored in this case, however this never happens in the tz database as it
23 // stands.
strip_comments(mut line: String) -> String24 fn strip_comments(mut line: String) -> String {
25 if let Some(pos) = line.find('#') {
26 line.truncate(pos);
27 };
28 line
29 }
30
31 // Generate a list of the time zone periods beyond the first that apply
32 // to this zone, as a string representation of a static slice.
format_rest(rest: Vec<(i64, FixedTimespan)>) -> String33 fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String {
34 let mut ret = "&[\n".to_string();
35 for (start, FixedTimespan { utc_offset, dst_offset, name }) in rest {
36 ret.push_str(&format!(
37 " ({start}, FixedTimespan {{ \
38 utc_offset: {utc}, dst_offset: {dst}, name: \"{name}\" \
39 }}),\n",
40 start = start,
41 utc = utc_offset,
42 dst = dst_offset,
43 name = name,
44 ));
45 }
46 ret.push_str(" ]");
47 ret
48 }
49
50 // Convert all '/' to '__', all '+' to 'Plus' and '-' to 'Minus', unless
51 // it's a hyphen, in which case remove it. This is so the names can be used
52 // as rust identifiers.
convert_bad_chars(name: &str) -> String53 fn convert_bad_chars(name: &str) -> String {
54 let name = name.replace("/", "__").replace("+", "Plus");
55 if let Some(pos) = name.find('-') {
56 if name[pos + 1..].chars().next().map(char::is_numeric).unwrap_or(false) {
57 name.replace("-", "Minus")
58 } else {
59 name.replace("-", "")
60 }
61 } else {
62 name
63 }
64 }
65
66 // The timezone file contains impls of `Timespans` for all timezones in the
67 // database. The `Wrap` wrapper in the `timezone_impl` module then implements
68 // TimeZone for any contained struct that implements `Timespans`.
write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()>69 fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()> {
70 let zones = table.zonesets.keys().chain(table.links.keys()).collect::<BTreeSet<_>>();
71 writeln!(timezone_file, "use core::fmt::{{self, Debug, Display, Formatter}};",)?;
72 writeln!(timezone_file, "use core::str::FromStr;\n",)?;
73 writeln!(
74 timezone_file,
75 "use ::timezone_impl::{{TimeSpans, FixedTimespanSet, FixedTimespan}};\n",
76 )?;
77 writeln!(
78 timezone_file,
79 "/// TimeZones built at compile time from the tz database
80 ///
81 /// This implements [`chrono::TimeZone`] so that it may be used in and to
82 /// construct chrono's DateTime type. See the root module documentation
83 /// for details."
84 )?;
85 writeln!(timezone_file, "#[derive(Clone, Copy, PartialEq, Eq, Hash)]\npub enum Tz {{")?;
86 for zone in &zones {
87 let zone_name = convert_bad_chars(zone);
88 writeln!(
89 timezone_file,
90 " /// {raw_zone_name}\n {zone},",
91 zone = zone_name,
92 raw_zone_name = zone
93 )?;
94 }
95 writeln!(timezone_file, "}}")?;
96
97 let mut map = phf_codegen::Map::new();
98 for zone in &zones {
99 map.entry(zone, &format!("Tz::{}", convert_bad_chars(zone)));
100 }
101 writeln!(timezone_file, "static TIMEZONES: ::phf::Map<&'static str, Tz> = \n{};", map.build())?;
102
103 #[cfg(feature = "case-insensitive")]
104 {
105 writeln!(timezone_file, "use uncased::UncasedStr;\n",)?;
106 let mut map = phf_codegen::Map::new();
107 for zone in &zones {
108 map.entry(uncased::UncasedStr::new(zone), &format!("Tz::{}", convert_bad_chars(zone)));
109 }
110 writeln!(
111 timezone_file,
112 "static TIMEZONES_UNCASED: ::phf::Map<&'static uncased::UncasedStr, Tz> = \n{};",
113 // FIXME(petrosagg): remove this once rust-phf/rust-phf#232 is released
114 map.build().to_string().replace("::std::mem::transmute", "::core::mem::transmute")
115 )?;
116 }
117
118 writeln!(
119 timezone_file,
120 "#[cfg(feature = \"std\")]
121 pub type ParseError = String;
122 #[cfg(not(feature = \"std\"))]
123 pub type ParseError = &'static str;
124
125 impl FromStr for Tz {{
126 type Err = ParseError;
127 fn from_str(s: &str) -> Result<Self, Self::Err> {{
128 #[cfg(feature = \"std\")]
129 return TIMEZONES.get(s).cloned().ok_or_else(|| format!(\"'{{}}' is not a valid timezone\", s));
130 #[cfg(not(feature = \"std\"))]
131 return TIMEZONES.get(s).cloned().ok_or(\"received invalid timezone\");
132 }}
133 }}\n"
134 )?;
135
136 writeln!(
137 timezone_file,
138 "impl Tz {{
139 pub fn name(self) -> &'static str {{
140 match self {{"
141 )?;
142 for zone in &zones {
143 let zone_name = convert_bad_chars(zone);
144 writeln!(
145 timezone_file,
146 " Tz::{zone} => \"{raw_zone_name}\",",
147 zone = zone_name,
148 raw_zone_name = zone
149 )?;
150 }
151 writeln!(
152 timezone_file,
153 " }}
154 }}"
155 )?;
156
157 #[cfg(feature = "case-insensitive")]
158 {
159 writeln!(
160 timezone_file,
161 r#"
162 #[cfg(feature = "case-insensitive")]
163 /// Parses a timezone string in a case-insensitive way
164 pub fn from_str_insensitive(s: &str) -> Result<Self, ParseError> {{
165 #[cfg(feature = "std")]
166 return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or_else(|| format!("'{{}}' is not a valid timezone", s));
167 #[cfg(not(feature = "std"))]
168 return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or("received invalid timezone");
169 }}"#
170 )?;
171 }
172
173 writeln!(timezone_file, "}}")?;
174
175 writeln!(
176 timezone_file,
177 "impl Debug for Tz {{
178 fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
179 f.write_str(self.name().as_ref())
180 }}
181 }}\n"
182 )?;
183 writeln!(
184 timezone_file,
185 "impl Display for Tz {{
186 fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
187 f.write_str(self.name().as_ref())
188 }}
189 }}\n"
190 )?;
191 writeln!(
192 timezone_file,
193 "impl TimeSpans for Tz {{
194 fn timespans(&self) -> FixedTimespanSet {{
195 match *self {{"
196 )?;
197 for zone in &zones {
198 let timespans = table.timespans(&zone).unwrap();
199 let zone_name = convert_bad_chars(zone);
200 writeln!(
201 timezone_file,
202 " Tz::{zone} => {{
203 const REST: &[(i64, FixedTimespan)] = {rest};
204 FixedTimespanSet {{
205 first: FixedTimespan {{
206 utc_offset: {utc},
207 dst_offset: {dst},
208 name: \"{name}\",
209 }},
210 rest: REST
211 }}
212 }},\n",
213 zone = zone_name,
214 rest = format_rest(timespans.rest),
215 utc = timespans.first.utc_offset,
216 dst = timespans.first.dst_offset,
217 name = timespans.first.name,
218 )?;
219 }
220 write!(
221 timezone_file,
222 " }}
223 }}
224 }}\n"
225 )?;
226 write!(
227 timezone_file,
228 "/// An array of every known variant
229 ///
230 /// Useful for iterating over known timezones:
231 ///
232 /// ```
233 /// use chrono_tz::{{TZ_VARIANTS, Tz}};
234 /// assert!(TZ_VARIANTS.iter().any(|v| *v == Tz::UTC));
235 /// ```
236 pub static TZ_VARIANTS: [Tz; {num}] = [
237 ",
238 num = zones.len()
239 )?;
240 for zone in &zones {
241 writeln!(timezone_file, " Tz::{zone},", zone = convert_bad_chars(zone))?;
242 }
243 write!(timezone_file, "];")?;
244 Ok(())
245 }
246
247 // Create a file containing nice-looking re-exports such as Europe::London
248 // instead of having to use chrono_tz::timezones::Europe__London
write_directory_file(directory_file: &mut File, table: &Table) -> io::Result<()>249 fn write_directory_file(directory_file: &mut File, table: &Table) -> io::Result<()> {
250 // add the `loose' zone definitions first at the top of the file
251 writeln!(directory_file, "use timezones::Tz;\n")?;
252 let zones = table
253 .zonesets
254 .keys()
255 .chain(table.links.keys())
256 .filter(|zone| !zone.contains('/'))
257 .collect::<BTreeSet<_>>();
258 for zone in zones {
259 let zone = convert_bad_chars(zone);
260 writeln!(directory_file, "pub const {name} : Tz = Tz::{name};", name = zone)?;
261 }
262 writeln!(directory_file)?;
263
264 // now add the `structured' zone names in submodules
265 for entry in table.structure() {
266 if entry.name.contains('/') {
267 continue;
268 }
269 let module_name = convert_bad_chars(entry.name);
270 writeln!(directory_file, "pub mod {name} {{", name = module_name)?;
271 writeln!(directory_file, " use timezones::Tz;\n",)?;
272 for child in entry.children {
273 match child {
274 Child::Submodule(name) => {
275 let submodule_name = convert_bad_chars(name);
276 writeln!(directory_file, " pub mod {name} {{", name = submodule_name)?;
277 writeln!(directory_file, " use timezones::Tz;\n",)?;
278 let full_name = entry.name.to_string() + "/" + name;
279 for entry in table.structure() {
280 if entry.name == full_name {
281 for child in entry.children {
282 match child {
283 Child::Submodule(_) => {
284 panic!("Depth of > 3 nested submodules not implemented!")
285 }
286 Child::TimeZone(name) => {
287 let converted_name = convert_bad_chars(name);
288 writeln!(directory_file,
289 " pub const {name} : Tz = Tz::{module_name}__{submodule_name}__{name};",
290 module_name = module_name,
291 submodule_name = submodule_name,
292 name = converted_name,
293 )?;
294 }
295 }
296 }
297 }
298 }
299 writeln!(directory_file, " }}\n")?;
300 }
301 Child::TimeZone(name) => {
302 let name = convert_bad_chars(name);
303 writeln!(
304 directory_file,
305 " pub const {name} : Tz = Tz::{module_name}__{name};",
306 module_name = module_name,
307 name = name
308 )?;
309 }
310 }
311 }
312 writeln!(directory_file, "}}\n")?;
313 }
314 Ok(())
315 }
316
317 /// Stub module because filter-by-regex feature is not enabled
318 #[cfg(not(feature = "filter-by-regex"))]
319 mod filter {
320 /// stub function because filter-by-regex feature is not enabled
maybe_filter_timezone_table(_table: &mut super::Table)321 pub(crate) fn maybe_filter_timezone_table(_table: &mut super::Table) {}
322 }
323
324 /// Module containing code supporting filter-by-regex feature
325 #[cfg(feature = "filter-by-regex")]
326 mod filter {
327 use std::collections::HashSet;
328 use std::env;
329
330 use regex::Regex;
331
332 use crate::{Table, FILTER_ENV_VAR_NAME};
333
334 /// Filter `table` by applying [`FILTER_ENV_VAR_NAME`].
maybe_filter_timezone_table(table: &mut Table)335 pub(crate) fn maybe_filter_timezone_table(table: &mut Table) {
336 if let Some(filter_regex) = get_filter_regex() {
337 filter_timezone_table(table, filter_regex);
338 }
339 }
340
341 /// Checks the `CHRONO_TZ_TIMEZONE_FILTER` environment variable.
342 /// Converts it to a regex if set. Panics if the regex is not valid, as we want
343 /// to fail the build if that happens.
get_filter_regex() -> Option<Regex>344 fn get_filter_regex() -> Option<Regex> {
345 match env::var(FILTER_ENV_VAR_NAME) {
346 Ok(val) => {
347 let val = val.trim();
348 if val.is_empty() {
349 return None;
350 }
351 match Regex::new(val) {
352 Ok(regex) => Some(regex),
353 Err(err) => panic!(
354 "The value '{:?}' for environment variable {} is not a valid regex, err={}",
355 val, FILTER_ENV_VAR_NAME, err
356 ),
357 }
358 }
359 Err(env::VarError::NotPresent) => None,
360 Err(env::VarError::NotUnicode(s)) => panic!(
361 "The value '{:?}' for environment variable {} is not valid Unicode",
362 s, FILTER_ENV_VAR_NAME
363 ),
364 }
365 }
366
367 /// Insert a new name in the list of names to keep. If the name has 3
368 /// parts, then also insert the 2-part prefix. If we don't do this we will lose
369 /// half of Indiana in `directory.rs`. But we *don't* want to keep one-part names,
370 /// otherwise we will inevitably end up with 'America' and include too much as
371 /// a consequence.
insert_keep_entry(keep: &mut HashSet<String>, new_value: &str)372 fn insert_keep_entry(keep: &mut HashSet<String>, new_value: &str) {
373 let mut parts = new_value.split('/');
374 if let (Some(p1), Some(p2), Some(_), None) =
375 (parts.next(), parts.next(), parts.next(), parts.next())
376 {
377 keep.insert(format!("{}/{}", p1, p2));
378 }
379
380 keep.insert(new_value.to_string());
381 }
382
383 /// Filter `table` by applying `filter_regex`.
filter_timezone_table(table: &mut Table, filter_regex: Regex)384 fn filter_timezone_table(table: &mut Table, filter_regex: Regex) {
385 // Compute the transitive closure of things to keep.
386 // Doing this, instead of just filtering `zonesets` and `links` by the
387 // regiex, helps to keep the `structure()` intact.
388 let mut keep = HashSet::new();
389 for (k, v) in &table.links {
390 if filter_regex.is_match(k) {
391 insert_keep_entry(&mut keep, k);
392 }
393 if filter_regex.is_match(v) {
394 insert_keep_entry(&mut keep, v);
395 }
396 }
397
398 let mut n = 0;
399 loop {
400 let len = keep.len();
401
402 for (k, v) in &table.links {
403 if keep.contains(k) && !keep.contains(v) {
404 insert_keep_entry(&mut keep, v);
405 }
406 if keep.contains(v) && !keep.contains(k) {
407 insert_keep_entry(&mut keep, k);
408 }
409 }
410
411 if keep.len() == len {
412 break;
413 }
414
415 n += 1;
416 if n == 50 {
417 println!("cargo:warning=Recursion limit reached while building filter list");
418 break;
419 }
420 }
421
422 // Actually do the filtering.
423 table.links.retain(|k, v| keep.contains(k) || keep.contains(v));
424
425 table
426 .zonesets
427 .retain(|k, _| filter_regex.is_match(&k) || keep.iter().any(|s| k.starts_with(s)));
428 }
429 }
430
main()431 pub fn main() {
432 println!("cargo:rerun-if-env-changed={}", FILTER_ENV_VAR_NAME);
433
434 let parser = LineParser::new();
435 let mut table = TableBuilder::new();
436
437 let tzfiles = [
438 "tz/africa",
439 "tz/antarctica",
440 "tz/asia",
441 "tz/australasia",
442 "tz/backward",
443 "tz/etcetera",
444 "tz/europe",
445 "tz/northamerica",
446 "tz/southamerica",
447 ];
448
449 let lines = tzfiles
450 .iter()
451 .map(Path::new)
452 .map(|p| {
453 Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| String::new())).join(p)
454 })
455 .map(|path| {
456 File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {}", path.display(), e))
457 })
458 .map(BufReader::new)
459 .flat_map(BufRead::lines)
460 .map(Result::unwrap)
461 .map(strip_comments);
462
463 for line in lines {
464 match parser.parse_str(&line).unwrap() {
465 Line::Zone(zone) => table.add_zone_line(zone).unwrap(),
466 Line::Continuation(cont) => table.add_continuation_line(cont).unwrap(),
467 Line::Rule(rule) => table.add_rule_line(rule).unwrap(),
468 Line::Link(link) => table.add_link_line(link).unwrap(),
469 Line::Space => {}
470 }
471 }
472
473 let mut table = table.build();
474 filter::maybe_filter_timezone_table(&mut table);
475
476 let timezone_path = Path::new(&env::var("OUT_DIR").unwrap()).join("timezones.rs");
477 let mut timezone_file = File::create(&timezone_path).unwrap();
478 write_timezone_file(&mut timezone_file, &table).unwrap();
479
480 let directory_path = Path::new(&env::var("OUT_DIR").unwrap()).join("directory.rs");
481 let mut directory_file = File::create(&directory_path).unwrap();
482 write_directory_file(&mut directory_file, &table).unwrap();
483 }
484