1 extern crate parse_zoneinfo;
2 #[cfg(feature = "filter-by-regex")]
3 extern crate regex;
4 
5 use std::collections::BTreeSet;
6 use std::env;
7 use std::fs::File;
8 use std::io::{self, BufRead, BufReader, Write};
9 use std::path::Path;
10 
11 use parse_zoneinfo::line::{Line, LineParser};
12 use parse_zoneinfo::structure::{Child, Structure};
13 use parse_zoneinfo::table::{Table, TableBuilder};
14 use parse_zoneinfo::transitions::FixedTimespan;
15 use parse_zoneinfo::transitions::TableTransitions;
16 
17 /// The name of the environment variable which possibly holds the filter regex.
18 const FILTER_ENV_VAR_NAME: &str = "CHRONO_TZ_TIMEZONE_FILTER";
19 
20 // This function is needed until zoneinfo_parse handles comments correctly.
21 // Technically a '#' symbol could occur between double quotes and should be
22 // ignored in this case, however this never happens in the tz database as it
23 // stands.
strip_comments(mut line: String) -> String24 fn strip_comments(mut line: String) -> String {
25     if let Some(pos) = line.find('#') {
26         line.truncate(pos);
27     };
28     line
29 }
30 
31 // Generate a list of the time zone periods beyond the first that apply
32 // to this zone, as a string representation of a static slice.
format_rest(rest: Vec<(i64, FixedTimespan)>) -> String33 fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String {
34     let mut ret = "&[\n".to_string();
35     for (start, FixedTimespan { utc_offset, dst_offset, name }) in rest {
36         ret.push_str(&format!(
37             "                    ({start}, FixedTimespan {{ \
38              utc_offset: {utc}, dst_offset: {dst}, name: \"{name}\" \
39              }}),\n",
40             start = start,
41             utc = utc_offset,
42             dst = dst_offset,
43             name = name,
44         ));
45     }
46     ret.push_str("                ]");
47     ret
48 }
49 
50 // Convert all '/' to '__', all '+' to 'Plus' and '-' to 'Minus', unless
51 // it's a hyphen, in which case remove it. This is so the names can be used
52 // as rust identifiers.
convert_bad_chars(name: &str) -> String53 fn convert_bad_chars(name: &str) -> String {
54     let name = name.replace("/", "__").replace("+", "Plus");
55     if let Some(pos) = name.find('-') {
56         if name[pos + 1..].chars().next().map(char::is_numeric).unwrap_or(false) {
57             name.replace("-", "Minus")
58         } else {
59             name.replace("-", "")
60         }
61     } else {
62         name
63     }
64 }
65 
66 // The timezone file contains impls of `Timespans` for all timezones in the
67 // database. The `Wrap` wrapper in the `timezone_impl` module then implements
68 // TimeZone for any contained struct that implements `Timespans`.
write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()>69 fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()> {
70     let zones = table.zonesets.keys().chain(table.links.keys()).collect::<BTreeSet<_>>();
71     writeln!(timezone_file, "use core::fmt::{{self, Debug, Display, Formatter}};",)?;
72     writeln!(timezone_file, "use core::str::FromStr;\n",)?;
73     writeln!(
74         timezone_file,
75         "use ::timezone_impl::{{TimeSpans, FixedTimespanSet, FixedTimespan}};\n",
76     )?;
77     writeln!(
78         timezone_file,
79         "/// TimeZones built at compile time from the tz database
80 ///
81 /// This implements [`chrono::TimeZone`] so that it may be used in and to
82 /// construct chrono's DateTime type. See the root module documentation
83 /// for details."
84     )?;
85     writeln!(timezone_file, "#[derive(Clone, Copy, PartialEq, Eq, Hash)]\npub enum Tz {{")?;
86     for zone in &zones {
87         let zone_name = convert_bad_chars(zone);
88         writeln!(
89             timezone_file,
90             "    /// {raw_zone_name}\n    {zone},",
91             zone = zone_name,
92             raw_zone_name = zone
93         )?;
94     }
95     writeln!(timezone_file, "}}")?;
96 
97     let mut map = phf_codegen::Map::new();
98     for zone in &zones {
99         map.entry(zone, &format!("Tz::{}", convert_bad_chars(zone)));
100     }
101     writeln!(timezone_file, "static TIMEZONES: ::phf::Map<&'static str, Tz> = \n{};", map.build())?;
102 
103     #[cfg(feature = "case-insensitive")]
104     {
105         writeln!(timezone_file, "use uncased::UncasedStr;\n",)?;
106         let mut map = phf_codegen::Map::new();
107         for zone in &zones {
108             map.entry(uncased::UncasedStr::new(zone), &format!("Tz::{}", convert_bad_chars(zone)));
109         }
110         writeln!(
111             timezone_file,
112             "static TIMEZONES_UNCASED: ::phf::Map<&'static uncased::UncasedStr, Tz> = \n{};",
113             // FIXME(petrosagg): remove this once rust-phf/rust-phf#232 is released
114             map.build().to_string().replace("::std::mem::transmute", "::core::mem::transmute")
115         )?;
116     }
117 
118     writeln!(
119         timezone_file,
120         "#[cfg(feature = \"std\")]
121 pub type ParseError = String;
122 #[cfg(not(feature = \"std\"))]
123 pub type ParseError = &'static str;
124 
125 impl FromStr for Tz {{
126     type Err = ParseError;
127     fn from_str(s: &str) -> Result<Self, Self::Err> {{
128         #[cfg(feature = \"std\")]
129         return TIMEZONES.get(s).cloned().ok_or_else(|| format!(\"'{{}}' is not a valid timezone\", s));
130         #[cfg(not(feature = \"std\"))]
131         return TIMEZONES.get(s).cloned().ok_or(\"received invalid timezone\");
132     }}
133 }}\n"
134     )?;
135 
136     writeln!(
137         timezone_file,
138         "impl Tz {{
139     pub fn name(self) -> &'static str {{
140         match self {{"
141     )?;
142     for zone in &zones {
143         let zone_name = convert_bad_chars(zone);
144         writeln!(
145             timezone_file,
146             "            Tz::{zone} => \"{raw_zone_name}\",",
147             zone = zone_name,
148             raw_zone_name = zone
149         )?;
150     }
151     writeln!(
152         timezone_file,
153         "        }}
154     }}"
155     )?;
156 
157     #[cfg(feature = "case-insensitive")]
158     {
159         writeln!(
160             timezone_file,
161             r#"
162     #[cfg(feature = "case-insensitive")]
163     /// Parses a timezone string in a case-insensitive way
164     pub fn from_str_insensitive(s: &str) -> Result<Self, ParseError> {{
165         #[cfg(feature = "std")]
166         return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or_else(|| format!("'{{}}' is not a valid timezone", s));
167         #[cfg(not(feature = "std"))]
168         return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or("received invalid timezone");
169     }}"#
170         )?;
171     }
172 
173     writeln!(timezone_file, "}}")?;
174 
175     writeln!(
176         timezone_file,
177         "impl Debug for Tz {{
178     fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
179         f.write_str(self.name().as_ref())
180     }}
181 }}\n"
182     )?;
183     writeln!(
184         timezone_file,
185         "impl Display for Tz {{
186     fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
187         f.write_str(self.name().as_ref())
188     }}
189 }}\n"
190     )?;
191     writeln!(
192         timezone_file,
193         "impl TimeSpans for Tz {{
194     fn timespans(&self) -> FixedTimespanSet {{
195         match *self {{"
196     )?;
197     for zone in &zones {
198         let timespans = table.timespans(&zone).unwrap();
199         let zone_name = convert_bad_chars(zone);
200         writeln!(
201             timezone_file,
202             "            Tz::{zone} => {{
203                 const REST: &[(i64, FixedTimespan)] = {rest};
204                 FixedTimespanSet {{
205                     first: FixedTimespan {{
206                         utc_offset: {utc},
207                         dst_offset: {dst},
208                         name: \"{name}\",
209                     }},
210                     rest: REST
211                 }}
212             }},\n",
213             zone = zone_name,
214             rest = format_rest(timespans.rest),
215             utc = timespans.first.utc_offset,
216             dst = timespans.first.dst_offset,
217             name = timespans.first.name,
218         )?;
219     }
220     write!(
221         timezone_file,
222         "         }}
223     }}
224 }}\n"
225     )?;
226     write!(
227         timezone_file,
228         "/// An array of every known variant
229 ///
230 /// Useful for iterating over known timezones:
231 ///
232 /// ```
233 /// use chrono_tz::{{TZ_VARIANTS, Tz}};
234 /// assert!(TZ_VARIANTS.iter().any(|v| *v == Tz::UTC));
235 /// ```
236 pub static TZ_VARIANTS: [Tz; {num}] = [
237 ",
238         num = zones.len()
239     )?;
240     for zone in &zones {
241         writeln!(timezone_file, "    Tz::{zone},", zone = convert_bad_chars(zone))?;
242     }
243     write!(timezone_file, "];")?;
244     Ok(())
245 }
246 
247 // Create a file containing nice-looking re-exports such as Europe::London
248 // instead of having to use chrono_tz::timezones::Europe__London
write_directory_file(directory_file: &mut File, table: &Table) -> io::Result<()>249 fn write_directory_file(directory_file: &mut File, table: &Table) -> io::Result<()> {
250     // add the `loose' zone definitions first at the top of the file
251     writeln!(directory_file, "use timezones::Tz;\n")?;
252     let zones = table
253         .zonesets
254         .keys()
255         .chain(table.links.keys())
256         .filter(|zone| !zone.contains('/'))
257         .collect::<BTreeSet<_>>();
258     for zone in zones {
259         let zone = convert_bad_chars(zone);
260         writeln!(directory_file, "pub const {name} : Tz = Tz::{name};", name = zone)?;
261     }
262     writeln!(directory_file)?;
263 
264     // now add the `structured' zone names in submodules
265     for entry in table.structure() {
266         if entry.name.contains('/') {
267             continue;
268         }
269         let module_name = convert_bad_chars(entry.name);
270         writeln!(directory_file, "pub mod {name} {{", name = module_name)?;
271         writeln!(directory_file, "    use timezones::Tz;\n",)?;
272         for child in entry.children {
273             match child {
274                 Child::Submodule(name) => {
275                     let submodule_name = convert_bad_chars(name);
276                     writeln!(directory_file, "    pub mod {name} {{", name = submodule_name)?;
277                     writeln!(directory_file, "        use timezones::Tz;\n",)?;
278                     let full_name = entry.name.to_string() + "/" + name;
279                     for entry in table.structure() {
280                         if entry.name == full_name {
281                             for child in entry.children {
282                                 match child {
283                                     Child::Submodule(_) => {
284                                         panic!("Depth of > 3 nested submodules not implemented!")
285                                     }
286                                     Child::TimeZone(name) => {
287                                         let converted_name = convert_bad_chars(name);
288                                         writeln!(directory_file,
289                                     "        pub const {name} : Tz = Tz::{module_name}__{submodule_name}__{name};",
290                                             module_name = module_name,
291                                             submodule_name = submodule_name,
292                                             name = converted_name,
293                                         )?;
294                                     }
295                                 }
296                             }
297                         }
298                     }
299                     writeln!(directory_file, "    }}\n")?;
300                 }
301                 Child::TimeZone(name) => {
302                     let name = convert_bad_chars(name);
303                     writeln!(
304                         directory_file,
305                         "    pub const {name} : Tz = Tz::{module_name}__{name};",
306                         module_name = module_name,
307                         name = name
308                     )?;
309                 }
310             }
311         }
312         writeln!(directory_file, "}}\n")?;
313     }
314     Ok(())
315 }
316 
317 /// Stub module because filter-by-regex feature is not enabled
318 #[cfg(not(feature = "filter-by-regex"))]
319 mod filter {
320     /// stub function because filter-by-regex feature is not enabled
maybe_filter_timezone_table(_table: &mut super::Table)321     pub(crate) fn maybe_filter_timezone_table(_table: &mut super::Table) {}
322 }
323 
324 /// Module containing code supporting filter-by-regex feature
325 #[cfg(feature = "filter-by-regex")]
326 mod filter {
327     use std::collections::HashSet;
328     use std::env;
329 
330     use regex::Regex;
331 
332     use crate::{Table, FILTER_ENV_VAR_NAME};
333 
334     /// Filter `table` by applying [`FILTER_ENV_VAR_NAME`].
maybe_filter_timezone_table(table: &mut Table)335     pub(crate) fn maybe_filter_timezone_table(table: &mut Table) {
336         if let Some(filter_regex) = get_filter_regex() {
337             filter_timezone_table(table, filter_regex);
338         }
339     }
340 
341     /// Checks the `CHRONO_TZ_TIMEZONE_FILTER` environment variable.
342     /// Converts it to a regex if set. Panics if the regex is not valid, as we want
343     /// to fail the build if that happens.
get_filter_regex() -> Option<Regex>344     fn get_filter_regex() -> Option<Regex> {
345         match env::var(FILTER_ENV_VAR_NAME) {
346             Ok(val) => {
347                 let val = val.trim();
348                 if val.is_empty() {
349                     return None;
350                 }
351                 match Regex::new(val) {
352                     Ok(regex) => Some(regex),
353                     Err(err) => panic!(
354                         "The value '{:?}' for environment variable {} is not a valid regex, err={}",
355                         val, FILTER_ENV_VAR_NAME, err
356                     ),
357                 }
358             }
359             Err(env::VarError::NotPresent) => None,
360             Err(env::VarError::NotUnicode(s)) => panic!(
361                 "The value '{:?}' for environment variable {} is not valid Unicode",
362                 s, FILTER_ENV_VAR_NAME
363             ),
364         }
365     }
366 
367     /// Insert a new name in the list of names to keep. If the name has 3
368     /// parts, then also insert the 2-part prefix. If we don't do this we will lose
369     /// half of Indiana in `directory.rs`. But we *don't* want to keep one-part names,
370     /// otherwise we will inevitably end up with 'America' and include too much as
371     /// a consequence.
insert_keep_entry(keep: &mut HashSet<String>, new_value: &str)372     fn insert_keep_entry(keep: &mut HashSet<String>, new_value: &str) {
373         let mut parts = new_value.split('/');
374         if let (Some(p1), Some(p2), Some(_), None) =
375             (parts.next(), parts.next(), parts.next(), parts.next())
376         {
377             keep.insert(format!("{}/{}", p1, p2));
378         }
379 
380         keep.insert(new_value.to_string());
381     }
382 
383     /// Filter `table` by applying `filter_regex`.
filter_timezone_table(table: &mut Table, filter_regex: Regex)384     fn filter_timezone_table(table: &mut Table, filter_regex: Regex) {
385         // Compute the transitive closure of things to keep.
386         // Doing this, instead of just filtering `zonesets` and `links` by the
387         // regiex, helps to keep the `structure()` intact.
388         let mut keep = HashSet::new();
389         for (k, v) in &table.links {
390             if filter_regex.is_match(k) {
391                 insert_keep_entry(&mut keep, k);
392             }
393             if filter_regex.is_match(v) {
394                 insert_keep_entry(&mut keep, v);
395             }
396         }
397 
398         let mut n = 0;
399         loop {
400             let len = keep.len();
401 
402             for (k, v) in &table.links {
403                 if keep.contains(k) && !keep.contains(v) {
404                     insert_keep_entry(&mut keep, v);
405                 }
406                 if keep.contains(v) && !keep.contains(k) {
407                     insert_keep_entry(&mut keep, k);
408                 }
409             }
410 
411             if keep.len() == len {
412                 break;
413             }
414 
415             n += 1;
416             if n == 50 {
417                 println!("cargo:warning=Recursion limit reached while building filter list");
418                 break;
419             }
420         }
421 
422         // Actually do the filtering.
423         table.links.retain(|k, v| keep.contains(k) || keep.contains(v));
424 
425         table
426             .zonesets
427             .retain(|k, _| filter_regex.is_match(&k) || keep.iter().any(|s| k.starts_with(s)));
428     }
429 }
430 
main()431 pub fn main() {
432     println!("cargo:rerun-if-env-changed={}", FILTER_ENV_VAR_NAME);
433 
434     let parser = LineParser::new();
435     let mut table = TableBuilder::new();
436 
437     let tzfiles = [
438         "tz/africa",
439         "tz/antarctica",
440         "tz/asia",
441         "tz/australasia",
442         "tz/backward",
443         "tz/etcetera",
444         "tz/europe",
445         "tz/northamerica",
446         "tz/southamerica",
447     ];
448 
449     let lines = tzfiles
450         .iter()
451         .map(Path::new)
452         .map(|p| {
453             Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| String::new())).join(p)
454         })
455         .map(|path| {
456             File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {}", path.display(), e))
457         })
458         .map(BufReader::new)
459         .flat_map(BufRead::lines)
460         .map(Result::unwrap)
461         .map(strip_comments);
462 
463     for line in lines {
464         match parser.parse_str(&line).unwrap() {
465             Line::Zone(zone) => table.add_zone_line(zone).unwrap(),
466             Line::Continuation(cont) => table.add_continuation_line(cont).unwrap(),
467             Line::Rule(rule) => table.add_rule_line(rule).unwrap(),
468             Line::Link(link) => table.add_link_line(link).unwrap(),
469             Line::Space => {}
470         }
471     }
472 
473     let mut table = table.build();
474     filter::maybe_filter_timezone_table(&mut table);
475 
476     let timezone_path = Path::new(&env::var("OUT_DIR").unwrap()).join("timezones.rs");
477     let mut timezone_file = File::create(&timezone_path).unwrap();
478     write_timezone_file(&mut timezone_file, &table).unwrap();
479 
480     let directory_path = Path::new(&env::var("OUT_DIR").unwrap()).join("directory.rs");
481     let mut directory_file = File::create(&directory_path).unwrap();
482     write_directory_file(&mut directory_file, &table).unwrap();
483 }
484