1 // This is a part of Chrono.
2 // See README.md and LICENSE.txt for details.
3 
4 /*!
5  * Various scanning routines for the parser.
6  */
7 
8 #![allow(deprecated)]
9 
10 use Weekday;
11 use super::{ParseResult, TOO_SHORT, INVALID, OUT_OF_RANGE};
12 
13 /// Returns true when two slices are equal case-insensitively (in ASCII).
14 /// Assumes that the `pattern` is already converted to lower case.
equals(s: &str, pattern: &str) -> bool15 fn equals(s: &str, pattern: &str) -> bool {
16     let mut xs = s.as_bytes().iter().map(|&c| match c { b'A'...b'Z' => c + 32, _ => c });
17     let mut ys = pattern.as_bytes().iter().cloned();
18     loop {
19         match (xs.next(), ys.next()) {
20             (None, None) => return true,
21             (None, _) | (_, None) => return false,
22             (Some(x), Some(y)) if x != y => return false,
23             _ => (),
24         }
25     }
26 }
27 
28 /// Tries to parse the non-negative number from `min` to `max` digits.
29 ///
30 /// The absence of digits at all is an unconditional error.
31 /// More than `max` digits are consumed up to the first `max` digits.
32 /// Any number that does not fit in `i64` is an error.
33 #[inline]
number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)>34 pub fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
35     assert!(min <= max);
36 
37     // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
38     // the first non-numeric byte, which may be another ascii character or beginning of multi-byte
39     // UTF-8 character.
40     let bytes = s.as_bytes();
41     if bytes.len() < min {
42         return Err(TOO_SHORT);
43     }
44 
45     let mut n = 0i64;
46     for (i, c) in bytes.iter().take(max).cloned().enumerate() { // cloned() = copied()
47         if c < b'0' || b'9' < c {
48             if i < min {
49                 return Err(INVALID);
50             } else {
51                 return Ok((&s[i..], n));
52             }
53         }
54 
55         n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
56             Some(n) => n,
57             None => return Err(OUT_OF_RANGE),
58         };
59     }
60 
61     Ok((&s[::core::cmp::min(max, bytes.len())..], n))
62 }
63 
64 /// Tries to consume at least one digits as a fractional second.
65 /// Returns the number of whole nanoseconds (0--999,999,999).
nanosecond(s: &str) -> ParseResult<(&str, i64)>66 pub fn nanosecond(s: &str) -> ParseResult<(&str, i64)> {
67     // record the number of digits consumed for later scaling.
68     let origlen = s.len();
69     let (s, v) = number(s, 1, 9)?;
70     let consumed = origlen - s.len();
71 
72     // scale the number accordingly.
73     static SCALE: [i64; 10] = [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000,
74                                1_000, 100, 10, 1];
75     let v = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?;
76 
77     // if there are more than 9 digits, skip next digits.
78     let s = s.trim_left_matches(|c: char| '0' <= c && c <= '9');
79 
80     Ok((s, v))
81 }
82 
83 /// Tries to consume a fixed number of digits as a fractional second.
84 /// Returns the number of whole nanoseconds (0--999,999,999).
nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)>85 pub fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> {
86     // record the number of digits consumed for later scaling.
87     let (s, v) = number(s, digits, digits)?;
88 
89     // scale the number accordingly.
90     static SCALE: [i64; 10] = [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000,
91                                1_000, 100, 10, 1];
92     let v = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?;
93 
94     Ok((s, v))
95 }
96 
97 /// Tries to parse the month index (0 through 11) with the first three ASCII letters.
short_month0(s: &str) -> ParseResult<(&str, u8)>98 pub fn short_month0(s: &str) -> ParseResult<(&str, u8)> {
99     if s.len() < 3 { return Err(TOO_SHORT); }
100     let buf = s.as_bytes();
101     let month0 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
102         (b'j',b'a',b'n') => 0,
103         (b'f',b'e',b'b') => 1,
104         (b'm',b'a',b'r') => 2,
105         (b'a',b'p',b'r') => 3,
106         (b'm',b'a',b'y') => 4,
107         (b'j',b'u',b'n') => 5,
108         (b'j',b'u',b'l') => 6,
109         (b'a',b'u',b'g') => 7,
110         (b's',b'e',b'p') => 8,
111         (b'o',b'c',b't') => 9,
112         (b'n',b'o',b'v') => 10,
113         (b'd',b'e',b'c') => 11,
114         _ => return Err(INVALID)
115     };
116     Ok((&s[3..], month0))
117 }
118 
119 /// Tries to parse the weekday with the first three ASCII letters.
short_weekday(s: &str) -> ParseResult<(&str, Weekday)>120 pub fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
121     if s.len() < 3 { return Err(TOO_SHORT); }
122     let buf = s.as_bytes();
123     let weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
124         (b'm',b'o',b'n') => Weekday::Mon,
125         (b't',b'u',b'e') => Weekday::Tue,
126         (b'w',b'e',b'd') => Weekday::Wed,
127         (b't',b'h',b'u') => Weekday::Thu,
128         (b'f',b'r',b'i') => Weekday::Fri,
129         (b's',b'a',b't') => Weekday::Sat,
130         (b's',b'u',b'n') => Weekday::Sun,
131         _ => return Err(INVALID)
132     };
133     Ok((&s[3..], weekday))
134 }
135 
136 /// Tries to parse the month index (0 through 11) with short or long month names.
137 /// It prefers long month names to short month names when both are possible.
short_or_long_month0(s: &str) -> ParseResult<(&str, u8)>138 pub fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> {
139     // lowercased month names, minus first three chars
140     static LONG_MONTH_SUFFIXES: [&'static str; 12] =
141         ["uary", "ruary", "ch", "il", "", "e", "y", "ust", "tember", "ober", "ember", "ember"];
142 
143     let (mut s, month0) = short_month0(s)?;
144 
145     // tries to consume the suffix if possible
146     let suffix = LONG_MONTH_SUFFIXES[month0 as usize];
147     if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) {
148         s = &s[suffix.len()..];
149     }
150 
151     Ok((s, month0))
152 }
153 
154 /// Tries to parse the weekday with short or long weekday names.
155 /// It prefers long weekday names to short weekday names when both are possible.
short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)>156 pub fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
157     // lowercased weekday names, minus first three chars
158     static LONG_WEEKDAY_SUFFIXES: [&'static str; 7] =
159         ["day", "sday", "nesday", "rsday", "day", "urday", "day"];
160 
161     let (mut s, weekday) = short_weekday(s)?;
162 
163     // tries to consume the suffix if possible
164     let suffix = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize];
165     if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) {
166         s = &s[suffix.len()..];
167     }
168 
169     Ok((s, weekday))
170 }
171 
172 /// Tries to consume exactly one given character.
char(s: &str, c1: u8) -> ParseResult<&str>173 pub fn char(s: &str, c1: u8) -> ParseResult<&str> {
174     match s.as_bytes().first() {
175         Some(&c) if c == c1 => Ok(&s[1..]),
176         Some(_) => Err(INVALID),
177         None => Err(TOO_SHORT),
178     }
179 }
180 
181 /// Tries to consume one or more whitespace.
space(s: &str) -> ParseResult<&str>182 pub fn space(s: &str) -> ParseResult<&str> {
183     let s_ = s.trim_left();
184     if s_.len() < s.len() {
185         Ok(s_)
186     } else if s.is_empty() {
187         Err(TOO_SHORT)
188     } else {
189         Err(INVALID)
190     }
191 }
192 
193 /// Consumes any number (including zero) of colon or spaces.
colon_or_space(s: &str) -> ParseResult<&str>194 pub fn colon_or_space(s: &str) -> ParseResult<&str> {
195     Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace()))
196 }
197 
198 /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible.
199 ///
200 /// The additional `colon` may be used to parse a mandatory or optional `:`
201 /// between hours and minutes, and should return either a new suffix or `Err` when parsing fails.
timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>202 pub fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)>
203         where F: FnMut(&str) -> ParseResult<&str> {
204     timezone_offset_internal(s, consume_colon, false)
205 }
206 
timezone_offset_internal<F>(mut s: &str, mut consume_colon: F, allow_missing_minutes: bool) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>207 fn timezone_offset_internal<F>(mut s: &str, mut consume_colon: F, allow_missing_minutes: bool)
208 -> ParseResult<(&str, i32)>
209     where F: FnMut(&str) -> ParseResult<&str>
210 {
211     fn digits(s: &str) -> ParseResult<(u8, u8)> {
212         let b = s.as_bytes();
213         if b.len() < 2 {
214             Err(TOO_SHORT)
215         } else {
216             Ok((b[0], b[1]))
217         }
218     }
219     let negative = match s.as_bytes().first() {
220         Some(&b'+') => false,
221         Some(&b'-') => true,
222         Some(_) => return Err(INVALID),
223         None => return Err(TOO_SHORT),
224     };
225     s = &s[1..];
226 
227     // hours (00--99)
228     let hours = match digits(s)? {
229         (h1 @ b'0'...b'9', h2 @ b'0'...b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')),
230         _ => return Err(INVALID),
231     };
232     s = &s[2..];
233 
234     // colons (and possibly other separators)
235     s = consume_colon(s)?;
236 
237     // minutes (00--59)
238     // if the next two items are digits then we have to add minutes
239     let minutes = if let Ok(ds) = digits(s) {
240         match ds {
241             (m1 @ b'0'...b'5', m2 @ b'0'...b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')),
242             (b'6'...b'9', b'0'...b'9') => return Err(OUT_OF_RANGE),
243             _ => return Err(INVALID),
244         }
245     } else if allow_missing_minutes {
246         0
247     } else {
248         return Err(TOO_SHORT);
249     };
250     s = match s.len() {
251         len if len >= 2 => &s[2..],
252         len if len == 0 => s,
253         _ => return Err(TOO_SHORT),
254     };
255 
256     let seconds = hours * 3600 + minutes * 60;
257     Ok((s, if negative {-seconds} else {seconds}))
258 }
259 
260 /// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as `+00:00`.
timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>261 pub fn timezone_offset_zulu<F>(s: &str, colon: F)
262 -> ParseResult<(&str, i32)>
263     where F: FnMut(&str) -> ParseResult<&str>
264 {
265     let bytes = s.as_bytes();
266     match bytes.first() {
267         Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
268         Some(&b'u') | Some(&b'U') => {
269             if bytes.len() >= 3 {
270                 let (b, c) = (bytes[1], bytes[2]);
271                 match (b | 32, c | 32) {
272                     (b't', b'c') => Ok((&s[3..], 0)),
273                     _ => Err(INVALID),
274                 }
275             } else {
276                 Err(INVALID)
277             }
278         }
279         _ => timezone_offset(s, colon),
280     }
281 }
282 
283 /// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as
284 /// `+00:00`, and allows missing minutes entirely.
timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>285 pub fn timezone_offset_permissive<F>(s: &str, colon: F)
286 -> ParseResult<(&str, i32)>
287     where F: FnMut(&str) -> ParseResult<&str>
288 {
289     match s.as_bytes().first() {
290         Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
291         _ => timezone_offset_internal(s, colon, true),
292     }
293 }
294 
295 /// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones.
296 /// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)>297 pub fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> {
298     // tries to parse legacy time zone names
299     let upto = s.as_bytes().iter().position(|&c| match c { b'a'...b'z' | b'A'...b'Z' => false,
300                                                            _ => true })
301         .unwrap_or_else(|| s.len());
302     if upto > 0 {
303         let name = &s[..upto];
304         let s = &s[upto..];
305         let offset_hours = |o| Ok((s, Some(o * 3600)));
306         if equals(name, "gmt") || equals(name, "ut") {
307             offset_hours(0)
308         } else if equals(name, "edt") {
309             offset_hours(-4)
310         } else if equals(name, "est") || equals(name, "cdt") {
311             offset_hours(-5)
312         } else if equals(name, "cst") || equals(name, "mdt") {
313             offset_hours(-6)
314         } else if equals(name, "mst") || equals(name, "pdt") {
315             offset_hours(-7)
316         } else if equals(name, "pst") {
317             offset_hours(-8)
318         } else {
319             Ok((s, None)) // recommended by RFC 2822: consume but treat it as -0000
320         }
321     } else {
322         let (s_, offset) = timezone_offset(s, |s| Ok(s))?;
323         Ok((s_, Some(offset)))
324     }
325 }
326 
327