1 #![deny(missing_docs)]
2 #![cfg_attr(test, allow(unknown_lints))]
3 #![cfg_attr(test, deny(warnings))]
4 
5 //! # dtparse
6 //! The fully-featured "even I couldn't understand that" time parser.
7 //! Designed to take in strings and give back sensible dates and times.
8 //!
9 //! dtparse has its foundations in the [`dateutil`](dateutil) library for
10 //! Python, which excels at taking "interesting" strings and trying to make
11 //! sense of the dates and times they contain. A couple of quick examples
12 //! from the test cases should give some context:
13 //!
14 //! ```rust,ignore (tests-dont-compile-on-old-rust)
15 //! # extern crate chrono;
16 //! # extern crate dtparse;
17 //! use chrono::prelude::*;
18 //! use dtparse::parse;
19 //!
20 //! assert_eq!(
21 //!     parse("2008.12.30"),
22 //!     Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None))
23 //! );
24 //!
25 //! // It can even handle timezones!
26 //! assert_eq!(
27 //!     parse("January 4, 2024; 18:30:04 +02:00"),
28 //!     Ok((
29 //!         NaiveDate::from_ymd(2024, 1, 4).and_hms(18, 30, 4),
30 //!         Some(FixedOffset::east(7200))
31 //!     ))
32 //! );
33 //! ```
34 //!
35 //! And we can even handle fuzzy strings where dates/times aren't the
36 //! only content if we dig into the implementation a bit!
37 //!
38 //! ```rust,ignore (tests-dont-compile-on-old-rust)
39 //! # extern crate chrono;
40 //! # extern crate dtparse;
41 //! use chrono::prelude::*;
42 //! use dtparse::Parser;
43 //! # use std::collections::HashMap;
44 //!
45 //! let mut p = Parser::default();
46 //! assert_eq!(
47 //!     p.parse(
48 //!         "I first released this library on the 17th of June, 2018.",
49 //!         None, None,
50 //!         true /* turns on fuzzy mode */,
51 //!         true /* gives us the tokens that weren't recognized */,
52 //!         None, false, &HashMap::new()
53 //!     ),
54 //!     Ok((
55 //!         NaiveDate::from_ymd(2018, 6, 17).and_hms(0, 0, 0),
56 //!         None,
57 //!         Some(vec!["I first released this library on the ",
58 //!                   " of ", ", "].iter().map(|&s| s.into()).collect())
59 //!     ))
60 //! );
61 //! ```
62 //!
63 //! Further examples can be found in the `examples` directory on international usage.
64 //!
65 //! # Usage
66 //!
67 //! `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
68 //! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
69 //! tested against them.
70 //!
71 //! [dateutil]: https://github.com/dateutil/dateutil
72 
73 #[macro_use]
74 extern crate lazy_static;
75 
76 extern crate chrono;
77 extern crate chrono_tz;
78 extern crate num_traits;
79 extern crate rust_decimal;
80 
81 use chrono::Datelike;
82 use chrono::Duration;
83 use chrono::FixedOffset;
84 use chrono::Local;
85 use chrono::NaiveDate;
86 use chrono::NaiveDateTime;
87 use chrono::NaiveTime;
88 use chrono::Timelike;
89 use num_traits::cast::ToPrimitive;
90 use rust_decimal::Decimal;
91 use rust_decimal::Error as DecimalError;
92 use std::cmp::min;
93 use std::collections::HashMap;
94 use std::error::Error;
95 use std::fmt;
96 use std::num::ParseIntError;
97 use std::str::FromStr;
98 use std::vec::Vec;
99 
100 mod tokenize;
101 mod weekday;
102 
103 #[cfg(test)]
104 mod tests;
105 
106 use tokenize::Tokenizer;
107 use weekday::day_of_week;
108 use weekday::DayOfWeek;
109 
110 lazy_static! {
111     static ref ZERO: Decimal = Decimal::new(0, 0);
112     static ref ONE: Decimal = Decimal::new(1, 0);
113     static ref TWENTY_FOUR: Decimal = Decimal::new(24, 0);
114     static ref SIXTY: Decimal = Decimal::new(60, 0);
115     static ref DEFAULT_PARSER: Parser = Parser::default();
116 }
117 
118 impl From<DecimalError> for ParseError {
from(err: DecimalError) -> Self119     fn from(err: DecimalError) -> Self {
120         ParseError::InvalidNumeric(format!("{}", err))
121     }
122 }
123 
124 impl From<ParseIntError> for ParseError {
from(err: ParseIntError) -> Self125     fn from(err: ParseIntError) -> Self {
126         ParseError::InvalidNumeric(format!("{}", err))
127     }
128 }
129 
130 /// Potential errors that come up when trying to parse time strings
131 #[derive(Debug, PartialEq)]
132 pub enum ParseError {
133     /// Attempted to specify "AM" or "PM" without indicating an hour
134     AmPmWithoutHour,
135     /// Impossible value for a category; the 32nd day of a month is impossible
136     ImpossibleTimestamp(&'static str),
137     /// Unable to parse a numeric value from a token expected to be numeric
138     InvalidNumeric(String),
139     /// Generally unrecognized date string; please report to maintainer so
140     /// new test cases can be developed
141     UnrecognizedFormat,
142     /// A token the parser did not recognize was in the string, and fuzzy mode was off
143     UnrecognizedToken(String),
144     /// A timezone could not be handled; please report to maintainer as the timestring
145     /// likely exposes a bug in the implementation
146     TimezoneUnsupported,
147     /// Parser unable to make sense of year/month/day parameters in the time string;
148     /// please report to maintainer as the timestring likely exposes a bug in implementation
149     YearMonthDayError(&'static str),
150     /// Parser unable to find any date/time-related content in the supplied string
151     NoDate,
152 }
153 
154 impl fmt::Display for ParseError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result155     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156         write!(f, "{:?}", self)
157     }
158 }
159 
160 impl Error for ParseError {}
161 
162 type ParseResult<I> = Result<I, ParseError>;
163 
tokenize(parse_string: &str) -> Vec<String>164 pub(crate) fn tokenize(parse_string: &str) -> Vec<String> {
165     let tokenizer = Tokenizer::new(parse_string);
166     tokenizer.collect()
167 }
168 
169 /// Utility function for `ParserInfo` that helps in constructing
170 /// the attributes that make up the `ParserInfo` container
parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize>171 pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
172     let mut m = HashMap::new();
173 
174     if vec.len() == 1 {
175         for (i, val) in vec.get(0).unwrap().iter().enumerate() {
176             m.insert(val.to_lowercase(), i);
177         }
178     } else {
179         for (i, val_vec) in vec.iter().enumerate() {
180             for val in val_vec {
181                 m.insert(val.to_lowercase(), i);
182             }
183         }
184     }
185 
186     m
187 }
188 
189 /// Container for specific tokens to be recognized during parsing.
190 ///
191 /// - `jump`: Values that indicate the end of a token for parsing and can be ignored
192 /// - `weekday`: Names of the days of the week
193 /// - `months`: Names of the months
194 /// - `hms`: Names for the units of time - hours, minutes, seconds in English
195 /// - `ampm`: AM and PM tokens
196 /// - `utczone`: Tokens indicating a UTC-timezone string
197 /// - `pertain`: Tokens indicating a "belongs to" relationship; in English this is just "of"
198 /// - `tzoffset`:
199 /// - `dayfirst`: Upon encountering an ambiguous date, treat the first value as the day
200 /// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year
201 /// - `year`: The current year
202 /// - `century`: The first year in the current century
203 ///
204 /// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence
205 /// and will be parsed as "YDM"
206 #[derive(Debug, PartialEq)]
207 pub struct ParserInfo {
208     /// Tokens that can be safely ignored
209     pub jump: HashMap<String, usize>,
210     /// Names of all seven weekdays
211     pub weekday: HashMap<String, usize>,
212     /// Names of all twelve months
213     pub months: HashMap<String, usize>,
214     /// Tokens to indicate a value is in units of hours, minutes, or seconds
215     pub hms: HashMap<String, usize>,
216     /// Tokens to indicate a value refers to AM or PM time
217     pub ampm: HashMap<String, usize>,
218     /// Tokens to indicate our timestamp is in the UTC timezone
219     pub utczone: HashMap<String, usize>,
220     /// Tokens to indicate values "belonging" to other tokens (e.g. 3rd *of* March)
221     pub pertain: HashMap<String, usize>,
222     /// Map of timezone names to their offset in seconds
223     pub tzoffset: HashMap<String, usize>,
224     /// For ambiguous year/month/day values, and `dayfirst` was not specified as
225     /// an argument to `Parser`, treat the first observed value as the day.
226     pub dayfirst: bool,
227     /// For ambiguous year/month/day values, and `dayfirst` was not specified as
228     /// an argument to `Parser`, treat the first observed value as the day.
229     /// Takes priority over `dayfirst`
230     pub yearfirst: bool,
231     /// The current year we are parsing values for
232     pub year: i32,
233     /// The current year we are parsing values for *modulo* 100
234     pub century: i32,
235 }
236 
237 impl Default for ParserInfo {
238     /// Create a basic `ParserInfo` object suitable for parsing dates in English
default() -> Self239     fn default() -> Self {
240         let year = Local::now().year();
241         let century = year / 100 * 100;
242 
243         ParserInfo {
244             jump: parse_info(vec![vec![
245                 " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
246                 "nd", "rd", "th",
247             ]]),
248             weekday: parse_info(vec![
249                 vec!["Mon", "Monday"],
250                 vec!["Tue", "Tues", "Tuesday"],
251                 vec!["Wed", "Wednesday"],
252                 vec!["Thu", "Thurs", "Thursday"],
253                 vec!["Fri", "Friday"],
254                 vec!["Sat", "Saturday"],
255                 vec!["Sun", "Sunday"],
256             ]),
257             months: parse_info(vec![
258                 vec!["Jan", "January"],
259                 vec!["Feb", "February"],
260                 vec!["Mar", "March"],
261                 vec!["Apr", "April"],
262                 vec!["May"],
263                 vec!["Jun", "June"],
264                 vec!["Jul", "July"],
265                 vec!["Aug", "August"],
266                 vec!["Sep", "Sept", "September"],
267                 vec!["Oct", "October"],
268                 vec!["Nov", "November"],
269                 vec!["Dec", "December"],
270             ]),
271             hms: parse_info(vec![
272                 vec!["h", "hour", "hours"],
273                 vec!["m", "minute", "minutes"],
274                 vec!["s", "second", "seconds"],
275             ]),
276             ampm: parse_info(vec![vec!["am", "a"], vec!["pm", "p"]]),
277             utczone: parse_info(vec![vec!["UTC", "GMT", "Z"]]),
278             pertain: parse_info(vec![vec!["of"]]),
279             tzoffset: parse_info(vec![vec![]]),
280             dayfirst: false,
281             yearfirst: false,
282             year,
283             century,
284         }
285     }
286 }
287 
288 impl ParserInfo {
jump_index(&self, name: &str) -> bool289     fn jump_index(&self, name: &str) -> bool {
290         self.jump.contains_key(&name.to_lowercase())
291     }
292 
weekday_index(&self, name: &str) -> Option<usize>293     fn weekday_index(&self, name: &str) -> Option<usize> {
294         self.weekday.get(&name.to_lowercase()).cloned()
295     }
296 
month_index(&self, name: &str) -> Option<usize>297     fn month_index(&self, name: &str) -> Option<usize> {
298         self.months.get(&name.to_lowercase()).map(|u| u + 1)
299     }
300 
hms_index(&self, name: &str) -> Option<usize>301     fn hms_index(&self, name: &str) -> Option<usize> {
302         self.hms.get(&name.to_lowercase()).cloned()
303     }
304 
ampm_index(&self, name: &str) -> Option<bool>305     fn ampm_index(&self, name: &str) -> Option<bool> {
306         if let Some(v) = self.ampm.get(&name.to_lowercase()) {
307             // Python technically uses numbers here, but given that the numbers are
308             // only 0 and 1, it's easier to use booleans
309             Some(*v == 1)
310         } else {
311             None
312         }
313     }
314 
pertain_index(&self, name: &str) -> bool315     fn pertain_index(&self, name: &str) -> bool {
316         self.pertain.contains_key(&name.to_lowercase())
317     }
318 
utczone_index(&self, name: &str) -> bool319     fn utczone_index(&self, name: &str) -> bool {
320         self.utczone.contains_key(&name.to_lowercase())
321     }
322 
tzoffset_index(&self, name: &str) -> Option<usize>323     fn tzoffset_index(&self, name: &str) -> Option<usize> {
324         if self.utczone.contains_key(&name.to_lowercase()) {
325             Some(0)
326         } else {
327             self.tzoffset.get(&name.to_lowercase()).cloned()
328         }
329     }
330 
convertyear(&self, year: i32, century_specified: bool) -> i32331     fn convertyear(&self, year: i32, century_specified: bool) -> i32 {
332         let mut year = year;
333 
334         if year < 100 && !century_specified {
335             year += self.century;
336             if year >= self.year + 50 {
337                 year -= 100;
338             } else if year < self.year - 50 {
339                 year += 100
340             }
341         }
342 
343         year
344     }
345 
346     // TODO: Should this be moved elsewhere?
validate(&self, res: &mut ParsingResult) -> bool347     fn validate(&self, res: &mut ParsingResult) -> bool {
348         if let Some(y) = res.year {
349             res.year = Some(self.convertyear(y, res.century_specified))
350         };
351 
352         if (res.tzoffset == Some(0) && res.tzname.is_none())
353             || (res.tzname == Some("Z".to_owned()) || res.tzname == Some("z".to_owned()))
354         {
355             res.tzname = Some("UTC".to_owned());
356             res.tzoffset = Some(0);
357         } else if res.tzoffset != Some(0)
358             && res.tzname.is_some()
359             && self.utczone_index(res.tzname.as_ref().unwrap())
360         {
361             res.tzoffset = Some(0);
362         }
363 
364         true
365     }
366 }
367 
days_in_month(year: i32, month: i32) -> Result<u32, ParseError>368 fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
369     let leap_year = match year % 4 {
370         0 => year % 400 != 0,
371         _ => false,
372     };
373 
374     match month {
375         2 => {
376             if leap_year {
377                 Ok(29)
378             } else {
379                 Ok(28)
380             }
381         }
382         1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
383         4 | 6 | 9 | 11 => Ok(30),
384         _ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
385     }
386 }
387 
388 #[derive(Debug, Hash, PartialEq, Eq)]
389 enum YMDLabel {
390     Year,
391     Month,
392     Day,
393 }
394 
395 #[derive(Debug, Default)]
396 struct YMD {
397     _ymd: Vec<i32>, // TODO: This seems like a super weird way to store things
398     century_specified: bool,
399     dstridx: Option<usize>,
400     mstridx: Option<usize>,
401     ystridx: Option<usize>,
402 }
403 
404 impl YMD {
len(&self) -> usize405     fn len(&self) -> usize {
406         self._ymd.len()
407     }
408 
could_be_day(&self, val: i32) -> bool409     fn could_be_day(&self, val: i32) -> bool {
410         if self.dstridx.is_some() {
411             false
412         } else if self.mstridx.is_none() {
413             (1 <= val) && (val <= 31)
414         } else if self.ystridx.is_none() {
415             // UNWRAP: Earlier condition catches mstridx missing
416             let month = self._ymd[self.mstridx.unwrap()];
417             1 <= val && (val <= days_in_month(2000, month).unwrap() as i32)
418         } else {
419             // UNWRAP: Earlier conditions prevent us from unsafely unwrapping
420             let month = self._ymd[self.mstridx.unwrap()];
421             let year = self._ymd[self.ystridx.unwrap()];
422             1 <= val && (val <= days_in_month(year, month).unwrap() as i32)
423         }
424     }
425 
append(&mut self, val: i32, token: &str, label: Option<YMDLabel>) -> ParseResult<()>426     fn append(&mut self, val: i32, token: &str, label: Option<YMDLabel>) -> ParseResult<()> {
427         let mut label = label;
428 
429         // Python auto-detects strings using the '__len__' function here.
430         // We instead take in both and handle as necessary.
431         if Decimal::from_str(token).is_ok() && token.len() > 2 {
432             self.century_specified = true;
433             match label {
434                 None | Some(YMDLabel::Year) => label = Some(YMDLabel::Year),
435                 Some(YMDLabel::Month) => {
436                     return Err(ParseError::ImpossibleTimestamp("Invalid month"))
437                 }
438                 Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
439             }
440         }
441 
442         if val > 100 {
443             self.century_specified = true;
444             match label {
445                 None => label = Some(YMDLabel::Year),
446                 Some(YMDLabel::Year) => (),
447                 Some(YMDLabel::Month) => {
448                     return Err(ParseError::ImpossibleTimestamp("Invalid month"))
449                 }
450                 Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
451             }
452         }
453 
454         self._ymd.push(val);
455 
456         match label {
457             Some(YMDLabel::Month) => {
458                 if self.mstridx.is_some() {
459                     Err(ParseError::YearMonthDayError("Month already set"))
460                 } else {
461                     self.mstridx = Some(self._ymd.len() - 1);
462                     Ok(())
463                 }
464             }
465             Some(YMDLabel::Day) => {
466                 if self.dstridx.is_some() {
467                     Err(ParseError::YearMonthDayError("Day already set"))
468                 } else {
469                     self.dstridx = Some(self._ymd.len() - 1);
470                     Ok(())
471                 }
472             }
473             Some(YMDLabel::Year) => {
474                 if self.ystridx.is_some() {
475                     Err(ParseError::YearMonthDayError("Year already set"))
476                 } else {
477                     self.ystridx = Some(self._ymd.len() - 1);
478                     Ok(())
479                 }
480             }
481             None => Ok(()),
482         }
483     }
484 
resolve_from_stridxs( &mut self, strids: &mut HashMap<YMDLabel, usize>, ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)>485     fn resolve_from_stridxs(
486         &mut self,
487         strids: &mut HashMap<YMDLabel, usize>,
488     ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)> {
489         if self._ymd.len() == 3 && strids.len() == 2 {
490             let missing_key = if !strids.contains_key(&YMDLabel::Year) {
491                 YMDLabel::Year
492             } else if !strids.contains_key(&YMDLabel::Month) {
493                 YMDLabel::Month
494             } else {
495                 YMDLabel::Day
496             };
497 
498             let strids_vals: Vec<usize> = strids.values().cloned().collect();
499             let missing_val = if !strids_vals.contains(&0) {
500                 0
501             } else if !strids_vals.contains(&1) {
502                 1
503             } else {
504                 2
505             };
506 
507             strids.insert(missing_key, missing_val);
508         }
509 
510         if self._ymd.len() != strids.len() {
511             return Err(ParseError::YearMonthDayError(
512                 "Tried to resolve year, month, and day without enough information",
513             ));
514         }
515 
516         Ok((
517             strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
518             strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
519             strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
520         ))
521     }
522 
523     #[allow(clippy::needless_return)]
resolve_ymd( &mut self, yearfirst: bool, dayfirst: bool, ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)>524     fn resolve_ymd(
525         &mut self,
526         yearfirst: bool,
527         dayfirst: bool,
528     ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)> {
529         let len_ymd = self._ymd.len();
530 
531         let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
532         self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
533         self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
534         self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
535 
536         // TODO: More Rustiomatic way of doing this?
537         if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
538             return self.resolve_from_stridxs(&mut strids);
539         };
540 
541         // Received year, month, day, and ???
542         if len_ymd > 3 {
543             return Err(ParseError::YearMonthDayError(
544                 "Received extra tokens in resolving year, month, and day",
545             ));
546         }
547 
548         match (len_ymd, self.mstridx) {
549             (1, Some(val)) | (2, Some(val)) => {
550                 let other = if len_ymd == 1 {
551                     self._ymd[0]
552                 } else {
553                     self._ymd[1 - val]
554                 };
555                 if other > 31 {
556                     return Ok((Some(other), Some(self._ymd[val]), None));
557                 }
558                 return Ok((None, Some(self._ymd[val]), Some(other)));
559             }
560             (2, None) => {
561                 if self._ymd[0] > 31 {
562                     return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
563                 }
564                 if self._ymd[1] > 31 {
565                     return Ok((Some(self._ymd[1]), Some(self._ymd[0]), None));
566                 }
567                 if dayfirst && self._ymd[1] <= 12 {
568                     return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
569                 }
570                 return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
571             }
572             (3, Some(0)) => {
573                 if self._ymd[1] > 31 {
574                     return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
575                 }
576                 return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
577             }
578             (3, Some(1)) => {
579                 if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
580                     return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
581                 }
582                 return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
583             }
584             (3, Some(2)) => {
585                 // It was in the original docs, so: WTF!?
586                 if self._ymd[1] > 31 {
587                     return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
588                 }
589                 return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
590             }
591             (3, None) => {
592                 if self._ymd[0] > 31
593                     || self.ystridx == Some(0)
594                     || (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
595                 {
596                     if dayfirst && self._ymd[2] <= 12 {
597                         return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
598                     }
599                     return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
600                 } else if self._ymd[0] > 12 || (dayfirst && self._ymd[1] <= 12) {
601                     return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
602                 }
603                 return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
604             }
605             (_, _) => {
606                 return Ok((None, None, None));
607             }
608         }
609     }
610 }
611 
612 #[derive(Default, Debug, PartialEq)]
613 struct ParsingResult {
614     year: Option<i32>,
615     month: Option<i32>,
616     day: Option<i32>,
617     weekday: Option<usize>,
618     hour: Option<i32>,
619     minute: Option<i32>,
620     second: Option<i32>,
621     microsecond: Option<i32>,
622     tzname: Option<String>,
623     tzoffset: Option<i32>,
624     ampm: Option<bool>,
625     century_specified: bool,
626     any_unused_tokens: Vec<String>,
627 }
628 
629 macro_rules! option_len {
630     ($o:expr) => {{
631         if $o.is_some() {
632             1
633         } else {
634             0
635         }
636     }};
637 }
638 
639 impl ParsingResult {
len(&self) -> usize640     fn len(&self) -> usize {
641         option_len!(self.year)
642             + option_len!(self.month)
643             + option_len!(self.day)
644             + option_len!(self.weekday)
645             + option_len!(self.hour)
646             + option_len!(self.minute)
647             + option_len!(self.second)
648             + option_len!(self.microsecond)
649             + option_len!(self.tzname)
650             + option_len!(self.ampm)
651     }
652 }
653 
654 /// Parser is responsible for doing the actual work of understanding a time string.
655 /// The root level `parse` function is responsible for constructing a default `Parser`
656 /// and triggering its behavior.
657 #[derive(Default)]
658 pub struct Parser {
659     info: ParserInfo,
660 }
661 
662 impl Parser {
663     /// Create a new `Parser` instance using the provided `ParserInfo`.
664     ///
665     /// This method allows you to set up a parser to handle different
666     /// names for days of the week, months, etc., enabling customization
667     /// for different languages or extra values.
new(info: ParserInfo) -> Self668     pub fn new(info: ParserInfo) -> Self {
669         Parser { info }
670     }
671 
672     /// Main method to trigger parsing of a string using the previously-provided
673     /// parser information. Returns a naive timestamp along with timezone and
674     /// unused tokens if available.
675     ///
676     /// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous
677     /// dates. Consider the following scenarios where we parse the string '01.02.03'
678     ///
679     /// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003`
680     /// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001`
681     /// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001`
682     ///
683     /// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if
684     /// they are unrecognized. However, the unused tokens will not be returned
685     /// unless `fuzzy_with_tokens` is set as `true`.
686     ///
687     /// `default` is the timestamp used to infer missing values, and is midnight
688     /// of the current day by default. For example, when parsing the text '2003',
689     /// we will use the current month and day as a default value, leading to a
690     /// result of 'March 3, 2003' if the function was run using a default of
691     /// March 3rd.
692     ///
693     /// `ignoretz` forces the parser to ignore timezone information even if it
694     /// is recognized in the time string
695     ///
696     /// `tzinfos` is a map of timezone names to the offset seconds. For example,
697     /// the parser would ignore the 'EST' part of the string in '10 AM EST'
698     /// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that
699     /// timezone name support (i.e. "EST", "BRST") is not available by default
700     /// at the moment, they must be added through `tzinfos` at the moment in
701     /// order to be resolved.
702     #[allow(clippy::too_many_arguments)]
parse( &self, timestr: &str, dayfirst: Option<bool>, yearfirst: Option<bool>, fuzzy: bool, fuzzy_with_tokens: bool, default: Option<&NaiveDateTime>, ignoretz: bool, tzinfos: &HashMap<String, i32>, ) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)>703     pub fn parse(
704         &self,
705         timestr: &str,
706         dayfirst: Option<bool>,
707         yearfirst: Option<bool>,
708         fuzzy: bool,
709         fuzzy_with_tokens: bool,
710         default: Option<&NaiveDateTime>,
711         ignoretz: bool,
712         tzinfos: &HashMap<String, i32>,
713     ) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)> {
714         let default_date = default.unwrap_or(&Local::now().naive_local()).date();
715 
716         let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms(0, 0, 0));
717 
718         let (res, tokens) =
719             self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
720 
721         if res.len() == 0 {
722             return Err(ParseError::NoDate);
723         }
724 
725         let naive = self.build_naive(&res, &default_ts)?;
726 
727         if !ignoretz {
728             let offset = self.build_tzaware(&naive, &res, tzinfos)?;
729             Ok((naive, offset, tokens))
730         } else {
731             Ok((naive, None, tokens))
732         }
733     }
734 
735     #[allow(clippy::cognitive_complexity)] // Imitating Python API is priority
parse_with_tokens( &self, timestr: &str, dayfirst: Option<bool>, yearfirst: Option<bool>, fuzzy: bool, fuzzy_with_tokens: bool, ) -> Result<(ParsingResult, Option<Vec<String>>), ParseError>736     fn parse_with_tokens(
737         &self,
738         timestr: &str,
739         dayfirst: Option<bool>,
740         yearfirst: Option<bool>,
741         fuzzy: bool,
742         fuzzy_with_tokens: bool,
743     ) -> Result<(ParsingResult, Option<Vec<String>>), ParseError> {
744         let fuzzy = if fuzzy_with_tokens { true } else { fuzzy };
745         // This is probably a stylistic abomination
746         let dayfirst = if let Some(dayfirst) = dayfirst {
747             dayfirst
748         } else {
749             self.info.dayfirst
750         };
751         let yearfirst = if let Some(yearfirst) = yearfirst {
752             yearfirst
753         } else {
754             self.info.yearfirst
755         };
756 
757         let mut res = ParsingResult::default();
758 
759         let mut l = tokenize(&timestr);
760         let mut skipped_idxs: Vec<usize> = Vec::new();
761 
762         let mut ymd = YMD::default();
763 
764         let len_l = l.len();
765         let mut i = 0;
766 
767         while i < len_l {
768             let value_repr = l[i].clone();
769 
770             if let Ok(_v) = Decimal::from_str(&value_repr) {
771                 i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?;
772             } else if let Some(value) = self.info.weekday_index(&l[i]) {
773                 res.weekday = Some(value);
774             } else if let Some(value) = self.info.month_index(&l[i]) {
775                 ymd.append(value as i32, &l[i], Some(YMDLabel::Month))?;
776 
777                 if i + 1 < len_l {
778                     if l[i + 1] == "-" || l[i + 1] == "/" {
779                         // Jan-01[-99]
780                         let sep = &l[i + 1];
781                         // TODO: This seems like a very unsafe unwrap
782                         ymd.append(l[i + 2].parse::<i32>()?, &l[i + 2], None)?;
783 
784                         if i + 3 < len_l && &l[i + 3] == sep {
785                             // Jan-01-99
786                             ymd.append(l[i + 4].parse::<i32>()?, &l[i + 4], None)?;
787                             i += 2;
788                         }
789 
790                         i += 2;
791                     } else if i + 4 < len_l
792                         && l[i + 1] == l[i + 3]
793                         && l[i + 3] == " "
794                         && self.info.pertain_index(&l[i + 2])
795                     {
796                         // Jan of 01
797                         if let Ok(value) = l[i + 4].parse::<i32>() {
798                             let year = self.info.convertyear(value, false);
799                             ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?;
800                         }
801 
802                         i += 4;
803                     }
804                 }
805             } else if let Some(value) = self.info.ampm_index(&l[i]) {
806                 let is_ampm = self.ampm_valid(res.hour, res.ampm, fuzzy);
807 
808                 if is_ampm == Ok(true) {
809                     res.hour = res.hour.map(|h| self.adjust_ampm(h, value));
810                     res.ampm = Some(value);
811                 } else if fuzzy {
812                     skipped_idxs.push(i);
813                 }
814             } else if self.could_be_tzname(res.hour, &res.tzname, res.tzoffset, &l[i]) {
815                 res.tzname = Some(l[i].clone());
816 
817                 let tzname = res.tzname.clone().unwrap();
818                 res.tzoffset = self.info.tzoffset_index(&tzname).map(|t| t as i32);
819 
820                 if i + 1 < len_l && (l[i + 1] == "+" || l[i + 1] == "-") {
821                     // GMT+3
822                     // According to dateutil docs - reverse the size, as GMT+3 means
823                     // "my time +3 is GMT" not "GMT +3 is my time"
824 
825                     // TODO: Is there a better way of in-place modifying a vector?
826                     let item = if l[i + 1] == "+" {
827                         "-".to_owned()
828                     } else {
829                         "+".to_owned()
830                     };
831                     l[i + 1] = item;
832 
833                     res.tzoffset = None;
834 
835                     if self.info.utczone_index(&tzname) {
836                         res.tzname = None;
837                     }
838                 }
839             } else if res.hour.is_some() && (l[i] == "+" || l[i] == "-") {
840                 let signal = if l[i] == "+" { 1 } else { -1 };
841                 let len_li = l[i].len();
842 
843                 let mut hour_offset: Option<i32> = None;
844                 let mut min_offset: Option<i32> = None;
845 
846                 // TODO: check that l[i + 1] is integer?
847                 if len_li == 4 {
848                     // -0300
849                     hour_offset = Some(l[i + 1][..2].parse::<i32>()?);
850                     min_offset = Some(l[i + 1][2..4].parse::<i32>()?);
851                 } else if i + 2 < len_l && l[i + 2] == ":" {
852                     // -03:00
853                     hour_offset = Some(l[i + 1].parse::<i32>()?);
854                     min_offset = Some(l[i + 3].parse::<i32>()?);
855                     i += 2;
856                 } else if len_li <= 2 {
857                     // -[0]3
858                     let range_len = min(l[i + 1].len(), 2);
859                     hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?);
860                     min_offset = Some(0);
861                 }
862 
863                 res.tzoffset =
864                     Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
865 
866                 let tzname = res.tzname.clone();
867                 if i + 5 < len_l
868                     && self.info.jump_index(&l[i + 2])
869                     && l[i + 3] == "("
870                     && l[i + 5] == ")"
871                     && 3 <= l[i + 4].len()
872                     && self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
873                 {
874                     // (GMT)
875                     res.tzname = Some(l[i + 4].clone());
876                     i += 4;
877                 }
878 
879                 i += 1;
880             } else if !(self.info.jump_index(&l[i]) || fuzzy) {
881                 return Err(ParseError::UnrecognizedToken(l[i].clone()));
882             } else {
883                 skipped_idxs.push(i);
884             }
885 
886             i += 1;
887         }
888 
889         let (year, month, day) = ymd.resolve_ymd(yearfirst, dayfirst)?;
890 
891         res.century_specified = ymd.century_specified;
892         res.year = year;
893         res.month = month;
894         res.day = day;
895 
896         if !self.info.validate(&mut res) {
897             Err(ParseError::UnrecognizedFormat)
898         } else if fuzzy_with_tokens {
899             let skipped_tokens = self.recombine_skipped(skipped_idxs, l);
900             Ok((res, Some(skipped_tokens)))
901         } else {
902             Ok((res, None))
903         }
904     }
905 
could_be_tzname( &self, hour: Option<i32>, tzname: &Option<String>, tzoffset: Option<i32>, token: &str, ) -> bool906     fn could_be_tzname(
907         &self,
908         hour: Option<i32>,
909         tzname: &Option<String>,
910         tzoffset: Option<i32>,
911         token: &str,
912     ) -> bool {
913         let all_ascii_upper = token
914             .chars()
915             .all(|c| 65u8 as char <= c && c <= 90u8 as char);
916 
917         hour.is_some()
918             && tzname.is_none()
919             && tzoffset.is_none()
920             && token.len() <= 5
921             && (all_ascii_upper || self.info.utczone.contains_key(token))
922     }
923 
924     #[allow(clippy::unnecessary_unwrap)]
ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool>925     fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> {
926         let mut val_is_ampm = !(fuzzy && ampm.is_some());
927 
928         if hour.is_none() {
929             if fuzzy {
930                 val_is_ampm = false;
931             } else {
932                 return Err(ParseError::AmPmWithoutHour);
933             }
934         } else if !(0 <= hour.unwrap() && hour.unwrap() <= 12) {
935             if fuzzy {
936                 val_is_ampm = false;
937             } else {
938                 return Err(ParseError::ImpossibleTimestamp("Invalid hour"));
939             }
940         }
941 
942         Ok(val_is_ampm)
943     }
944 
build_naive( &self, res: &ParsingResult, default: &NaiveDateTime, ) -> ParseResult<NaiveDateTime>945     fn build_naive(
946         &self,
947         res: &ParsingResult,
948         default: &NaiveDateTime,
949     ) -> ParseResult<NaiveDateTime> {
950         let y = res.year.unwrap_or_else(|| default.year());
951         let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
952 
953         let d_offset = if res.weekday.is_some() && res.day.is_none() {
954             let dow = day_of_week(y as u32, m, default.day())?;
955 
956             // UNWRAP: We've already check res.weekday() is some
957             let actual_weekday = (res.weekday.unwrap() + 1) % 7;
958             let other = DayOfWeek::from_numeral(actual_weekday as u32);
959             Duration::days(i64::from(dow.difference(&other)))
960         } else {
961             Duration::days(0)
962         };
963 
964         // TODO: Change month/day to u32
965         let d = NaiveDate::from_ymd_opt(
966             y,
967             m,
968             min(
969                 res.day.unwrap_or(default.day() as i32) as u32,
970                 days_in_month(y, m as i32)?,
971             ),
972         )
973         .ok_or_else(|| ParseError::ImpossibleTimestamp("Invalid date range given"))?;
974 
975         let d = d + d_offset;
976 
977         let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
978         let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
979         let second = res.second.unwrap_or(default.second() as i32) as u32;
980         let microsecond = res
981             .microsecond
982             .unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
983         let t =
984             NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
985                 if hour >= 24 {
986                     ParseError::ImpossibleTimestamp("Invalid hour")
987                 } else if minute >= 60 {
988                     ParseError::ImpossibleTimestamp("Invalid minute")
989                 } else if second >= 60 {
990                     ParseError::ImpossibleTimestamp("Invalid second")
991                 } else if microsecond >= 2_000_000 {
992                     ParseError::ImpossibleTimestamp("Invalid microsecond")
993                 } else {
994                     unreachable!();
995                 }
996             })?;
997 
998         Ok(NaiveDateTime::new(d, t))
999     }
1000 
build_tzaware( &self, _dt: &NaiveDateTime, res: &ParsingResult, tzinfos: &HashMap<String, i32>, ) -> ParseResult<Option<FixedOffset>>1001     fn build_tzaware(
1002         &self,
1003         _dt: &NaiveDateTime,
1004         res: &ParsingResult,
1005         tzinfos: &HashMap<String, i32>,
1006     ) -> ParseResult<Option<FixedOffset>> {
1007         if let Some(offset) = res.tzoffset {
1008             Ok(Some(FixedOffset::east(offset)))
1009         } else if res.tzoffset == None
1010             && (res.tzname == Some(" ".to_owned())
1011                 || res.tzname == Some(".".to_owned())
1012                 || res.tzname == Some("-".to_owned())
1013                 || res.tzname == None)
1014         {
1015             Ok(None)
1016         } else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
1017             Ok(Some(FixedOffset::east(
1018                 *tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(),
1019             )))
1020         } else if let Some(tzname) = res.tzname.as_ref() {
1021             println!("tzname {} identified but not understood.", tzname);
1022             Ok(None)
1023         } else {
1024             Err(ParseError::TimezoneUnsupported)
1025         }
1026     }
1027 
1028     #[allow(clippy::unnecessary_unwrap)]
parse_numeric_token( &self, tokens: &[String], idx: usize, info: &ParserInfo, ymd: &mut YMD, res: &mut ParsingResult, fuzzy: bool, ) -> ParseResult<usize>1029     fn parse_numeric_token(
1030         &self,
1031         tokens: &[String],
1032         idx: usize,
1033         info: &ParserInfo,
1034         ymd: &mut YMD,
1035         res: &mut ParsingResult,
1036         fuzzy: bool,
1037     ) -> ParseResult<usize> {
1038         let mut idx = idx;
1039         let value_repr = &tokens[idx];
1040         let mut value = Decimal::from_str(&value_repr).unwrap();
1041 
1042         let len_li = value_repr.len();
1043         let len_l = tokens.len();
1044 
1045         // TODO: I miss the `x in y` syntax
1046         // TODO: Decompose this logic a bit
1047         if ymd.len() == 3
1048             && (len_li == 2 || len_li == 4)
1049             && res.hour.is_none()
1050             && (idx + 1 >= len_l
1051                 || (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
1052         {
1053             // 1990101T32[59]
1054             let s = &tokens[idx];
1055             res.hour = s[0..2].parse::<i32>().ok();
1056 
1057             if len_li == 4 {
1058                 res.minute = Some(s[2..4].parse::<i32>()?)
1059             }
1060         } else if len_li == 6 || (len_li > 6 && tokens[idx].find('.') == Some(6)) {
1061             // YYMMDD or HHMMSS[.ss]
1062             let s = &tokens[idx];
1063 
1064             if ymd.len() == 0 && tokens[idx].find('.') == None {
1065                 ymd.append(s[0..2].parse::<i32>()?, &s[0..2], None)?;
1066                 ymd.append(s[2..4].parse::<i32>()?, &s[2..4], None)?;
1067                 ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
1068             } else {
1069                 // 19990101T235959[.59]
1070                 res.hour = s[0..2].parse::<i32>().ok();
1071                 res.minute = s[2..4].parse::<i32>().ok();
1072 
1073                 let t = self.parsems(&s[4..])?;
1074                 res.second = Some(t.0);
1075                 res.microsecond = Some(t.1);
1076             }
1077         } else if vec![8, 12, 14].contains(&len_li) {
1078             // YYMMDD
1079             let s = &tokens[idx];
1080             ymd.append(s[..4].parse::<i32>()?, &s[..4], Some(YMDLabel::Year))?;
1081             ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
1082             ymd.append(s[6..8].parse::<i32>()?, &s[6..8], None)?;
1083 
1084             if len_li > 8 {
1085                 res.hour = Some(s[8..10].parse::<i32>()?);
1086                 res.minute = Some(s[10..12].parse::<i32>()?);
1087 
1088                 if len_li > 12 {
1089                     res.second = Some(s[12..].parse::<i32>()?);
1090                 }
1091             }
1092         } else if let Some(hms_idx) = self.find_hms_index(idx, tokens, info, true) {
1093             // HH[ ]h or MM[ ]m or SS[.ss][ ]s
1094             let (new_idx, hms) = self.parse_hms(idx, tokens, info, Some(hms_idx));
1095             if hms.is_some() {
1096                 // TODO: This unwrap is unjustified.
1097                 self.assign_hms(res, value_repr, hms.unwrap());
1098             }
1099             idx = new_idx;
1100         } else if idx + 2 < len_l && tokens[idx + 1] == ":" {
1101             // HH:MM[:SS[.ss]]
1102             // TODO: Better story around Decimal handling
1103             res.hour = Some(value.floor().to_i64().unwrap() as i32);
1104             // TODO: Rescope `value` here?
1105             value = self.to_decimal(&tokens[idx + 2]);
1106             let min_sec = self.parse_min_sec(value);
1107             res.minute = Some(min_sec.0);
1108             res.second = min_sec.1;
1109 
1110             if idx + 4 < len_l && tokens[idx + 3] == ":" {
1111                 // TODO: (x, y) = (a, b) syntax?
1112                 let ms = self.parsems(&tokens[idx + 4]).unwrap();
1113                 res.second = Some(ms.0);
1114                 res.microsecond = Some(ms.1);
1115 
1116                 idx += 2;
1117             }
1118             idx += 2;
1119         } else if idx + 1 < len_l
1120             && (tokens[idx + 1] == "-" || tokens[idx + 1] == "/" || tokens[idx + 1] == ".")
1121         {
1122             // TODO: There's got to be a better way of handling the condition above
1123             let sep = &tokens[idx + 1];
1124             ymd.append(value_repr.parse::<i32>()?, &value_repr, None)?;
1125 
1126             if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) {
1127                 if let Ok(val) = tokens[idx + 2].parse::<i32>() {
1128                     ymd.append(val, &tokens[idx + 2], None)?;
1129                 } else if let Some(val) = info.month_index(&tokens[idx + 2]) {
1130                     ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month))?;
1131                 }
1132 
1133                 if idx + 3 < len_l && &tokens[idx + 3] == sep {
1134                     if let Some(value) = info.month_index(&tokens[idx + 4]) {
1135                         ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
1136                     } else if let Ok(val) = tokens[idx + 4].parse::<i32>() {
1137                         ymd.append(val, &tokens[idx + 4], None)?;
1138                     } else {
1139                         return Err(ParseError::UnrecognizedFormat);
1140                     }
1141 
1142                     idx += 2;
1143                 }
1144 
1145                 idx += 1;
1146             }
1147 
1148             idx += 1
1149         } else if idx + 1 >= len_l || info.jump_index(&tokens[idx + 1]) {
1150             if idx + 2 < len_l && info.ampm_index(&tokens[idx + 2]).is_some() {
1151                 let hour = value.to_i64().unwrap() as i32;
1152                 let ampm = info.ampm_index(&tokens[idx + 2]).unwrap();
1153                 res.hour = Some(self.adjust_ampm(hour, ampm));
1154                 idx += 1;
1155             } else {
1156                 ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None)?;
1157             }
1158 
1159             idx += 1;
1160         } else if info.ampm_index(&tokens[idx + 1]).is_some()
1161             && (*ZERO <= value && value < *TWENTY_FOUR)
1162         {
1163             // 12am
1164             let hour = value.to_i64().unwrap() as i32;
1165             res.hour = Some(self.adjust_ampm(hour, info.ampm_index(&tokens[idx + 1]).unwrap()));
1166             idx += 1;
1167         } else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
1168             ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?;
1169         } else if !fuzzy {
1170             return Err(ParseError::UnrecognizedFormat);
1171         }
1172 
1173         Ok(idx)
1174     }
1175 
adjust_ampm(&self, hour: i32, ampm: bool) -> i321176     fn adjust_ampm(&self, hour: i32, ampm: bool) -> i32 {
1177         if hour < 12 && ampm {
1178             hour + 12
1179         } else if hour == 12 && !ampm {
1180             0
1181         } else {
1182             hour
1183         }
1184     }
1185 
parsems(&self, seconds_str: &str) -> ParseResult<(i32, i32)>1186     fn parsems(&self, seconds_str: &str) -> ParseResult<(i32, i32)> {
1187         if seconds_str.contains('.') {
1188             let split: Vec<&str> = seconds_str.split('.').collect();
1189             let (i, f): (&str, &str) = (split[0], split[1]);
1190 
1191             let i_parse = i.parse::<i32>()?;
1192             let f_parse = ljust(f, 6, '0').parse::<i32>()?;
1193             Ok((i_parse, f_parse))
1194         } else {
1195             Ok((seconds_str.parse::<i32>()?, 0))
1196         }
1197     }
1198 
find_hms_index( &self, idx: usize, tokens: &[String], info: &ParserInfo, allow_jump: bool, ) -> Option<usize>1199     fn find_hms_index(
1200         &self,
1201         idx: usize,
1202         tokens: &[String],
1203         info: &ParserInfo,
1204         allow_jump: bool,
1205     ) -> Option<usize> {
1206         let len_l = tokens.len();
1207         let mut hms_idx = None;
1208 
1209         // There's a super weird edge case that can happen
1210         // because Python safely handles negative array indices,
1211         // and Rust (because of usize) does not.
1212         let idx_minus_two = if idx == 1 && len_l > 0 {
1213             len_l - 1
1214         } else if idx == 0 && len_l > 1 {
1215             len_l - 2
1216         } else if idx > 1 {
1217             idx - 2
1218         } else if len_l == 0 {
1219             panic!("Attempting to find_hms_index() wih no tokens.");
1220         } else {
1221             0
1222         };
1223 
1224         if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
1225             hms_idx = Some(idx + 1)
1226         } else if allow_jump
1227             && idx + 2 < len_l
1228             && tokens[idx + 1] == " "
1229             && info.hms_index(&tokens[idx + 2]).is_some()
1230         {
1231             hms_idx = Some(idx + 2)
1232         } else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
1233             hms_idx = Some(idx - 1)
1234         } else if len_l > 0
1235             && idx > 0
1236             && idx == len_l - 1
1237             && tokens[idx - 1] == " "
1238             && info.hms_index(&tokens[idx_minus_two]).is_some()
1239         {
1240             hms_idx = Some(idx - 2)
1241         }
1242 
1243         hms_idx
1244     }
1245 
1246     #[allow(clippy::unnecessary_unwrap)]
parse_hms( &self, idx: usize, tokens: &[String], info: &ParserInfo, hms_index: Option<usize>, ) -> (usize, Option<usize>)1247     fn parse_hms(
1248         &self,
1249         idx: usize,
1250         tokens: &[String],
1251         info: &ParserInfo,
1252         hms_index: Option<usize>,
1253     ) -> (usize, Option<usize>) {
1254         if hms_index.is_none() {
1255             (idx, None)
1256         } else if hms_index.unwrap() > idx {
1257             (
1258                 hms_index.unwrap(),
1259                 info.hms_index(&tokens[hms_index.unwrap()]),
1260             )
1261         } else {
1262             (
1263                 idx,
1264                 info.hms_index(&tokens[hms_index.unwrap()]).map(|u| u + 1),
1265             )
1266         }
1267     }
1268 
assign_hms(&self, res: &mut ParsingResult, value_repr: &str, hms: usize)1269     fn assign_hms(&self, res: &mut ParsingResult, value_repr: &str, hms: usize) {
1270         let value = self.to_decimal(value_repr);
1271 
1272         if hms == 0 {
1273             res.hour = Some(value.to_i64().unwrap() as i32);
1274             if !close_to_integer(&value) {
1275                 res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32);
1276             }
1277         } else if hms == 1 {
1278             let (min, sec) = self.parse_min_sec(value);
1279             res.minute = Some(min);
1280             res.second = sec;
1281         } else if hms == 2 {
1282             let (sec, micro) = self.parsems(value_repr).unwrap();
1283             res.second = Some(sec);
1284             res.microsecond = Some(micro);
1285         }
1286     }
1287 
to_decimal(&self, value: &str) -> Decimal1288     fn to_decimal(&self, value: &str) -> Decimal {
1289         // TODO: Justify unwrap
1290         Decimal::from_str(value).unwrap()
1291     }
1292 
parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>)1293     fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) {
1294         // UNWRAP: i64 guaranteed to be fine because of preceding floor
1295         let minute = value.floor().to_i64().unwrap() as i32;
1296         let mut second = None;
1297 
1298         let sec_remainder = value - value.floor();
1299         if sec_remainder != *ZERO {
1300             second = Some((*SIXTY * sec_remainder).floor().to_i64().unwrap() as i32);
1301         }
1302 
1303         (minute, second)
1304     }
1305 
recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String>1306     fn recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String> {
1307         let mut skipped_tokens: Vec<String> = vec![];
1308 
1309         let mut sorted_idxs = skipped_idxs.clone();
1310         sorted_idxs.sort();
1311 
1312         for (i, idx) in sorted_idxs.iter().enumerate() {
1313             if i > 0 && idx - 1 == skipped_idxs[i - 1] {
1314                 // UNWRAP: Having an initial value and unconditional push at end guarantees value
1315                 let mut t = skipped_tokens.pop().unwrap();
1316                 t.push_str(tokens[*idx].as_ref());
1317                 skipped_tokens.push(t);
1318             } else {
1319                 skipped_tokens.push(tokens[*idx].to_owned());
1320             }
1321         }
1322 
1323         skipped_tokens
1324     }
1325 }
1326 
close_to_integer(value: &Decimal) -> bool1327 fn close_to_integer(value: &Decimal) -> bool {
1328     value % *ONE == *ZERO
1329 }
1330 
ljust(s: &str, chars: usize, replace: char) -> String1331 fn ljust(s: &str, chars: usize, replace: char) -> String {
1332     if s.len() >= chars {
1333         s[..chars].to_owned()
1334     } else {
1335         format!("{}{}", s, replace.to_string().repeat(chars - s.len()))
1336     }
1337 }
1338 
1339 /// Main entry point for using `dtparse`. The parse function is responsible for
1340 /// taking in a string representing some time value, and turning it into
1341 /// a timestamp with optional timezone information if it can be identified.
1342 ///
1343 /// The default implementation assumes English values for names of months,
1344 /// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()`
parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)>1345 pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {
1346     let res = DEFAULT_PARSER.parse(
1347         timestr,
1348         None,
1349         None,
1350         false,
1351         false,
1352         None,
1353         false,
1354         &HashMap::new(),
1355     )?;
1356 
1357     Ok((res.0, res.1))
1358 }
1359