1 #![deny(missing_docs)]
2 #![cfg_attr(test, allow(unknown_lints))]
3 #![cfg_attr(test, deny(warnings))]
4
5 //! # dtparse
6 //! The fully-featured "even I couldn't understand that" time parser.
7 //! Designed to take in strings and give back sensible dates and times.
8 //!
9 //! dtparse has its foundations in the [`dateutil`](dateutil) library for
10 //! Python, which excels at taking "interesting" strings and trying to make
11 //! sense of the dates and times they contain. A couple of quick examples
12 //! from the test cases should give some context:
13 //!
14 //! ```rust,ignore (tests-dont-compile-on-old-rust)
15 //! # extern crate chrono;
16 //! # extern crate dtparse;
17 //! use chrono::prelude::*;
18 //! use dtparse::parse;
19 //!
20 //! assert_eq!(
21 //! parse("2008.12.30"),
22 //! Ok((NaiveDate::from_ymd(2008, 12, 30).and_hms(0, 0, 0), None))
23 //! );
24 //!
25 //! // It can even handle timezones!
26 //! assert_eq!(
27 //! parse("January 4, 2024; 18:30:04 +02:00"),
28 //! Ok((
29 //! NaiveDate::from_ymd(2024, 1, 4).and_hms(18, 30, 4),
30 //! Some(FixedOffset::east(7200))
31 //! ))
32 //! );
33 //! ```
34 //!
35 //! And we can even handle fuzzy strings where dates/times aren't the
36 //! only content if we dig into the implementation a bit!
37 //!
38 //! ```rust,ignore (tests-dont-compile-on-old-rust)
39 //! # extern crate chrono;
40 //! # extern crate dtparse;
41 //! use chrono::prelude::*;
42 //! use dtparse::Parser;
43 //! # use std::collections::HashMap;
44 //!
45 //! let mut p = Parser::default();
46 //! assert_eq!(
47 //! p.parse(
48 //! "I first released this library on the 17th of June, 2018.",
49 //! None, None,
50 //! true /* turns on fuzzy mode */,
51 //! true /* gives us the tokens that weren't recognized */,
52 //! None, false, &HashMap::new()
53 //! ),
54 //! Ok((
55 //! NaiveDate::from_ymd(2018, 6, 17).and_hms(0, 0, 0),
56 //! None,
57 //! Some(vec!["I first released this library on the ",
58 //! " of ", ", "].iter().map(|&s| s.into()).collect())
59 //! ))
60 //! );
61 //! ```
62 //!
63 //! Further examples can be found in the `examples` directory on international usage.
64 //!
65 //! # Usage
66 //!
67 //! `dtparse` requires a minimum Rust version of 1.28 to build, but is tested on Windows, OSX,
68 //! BSD, Linux, and WASM. The build is also compiled against the iOS and Android SDK's, but is not
69 //! tested against them.
70 //!
71 //! [dateutil]: https://github.com/dateutil/dateutil
72
73 #[macro_use]
74 extern crate lazy_static;
75
76 extern crate chrono;
77 extern crate chrono_tz;
78 extern crate num_traits;
79 extern crate rust_decimal;
80
81 use chrono::Datelike;
82 use chrono::Duration;
83 use chrono::FixedOffset;
84 use chrono::Local;
85 use chrono::NaiveDate;
86 use chrono::NaiveDateTime;
87 use chrono::NaiveTime;
88 use chrono::Timelike;
89 use num_traits::cast::ToPrimitive;
90 use rust_decimal::Decimal;
91 use rust_decimal::Error as DecimalError;
92 use std::cmp::min;
93 use std::collections::HashMap;
94 use std::error::Error;
95 use std::fmt;
96 use std::num::ParseIntError;
97 use std::str::FromStr;
98 use std::vec::Vec;
99
100 mod tokenize;
101 mod weekday;
102
103 #[cfg(test)]
104 mod tests;
105
106 use tokenize::Tokenizer;
107 use weekday::day_of_week;
108 use weekday::DayOfWeek;
109
110 lazy_static! {
111 static ref ZERO: Decimal = Decimal::new(0, 0);
112 static ref ONE: Decimal = Decimal::new(1, 0);
113 static ref TWENTY_FOUR: Decimal = Decimal::new(24, 0);
114 static ref SIXTY: Decimal = Decimal::new(60, 0);
115 static ref DEFAULT_PARSER: Parser = Parser::default();
116 }
117
118 impl From<DecimalError> for ParseError {
from(err: DecimalError) -> Self119 fn from(err: DecimalError) -> Self {
120 ParseError::InvalidNumeric(format!("{}", err))
121 }
122 }
123
124 impl From<ParseIntError> for ParseError {
from(err: ParseIntError) -> Self125 fn from(err: ParseIntError) -> Self {
126 ParseError::InvalidNumeric(format!("{}", err))
127 }
128 }
129
130 /// Potential errors that come up when trying to parse time strings
131 #[derive(Debug, PartialEq)]
132 pub enum ParseError {
133 /// Attempted to specify "AM" or "PM" without indicating an hour
134 AmPmWithoutHour,
135 /// Impossible value for a category; the 32nd day of a month is impossible
136 ImpossibleTimestamp(&'static str),
137 /// Unable to parse a numeric value from a token expected to be numeric
138 InvalidNumeric(String),
139 /// Generally unrecognized date string; please report to maintainer so
140 /// new test cases can be developed
141 UnrecognizedFormat,
142 /// A token the parser did not recognize was in the string, and fuzzy mode was off
143 UnrecognizedToken(String),
144 /// A timezone could not be handled; please report to maintainer as the timestring
145 /// likely exposes a bug in the implementation
146 TimezoneUnsupported,
147 /// Parser unable to make sense of year/month/day parameters in the time string;
148 /// please report to maintainer as the timestring likely exposes a bug in implementation
149 YearMonthDayError(&'static str),
150 /// Parser unable to find any date/time-related content in the supplied string
151 NoDate,
152 }
153
154 impl fmt::Display for ParseError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 write!(f, "{:?}", self)
157 }
158 }
159
160 impl Error for ParseError {}
161
162 type ParseResult<I> = Result<I, ParseError>;
163
tokenize(parse_string: &str) -> Vec<String>164 pub(crate) fn tokenize(parse_string: &str) -> Vec<String> {
165 let tokenizer = Tokenizer::new(parse_string);
166 tokenizer.collect()
167 }
168
169 /// Utility function for `ParserInfo` that helps in constructing
170 /// the attributes that make up the `ParserInfo` container
parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize>171 pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
172 let mut m = HashMap::new();
173
174 if vec.len() == 1 {
175 for (i, val) in vec.get(0).unwrap().iter().enumerate() {
176 m.insert(val.to_lowercase(), i);
177 }
178 } else {
179 for (i, val_vec) in vec.iter().enumerate() {
180 for val in val_vec {
181 m.insert(val.to_lowercase(), i);
182 }
183 }
184 }
185
186 m
187 }
188
189 /// Container for specific tokens to be recognized during parsing.
190 ///
191 /// - `jump`: Values that indicate the end of a token for parsing and can be ignored
192 /// - `weekday`: Names of the days of the week
193 /// - `months`: Names of the months
194 /// - `hms`: Names for the units of time - hours, minutes, seconds in English
195 /// - `ampm`: AM and PM tokens
196 /// - `utczone`: Tokens indicating a UTC-timezone string
197 /// - `pertain`: Tokens indicating a "belongs to" relationship; in English this is just "of"
198 /// - `tzoffset`:
199 /// - `dayfirst`: Upon encountering an ambiguous date, treat the first value as the day
200 /// - `yearfirst`: Upon encountering an ambiguous date, treat the first value as the year
201 /// - `year`: The current year
202 /// - `century`: The first year in the current century
203 ///
204 /// Please note that if both `dayfirst` and `yearfirst` are true, years take precedence
205 /// and will be parsed as "YDM"
206 #[derive(Debug, PartialEq)]
207 pub struct ParserInfo {
208 /// Tokens that can be safely ignored
209 pub jump: HashMap<String, usize>,
210 /// Names of all seven weekdays
211 pub weekday: HashMap<String, usize>,
212 /// Names of all twelve months
213 pub months: HashMap<String, usize>,
214 /// Tokens to indicate a value is in units of hours, minutes, or seconds
215 pub hms: HashMap<String, usize>,
216 /// Tokens to indicate a value refers to AM or PM time
217 pub ampm: HashMap<String, usize>,
218 /// Tokens to indicate our timestamp is in the UTC timezone
219 pub utczone: HashMap<String, usize>,
220 /// Tokens to indicate values "belonging" to other tokens (e.g. 3rd *of* March)
221 pub pertain: HashMap<String, usize>,
222 /// Map of timezone names to their offset in seconds
223 pub tzoffset: HashMap<String, usize>,
224 /// For ambiguous year/month/day values, and `dayfirst` was not specified as
225 /// an argument to `Parser`, treat the first observed value as the day.
226 pub dayfirst: bool,
227 /// For ambiguous year/month/day values, and `dayfirst` was not specified as
228 /// an argument to `Parser`, treat the first observed value as the day.
229 /// Takes priority over `dayfirst`
230 pub yearfirst: bool,
231 /// The current year we are parsing values for
232 pub year: i32,
233 /// The current year we are parsing values for *modulo* 100
234 pub century: i32,
235 }
236
237 impl Default for ParserInfo {
238 /// Create a basic `ParserInfo` object suitable for parsing dates in English
default() -> Self239 fn default() -> Self {
240 let year = Local::now().year();
241 let century = year / 100 * 100;
242
243 ParserInfo {
244 jump: parse_info(vec![vec![
245 " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st",
246 "nd", "rd", "th",
247 ]]),
248 weekday: parse_info(vec![
249 vec!["Mon", "Monday"],
250 vec!["Tue", "Tues", "Tuesday"],
251 vec!["Wed", "Wednesday"],
252 vec!["Thu", "Thurs", "Thursday"],
253 vec!["Fri", "Friday"],
254 vec!["Sat", "Saturday"],
255 vec!["Sun", "Sunday"],
256 ]),
257 months: parse_info(vec![
258 vec!["Jan", "January"],
259 vec!["Feb", "February"],
260 vec!["Mar", "March"],
261 vec!["Apr", "April"],
262 vec!["May"],
263 vec!["Jun", "June"],
264 vec!["Jul", "July"],
265 vec!["Aug", "August"],
266 vec!["Sep", "Sept", "September"],
267 vec!["Oct", "October"],
268 vec!["Nov", "November"],
269 vec!["Dec", "December"],
270 ]),
271 hms: parse_info(vec![
272 vec!["h", "hour", "hours"],
273 vec!["m", "minute", "minutes"],
274 vec!["s", "second", "seconds"],
275 ]),
276 ampm: parse_info(vec![vec!["am", "a"], vec!["pm", "p"]]),
277 utczone: parse_info(vec![vec!["UTC", "GMT", "Z"]]),
278 pertain: parse_info(vec![vec!["of"]]),
279 tzoffset: parse_info(vec![vec![]]),
280 dayfirst: false,
281 yearfirst: false,
282 year,
283 century,
284 }
285 }
286 }
287
288 impl ParserInfo {
jump_index(&self, name: &str) -> bool289 fn jump_index(&self, name: &str) -> bool {
290 self.jump.contains_key(&name.to_lowercase())
291 }
292
weekday_index(&self, name: &str) -> Option<usize>293 fn weekday_index(&self, name: &str) -> Option<usize> {
294 self.weekday.get(&name.to_lowercase()).cloned()
295 }
296
month_index(&self, name: &str) -> Option<usize>297 fn month_index(&self, name: &str) -> Option<usize> {
298 self.months.get(&name.to_lowercase()).map(|u| u + 1)
299 }
300
hms_index(&self, name: &str) -> Option<usize>301 fn hms_index(&self, name: &str) -> Option<usize> {
302 self.hms.get(&name.to_lowercase()).cloned()
303 }
304
ampm_index(&self, name: &str) -> Option<bool>305 fn ampm_index(&self, name: &str) -> Option<bool> {
306 if let Some(v) = self.ampm.get(&name.to_lowercase()) {
307 // Python technically uses numbers here, but given that the numbers are
308 // only 0 and 1, it's easier to use booleans
309 Some(*v == 1)
310 } else {
311 None
312 }
313 }
314
pertain_index(&self, name: &str) -> bool315 fn pertain_index(&self, name: &str) -> bool {
316 self.pertain.contains_key(&name.to_lowercase())
317 }
318
utczone_index(&self, name: &str) -> bool319 fn utczone_index(&self, name: &str) -> bool {
320 self.utczone.contains_key(&name.to_lowercase())
321 }
322
tzoffset_index(&self, name: &str) -> Option<usize>323 fn tzoffset_index(&self, name: &str) -> Option<usize> {
324 if self.utczone.contains_key(&name.to_lowercase()) {
325 Some(0)
326 } else {
327 self.tzoffset.get(&name.to_lowercase()).cloned()
328 }
329 }
330
convertyear(&self, year: i32, century_specified: bool) -> i32331 fn convertyear(&self, year: i32, century_specified: bool) -> i32 {
332 let mut year = year;
333
334 if year < 100 && !century_specified {
335 year += self.century;
336 if year >= self.year + 50 {
337 year -= 100;
338 } else if year < self.year - 50 {
339 year += 100
340 }
341 }
342
343 year
344 }
345
346 // TODO: Should this be moved elsewhere?
validate(&self, res: &mut ParsingResult) -> bool347 fn validate(&self, res: &mut ParsingResult) -> bool {
348 if let Some(y) = res.year {
349 res.year = Some(self.convertyear(y, res.century_specified))
350 };
351
352 if (res.tzoffset == Some(0) && res.tzname.is_none())
353 || (res.tzname == Some("Z".to_owned()) || res.tzname == Some("z".to_owned()))
354 {
355 res.tzname = Some("UTC".to_owned());
356 res.tzoffset = Some(0);
357 } else if res.tzoffset != Some(0)
358 && res.tzname.is_some()
359 && self.utczone_index(res.tzname.as_ref().unwrap())
360 {
361 res.tzoffset = Some(0);
362 }
363
364 true
365 }
366 }
367
days_in_month(year: i32, month: i32) -> Result<u32, ParseError>368 fn days_in_month(year: i32, month: i32) -> Result<u32, ParseError> {
369 let leap_year = match year % 4 {
370 0 => year % 400 != 0,
371 _ => false,
372 };
373
374 match month {
375 2 => {
376 if leap_year {
377 Ok(29)
378 } else {
379 Ok(28)
380 }
381 }
382 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
383 4 | 6 | 9 | 11 => Ok(30),
384 _ => Err(ParseError::ImpossibleTimestamp("Invalid month")),
385 }
386 }
387
388 #[derive(Debug, Hash, PartialEq, Eq)]
389 enum YMDLabel {
390 Year,
391 Month,
392 Day,
393 }
394
395 #[derive(Debug, Default)]
396 struct YMD {
397 _ymd: Vec<i32>, // TODO: This seems like a super weird way to store things
398 century_specified: bool,
399 dstridx: Option<usize>,
400 mstridx: Option<usize>,
401 ystridx: Option<usize>,
402 }
403
404 impl YMD {
len(&self) -> usize405 fn len(&self) -> usize {
406 self._ymd.len()
407 }
408
could_be_day(&self, val: i32) -> bool409 fn could_be_day(&self, val: i32) -> bool {
410 if self.dstridx.is_some() {
411 false
412 } else if self.mstridx.is_none() {
413 (1 <= val) && (val <= 31)
414 } else if self.ystridx.is_none() {
415 // UNWRAP: Earlier condition catches mstridx missing
416 let month = self._ymd[self.mstridx.unwrap()];
417 1 <= val && (val <= days_in_month(2000, month).unwrap() as i32)
418 } else {
419 // UNWRAP: Earlier conditions prevent us from unsafely unwrapping
420 let month = self._ymd[self.mstridx.unwrap()];
421 let year = self._ymd[self.ystridx.unwrap()];
422 1 <= val && (val <= days_in_month(year, month).unwrap() as i32)
423 }
424 }
425
append(&mut self, val: i32, token: &str, label: Option<YMDLabel>) -> ParseResult<()>426 fn append(&mut self, val: i32, token: &str, label: Option<YMDLabel>) -> ParseResult<()> {
427 let mut label = label;
428
429 // Python auto-detects strings using the '__len__' function here.
430 // We instead take in both and handle as necessary.
431 if Decimal::from_str(token).is_ok() && token.len() > 2 {
432 self.century_specified = true;
433 match label {
434 None | Some(YMDLabel::Year) => label = Some(YMDLabel::Year),
435 Some(YMDLabel::Month) => {
436 return Err(ParseError::ImpossibleTimestamp("Invalid month"))
437 }
438 Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
439 }
440 }
441
442 if val > 100 {
443 self.century_specified = true;
444 match label {
445 None => label = Some(YMDLabel::Year),
446 Some(YMDLabel::Year) => (),
447 Some(YMDLabel::Month) => {
448 return Err(ParseError::ImpossibleTimestamp("Invalid month"))
449 }
450 Some(YMDLabel::Day) => return Err(ParseError::ImpossibleTimestamp("Invalid day")),
451 }
452 }
453
454 self._ymd.push(val);
455
456 match label {
457 Some(YMDLabel::Month) => {
458 if self.mstridx.is_some() {
459 Err(ParseError::YearMonthDayError("Month already set"))
460 } else {
461 self.mstridx = Some(self._ymd.len() - 1);
462 Ok(())
463 }
464 }
465 Some(YMDLabel::Day) => {
466 if self.dstridx.is_some() {
467 Err(ParseError::YearMonthDayError("Day already set"))
468 } else {
469 self.dstridx = Some(self._ymd.len() - 1);
470 Ok(())
471 }
472 }
473 Some(YMDLabel::Year) => {
474 if self.ystridx.is_some() {
475 Err(ParseError::YearMonthDayError("Year already set"))
476 } else {
477 self.ystridx = Some(self._ymd.len() - 1);
478 Ok(())
479 }
480 }
481 None => Ok(()),
482 }
483 }
484
resolve_from_stridxs( &mut self, strids: &mut HashMap<YMDLabel, usize>, ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)>485 fn resolve_from_stridxs(
486 &mut self,
487 strids: &mut HashMap<YMDLabel, usize>,
488 ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)> {
489 if self._ymd.len() == 3 && strids.len() == 2 {
490 let missing_key = if !strids.contains_key(&YMDLabel::Year) {
491 YMDLabel::Year
492 } else if !strids.contains_key(&YMDLabel::Month) {
493 YMDLabel::Month
494 } else {
495 YMDLabel::Day
496 };
497
498 let strids_vals: Vec<usize> = strids.values().cloned().collect();
499 let missing_val = if !strids_vals.contains(&0) {
500 0
501 } else if !strids_vals.contains(&1) {
502 1
503 } else {
504 2
505 };
506
507 strids.insert(missing_key, missing_val);
508 }
509
510 if self._ymd.len() != strids.len() {
511 return Err(ParseError::YearMonthDayError(
512 "Tried to resolve year, month, and day without enough information",
513 ));
514 }
515
516 Ok((
517 strids.get(&YMDLabel::Year).map(|i| self._ymd[*i]),
518 strids.get(&YMDLabel::Month).map(|i| self._ymd[*i]),
519 strids.get(&YMDLabel::Day).map(|i| self._ymd[*i]),
520 ))
521 }
522
523 #[allow(clippy::needless_return)]
resolve_ymd( &mut self, yearfirst: bool, dayfirst: bool, ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)>524 fn resolve_ymd(
525 &mut self,
526 yearfirst: bool,
527 dayfirst: bool,
528 ) -> ParseResult<(Option<i32>, Option<i32>, Option<i32>)> {
529 let len_ymd = self._ymd.len();
530
531 let mut strids: HashMap<YMDLabel, usize> = HashMap::new();
532 self.ystridx.map(|u| strids.insert(YMDLabel::Year, u));
533 self.mstridx.map(|u| strids.insert(YMDLabel::Month, u));
534 self.dstridx.map(|u| strids.insert(YMDLabel::Day, u));
535
536 // TODO: More Rustiomatic way of doing this?
537 if len_ymd == strids.len() && !strids.is_empty() || (len_ymd == 3 && strids.len() == 2) {
538 return self.resolve_from_stridxs(&mut strids);
539 };
540
541 // Received year, month, day, and ???
542 if len_ymd > 3 {
543 return Err(ParseError::YearMonthDayError(
544 "Received extra tokens in resolving year, month, and day",
545 ));
546 }
547
548 match (len_ymd, self.mstridx) {
549 (1, Some(val)) | (2, Some(val)) => {
550 let other = if len_ymd == 1 {
551 self._ymd[0]
552 } else {
553 self._ymd[1 - val]
554 };
555 if other > 31 {
556 return Ok((Some(other), Some(self._ymd[val]), None));
557 }
558 return Ok((None, Some(self._ymd[val]), Some(other)));
559 }
560 (2, None) => {
561 if self._ymd[0] > 31 {
562 return Ok((Some(self._ymd[0]), Some(self._ymd[1]), None));
563 }
564 if self._ymd[1] > 31 {
565 return Ok((Some(self._ymd[1]), Some(self._ymd[0]), None));
566 }
567 if dayfirst && self._ymd[1] <= 12 {
568 return Ok((None, Some(self._ymd[1]), Some(self._ymd[0])));
569 }
570 return Ok((None, Some(self._ymd[0]), Some(self._ymd[1])));
571 }
572 (3, Some(0)) => {
573 if self._ymd[1] > 31 {
574 return Ok((Some(self._ymd[1]), Some(self._ymd[0]), Some(self._ymd[2])));
575 }
576 return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
577 }
578 (3, Some(1)) => {
579 if self._ymd[0] > 31 || (yearfirst && self._ymd[2] <= 31) {
580 return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
581 }
582 return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
583 }
584 (3, Some(2)) => {
585 // It was in the original docs, so: WTF!?
586 if self._ymd[1] > 31 {
587 return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
588 }
589 return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
590 }
591 (3, None) => {
592 if self._ymd[0] > 31
593 || self.ystridx == Some(0)
594 || (yearfirst && self._ymd[1] <= 12 && self._ymd[2] <= 31)
595 {
596 if dayfirst && self._ymd[2] <= 12 {
597 return Ok((Some(self._ymd[0]), Some(self._ymd[2]), Some(self._ymd[1])));
598 }
599 return Ok((Some(self._ymd[0]), Some(self._ymd[1]), Some(self._ymd[2])));
600 } else if self._ymd[0] > 12 || (dayfirst && self._ymd[1] <= 12) {
601 return Ok((Some(self._ymd[2]), Some(self._ymd[1]), Some(self._ymd[0])));
602 }
603 return Ok((Some(self._ymd[2]), Some(self._ymd[0]), Some(self._ymd[1])));
604 }
605 (_, _) => {
606 return Ok((None, None, None));
607 }
608 }
609 }
610 }
611
612 #[derive(Default, Debug, PartialEq)]
613 struct ParsingResult {
614 year: Option<i32>,
615 month: Option<i32>,
616 day: Option<i32>,
617 weekday: Option<usize>,
618 hour: Option<i32>,
619 minute: Option<i32>,
620 second: Option<i32>,
621 microsecond: Option<i32>,
622 tzname: Option<String>,
623 tzoffset: Option<i32>,
624 ampm: Option<bool>,
625 century_specified: bool,
626 any_unused_tokens: Vec<String>,
627 }
628
629 macro_rules! option_len {
630 ($o:expr) => {{
631 if $o.is_some() {
632 1
633 } else {
634 0
635 }
636 }};
637 }
638
639 impl ParsingResult {
len(&self) -> usize640 fn len(&self) -> usize {
641 option_len!(self.year)
642 + option_len!(self.month)
643 + option_len!(self.day)
644 + option_len!(self.weekday)
645 + option_len!(self.hour)
646 + option_len!(self.minute)
647 + option_len!(self.second)
648 + option_len!(self.microsecond)
649 + option_len!(self.tzname)
650 + option_len!(self.ampm)
651 }
652 }
653
654 /// Parser is responsible for doing the actual work of understanding a time string.
655 /// The root level `parse` function is responsible for constructing a default `Parser`
656 /// and triggering its behavior.
657 #[derive(Default)]
658 pub struct Parser {
659 info: ParserInfo,
660 }
661
662 impl Parser {
663 /// Create a new `Parser` instance using the provided `ParserInfo`.
664 ///
665 /// This method allows you to set up a parser to handle different
666 /// names for days of the week, months, etc., enabling customization
667 /// for different languages or extra values.
new(info: ParserInfo) -> Self668 pub fn new(info: ParserInfo) -> Self {
669 Parser { info }
670 }
671
672 /// Main method to trigger parsing of a string using the previously-provided
673 /// parser information. Returns a naive timestamp along with timezone and
674 /// unused tokens if available.
675 ///
676 /// `dayfirst` and `yearfirst` force parser behavior in the event of ambiguous
677 /// dates. Consider the following scenarios where we parse the string '01.02.03'
678 ///
679 /// - `dayfirst=Some(true)`, `yearfirst=None`: Results in `February 2, 2003`
680 /// - `dayfirst=None`, `yearfirst=Some(true)`: Results in `February 3, 2001`
681 /// - `dayfirst=Some(true)`, `yearfirst=Some(true)`: Results in `March 2, 2001`
682 ///
683 /// `fuzzy` enables fuzzy parsing mode, allowing the parser to skip tokens if
684 /// they are unrecognized. However, the unused tokens will not be returned
685 /// unless `fuzzy_with_tokens` is set as `true`.
686 ///
687 /// `default` is the timestamp used to infer missing values, and is midnight
688 /// of the current day by default. For example, when parsing the text '2003',
689 /// we will use the current month and day as a default value, leading to a
690 /// result of 'March 3, 2003' if the function was run using a default of
691 /// March 3rd.
692 ///
693 /// `ignoretz` forces the parser to ignore timezone information even if it
694 /// is recognized in the time string
695 ///
696 /// `tzinfos` is a map of timezone names to the offset seconds. For example,
697 /// the parser would ignore the 'EST' part of the string in '10 AM EST'
698 /// unless you added a `tzinfos` map of `{"EST": "14400"}`. Please note that
699 /// timezone name support (i.e. "EST", "BRST") is not available by default
700 /// at the moment, they must be added through `tzinfos` at the moment in
701 /// order to be resolved.
702 #[allow(clippy::too_many_arguments)]
parse( &self, timestr: &str, dayfirst: Option<bool>, yearfirst: Option<bool>, fuzzy: bool, fuzzy_with_tokens: bool, default: Option<&NaiveDateTime>, ignoretz: bool, tzinfos: &HashMap<String, i32>, ) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)>703 pub fn parse(
704 &self,
705 timestr: &str,
706 dayfirst: Option<bool>,
707 yearfirst: Option<bool>,
708 fuzzy: bool,
709 fuzzy_with_tokens: bool,
710 default: Option<&NaiveDateTime>,
711 ignoretz: bool,
712 tzinfos: &HashMap<String, i32>,
713 ) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)> {
714 let default_date = default.unwrap_or(&Local::now().naive_local()).date();
715
716 let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms(0, 0, 0));
717
718 let (res, tokens) =
719 self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
720
721 if res.len() == 0 {
722 return Err(ParseError::NoDate);
723 }
724
725 let naive = self.build_naive(&res, &default_ts)?;
726
727 if !ignoretz {
728 let offset = self.build_tzaware(&naive, &res, tzinfos)?;
729 Ok((naive, offset, tokens))
730 } else {
731 Ok((naive, None, tokens))
732 }
733 }
734
735 #[allow(clippy::cognitive_complexity)] // Imitating Python API is priority
parse_with_tokens( &self, timestr: &str, dayfirst: Option<bool>, yearfirst: Option<bool>, fuzzy: bool, fuzzy_with_tokens: bool, ) -> Result<(ParsingResult, Option<Vec<String>>), ParseError>736 fn parse_with_tokens(
737 &self,
738 timestr: &str,
739 dayfirst: Option<bool>,
740 yearfirst: Option<bool>,
741 fuzzy: bool,
742 fuzzy_with_tokens: bool,
743 ) -> Result<(ParsingResult, Option<Vec<String>>), ParseError> {
744 let fuzzy = if fuzzy_with_tokens { true } else { fuzzy };
745 // This is probably a stylistic abomination
746 let dayfirst = if let Some(dayfirst) = dayfirst {
747 dayfirst
748 } else {
749 self.info.dayfirst
750 };
751 let yearfirst = if let Some(yearfirst) = yearfirst {
752 yearfirst
753 } else {
754 self.info.yearfirst
755 };
756
757 let mut res = ParsingResult::default();
758
759 let mut l = tokenize(×tr);
760 let mut skipped_idxs: Vec<usize> = Vec::new();
761
762 let mut ymd = YMD::default();
763
764 let len_l = l.len();
765 let mut i = 0;
766
767 while i < len_l {
768 let value_repr = l[i].clone();
769
770 if let Ok(_v) = Decimal::from_str(&value_repr) {
771 i = self.parse_numeric_token(&l, i, &self.info, &mut ymd, &mut res, fuzzy)?;
772 } else if let Some(value) = self.info.weekday_index(&l[i]) {
773 res.weekday = Some(value);
774 } else if let Some(value) = self.info.month_index(&l[i]) {
775 ymd.append(value as i32, &l[i], Some(YMDLabel::Month))?;
776
777 if i + 1 < len_l {
778 if l[i + 1] == "-" || l[i + 1] == "/" {
779 // Jan-01[-99]
780 let sep = &l[i + 1];
781 // TODO: This seems like a very unsafe unwrap
782 ymd.append(l[i + 2].parse::<i32>()?, &l[i + 2], None)?;
783
784 if i + 3 < len_l && &l[i + 3] == sep {
785 // Jan-01-99
786 ymd.append(l[i + 4].parse::<i32>()?, &l[i + 4], None)?;
787 i += 2;
788 }
789
790 i += 2;
791 } else if i + 4 < len_l
792 && l[i + 1] == l[i + 3]
793 && l[i + 3] == " "
794 && self.info.pertain_index(&l[i + 2])
795 {
796 // Jan of 01
797 if let Ok(value) = l[i + 4].parse::<i32>() {
798 let year = self.info.convertyear(value, false);
799 ymd.append(year, &l[i + 4], Some(YMDLabel::Year))?;
800 }
801
802 i += 4;
803 }
804 }
805 } else if let Some(value) = self.info.ampm_index(&l[i]) {
806 let is_ampm = self.ampm_valid(res.hour, res.ampm, fuzzy);
807
808 if is_ampm == Ok(true) {
809 res.hour = res.hour.map(|h| self.adjust_ampm(h, value));
810 res.ampm = Some(value);
811 } else if fuzzy {
812 skipped_idxs.push(i);
813 }
814 } else if self.could_be_tzname(res.hour, &res.tzname, res.tzoffset, &l[i]) {
815 res.tzname = Some(l[i].clone());
816
817 let tzname = res.tzname.clone().unwrap();
818 res.tzoffset = self.info.tzoffset_index(&tzname).map(|t| t as i32);
819
820 if i + 1 < len_l && (l[i + 1] == "+" || l[i + 1] == "-") {
821 // GMT+3
822 // According to dateutil docs - reverse the size, as GMT+3 means
823 // "my time +3 is GMT" not "GMT +3 is my time"
824
825 // TODO: Is there a better way of in-place modifying a vector?
826 let item = if l[i + 1] == "+" {
827 "-".to_owned()
828 } else {
829 "+".to_owned()
830 };
831 l[i + 1] = item;
832
833 res.tzoffset = None;
834
835 if self.info.utczone_index(&tzname) {
836 res.tzname = None;
837 }
838 }
839 } else if res.hour.is_some() && (l[i] == "+" || l[i] == "-") {
840 let signal = if l[i] == "+" { 1 } else { -1 };
841 let len_li = l[i].len();
842
843 let mut hour_offset: Option<i32> = None;
844 let mut min_offset: Option<i32> = None;
845
846 // TODO: check that l[i + 1] is integer?
847 if len_li == 4 {
848 // -0300
849 hour_offset = Some(l[i + 1][..2].parse::<i32>()?);
850 min_offset = Some(l[i + 1][2..4].parse::<i32>()?);
851 } else if i + 2 < len_l && l[i + 2] == ":" {
852 // -03:00
853 hour_offset = Some(l[i + 1].parse::<i32>()?);
854 min_offset = Some(l[i + 3].parse::<i32>()?);
855 i += 2;
856 } else if len_li <= 2 {
857 // -[0]3
858 let range_len = min(l[i + 1].len(), 2);
859 hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?);
860 min_offset = Some(0);
861 }
862
863 res.tzoffset =
864 Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));
865
866 let tzname = res.tzname.clone();
867 if i + 5 < len_l
868 && self.info.jump_index(&l[i + 2])
869 && l[i + 3] == "("
870 && l[i + 5] == ")"
871 && 3 <= l[i + 4].len()
872 && self.could_be_tzname(res.hour, &tzname, None, &l[i + 4])
873 {
874 // (GMT)
875 res.tzname = Some(l[i + 4].clone());
876 i += 4;
877 }
878
879 i += 1;
880 } else if !(self.info.jump_index(&l[i]) || fuzzy) {
881 return Err(ParseError::UnrecognizedToken(l[i].clone()));
882 } else {
883 skipped_idxs.push(i);
884 }
885
886 i += 1;
887 }
888
889 let (year, month, day) = ymd.resolve_ymd(yearfirst, dayfirst)?;
890
891 res.century_specified = ymd.century_specified;
892 res.year = year;
893 res.month = month;
894 res.day = day;
895
896 if !self.info.validate(&mut res) {
897 Err(ParseError::UnrecognizedFormat)
898 } else if fuzzy_with_tokens {
899 let skipped_tokens = self.recombine_skipped(skipped_idxs, l);
900 Ok((res, Some(skipped_tokens)))
901 } else {
902 Ok((res, None))
903 }
904 }
905
could_be_tzname( &self, hour: Option<i32>, tzname: &Option<String>, tzoffset: Option<i32>, token: &str, ) -> bool906 fn could_be_tzname(
907 &self,
908 hour: Option<i32>,
909 tzname: &Option<String>,
910 tzoffset: Option<i32>,
911 token: &str,
912 ) -> bool {
913 let all_ascii_upper = token
914 .chars()
915 .all(|c| 65u8 as char <= c && c <= 90u8 as char);
916
917 hour.is_some()
918 && tzname.is_none()
919 && tzoffset.is_none()
920 && token.len() <= 5
921 && (all_ascii_upper || self.info.utczone.contains_key(token))
922 }
923
924 #[allow(clippy::unnecessary_unwrap)]
ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool>925 fn ampm_valid(&self, hour: Option<i32>, ampm: Option<bool>, fuzzy: bool) -> ParseResult<bool> {
926 let mut val_is_ampm = !(fuzzy && ampm.is_some());
927
928 if hour.is_none() {
929 if fuzzy {
930 val_is_ampm = false;
931 } else {
932 return Err(ParseError::AmPmWithoutHour);
933 }
934 } else if !(0 <= hour.unwrap() && hour.unwrap() <= 12) {
935 if fuzzy {
936 val_is_ampm = false;
937 } else {
938 return Err(ParseError::ImpossibleTimestamp("Invalid hour"));
939 }
940 }
941
942 Ok(val_is_ampm)
943 }
944
build_naive( &self, res: &ParsingResult, default: &NaiveDateTime, ) -> ParseResult<NaiveDateTime>945 fn build_naive(
946 &self,
947 res: &ParsingResult,
948 default: &NaiveDateTime,
949 ) -> ParseResult<NaiveDateTime> {
950 let y = res.year.unwrap_or_else(|| default.year());
951 let m = res.month.unwrap_or_else(|| default.month() as i32) as u32;
952
953 let d_offset = if res.weekday.is_some() && res.day.is_none() {
954 let dow = day_of_week(y as u32, m, default.day())?;
955
956 // UNWRAP: We've already check res.weekday() is some
957 let actual_weekday = (res.weekday.unwrap() + 1) % 7;
958 let other = DayOfWeek::from_numeral(actual_weekday as u32);
959 Duration::days(i64::from(dow.difference(&other)))
960 } else {
961 Duration::days(0)
962 };
963
964 // TODO: Change month/day to u32
965 let d = NaiveDate::from_ymd_opt(
966 y,
967 m,
968 min(
969 res.day.unwrap_or(default.day() as i32) as u32,
970 days_in_month(y, m as i32)?,
971 ),
972 )
973 .ok_or_else(|| ParseError::ImpossibleTimestamp("Invalid date range given"))?;
974
975 let d = d + d_offset;
976
977 let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
978 let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
979 let second = res.second.unwrap_or(default.second() as i32) as u32;
980 let microsecond = res
981 .microsecond
982 .unwrap_or(default.timestamp_subsec_micros() as i32) as u32;
983 let t =
984 NaiveTime::from_hms_micro_opt(hour, minute, second, microsecond).ok_or_else(|| {
985 if hour >= 24 {
986 ParseError::ImpossibleTimestamp("Invalid hour")
987 } else if minute >= 60 {
988 ParseError::ImpossibleTimestamp("Invalid minute")
989 } else if second >= 60 {
990 ParseError::ImpossibleTimestamp("Invalid second")
991 } else if microsecond >= 2_000_000 {
992 ParseError::ImpossibleTimestamp("Invalid microsecond")
993 } else {
994 unreachable!();
995 }
996 })?;
997
998 Ok(NaiveDateTime::new(d, t))
999 }
1000
build_tzaware( &self, _dt: &NaiveDateTime, res: &ParsingResult, tzinfos: &HashMap<String, i32>, ) -> ParseResult<Option<FixedOffset>>1001 fn build_tzaware(
1002 &self,
1003 _dt: &NaiveDateTime,
1004 res: &ParsingResult,
1005 tzinfos: &HashMap<String, i32>,
1006 ) -> ParseResult<Option<FixedOffset>> {
1007 if let Some(offset) = res.tzoffset {
1008 Ok(Some(FixedOffset::east(offset)))
1009 } else if res.tzoffset == None
1010 && (res.tzname == Some(" ".to_owned())
1011 || res.tzname == Some(".".to_owned())
1012 || res.tzname == Some("-".to_owned())
1013 || res.tzname == None)
1014 {
1015 Ok(None)
1016 } else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
1017 Ok(Some(FixedOffset::east(
1018 *tzinfos.get(res.tzname.as_ref().unwrap()).unwrap(),
1019 )))
1020 } else if let Some(tzname) = res.tzname.as_ref() {
1021 println!("tzname {} identified but not understood.", tzname);
1022 Ok(None)
1023 } else {
1024 Err(ParseError::TimezoneUnsupported)
1025 }
1026 }
1027
1028 #[allow(clippy::unnecessary_unwrap)]
parse_numeric_token( &self, tokens: &[String], idx: usize, info: &ParserInfo, ymd: &mut YMD, res: &mut ParsingResult, fuzzy: bool, ) -> ParseResult<usize>1029 fn parse_numeric_token(
1030 &self,
1031 tokens: &[String],
1032 idx: usize,
1033 info: &ParserInfo,
1034 ymd: &mut YMD,
1035 res: &mut ParsingResult,
1036 fuzzy: bool,
1037 ) -> ParseResult<usize> {
1038 let mut idx = idx;
1039 let value_repr = &tokens[idx];
1040 let mut value = Decimal::from_str(&value_repr).unwrap();
1041
1042 let len_li = value_repr.len();
1043 let len_l = tokens.len();
1044
1045 // TODO: I miss the `x in y` syntax
1046 // TODO: Decompose this logic a bit
1047 if ymd.len() == 3
1048 && (len_li == 2 || len_li == 4)
1049 && res.hour.is_none()
1050 && (idx + 1 >= len_l
1051 || (tokens[idx + 1] != ":" && info.hms_index(&tokens[idx + 1]).is_none()))
1052 {
1053 // 1990101T32[59]
1054 let s = &tokens[idx];
1055 res.hour = s[0..2].parse::<i32>().ok();
1056
1057 if len_li == 4 {
1058 res.minute = Some(s[2..4].parse::<i32>()?)
1059 }
1060 } else if len_li == 6 || (len_li > 6 && tokens[idx].find('.') == Some(6)) {
1061 // YYMMDD or HHMMSS[.ss]
1062 let s = &tokens[idx];
1063
1064 if ymd.len() == 0 && tokens[idx].find('.') == None {
1065 ymd.append(s[0..2].parse::<i32>()?, &s[0..2], None)?;
1066 ymd.append(s[2..4].parse::<i32>()?, &s[2..4], None)?;
1067 ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
1068 } else {
1069 // 19990101T235959[.59]
1070 res.hour = s[0..2].parse::<i32>().ok();
1071 res.minute = s[2..4].parse::<i32>().ok();
1072
1073 let t = self.parsems(&s[4..])?;
1074 res.second = Some(t.0);
1075 res.microsecond = Some(t.1);
1076 }
1077 } else if vec![8, 12, 14].contains(&len_li) {
1078 // YYMMDD
1079 let s = &tokens[idx];
1080 ymd.append(s[..4].parse::<i32>()?, &s[..4], Some(YMDLabel::Year))?;
1081 ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
1082 ymd.append(s[6..8].parse::<i32>()?, &s[6..8], None)?;
1083
1084 if len_li > 8 {
1085 res.hour = Some(s[8..10].parse::<i32>()?);
1086 res.minute = Some(s[10..12].parse::<i32>()?);
1087
1088 if len_li > 12 {
1089 res.second = Some(s[12..].parse::<i32>()?);
1090 }
1091 }
1092 } else if let Some(hms_idx) = self.find_hms_index(idx, tokens, info, true) {
1093 // HH[ ]h or MM[ ]m or SS[.ss][ ]s
1094 let (new_idx, hms) = self.parse_hms(idx, tokens, info, Some(hms_idx));
1095 if hms.is_some() {
1096 // TODO: This unwrap is unjustified.
1097 self.assign_hms(res, value_repr, hms.unwrap());
1098 }
1099 idx = new_idx;
1100 } else if idx + 2 < len_l && tokens[idx + 1] == ":" {
1101 // HH:MM[:SS[.ss]]
1102 // TODO: Better story around Decimal handling
1103 res.hour = Some(value.floor().to_i64().unwrap() as i32);
1104 // TODO: Rescope `value` here?
1105 value = self.to_decimal(&tokens[idx + 2]);
1106 let min_sec = self.parse_min_sec(value);
1107 res.minute = Some(min_sec.0);
1108 res.second = min_sec.1;
1109
1110 if idx + 4 < len_l && tokens[idx + 3] == ":" {
1111 // TODO: (x, y) = (a, b) syntax?
1112 let ms = self.parsems(&tokens[idx + 4]).unwrap();
1113 res.second = Some(ms.0);
1114 res.microsecond = Some(ms.1);
1115
1116 idx += 2;
1117 }
1118 idx += 2;
1119 } else if idx + 1 < len_l
1120 && (tokens[idx + 1] == "-" || tokens[idx + 1] == "/" || tokens[idx + 1] == ".")
1121 {
1122 // TODO: There's got to be a better way of handling the condition above
1123 let sep = &tokens[idx + 1];
1124 ymd.append(value_repr.parse::<i32>()?, &value_repr, None)?;
1125
1126 if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) {
1127 if let Ok(val) = tokens[idx + 2].parse::<i32>() {
1128 ymd.append(val, &tokens[idx + 2], None)?;
1129 } else if let Some(val) = info.month_index(&tokens[idx + 2]) {
1130 ymd.append(val as i32, &tokens[idx + 2], Some(YMDLabel::Month))?;
1131 }
1132
1133 if idx + 3 < len_l && &tokens[idx + 3] == sep {
1134 if let Some(value) = info.month_index(&tokens[idx + 4]) {
1135 ymd.append(value as i32, &tokens[idx + 4], Some(YMDLabel::Month))?;
1136 } else if let Ok(val) = tokens[idx + 4].parse::<i32>() {
1137 ymd.append(val, &tokens[idx + 4], None)?;
1138 } else {
1139 return Err(ParseError::UnrecognizedFormat);
1140 }
1141
1142 idx += 2;
1143 }
1144
1145 idx += 1;
1146 }
1147
1148 idx += 1
1149 } else if idx + 1 >= len_l || info.jump_index(&tokens[idx + 1]) {
1150 if idx + 2 < len_l && info.ampm_index(&tokens[idx + 2]).is_some() {
1151 let hour = value.to_i64().unwrap() as i32;
1152 let ampm = info.ampm_index(&tokens[idx + 2]).unwrap();
1153 res.hour = Some(self.adjust_ampm(hour, ampm));
1154 idx += 1;
1155 } else {
1156 ymd.append(value.floor().to_i64().unwrap() as i32, &value_repr, None)?;
1157 }
1158
1159 idx += 1;
1160 } else if info.ampm_index(&tokens[idx + 1]).is_some()
1161 && (*ZERO <= value && value < *TWENTY_FOUR)
1162 {
1163 // 12am
1164 let hour = value.to_i64().unwrap() as i32;
1165 res.hour = Some(self.adjust_ampm(hour, info.ampm_index(&tokens[idx + 1]).unwrap()));
1166 idx += 1;
1167 } else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
1168 ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?;
1169 } else if !fuzzy {
1170 return Err(ParseError::UnrecognizedFormat);
1171 }
1172
1173 Ok(idx)
1174 }
1175
adjust_ampm(&self, hour: i32, ampm: bool) -> i321176 fn adjust_ampm(&self, hour: i32, ampm: bool) -> i32 {
1177 if hour < 12 && ampm {
1178 hour + 12
1179 } else if hour == 12 && !ampm {
1180 0
1181 } else {
1182 hour
1183 }
1184 }
1185
parsems(&self, seconds_str: &str) -> ParseResult<(i32, i32)>1186 fn parsems(&self, seconds_str: &str) -> ParseResult<(i32, i32)> {
1187 if seconds_str.contains('.') {
1188 let split: Vec<&str> = seconds_str.split('.').collect();
1189 let (i, f): (&str, &str) = (split[0], split[1]);
1190
1191 let i_parse = i.parse::<i32>()?;
1192 let f_parse = ljust(f, 6, '0').parse::<i32>()?;
1193 Ok((i_parse, f_parse))
1194 } else {
1195 Ok((seconds_str.parse::<i32>()?, 0))
1196 }
1197 }
1198
find_hms_index( &self, idx: usize, tokens: &[String], info: &ParserInfo, allow_jump: bool, ) -> Option<usize>1199 fn find_hms_index(
1200 &self,
1201 idx: usize,
1202 tokens: &[String],
1203 info: &ParserInfo,
1204 allow_jump: bool,
1205 ) -> Option<usize> {
1206 let len_l = tokens.len();
1207 let mut hms_idx = None;
1208
1209 // There's a super weird edge case that can happen
1210 // because Python safely handles negative array indices,
1211 // and Rust (because of usize) does not.
1212 let idx_minus_two = if idx == 1 && len_l > 0 {
1213 len_l - 1
1214 } else if idx == 0 && len_l > 1 {
1215 len_l - 2
1216 } else if idx > 1 {
1217 idx - 2
1218 } else if len_l == 0 {
1219 panic!("Attempting to find_hms_index() wih no tokens.");
1220 } else {
1221 0
1222 };
1223
1224 if idx + 1 < len_l && info.hms_index(&tokens[idx + 1]).is_some() {
1225 hms_idx = Some(idx + 1)
1226 } else if allow_jump
1227 && idx + 2 < len_l
1228 && tokens[idx + 1] == " "
1229 && info.hms_index(&tokens[idx + 2]).is_some()
1230 {
1231 hms_idx = Some(idx + 2)
1232 } else if idx > 0 && info.hms_index(&tokens[idx - 1]).is_some() {
1233 hms_idx = Some(idx - 1)
1234 } else if len_l > 0
1235 && idx > 0
1236 && idx == len_l - 1
1237 && tokens[idx - 1] == " "
1238 && info.hms_index(&tokens[idx_minus_two]).is_some()
1239 {
1240 hms_idx = Some(idx - 2)
1241 }
1242
1243 hms_idx
1244 }
1245
1246 #[allow(clippy::unnecessary_unwrap)]
parse_hms( &self, idx: usize, tokens: &[String], info: &ParserInfo, hms_index: Option<usize>, ) -> (usize, Option<usize>)1247 fn parse_hms(
1248 &self,
1249 idx: usize,
1250 tokens: &[String],
1251 info: &ParserInfo,
1252 hms_index: Option<usize>,
1253 ) -> (usize, Option<usize>) {
1254 if hms_index.is_none() {
1255 (idx, None)
1256 } else if hms_index.unwrap() > idx {
1257 (
1258 hms_index.unwrap(),
1259 info.hms_index(&tokens[hms_index.unwrap()]),
1260 )
1261 } else {
1262 (
1263 idx,
1264 info.hms_index(&tokens[hms_index.unwrap()]).map(|u| u + 1),
1265 )
1266 }
1267 }
1268
assign_hms(&self, res: &mut ParsingResult, value_repr: &str, hms: usize)1269 fn assign_hms(&self, res: &mut ParsingResult, value_repr: &str, hms: usize) {
1270 let value = self.to_decimal(value_repr);
1271
1272 if hms == 0 {
1273 res.hour = Some(value.to_i64().unwrap() as i32);
1274 if !close_to_integer(&value) {
1275 res.minute = Some((*SIXTY * (value % *ONE)).to_i64().unwrap() as i32);
1276 }
1277 } else if hms == 1 {
1278 let (min, sec) = self.parse_min_sec(value);
1279 res.minute = Some(min);
1280 res.second = sec;
1281 } else if hms == 2 {
1282 let (sec, micro) = self.parsems(value_repr).unwrap();
1283 res.second = Some(sec);
1284 res.microsecond = Some(micro);
1285 }
1286 }
1287
to_decimal(&self, value: &str) -> Decimal1288 fn to_decimal(&self, value: &str) -> Decimal {
1289 // TODO: Justify unwrap
1290 Decimal::from_str(value).unwrap()
1291 }
1292
parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>)1293 fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) {
1294 // UNWRAP: i64 guaranteed to be fine because of preceding floor
1295 let minute = value.floor().to_i64().unwrap() as i32;
1296 let mut second = None;
1297
1298 let sec_remainder = value - value.floor();
1299 if sec_remainder != *ZERO {
1300 second = Some((*SIXTY * sec_remainder).floor().to_i64().unwrap() as i32);
1301 }
1302
1303 (minute, second)
1304 }
1305
recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String>1306 fn recombine_skipped(&self, skipped_idxs: Vec<usize>, tokens: Vec<String>) -> Vec<String> {
1307 let mut skipped_tokens: Vec<String> = vec![];
1308
1309 let mut sorted_idxs = skipped_idxs.clone();
1310 sorted_idxs.sort();
1311
1312 for (i, idx) in sorted_idxs.iter().enumerate() {
1313 if i > 0 && idx - 1 == skipped_idxs[i - 1] {
1314 // UNWRAP: Having an initial value and unconditional push at end guarantees value
1315 let mut t = skipped_tokens.pop().unwrap();
1316 t.push_str(tokens[*idx].as_ref());
1317 skipped_tokens.push(t);
1318 } else {
1319 skipped_tokens.push(tokens[*idx].to_owned());
1320 }
1321 }
1322
1323 skipped_tokens
1324 }
1325 }
1326
close_to_integer(value: &Decimal) -> bool1327 fn close_to_integer(value: &Decimal) -> bool {
1328 value % *ONE == *ZERO
1329 }
1330
ljust(s: &str, chars: usize, replace: char) -> String1331 fn ljust(s: &str, chars: usize, replace: char) -> String {
1332 if s.len() >= chars {
1333 s[..chars].to_owned()
1334 } else {
1335 format!("{}{}", s, replace.to_string().repeat(chars - s.len()))
1336 }
1337 }
1338
1339 /// Main entry point for using `dtparse`. The parse function is responsible for
1340 /// taking in a string representing some time value, and turning it into
1341 /// a timestamp with optional timezone information if it can be identified.
1342 ///
1343 /// The default implementation assumes English values for names of months,
1344 /// days of the week, etc. It is equivalent to Python's `dateutil.parser.parse()`
parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)>1345 pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {
1346 let res = DEFAULT_PARSER.parse(
1347 timestr,
1348 None,
1349 None,
1350 false,
1351 false,
1352 None,
1353 false,
1354 &HashMap::new(),
1355 )?;
1356
1357 Ok((res.0, res.1))
1358 }
1359