1##' Parse dates with **y**ear, **m**onth, and **d**ay components 2##' 3##' Transforms dates stored in character and numeric vectors to Date or POSIXct 4##' objects (see `tz` argument). These functions recognize arbitrary 5##' non-digit separators as well as no separator. As long as the order of 6##' formats is correct, these functions will parse dates correctly even when the 7##' input vectors contain differently formatted dates. See examples. 8##' 9##' In case of heterogeneous date formats, the `ymd()` family guesses formats based 10##' on a subset of the input vector. If the input vector contains many missing 11##' values or non-date strings, the subset might not contain meaningful dates 12##' and the date-time format won't be guessed resulting in 13##' `All formats failed to parse` error. In such cases please see 14##' [parse_date_time()] for a more flexible parsing interface. 15##' 16##' If the `truncated` parameter is non-zero, the `ymd()` functions also check for 17##' truncated formats. For example, `ymd()` with `truncated = 2` will also 18##' parse incomplete dates like `2012-06` and `2012`. 19##' 20##' NOTE: The `ymd()` family of functions is based on `parse_date_time()` and thus 21##' directly drop to the internal C parser for numeric months, but uses 22##' [base::strptime()] for alphabetic months. This implies that some of [base::strptime()]'s 23##' limitations are inherited by \pkg{lubridate}'s parser. For example, truncated 24##' formats (like `%Y-%b`) will not be parsed. Numeric truncated formats (like 25##' `%Y-%m`) are handled correctly by \pkg{lubridate}'s C parser. 26##' 27##' As of version 1.3.0, \pkg{lubridate}'s parse functions no longer return a 28##' message that displays which format they used to parse their input. You can 29##' change this by setting the `lubridate.verbose` option to `TRUE` with 30##' `options(lubridate.verbose = TRUE)`. 31##' 32##' @export 33##' @param ... a character or numeric vector of suspected dates 34##' @param quiet logical. If `TRUE`, function evaluates without displaying 35##' customary messages. 36##' @param tz Time zone indicator. If `NULL` (default), a Date object is 37##' returned. Otherwise a POSIXct with time zone attribute set to `tz`. 38##' @param locale locale to be used, see [locales]. On Linux systems you 39##' can use `system("locale -a")` to list all the installed locales. 40##' @param truncated integer. Number of formats that can be truncated. 41##' @return a vector of class POSIXct if `tz` argument is non-`NULL` or Date if tz 42##' is `NULL` (default) 43##' @seealso [parse_date_time()] for an even more flexible low level 44##' mechanism. 45##' @keywords chron 46##' @examples 47##' x <- c("09-01-01", "09-01-02", "09-01-03") 48##' ymd(x) 49##' x <- c("2009-01-01", "2009-01-02", "2009-01-03") 50##' ymd(x) 51##' ymd(090101, 90102) 52##' now() > ymd(20090101) 53##' ## TRUE 54##' dmy(010210) 55##' mdy(010210) 56##' 57##' yq('2014.2') 58##' 59##' ## heterogeneous formats in a single vector: 60##' x <- c(20090101, "2009-01-02", "2009 01 03", "2009-1-4", 61##' "2009-1, 5", "Created on 2009 1 6", "200901 !!! 07") 62##' ymd(x) 63##' 64##' ## What lubridate might not handle: 65##' 66##' ## Extremely weird cases when one of the separators is "" and some of the 67##' ## formats are not in double digits might not be parsed correctly: 68##' \dontrun{ymd("201002-01", "201002-1", "20102-1") 69##' dmy("0312-2010", "312-2010")} 70ymd <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME"), truncated = 0) 71 .parse_xxx(..., orders = "ymd", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 72 73#' @export 74#' @rdname ymd 75ydm <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME"), truncated = 0) 76 .parse_xxx(..., orders = "ydm", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 77 78#' @export 79#' @rdname ymd 80mdy <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME"), truncated = 0) 81 .parse_xxx(..., orders = "mdy", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 82 83#' @export 84#' @rdname ymd 85myd <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME"), truncated = 0) 86 .parse_xxx(..., orders = "myd", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 87 88#' @export 89#' @rdname ymd 90dmy <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME"), truncated = 0) 91 .parse_xxx(..., orders = "dmy", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 92 93#' @export 94#' @rdname ymd 95dym <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME"), truncated = 0) 96 .parse_xxx(..., orders = "dym", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 97 98#' @export 99#' @rdname ymd 100yq <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME")) 101 .parse_xxx(..., orders = "yq", quiet = quiet, tz = tz, locale = locale, truncated = 0) 102 103#' @export 104#' @rdname ymd 105ym <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME")) 106 .parse_xxx(..., orders = "ym", quiet = quiet, tz = tz, locale = locale, truncated = 0) 107 108#' @export 109#' @rdname ymd 110my <- function(..., quiet = FALSE, tz = NULL, locale = Sys.getlocale("LC_TIME")) 111 .parse_xxx(..., orders = "my", quiet = quiet, tz = tz, locale = locale, truncated = 0) 112 113 114##' Parse date-times with **y**ear, **m**onth, and **d**ay, **h**our, 115##' **m**inute, and **s**econd components. 116##' 117##' Transform dates stored as character or numeric vectors to POSIXct 118##' objects. The `ymd_hms()` family of functions recognizes all non-alphanumeric 119##' separators (with the exception of "." if `frac = TRUE`) and correctly 120##' handles heterogeneous date-time representations. For more flexibility in 121##' treatment of heterogeneous formats, see low level parser 122##' [parse_date_time()]. 123##' 124##' The `ymd_hms()` functions automatically assign the Universal Coordinated Time 125##' Zone (UTC) to the parsed date. This time zone can be changed with 126##' [force_tz()]. 127##' 128##' The most common type of irregularity in date-time data is the truncation 129##' due to rounding or unavailability of the time stamp. If the `truncated` 130##' parameter is non-zero, the `ymd_hms()` functions also check for truncated 131##' formats. For example, `ymd_hms()` with `truncated = 3` will also parse 132##' incomplete dates like `2012-06-01 12:23`, `2012-06-01 12` and 133##' `2012-06-01`. NOTE: The `ymd()` family of functions is based on 134##' [base::strptime()] which currently fails to parse `%y-%m` formats. 135##' 136##' In case of heterogeneous date formats the `ymd_hms()` family guesses formats 137##' based on a subset of the input vector. If the input vector contains many 138##' missing values or non-date strings, the subset might not contain meaningful 139##' dates and the date-time format won't be guessed resulting in 140##' `All formats failed to parse` error. In such cases please see 141##' [parse_date_time()] for a more flexible parsing interface. 142##' 143##' As of version 1.3.0, \pkg{lubridate}'s parse functions no longer return a 144##' message that displays which format they used to parse their input. You can 145##' change this by setting the `lubridate.verbose` option to `TRUE` with 146##' `options(lubridate.verbose = TRUE)`. 147##' 148##' @export 149##' @param ... a character vector of dates in year, month, day, hour, minute, 150##' second format 151##' @param quiet logical. If `TRUE`, function evaluates without displaying customary messages. 152##' @param tz a character string that specifies which time zone to parse the date with. The string 153##' must be a time zone that is recognized by the user's OS. 154##' @param locale locale to be used, see \link{locales}. On Linux systems you 155##' can use `system("locale -a")` to list all the installed locales. 156##' @param truncated integer, indicating how many formats can be missing. See details. 157##' @return a vector of [POSIXct] date-time objects 158##' @seealso 159##' - [ymd()], [hms()] 160##' - [parse_date_time()] for the underlying mechanism 161##' @keywords POSIXt parse 162##' @examples 163##' 164##' x <- c("2010-04-14-04-35-59", "2010-04-01-12-00-00") 165##' ymd_hms(x) 166##' x <- c("2011-12-31 12:59:59", "2010-01-01 12:00:00") 167##' ymd_hms(x) 168##' 169##' 170##' ## ** heterogeneous formats ** 171##' x <- c(20100101120101, "2009-01-02 12-01-02", "2009.01.03 12:01:03", 172##' "2009-1-4 12-1-4", 173##' "2009-1, 5 12:1, 5", 174##' "200901-08 1201-08", 175##' "2009 arbitrary 1 non-decimal 6 chars 12 in between 1 !!! 6", 176##' "OR collapsed formats: 20090107 120107 (as long as prefixed with zeros)", 177##' "Automatic wday, Thu, detection, 10-01-10 10:01:10 and p format: AM", 178##' "Created on 10-01-11 at 10:01:11 PM") 179##' ymd_hms(x) 180##' 181##' ## ** fractional seconds ** 182##' op <- options(digits.secs=3) 183##' dmy_hms("20/2/06 11:16:16.683") 184##' options(op) 185##' 186##' ## ** different formats for ISO8601 timezone offset ** 187##' ymd_hms(c("2013-01-24 19:39:07.880-0600", 188##' "2013-01-24 19:39:07.880", "2013-01-24 19:39:07.880-06:00", 189##' "2013-01-24 19:39:07.880-06", "2013-01-24 19:39:07.880Z")) 190##' 191##' ## ** internationalization ** 192##' \dontrun{ 193##' x_RO <- "Ma 2012 august 14 11:28:30 " 194##' ymd_hms(x_RO, locale = "ro_RO.utf8") 195##' } 196##' 197##' ## ** truncated time-dates ** 198##' x <- c("2011-12-31 12:59:59", "2010-01-01 12:11", "2010-01-01 12", "2010-01-01") 199##' ymd_hms(x, truncated = 3) 200##' x <- c("2011-12-31 12:59", "2010-01-01 12", "2010-01-01") 201##' ymd_hm(x, truncated = 2) 202##' ## ** What lubridate might not handle ** 203##' ## Extremely weird cases when one of the separators is "" and some of the 204##' ## formats are not in double digits might not be parsed correctly: 205##' \dontrun{ 206##' ymd_hm("20100201 07-01", "20100201 07-1", "20100201 7-01")} 207##' 208ymd_hms <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0){ 209 .parse_xxx_hms(..., orders = c("ymdTz", "ymdT"), quiet = quiet, tz = tz, locale = locale, truncated = truncated) 210} 211 212#' @export 213#' @rdname ymd_hms 214ymd_hm <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 215 .parse_xxx_hms(..., orders = "ymdR", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 216 217#' @export 218#' @rdname ymd_hms 219ymd_h <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 220 .parse_xxx_hms(..., orders = "ymdr", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 221 222#' @export 223#' @rdname ymd_hms 224dmy_hms <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 225 .parse_xxx_hms(..., orders = c("dmyTz", "dmyT"), quiet = quiet, tz = tz, locale = locale, truncated = truncated) 226 227#' @export 228#' @rdname ymd_hms 229dmy_hm <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 230 .parse_xxx_hms(..., orders = "dmyR", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 231 232#' @export 233#' @rdname ymd_hms 234dmy_h <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 235 .parse_xxx_hms(..., orders = "dmyr", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 236 237#' @export 238#' @rdname ymd_hms 239mdy_hms <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 240 .parse_xxx_hms(..., orders = c("mdyTz", "mdyT"), quiet = quiet, tz = tz, locale = locale, truncated = truncated) 241 242#' @export 243#' @rdname ymd_hms 244mdy_hm <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 245 .parse_xxx_hms(..., orders = "mdyR", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 246 247#' @export 248#' @rdname ymd_hms 249mdy_h <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 250 .parse_xxx_hms(..., orders = "mdyr", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 251 252#' @export 253#' @rdname ymd_hms 254ydm_hms <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 255 .parse_xxx_hms(..., orders = c("ydmTz", "ydmT"), quiet = quiet, tz = tz, locale = locale, truncated = truncated) 256 257#' @export 258#' @rdname ymd_hms 259ydm_hm <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 260 .parse_xxx_hms(..., orders = "ydmR", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 261 262#' @export 263#' @rdname ymd_hms 264ydm_h <- function(..., quiet = FALSE, tz = "UTC", locale = Sys.getlocale("LC_TIME"), truncated = 0) 265 .parse_xxx_hms(..., orders = "ydmr", quiet = quiet, tz = tz, locale = locale, truncated = truncated) 266 267 268##' @rdname hms 269##' @examples 270##' ms(c("09:10", "09:02", "1:10")) 271##' ms("7 6") 272##' ms("6,5") 273##' @export 274ms <- function(..., quiet = FALSE, roll = FALSE) { 275 out <- .parse_hms(..., order = "MS", quiet = quiet) 276 if (roll) { 277 hms <- .roll_hms(min = out["M", ], sec = out["S", ]) 278 period(hour = hms$hour, minute = hms$min, second = hms$sec) 279 } else { 280 period(minute = out["M", ], second = out["S", ]) 281 } 282} 283 284##' @rdname hms 285##' @examples 286##' hm(c("09:10", "09:02", "1:10")) 287##' hm("7 6") 288##' hm("6,5") 289##' @export 290hm <- function(..., quiet = FALSE, roll = FALSE) { 291 out <- .parse_hms(..., order = "HM", quiet = quiet) 292 if (roll) { 293 hms <- .roll_hms(hour = out["H", ], min = out["M", ]) 294 period(hour = hms$hour, minute = hms$min, second = hms$sec) 295 } else { 296 period(hour = out["H", ], minute = out["M", ]) 297 } 298} 299 300##' Parse periods with **h**our, **m**inute, and **s**econd components 301##' 302##' Transforms a character or numeric vector into a period object with the 303##' specified number of hours, minutes, and seconds. `hms()` recognizes all 304##' non-numeric characters except '-' as separators ('-' is used for negative 305##' `durations`). After hours, minutes and seconds have been parsed, the 306##' remaining input is ignored. 307##' 308##' @param ... a character vector of hour minute second triples 309##' @param quiet logical. If `TRUE`, function evaluates without displaying 310##' customary messages. 311##' @param roll logical. If `TRUE`, smaller units are rolled over to higher units 312##' if they exceed the conventional limit. For example, 313##' `hms("01:59:120", roll = TRUE)` produces period "2H 1M 0S". 314##' @return a vector of period objects 315##' @seealso [hm()], [ms()] 316##' @keywords period 317##' @examples 318##' 319##' x <- c("09:10:01", "09:10:02", "09:10:03") 320##' hms(x) 321##' 322##' hms("7 6 5", "3:23:::2", "2 : 23 : 33", "Finished in 9 hours, 20 min and 4 seconds") 323##' @export 324hms <- function(..., quiet = FALSE, roll = FALSE) { 325 out <- .parse_hms(..., order = "HMS", quiet = quiet) 326 if (roll) { 327 hms <- .roll_hms(out["H", ], out["M", ], out["S", ]) 328 period(hour = hms$hour, minute = hms$min, second = hms$sec) 329 } else { 330 period(hour = out["H", ], minute = out["M", ], second = out["S", ]) 331 } 332} 333 334.roll_hms <- function(hour = 0, min = 0, sec = 0){ 335 min <- min + sec %/% 60 336 sec <- sec %% 60 337 hour <- hour + min %/% 60 338 min <- min %% 60 339 list(hour = hour, min = min, sec = sec) 340} 341 342.parse_hms <- function(..., order, quiet = FALSE) { 343 ## wrapper for C level parse_hms 344 hms <- unlist(lapply(list(...), .num_to_date), use.names= FALSE) 345 out <- matrix(.Call(C_parse_hms, hms, order), 346 nrow = 3L, dimnames = list(c("H", "M", "S"), NULL)) 347 if (!quiet) { 348 ## fixme: this warning should be dropped to C and thrown only when there are 349 ## real parsing errors #530 350 if (any(is.na(out[substr(order, ln <- nchar(order), ln), ]))) 351 warning("Some strings failed to parse, or all strings are NAs") 352 } 353 out 354} 355 356##' User friendly date-time parsing functions 357##' 358##' `parse_date_time()` parses an input vector into POSIXct date-time 359##' object. It differs from [base::strptime()] in two respects. First, 360##' it allows specification of the order in which the formats occur without the 361##' need to include separators and the `%` prefix. Such a formatting argument is 362##' referred to as "order". Second, it allows the user to specify several 363##' format-orders to handle heterogeneous date-time character 364##' representations. 365##' 366##' When several format-orders are specified, `parse_date_time()` selects 367##' (guesses) format-orders based on a training subset of the input 368##' strings. After guessing the formats are ordered according to the performance 369##' on the training set and applied recursively on the entire input vector. You 370##' can disable training with `train = FALSE`. 371##' 372##' `parse_date_time()`, and all derived functions, such as `ymd_hms()`, 373##' `ymd()`, etc., will drop into `fast_strptime()` instead of 374##' [base::strptime()] whenever the guessed from the input data formats are all 375##' numeric. 376##' 377##' The list below contains formats recognized by \pkg{lubridate}. For numeric 378##' formats leading 0s are optional. As compared to [base::strptime()], some of 379##' the formats are new or have been extended for efficiency reasons. These 380##' formats are marked with "(*)" below. Fast parsers `parse_date_time2()` and 381##' `fast_strptime()` accept only formats marked with "(!)". 382##' 383##' 384##' \describe{ \item{`a`}{Abbreviated weekday name in the current 385##' locale. (Also matches full name)} 386##' 387##' \item{`A`}{Full weekday name in the current locale. (Also matches 388##' abbreviated name). 389##' 390##' You don't need to specify `a` and `A` formats explicitly. Wday is 391##' automatically handled if `preproc_wday = TRUE`} 392##' 393##' \item{`b` (!)}{Abbreviated or full month name in the current locale. The C 394##' parser currently understands only English month names.} 395##' 396##' \item{`B` (!)}{Same as b.} 397##' 398##' \item{`d` (!)}{Day of the month as decimal number (01--31 or 0--31)} 399##' 400##' \item{`H` (!)}{Hours as decimal number (00--24 or 0--24).} 401##' 402##' \item{`I` (!)}{Hours as decimal number (01--12 or 1--12).} 403##' 404##' \item{`j`}{Day of year as decimal number (001--366 or 1--366).} 405##' 406##' \item{`q` (!*)}{Quarter (1--4). The quarter month is added to the parsed 407##' month if `m` element is present.} 408##' 409##' \item{`m` (!*)}{Month as decimal number (01--12 or 1--12). For 410##' `parse_date_time` also matches abbreviated and full months names as `b` 411##' and `B` formats. C parser understands only English month names.} 412##' 413##' \item{`M` (!)}{Minute as decimal number (00--59 or 0--59).} 414##' 415##' \item{`p` (!)}{AM/PM indicator in the locale. Commonly used in conjunction 416##' with `I` and \bold{not} with `H`. But \pkg{lubridate}'s C parser accepts H 417##' format as long as hour is not greater than 12. C parser understands only 418##' English locale AM/PM indicator.} 419##' 420##' \item{`S` (!)}{Second as decimal number (00--61 or 0--61), allowing for up 421##' to two leap-seconds (but POSIX-compliant implementations will ignore leap 422##' seconds).} 423##' 424##' \item{`OS`}{Fractional second.} 425##' 426##' \item{`U`}{Week of the year as decimal number (00--53 or 0--53) using 427##' Sunday as the first day 1 of the week (and typically with the first Sunday 428##' of the year as day 1 of week 1). The US convention.} 429##' 430##' \item{`w`}{Weekday as decimal number (0--6, Sunday is 0).} 431##' 432##' \item{`W`}{Week of the year as decimal number (00--53 or 0--53) using 433##' Monday as the first day of week (and typically with the first Monday of the 434##' year as day 1 of week 1). The UK convention.} 435##' 436##' \item{`y` (!*)}{Year without century (00--99 or 0--99). In 437##' `parse_date_time()` also matches year with century (Y format).} 438##' 439##' \item{`Y` (!)}{Year with century.} 440##' 441##' \item{`z` (!*)}{ISO8601 signed offset in hours and minutes from UTC. For 442##' example `-0800`, `-08:00` or `-08`, all represent 8 hours behind UTC. This 443##' format also matches the Z (Zulu) UTC indicator. Because [base::strptime()] 444##' doesn't fully support ISO8601 this format is implemented as an union of 4 445##' formats: Ou (Z), Oz (-0800), OO (-08:00) and Oo (-08). You can use these 446##' formats as any other but it is rarely necessary. `parse_date_time2()` and 447##' `fast_strptime()` support all of these formats.} 448##' 449##' \item{`Om` (!*)}{Matches numeric month and English alphabetic months 450##' (Both, long and abbreviated forms).} 451##' 452##' \item{`Op` (!*)}{Matches AM/PM English indicator.} 453##' 454##' \item{`r` (*)}{Matches `Ip` and `H` orders.} 455##' 456##' \item{`R` (*)}{Matches `HM` and`IMp` orders.} 457##' 458##' \item{`T` (*)}{Matches `IMSp`, `HMS`, and `HMOS` orders.} 459##' } 460##' 461##' 462##' @export 463##' @param x a character or numeric vector of dates 464##' @param orders a character vector of date-time formats. Each order string is 465##' a series of formatting characters as listed in [base::strptime()] but 466##' might not include the `"%"` prefix. For example, "ymd" will match all the 467##' possible dates in year, month, day order. Formatting orders might include 468##' arbitrary separators. These are discarded. See details for the implemented 469##' formats. If multiple order strings are supplied, they are applied in turn 470##' for `parse_date_time2()` and `fast_strptime()`. For `parse_date_time()` 471##' the order of applied formats is determined by `select_formats` parameter. 472##' @param tz a character string that specifies the time zone with which to 473##' parse the dates 474##' @param truncated integer, number of formats that can be missing. The most 475##' common type of irregularity in date-time data is the truncation due to 476##' rounding or unavailability of the time stamp. If the `truncated` parameter 477##' is non-zero `parse_date_time()` also checks for truncated formats. For 478##' example, if the format order is "ymdHMS" and `truncated = 3`, 479##' `parse_date_time()` will correctly parse incomplete date-times like 480##' `2012-06-01 12:23`, `2012-06-01 12` and `2012-06-01`. \bold{NOTE:} The 481##' `ymd()` family of functions is based on [base::strptime()] which currently 482##' fails to parse `%Y-%m` formats. 483##' @param quiet logical. If `TRUE`, progress messages are not printed, and `No 484##' formats found` error is suppressed and the function simply returns a 485##' vector of NAs. This mirrors the behavior of base R functions 486##' [base::strptime()] and [base::as.POSIXct()]. 487##' @param locale locale to be used, see \link{locales}. On Linux systems you 488##' can use `system("locale -a")` to list all the installed locales. 489##' @param select_formats A function to select actual formats for parsing from a 490##' set of formats which matched a training subset of `x`. It receives a named 491##' integer vector and returns a character vector of selected formats. Names 492##' of the input vector are formats (not orders) that matched the training 493##' set. Numeric values are the number of dates (in the training set) that 494##' matched the corresponding format. You should use this argument if the 495##' default selection method fails to select the formats in the right 496##' order. By default the formats with most formatting tokens (`%`) are 497##' selected and `%Y` counts as 2.5 tokens (so that it has a priority over 498##' `%y%m`). See examples. 499##' @param exact logical. If `TRUE`, the `orders` parameter is interpreted as an 500##' exact [base::strptime()] format and no training or guessing are performed 501##' (i.e. `train`, `drop` parameters are irrelevant). 502##' @param train logical, default `TRUE`. Whether to train formats on a subset 503##' of the input vector. The result of this is that supplied orders are sorted 504##' according to performance on this training set, which commonly results in 505##' increased performance. Please note that even when `train = FALSE` (and 506##' `exact = FALSE`) guessing of the actual formats is still performed on a 507##' pseudo-random subset of the original input vector. This might result in 508##' `All formats failed to parse` error. See notes below. 509##' @param drop logical, default `FALSE`. Whether to drop formats that didn't 510##' match on the training set. If `FALSE`, unmatched on the training set 511##' formats are tried as a last resort at the end of the parsing 512##' queue. Applies only when `train = TRUE`. Setting this parameter to `TRUE` 513##' might slightly speed up parsing in situations involving many 514##' formats. Prior to v1.7.0 this parameter was implicitly `TRUE`, which 515##' resulted in occasional surprising behavior when rare patterns where not 516##' present in the training set. 517##' @return a vector of POSIXct date-time objects 518##' @seealso [base::strptime()], [ymd()], [ymd_hms()] 519##' @keywords chron 520##' @note `parse_date_time()` (and the derivatives `ymd()`, `ymd_hms()`, etc.) 521##' relies on a sparse guesser that takes at most 501 elements from the 522##' supplied character vector in order to identify appropriate formats from 523##' the supplied orders. If you get the error `All formats failed to parse` 524##' and you are confident that your vector contains valid dates, you should 525##' either set `exact` argument to `TRUE` or use functions that don't perform 526##' format guessing (`fast_strptime()`, `parse_date_time2()` or 527##' [base::strptime()]). 528##' @note For performance reasons, when timezone is not UTC, 529##' `parse_date_time2()` and `fast_strptime()` perform no validity checks for 530##' daylight savings time. Thus, if your input string contains an invalid date 531##' time which falls into DST gap and `lt = TRUE` you will get an `POSIXlt` 532##' object with a non-existent time. If `lt = FALSE` your time instant will be 533##' adjusted to a valid time by adding an hour. See examples. If you want to 534##' get NA for invalid date-times use [fit_to_timeline()] explicitly. 535##' 536##' @examples 537##' 538##' ## ** orders are much easier to write ** 539##' x <- c("09-01-01", "09-01-02", "09-01-03") 540##' parse_date_time(x, "ymd") 541##' parse_date_time(x, "y m d") 542##' parse_date_time(x, "%y%m%d") 543##' # "2009-01-01 UTC" "2009-01-02 UTC" "2009-01-03 UTC" 544##' 545##' ## ** heterogeneous date-times ** 546##' x <- c("09-01-01", "090102", "09-01 03", "09-01-03 12:02") 547##' parse_date_time(x, c("ymd", "ymd HM")) 548##' 549##' ## ** different ymd orders ** 550##' x <- c("2009-01-01", "02022010", "02-02-2010") 551##' parse_date_time(x, c("dmY", "ymd")) 552##' ## "2009-01-01 UTC" "2010-02-02 UTC" "2010-02-02 UTC" 553##' 554##' ## ** truncated time-dates ** 555##' x <- c("2011-12-31 12:59:59", "2010-01-01 12:11", "2010-01-01 12", "2010-01-01") 556##' parse_date_time(x, "Ymd HMS", truncated = 3) 557##' 558##' ## ** specifying exact formats and avoiding training and guessing ** 559##' parse_date_time(x, c("%m-%d-%y", "%m%d%y", "%m-%d-%y %H:%M"), exact = TRUE) 560##' parse_date_time(c('12/17/1996 04:00:00','4/18/1950 0130'), 561##' c('%m/%d/%Y %I:%M:%S','%m/%d/%Y %H%M'), exact = TRUE) 562##' 563##' ## ** quarters and partial dates ** 564##' parse_date_time(c("2016.2", "2016-04"), orders = "Yq") 565##' parse_date_time(c("2016", "2016-04"), orders = c("Y", "Ym")) 566##' 567##' ## ** fast parsing ** 568##' \dontrun{ 569##' options(digits.secs = 3) 570##' ## random times between 1400 and 3000 571##' tt <- as.character(.POSIXct(runif(1000, -17987443200, 32503680000))) 572##' tt <- rep.int(tt, 1000) 573##' 574##' system.time(out <- as.POSIXct(tt, tz = "UTC")) 575##' system.time(out1 <- ymd_hms(tt)) # constant overhead on long vectors 576##' system.time(out2 <- parse_date_time2(tt, "YmdHMOS")) 577##' system.time(out3 <- fast_strptime(tt, "%Y-%m-%d %H:%M:%OS")) 578##' 579##' all.equal(out, out1) 580##' all.equal(out, out2) 581##' all.equal(out, out3) 582##' } 583##' 584##' ## ** how to use `select_formats` argument ** 585##' ## By default %Y has precedence: 586##' parse_date_time(c("27-09-13", "27-09-2013"), "dmy") 587##' 588##' ## to give priority to %y format, define your own select_format function: 589##' 590##' my_select <- function(trained, drop=FALSE, ...){ 591##' n_fmts <- nchar(gsub("[^%]", "", names(trained))) + grepl("%y", names(trained))*1.5 592##' names(trained[ which.max(n_fmts) ]) 593##' } 594##' 595##' parse_date_time(c("27-09-13", "27-09-2013"), "dmy", select_formats = my_select) 596##' 597##' ## ** invalid times with "fast" parsing ** 598##' parse_date_time("2010-03-14 02:05:06", "YmdHMS", tz = "America/New_York") 599##' parse_date_time2("2010-03-14 02:05:06", "YmdHMS", tz = "America/New_York") 600##' parse_date_time2("2010-03-14 02:05:06", "YmdHMS", tz = "America/New_York", lt = TRUE) 601parse_date_time <- function(x, orders, tz = "UTC", truncated = 0, quiet = FALSE, 602 locale = Sys.getlocale("LC_TIME"), select_formats = .select_formats, 603 exact = FALSE, train = TRUE, drop = FALSE) { 604 605 ## backward compatible hack 606 if (is.null(tz)) tz <- "" 607 if (length(tz) != 1 || is.na(tz)) 608 stop("`tz` argument must be a character of length one") 609 610 orig_locale <- Sys.getlocale("LC_TIME") 611 Sys.setlocale("LC_TIME", locale) 612 on.exit(Sys.setlocale("LC_TIME", orig_locale)) 613 614 x <- as.character(.num_to_date(x)) 615 if (truncated != 0) 616 orders <- .add_truncated(orders, truncated) 617 618 .local_parse <- function(x, first = FALSE) { 619 formats <- 620 if (exact) { 621 orders 622 } else { 623 train <- .get_train_set(x) 624 .best_formats(train, orders, locale = locale, select_formats, drop = drop) 625 } 626 if (length(formats) > 0) { 627 out <- .parse_date_time(x, formats, tz = tz, quiet = quiet, locale = locale) 628 new_na <- is.na(out) 629 if (any(new_na)) { 630 x <- x[new_na] 631 if (length(x) == length(out)) { 632 # don't recur if failed for all 633 failed <<- length(x) 634 } else { 635 out[new_na] <- .local_parse(x) 636 } 637 } 638 out 639 } else { 640 if (first && !quiet) { 641 warning("All formats failed to parse. No formats found.", call. = FALSE) 642 warned <<- TRUE 643 } 644 failed <<- length(x) 645 NA 646 } 647 } 648 649 failed <- 0L 650 warned <- FALSE 651 to_parse <- which(!is.na(x) & nzchar(x)) ## missing data might be "" 652 ## prepare an NA vector 653 out <- .POSIXct(rep.int(NA_real_, length(x)), tz = tz) 654 655 if (length(to_parse)) { 656 out[to_parse] <- .local_parse(x[to_parse], TRUE) 657 if (failed > 0 && !quiet && !warned) 658 warning(" ", failed, " failed to parse.", call. = FALSE) 659 } 660 661 out 662} 663 664parse_dt <- function(x, orders, is_format = FALSE, return_lt = FALSE, cutoff_2000 = 68L) { 665 .Call(C_parse_dt, x, orders, as.logical(is_format), as.logical(return_lt), as.integer(cutoff_2000)) 666} 667 668##' @description `parse_date_time2()` is a fast C parser of numeric orders. 669##' 670##' @rdname parse_date_time 671##' @export 672##' @param lt logical. If `TRUE`, returned object is of class POSIXlt, and POSIXct 673##' otherwise. For compatibility with [base::strptime()] the default is `TRUE` 674##' for `fast_strptime()` and `FALSE` for `parse_date_time2()`. 675##' @param cutoff_2000 integer. For `y` format, two-digit numbers smaller or equal 676##' to `cutoff_2000` are parsed as though starting with `20`, otherwise parsed 677##' as though starting with `19`. Available only for functions relying on 678##' `lubridate`s internal parser. 679parse_date_time2 <- function(x, orders, tz = "UTC", exact = FALSE, lt = FALSE, cutoff_2000 = 68L){ 680 if (length(tz) != 1 || is.na(tz)) 681 stop("`tz` argument must be a character of length one") 682 parse1 <- function(x, order) { 683 if (!exact) 684 order <- gsub("[^[:alpha:]]+", "", as.character(order)) 685 if (lt) { 686 .mklt(parse_dt(x, order, exact, TRUE, cutoff_2000), tz) 687 } else { 688 if (is_utc(tz)) { 689 .POSIXct(parse_dt(x, order, exact, FALSE, cutoff_2000), tz = "UTC") 690 } else { 691 as.POSIXct(.mklt(parse_dt(x, order, exact, TRUE, cutoff_2000), tz)) 692 } 693 } 694 } 695 nnas <- !is.na(x) 696 out <- parse1(x, orders[[1]]) 697 for (order in orders[-1]) { 698 nnas <- nnas & is.na(out) 699 out[nnas] <- parse1(x[nnas], order) 700 } 701 out 702} 703 704##' @description `fast_strptime()` is a fast C parser of numeric formats only 705##' that accepts explicit format arguments, just like [base::strptime()]. 706##' @rdname parse_date_time 707##' @export 708##' @param format a vector of formats. If multiple formats supplied they are 709##' applied in turn till success. The formats should include all the 710##' separators and each format letter must be prefixed with %, just as in the 711##' format argument of [base::strptime()]. 712fast_strptime <- function(x, format, tz = "UTC", lt = TRUE, cutoff_2000 = 68L) { 713 parse1 <- function(x, fmt) { 714 if (lt) { 715 .mklt(parse_dt(x, fmt, TRUE, TRUE, cutoff_2000), tz) 716 } else{ 717 if (is_utc(tz)) { 718 .POSIXct(parse_dt(x, fmt, TRUE, FALSE, cutoff_2000), "UTC") 719 } else { 720 as.POSIXct(.mklt(parse_dt(x, fmt, TRUE, TRUE, cutoff_2000), tz)) 721 } 722 } 723 } 724 nnas <- !is.na(x) 725 out <- parse1(x, format[[1]]) 726 for (fmt in format[-1]) { 727 nnas <- nnas & is.na(out) 728 out[nnas] <- parse1(x[nnas], fmt) 729 } 730 out 731} 732 733 734 735### INTERNAL 736.mklt <- function(dtlist, tz){ 737 na_fill <- rep_len(NA_integer_, length(dtlist$sec)) 738 dtlist[["wday"]] <- na_fill 739 dtlist[["yday"]] <- na_fill 740 dtlist[["isdst"]] <- -1L 741 .POSIXlt(dtlist, tz = tz) 742} 743 744.parse_date_time <- function(x, formats, tz, quiet, locale){ 745 746 ## print(formats) # for debugging 747 748 out <- .strptime(x, formats[[1]], tz = tz, quiet = quiet, locale = locale) 749 na <- is.na(out) 750 newx <- x[na] 751 752 if (is_verbose()) 753 message(" ", sum(!na) , " parsed with ", gsub("^@|@$", "", formats[[1]])) 754 755 ## recursive parsing 756 if (length(formats) > 1 && length(newx) > 0) 757 out[na] <- .parse_date_time(newx, formats[-1], tz = tz, quiet = quiet, locale = locale) 758 759 ## return POSIXlt 760 out 761} 762 763.strptime <- function(x, fmt, tz = "UTC", quiet = FALSE, locale = NULL){ 764 765 ## Depending on fmt we might need to preprocess x. 766 ## ISO8601 and internal parser are the only cases so far. 767 ## %Ou: "2013-04-16T04:59:59Z" 768 ## %Oo: "2013-04-16T04:59:59+01" 769 ## %Oz: "2013-04-16T04:59:59+0100" 770 ## %OO: "2013-04-16T04:59:59+01:00" 771 772 ## is_posix <- 0 < regexpr("^[^%]*%Y[^%]+%m[^%]+%d[^%]+(%H[^%](%M[^%](%S)?)?)?[^%Z]*$", fmt) 773 774 c_parser <- 0 < regexpr("^[^%0-9]*(%([YymdqIHMSz]|O[SzuoOpmb])[^%0-9Z]*)+$", fmt) 775 zpos <- regexpr("%O((?<z>z)|(?<u>u)|(?<o>o)|(?<O>O))", fmt, perl = TRUE) 776 777 if (c_parser) { 778 ## C PARSER: 779 out <- fast_strptime(x, fmt, tz = "UTC", lt = FALSE) 780 781 if (!is_utc(tz)) { 782 out <- 783 if (zpos > 0){ 784 if (!quiet) 785 message("Date in ISO8601 format; converting timezone from UTC to \"", tz, "\".") 786 with_tz(out, tzone = tz) 787 } else { 788 ## force_tz is very slow 789 force_tz(out, tzone = tz) 790 } 791 } 792 793 out 794 795 } else { 796 ## STRPTIME PARSER: 797 798 ## strptime doesn't accept 'locale' argument; need a hard reset 799 if (!is.null(locale)) { 800 old_lc_time <- Sys.getlocale("LC_TIME") 801 if (old_lc_time != locale){ 802 Sys.setlocale("LC_TIME", locale) 803 on.exit(Sys.setlocale("LC_TIME", old_lc_time)) 804 } 805 } 806 807 if (zpos > 0){ 808 ## If ISO8601 -> pre-process x and fmt 809 capt <- attr(zpos, "capture.names")[attr(zpos, "capture.start") > 0][[2]] ## <- second subexp 810 repl <- switch(capt, 811 z = "%z", 812 u ="Z", 813 ## substitute +aa with +aa00 814 o = { x <- sub("([+-]\\d{2}(?=\\D+)?$)", "\\100", x, perl = TRUE) 815 "%z"}, 816 ## substitute +aa:bb with +aabb 817 O = { x <- sub("([+-]\\d{2}):(?=[^:]+$)", "\\1", x, perl = TRUE) 818 "%z"}, 819 stop("Unrecognised capture detected; please report this bug")) 820 821 fmt <- .str_sub(fmt, zpos, zpos + attr(zpos, "match.length") - 1, repl) 822 823 ## user has supplied tz argument -> convert to tz 824 if (!is_utc(tz)){ 825 if (!quiet) 826 message("Date in ISO8601 format; converting timezone from UTC to \"", tz, "\".") 827 return(with_tz(strptime(.enclose(x), .enclose(fmt), "UTC"), tzone = tz)) 828 } 829 } 830 831 strptime(.enclose(x), .enclose(fmt), tz) 832 } 833} 834 835 836## Expand format strings to also include truncated formats 837## Get locations of letters as vector 838## Choose the number at the n - truncated place in the vector 839## return the substring created by 1 to tat number. 840.add_truncated <- function(orders, truncated){ 841 out <- orders 842 843 if (truncated > 0) { 844 trunc_one <- function(order) { 845 alphas <- gregexpr("[a-zA-Z]", order)[[1]] 846 start <- max(0, length(alphas) - truncated) 847 cut_points <- alphas[start:(length(alphas)-1)] 848 849 truncs <- c() 850 for (j in seq_along(cut_points)) 851 truncs <- c(truncs, substr(order, 1, cut_points[j])) 852 truncs 853 } 854 855 } else { 856 trunc_one <- function(order) { 857 alphas <- gregexpr("[a-zA-Z]", order)[[1]][-1] 858 end <- max(1, abs(truncated) - 1) 859 cut_points <- alphas[1:end] 860 861 truncs <- c() 862 for (j in seq_along(cut_points)) 863 truncs <- c(truncs, substr(order, cut_points[j], nchar(order))) 864 truncs 865 } 866 } 867 868 for (i in seq_along(orders)) { 869 out <- c(out, trunc_one(orders[i])) 870 } 871 872 out 873} 874 875 876.xxx_hms_truncations <- list(T = c("R", "r", ""), R = c("r", ""), r = "") 877 878.parse_xxx_hms <- function(..., orders, truncated, quiet, tz, locale){ 879 ## !!! NOTE: truncated operates on first element in ORDERS ! 880 if (truncated > 0){ 881 ## Take first 3 formats and append formats from .xxx_hms_truncations 882 ## co responding to the 4th format letter in order[[1]] -- T, R or r. 883 xxx <- substr(orders[[1]], 1, 3) ## 884 add <- paste(xxx, .xxx_hms_truncations[[substr(orders[[1]], 4, 4)]], sep = "") 885 rest <- length(add) - truncated 886 if (rest < 0) 887 orders <- c(orders, add, .add_truncated(xxx, abs(rest))) 888 else 889 orders <- c(orders, add[1:truncated]) 890 } 891 dates <- unlist(lapply(list(...), .num_to_date), use.names = FALSE) 892 parse_date_time(dates, orders, tz = tz, quiet = quiet, locale = locale) 893} 894 895.parse_xxx <- function(..., orders, quiet, tz, locale, truncated){ 896 dates <- unlist(lapply(list(...), .num_to_date), use.names = FALSE) 897 if (is.null(tz)) { 898 as.Date.POSIXct(parse_date_time(dates, orders, quiet = quiet, tz = "UTC", 899 locale = locale, truncated = truncated)) 900 } else { 901 parse_date_time(dates, orders, quiet = quiet, tz = tz, 902 locale = locale, truncated = truncated) 903 } 904} 905 906.num_to_date <- function(x) { 907 if (is.numeric(x)) { 908 out <- rep.int(as.character(NA), length(x)) 909 nnas <- !is.na(x) 910 x <- format(x[nnas], scientific = FALSE, trim = TRUE) 911 x <- paste(ifelse(nchar(x) %% 2 == 1, "0", ""), x, sep = "") 912 out[nnas] <- x 913 out 914 }else as.character(x) 915} 916 917.parse_iso_dt <- function(x, tz) { 918 parse_date_time(x, orders = c("ymdTz", "ymdT", "ymd"), tz = tz, train = FALSE) 919} 920 921as_POSIXct <- function(x, tz = tz(x)) { 922 if (is.character(x)) 923 .parse_iso_dt(x, tz = tz) 924 else if (is.Date(x)) 925 ## as.POSIXct.Date assues UTC in computation but returns POSIXct with system TZ 926 ## same as as_datetime Date method 927 .POSIXct(as.numeric(x) * 86400, tz = "UTC") 928 else if (!is.POSIXct(x)) 929 as.POSIXct(x, tz = tz) 930 else x 931} 932## parse.r ends here 933