1#  File src/library/stats/R/models.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2020 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19formula <- function(x, ...) UseMethod("formula")
20formula.default <- function (x = NULL, env = parent.frame(), ...)
21{
22    notAtomic <- !is.atomic(x)
23    notnull <- function(z) notAtomic && !is.null(z)
24
25    if (notnull(x$formula)) eval(x$formula)
26    else if (notnull(x$terms)) {z <- x$terms; oldClass(z) <- "formula"; z}
27    else if (notnull(x$call$formula))	eval(x$call$formula)
28    else if (!is.null(attr(x, "formula"))) attr(x, "formula")
29    else {
30        form <- switch(mode(x),
31                       NULL = structure(list(), class = "formula"),
32                       character = eval(str2expression(x)), # ever used?  formula.character!
33                       call = eval(x),
34                       stop("invalid formula"))
35        environment(form) <- env
36        form
37    }
38}
39formula.formula <- function(x, ...) x
40formula.terms <- function(x, ...) {
41    env <- environment(x)
42    attributes(x) <- list(class = "formula") # dropping all other attr.
43    environment(x) <- if(is.null(env)) globalenv() else env
44    x
45}
46
47DF2formula <- function(x, env = parent.frame()) {
48    nm <- lapply(names(x), as.name)
49    mkRHS <- function(nms) Reduce(function(x, y) call("+", x, y), nms)
50    ff <- if (length(nm) > 1L)
51              call("~", nm[[1L]], mkRHS(nm[-1L]))
52          else if (length(nm) == 1L)
53              call("~", nm[[1L]])
54          else stop("cannot create a formula from a zero-column data frame")
55    class(ff) <- "formula" # was ff <- eval(ff)
56    environment(ff) <- env
57    ff
58}
59
60formula.data.frame <- function (x, ...)
61{
62    if(length(tx <- attr(x, "terms")) && length(ff <- formula.terms(tx)))
63	ff
64    else DF2formula(x, parent.frame())
65}
66
67## Future version {w/o .Deprecated etc}:
68formula.character <- function(x, env = parent.frame(), ...)
69{
70    ff <- str2lang(x)
71    if(!(is.call(ff) && is.symbol(c. <- ff[[1L]]) && c. == quote(`~`)))
72        stop(gettextf("invalid formula: %s", deparse2(x)), domain=NA)
73    class(ff) <- "formula"
74    environment(ff) <- env
75    ff
76}
77
78## Active version helping to move towards future version:
79formula.character <- function(x, env = parent.frame(), ...)
80{
81    ff <- if(length(x) > 1L) {
82              .Deprecated(msg=
83 "Using formula(x) is deprecated when x is a character vector of length > 1.
84  Consider formula(paste(x, collapse = \" \")) instead.")
85              str2expression(x)[[1L]]
86          } else {
87              str2lang(x)
88          }
89    if(!is.call(ff))
90        stop(gettextf("invalid formula %s: not a call", deparse2(x)), domain=NA)
91    ## else
92    if(is.symbol(c. <- ff[[1L]]) && c. == quote(`~`)) {
93        ## perfect
94    } else {
95        if(is.symbol(c.)) { ## back compatibility
96            ff <- if(c. == quote(`=`)) {
97                      .Deprecated(msg = gettextf(
98				"invalid formula %s: assignment is deprecated",
99				deparse2(x)))
100                      ff[[3L]] # RHS of "v = <form>" (pkgs 'GeNetIt', 'KMgene')
101                  } else if(c. == quote(`(`) || c. == quote(`{`)) {
102                      .Deprecated(msg = gettextf(
103			"invalid formula %s: extraneous call to `%s` is deprecated",
104			deparse2(x), as.character(c.)))
105                      eval(ff)
106                  }
107        } else
108            stop(gettextf("invalid formula %s", deparse2(x)), domain=NA)
109    }
110    class(ff) <- "formula"
111    environment(ff) <- env
112    ff
113}
114
115print.formula <- function(x, showEnv = !identical(e, .GlobalEnv), ...)
116{
117    e <- environment(.x <- x) ## return(.) original x
118    attr(x, ".Environment") <- NULL
119    print.default(unclass(x), ...)
120    if (showEnv) print(e)
121    invisible(.x)
122}
123
124`[.formula` <- function(x,i) {
125    ans <- NextMethod("[")
126    if(!length(ans) || is.symbol(a1 <- ans[[1L]]) && as.character(a1) == "~") {
127        if(is.null(ans)) ans <- list()
128        class(ans) <- "formula"
129        environment(ans) <- environment(x)
130    }
131    ans
132}
133
134as.formula <- function(object, env = parent.frame())
135{
136    if(inherits(object, "formula"))
137        object
138    else {
139        rval <- formula(object, env = baseenv())
140        if (identical(environment(rval), baseenv()) || !missing(env))
141            environment(rval) <- env
142        rval
143    }
144}
145
146terms <- function(x, ...) UseMethod("terms")
147terms.default <- function(x, ...) {
148    x$terms %||% attr(x, "terms") %||% stop("no terms component nor attribute")
149}
150
151terms.terms <- function(x, ...) x
152print.terms <- function(x, ...) {
153    print.default(unclass(x), ...)
154    invisible(x)
155}
156
157## moved from base/R/labels.R
158labels.terms <- function(object, ...) attr(object, "term.labels")
159
160### do this `by hand' as previous approach was vulnerable to re-ordering.
161delete.response <- function (termobj)
162{
163    a <- attributes(termobj)
164    y <- a$response
165    if(!is.null(y) && y) {
166        termobj[[2L]] <- NULL
167        a$response <- 0
168        a$variables <- a$variables[-(1+y)]
169        a$predvars <- a$predvars[-(1+y)]
170        if(length(a$factors))
171            a$factors <- a$factors[-y, , drop = FALSE]
172        if(length(a$offset))
173            a$offset <- ifelse(a$offset > y, a$offset-1, a$offset)
174        if(length(a$specials))
175            for(i in seq_along(a$specials)) {
176                b <- a$specials[[i]]
177                a$specials[[i]] <- ifelse(b > y, b-1, b)
178            }
179        attributes(termobj) <- a
180    }
181    termobj
182}
183
184reformulate <- function (termlabels, response=NULL, intercept = TRUE, env = parent.frame())
185{
186    ## an extension of formula.character()
187    if(!is.character(termlabels) || !length(termlabels))
188        stop("'termlabels' must be a character vector of length at least one")
189    termtext <- paste(termlabels, collapse = "+")
190    if(!intercept) termtext <- paste(termtext, "- 1")
191    terms <- str2lang(termtext)
192    fexpr <-
193	if(is.null(response))
194	    call("~", terms)
195	else
196	    call("~",
197		 ## response can be a symbol or call as  Surv(ftime, case)
198		 if(is.character(response))
199                     tryCatch(str2lang(response),
200                              error = function(e) {
201                                  sc <- sys.calls()
202                                  sc1 <- lapply(sc, `[[`, 1L)
203                                  isF <- function(cl) is.symbol(cl) && cl == quote(reformulate)
204                                  reformCall <- sc[[match(TRUE, vapply(sc1, isF, NA))]]
205                                  warning(warningCondition(message = paste(sprintf(
206		"Unparseable 'response' \"%s\"; use is deprecated.  Use as.name(.) or `..`!",
207									response),
208						conditionMessage(e), sep="\n"),
209                                      class = c("reformulate", "deprecatedWarning"),
210                                      call = reformCall)) # , domain=NA
211                                  as.symbol(response)
212                              })
213                 else response,
214		 terms)
215    formula(fexpr, env)
216}
217
218drop.terms <- function(termobj, dropx = NULL, keep.response = FALSE)
219{
220    if (is.null(dropx))
221	termobj
222    else {
223        if(!inherits(termobj, "terms"))
224            stop(gettextf("'termobj' must be a object of class %s",
225                          dQuote("terms")),
226                 domain = NA)
227	newformula <-
228	    reformulate(attr(termobj, "term.labels")[-dropx],
229			response = if(keep.response) termobj[[2L]],
230			intercept = attr(termobj, "intercept"),
231			env = environment(termobj))
232	result <- terms(newformula, specials=names(attr(termobj, "specials")))
233
234	# Edit the optional attributes
235
236	response <- attr(termobj, "response")
237	dropOpt <- if(response && !keep.response) # we have a response in termobj, but not in the result
238		       c(response, dropx + length(response))
239		   else
240		       dropx + max(response)
241
242	if (!is.null(predvars <- attr(termobj, "predvars"))) {
243	    # predvars is a language expression giving a list of
244	    # values corresponding to terms in the model
245            # so add 1 for the name "list"
246	    attr(result, "predvars") <- predvars[-(dropOpt+1)]
247	}
248	if (!is.null(dataClasses <- attr(termobj, "dataClasses"))) {
249	    # dataClasses is a character vector of
250	    # values corresponding to terms in the model
251	    attr(result, "dataClasses") <- dataClasses[-dropOpt]
252	}
253	result
254    }
255}
256
257
258`[.terms` <- function (termobj, i)
259{
260    resp <- if (attr(termobj, "response")) termobj[[2L]]
261    newformula <- attr(termobj, "term.labels")[i]
262    if (length(newformula) == 0L) newformula <- "1"
263    newformula <- reformulate(newformula, resp, attr(termobj, "intercept"), environment(termobj))
264    result <- terms(newformula, specials = names(attr(termobj, "specials")))
265
266    # Edit the optional attributes
267
268    addindex <- function(index, offset)
269        # add a non-negative offset to a possibly negative index
270    	ifelse(index < 0, index - offset,
271    	       ifelse(index == 0, 0, index + offset))
272
273    if (is.logical(i))
274    	i <- which(rep_len(i, length.out = length(attr(termobj, "term.labels"))))
275
276    response <- attr(termobj, "response")
277    if (response)
278	iOpt <- c(if (max(i) > 0) response, # inclusive indexing
279	          addindex(i, max(response)))
280    else
281	iOpt <- i
282
283    if (!is.null(predvars <- attr(termobj, "predvars")))
284	attr(result, "predvars") <- predvars[c(if (max(iOpt) > 0) 1,
285	                                     addindex(iOpt, 1))]
286
287    if (!is.null(dataClasses <- attr(termobj, "dataClasses")))
288	attr(result, "dataClasses") <- dataClasses[iOpt]
289
290    result
291}
292
293
294## Arguments abb and neg.out are a legacy from S
295## simplify=TRUE was the default in R < 1.7.0
296terms.formula <- function(x, specials = NULL, abb = NULL, data = NULL,
297			  neg.out = TRUE, keep.order = FALSE,
298                          simplify = FALSE, ..., allowDotAsName = FALSE)
299{
300    if(simplify)
301    fixFormulaObject <- function(object) {
302        Terms <- terms(object)
303	tmp <- attr(Terms, "term.labels")
304        ## fix up terms involving | : PR#8462
305        ind <- grep("|", tmp, fixed = TRUE)
306        if(length(ind)) tmp[ind] <- paste("(", tmp[ind], ")")
307        ## need to add back any offsets
308        if(length(ind <- attr(Terms, "offset"))) {
309            ## can't look at rownames of factors, as not there for y ~ offset(x)
310            tmp2 <- as.character(attr(Terms, "variables"))[-1L]
311            tmp <- c(tmp, tmp2[ind])
312        }
313	rhs <- if(length(tmp)) paste(tmp, collapse = " + ") else "1"
314	if(!attr(Terms, "intercept")) rhs <- paste(rhs, "- 1")
315        if(length(form <- formula(object)) > 2L) {
316            res <- formula(paste("lhs ~", rhs))
317            res[[2L]] <- form[[2L]]
318            res
319        } else formula(paste("~", rhs))
320    }
321
322    if (!is.null(data) && !is.environment(data) && !is.data.frame(data))
323	data <- as.data.frame(data, optional = TRUE)
324    terms <-
325        .External(C_termsform, x, specials, data, keep.order, allowDotAsName)
326    if (simplify) {
327        a <- attributes(terms)
328        terms <- fixFormulaObject(terms)
329        attributes(terms) <- a
330    }
331    environment(terms) <- environment(x)
332    if(!inherits(terms, "formula"))
333        class(terms) <- c(oldClass(terms), "formula")
334    terms
335}
336
337coef <- function(object, ...) UseMethod("coef")
338## 'complete': be compatible with vcov()
339coef.default <- function(object, complete=TRUE, ...) {
340    cf <- object$coefficients
341    if(complete) cf else cf[!is.na(cf)]
342}
343coef.aov <- coef.default; formals(coef.aov)[["complete"]] <- FALSE
344coefficients <- coef
345
346residuals <- function(object, ...) UseMethod("residuals")
347residuals.default <- function(object, ...)
348    naresid(object$na.action, object$residuals)
349resid <- residuals
350
351deviance <- function(object, ...) UseMethod("deviance")
352deviance.default <- function(object, ...) object$deviance
353
354fitted <- function(object, ...) UseMethod("fitted")
355## we really do need partial matching here
356fitted.default <- function(object, ...)
357{
358    xx <- if("fitted.values" %in% names(object))
359        object$fitted.values else object$fitted
360    napredict(object$na.action, xx)
361}
362fitted.values <- fitted
363
364anova <- function(object, ...)UseMethod("anova")
365
366effects <- function(object, ...)UseMethod("effects")
367
368weights <- function(object, ...)UseMethod("weights")
369## used for class "lm", e.g. in drop1.
370weights.default <- function(object, ...)
371{
372    wts <-  object$weights
373    if (is.null(wts)) wts else napredict(object$na.action, wts)
374}
375
376df.residual <- function(object, ...)UseMethod("df.residual")
377df.residual.default <- function(object, ...) object$df.residual
378
379variable.names <- function(object, ...) UseMethod("variable.names")
380variable.names.default <- function(object, ...) colnames(object)
381
382case.names <- function(object, ...) UseMethod("case.names")
383case.names.default <- function(object, ...) rownames(object)
384
385simulate <- function(object, nsim = 1, seed = NULL, ...) UseMethod("simulate")
386
387offset <- function(object) object
388## ?
389
390.checkMFClasses <- function(cl, m, ordNotOK = FALSE)
391{
392    ## when called from predict.nls, vars not match.
393    new <- vapply(m, .MFclass, "")
394    new <- new[names(new) %in% names(cl)]
395    if(length(new) == 0L) return(invisible())
396    ## else
397    old <- cl[names(new)]
398    if(!ordNotOK) {
399        old[old == "ordered"] <- "factor"
400        new[new == "ordered"] <- "factor"
401    }
402    ## ordered is OK as a substitute for factor, but not v.v.
403    new[new == "ordered" & old == "factor"] <- "factor"
404    ## factor is OK as a substitute for character
405    ## This probably means the original character got auto-converted to
406    ## factor, setting xlevels and causing the conversion of the new
407    new[new == "factor" & old == "character"] <- "character"
408    if(!identical(old, new)) {
409        wrong <- old != new
410        if(sum(wrong) == 1)
411            stop(gettextf(
412    "variable '%s' was fitted with type \"%s\" but type \"%s\" was supplied",
413                          names(old)[wrong], old[wrong], new[wrong]),
414                 call. = FALSE, domain = NA)
415        else
416            stop(gettextf(
417    "variables %s were specified with different types from the fit",
418                 paste(sQuote(names(old)[wrong]), collapse=", ")),
419                 call. = FALSE, domain = NA)
420    }
421    else invisible()
422}
423
424##' Model Frame Class
425.MFclass <- function(x)
426{
427    ## the idea is to identify the relevant classes that model.matrix
428    ## will handle differently
429    ## logical, factor, ordered vs numeric, and other for future proofing
430    if(is.logical(x)) return("logical")
431    if(is.ordered(x)) return("ordered")
432    if(is.factor(x)) return("factor")
433    ## Character vectors may be auto-converted to factors, but keep them separate for now
434    if(is.character(x)) return("character")
435    if(is.matrix(x) && is.numeric(x))
436        return(paste0("nmatrix.", ncol(x)))
437    ## this is unclear.  Prior to 2.6.0 we assumed numeric with attributes
438    ## meant something, but at least for now model.matrix does not
439    ## treat it differently.
440##    if(is.vector(x) && is.numeric(x)) return("numeric")
441    if(is.numeric(x)) return("numeric")
442    return("other")
443}
444
445##' A complete deparse for "models", i.e. for formula and variable names (PR#15377)
446##' @param width.cutoff = 500L: Some people have generated longer variable names
447##' https://stat.ethz.ch/pipermail/r-devel/2010-October/058756.html
448deparse2 <- function(x)
449    paste(deparse(x, width.cutoff = 500L, backtick = !is.symbol(x) && is.language(x)),
450          collapse = " ")
451
452model.frame <- function(formula, ...) UseMethod("model.frame")
453model.frame.default <-
454    function(formula, data = NULL, subset = NULL, na.action = na.fail,
455	     drop.unused.levels = FALSE, xlev = NULL,...)
456{
457    ## first off, establish if we were passed a data frame 'newdata'
458    ## and note the number of rows.
459    possible_newdata <-
460        !missing(data) && is.data.frame(data) &&
461        identical(substitute(data), quote(newdata)) &&
462        (nr <- nrow(data)) > 0
463
464    ## were we passed just a fitted model object?
465    ## the fit might have a saved model object
466    if(!missing(formula) && nargs() == 1 && is.list(formula)
467       && !is.null(m <- formula$model)) return(m)
468    ## if not use the saved call (if there is one).
469    if(!missing(formula) && nargs() == 1 && is.list(formula)
470       && all(c("terms", "call") %in% names(formula))) {
471        fcall <- formula$call
472        m <- match(c("formula", "data", "subset", "weights", "na.action"),
473                   names(fcall), 0)
474        fcall <- fcall[c(1, m)]
475        ## need stats:: for non-standard evaluation
476        fcall[[1L]] <- quote(stats::model.frame)
477        env <- environment(formula$terms) %||% parent.frame()
478        return(eval(fcall, env)) # 2-arg form as env is an environment
479    }
480    if(missing(formula)) {
481	if(!missing(data) && inherits(data, "data.frame") && length(attr(data, "terms")))
482	    return(data)
483	formula <- as.formula(data)
484    }
485    else if(missing(data) && inherits(formula, "data.frame")) {
486	if(length(attr(formula, "terms")))
487	    return(formula)
488	data <- formula
489	formula <- as.formula(data)
490    } else
491        formula <- as.formula(formula)
492    if(missing(na.action)) {
493	if(!is.null(naa <- attr(data, "na.action")) && mode(naa)!="numeric")
494	    na.action <- naa
495	else if(!is.null(naa <- getOption("na.action")))
496	    na.action <- naa
497    }
498
499    ## The following logic is quite ancient and should possibly be revised
500    ## In particular it lets data=1 slip through and subsequent eval()
501    ## would interpret it as a sys.frame() index (PR#17879).
502    ## For now, insert explicit check below
503
504    if(missing(data))
505	data <- environment(formula)
506    else if (!is.data.frame(data) && !is.environment(data)
507             && !is.null(attr(data, "class")))
508        data <- as.data.frame(data)
509    else if (is.array(data))
510        stop("'data' must be a data.frame, not a matrix or an array")
511
512    ## Explicitly check "data"
513    if (!is.data.frame(data) && !is.environment(data) && !is.list(data)
514        && !is.null(data))
515        stop("'data' must be a data.frame, environment, or list")
516
517    if(!inherits(formula, "terms"))
518	formula <- terms(formula, data = data)
519    env <- environment(formula)
520    rownames <- .row_names_info(data, 0L) #attr(data, "row.names")
521    vars <- attr(formula, "variables")
522    predvars <- attr(formula, "predvars") %||% vars
523    varnames <- vapply(vars, deparse2, " ")[-1L]
524    variables <- eval(predvars, data, env)
525    resp <- attr(formula, "response")
526    if(is.null(rownames) && resp > 0L) {
527        ## see if we can get rownames from the response
528        lhs <- variables[[resp]]
529        rownames <- if(is.matrix(lhs)) rownames(lhs) else names(lhs)
530    }
531    if(possible_newdata && length(variables)) {
532        ## need to do this before subsetting and na.action
533        nr2 <- max(sapply(variables, NROW))
534        if(nr2 != nr)
535            warning(sprintf(paste0(ngettext(nr,
536                                            "'newdata' had %d row",
537                                            "'newdata' had %d rows"),
538                                   " ",
539                                  ngettext(nr2,
540                                           "but variable found had %d row",
541                                           "but variables found have %d rows")),
542                            nr, nr2),
543                    call. = FALSE, domain = NA)
544    }
545    if(is.null(attr(formula, "predvars"))) {
546        for (i in seq_along(varnames))
547            predvars[[i+1L]] <- makepredictcall(variables[[i]], vars[[i+1L]])
548        attr(formula, "predvars") <- predvars
549    }
550    extras <- substitute(list(...))
551    extranames <- names(extras[-1L])
552    extras <- eval(extras, data, env)
553    subset <- eval(substitute(subset), data, env)
554    data <- .External2(C_modelframe, formula, rownames, variables, varnames,
555                       extras, extranames, subset, na.action)
556    ## fix up the levels
557    if(length(xlev)) {
558	for(nm in names(xlev))
559	    if(!is.null(xl <- xlev[[nm]])) {
560		xi <- data[[nm]]
561                if(is.character(xi))
562                    xi <- as.factor(xi)
563		if(!is.factor(xi) || is.null(nxl <- levels(xi)))
564		    warning(gettextf("variable '%s' is not a factor", nm),
565                            domain = NA)
566		else {
567		    ctr <- attr(xi, "contrasts")
568		    xi <- xi[, drop = TRUE] # drop unused levels
569                    nxl <- levels(xi)
570		    if(any(m <- is.na(match(nxl, xl))))
571                        stop(sprintf(ngettext(length(m),
572                                              "factor %s has new level %s",
573                                              "factor %s has new levels %s"),
574                                     nm, paste(nxl[m], collapse=", ")),
575                             domain = NA)
576		    data[[nm]] <- factor(xi, levels=xl, exclude=NULL)
577		    if (!identical(attr(data[[nm]], "contrasts"), ctr))
578		    	warning(gettext(sprintf("contrasts dropped from factor %s",
579						nm), domain = NA),
580		    	        call. = FALSE)
581		}
582	    }
583    } else if(drop.unused.levels) {
584	for(nm in names(data)) {
585	    x <- data[[nm]]
586	    if(is.factor(x) &&
587	       length(unique(x[!is.na(x)])) < length(levels(x))) {
588	        ctr <- attr(x, "contrasts")
589		data[[nm]] <- x[, drop = TRUE]
590		if (!identical(attr(data[[nm]], "contrasts"), ctr))
591		    warning(gettext(sprintf(
592				"contrasts dropped from factor %s due to missing levels",
593					    nm), domain = NA), call. = FALSE)
594	    }
595	}
596    }
597    attr(formula, "dataClasses") <- vapply(data, .MFclass, "")
598    attr(data, "terms") <- formula
599    data
600}
601
602## we don't assume weights are numeric or a vector, leaving this to the
603## calling application
604model.weights <- function(x) x$"(weights)"
605
606## we do check that offsets are numeric.
607model.offset <- function(x) {
608    offsets <- attr(attr(x, "terms"),"offset")
609    if(length(offsets)) {
610	ans <- x$"(offset)" %||% 0
611	for(i in offsets) ans <- ans+x[[i]]
612    }
613    else ans <- x$"(offset)"
614    if(!is.null(ans) && !is.numeric(ans)) stop("'offset' must be numeric")
615    ans
616}
617
618model.matrix <- function(object, ...) UseMethod("model.matrix")
619
620model.matrix.default <- function(object, data = environment(object),
621				 contrasts.arg = NULL, xlev = NULL, ...)
622{
623    t <- if(missing(data)) terms(object) else terms(object, data=data)
624    if (is.null(attr(data, "terms")))
625	data <- model.frame(object, data, xlev=xlev)
626    else {
627	reorder <- match(vapply(attr(t, "variables"), deparse2, "")[-1L],
628                         names(data))
629	if (anyNA(reorder))
630	    stop("model frame and formula mismatch in model.matrix()")
631	if(!identical(reorder, seq_len(ncol(data))))
632	    data <- data[,reorder, drop=FALSE]
633    }
634    int <- attr(t, "response")
635    if(length(data)) {
636        contr.funs <- as.character(getOption("contrasts"))
637        namD <- names(data)
638        ## turn any character columns into factors
639        for(i in namD)
640            if(is.character(data[[i]]))
641                data[[i]] <- factor(data[[i]])
642        isF <- vapply(data, function(x) is.factor(x) || is.logical(x), NA)
643        isF[int] <- FALSE
644        isOF <- vapply(data, is.ordered, NA)
645        for(nn in namD[isF])            # drop response
646            if(is.null(attr(data[[nn]], "contrasts")))
647                contrasts(data[[nn]]) <- contr.funs[1 + isOF[nn]]
648        ## it might be safer to have numerical contrasts:
649        ##	  get(contr.funs[1 + isOF[nn]])(nlevels(data[[nn]]))
650        if (!is.null(contrasts.arg)) {
651          if (!is.list(contrasts.arg))
652              warning("non-list contrasts argument ignored")
653          else {  ## contrasts.arg is a list
654            if (is.null(namC <- names(contrasts.arg)))
655                stop("'contrasts.arg' argument must be named")
656            for (nn in namC) {
657                if (is.na(ni <- match(nn, namD)))
658                    warning(gettextf("variable '%s' is absent, its contrast will be ignored", nn),
659                            domain = NA)
660                else {
661                    ca <- contrasts.arg[[nn]]
662                    if(is.matrix(ca)) contrasts(data[[ni]], ncol(ca)) <- ca
663                    else contrasts(data[[ni]]) <- contrasts.arg[[nn]]
664                }
665            }
666          }
667        } ## non-null contrasts.arg
668    } else { #  no rhs terms ('~1', or '~0'): internal model.matrix needs some variable
669	isF <- FALSE
670	data[["x"]] <- raw(nrow(data))
671    }
672    ans <- .External2(C_modelmatrix, t, data) # modelmatrix() in ../src/model.c
673    if(any(isF))
674	attr(ans, "contrasts") <- lapply(data[isF], attr, "contrasts")
675    ans
676}
677
678model.response <- function (data, type = "any")
679{
680    if (attr(attr(data, "terms"), "response")) {
681	if (is.list(data) || is.data.frame(data)) {
682	    v <- data[[1L]]
683	    if (type == "numeric" && is.factor(v)) {
684		warning('using type = "numeric" with a factor response will be ignored')
685	    } else if (type == "numeric" || type == "double")
686		storage.mode(v) <- "double"
687	    else if (type != "any") stop("invalid response type")
688	    if (is.matrix(v) && ncol(v) == 1L) dim(v) <- NULL
689	    rows <- attr(data, "row.names")
690	    if (nrows <- length(rows)) {
691		if (length(v) == nrows) names(v) <- rows
692		else if (length(dd <- dim(v)) == 2L)
693		    if (dd[1L] == nrows && !length((dn <- dimnames(v))[[1L]]))
694			dimnames(v) <- list(rows, dn[[2L]])
695	    }
696	    return(v)
697	} else stop("invalid 'data' argument")
698    } else return(NULL)
699}
700
701model.extract <- function (frame, component)
702{
703    component <- as.character(substitute(component))
704    rval <- switch(component,
705		   response = model.response(frame),
706		   offset = model.offset(frame),
707                   frame[[paste0("(", component, ")")]]
708                   )
709    if(!is.null(rval)){
710	if (length(rval) == nrow(frame))
711	    names(rval) <- attr(frame, "row.names")
712	else if (is.matrix(rval) && nrow(rval) == nrow(frame)) {
713	    t1 <- dimnames(rval)
714	    dimnames(rval) <- list(attr(frame, "row.names"), t1[[2L]])
715	}
716    }
717    rval
718}
719
720preplot <- function(object, ...) UseMethod("preplot")
721update <- function(object, ...) UseMethod("update")
722
723is.empty.model <- function (x)
724{
725    tt <- terms(x)
726    (length(attr(tt, "factors")) == 0L) & (attr(tt, "intercept") == 0L)
727}
728
729makepredictcall <- function(var, call) UseMethod("makepredictcall")
730
731makepredictcall.default  <- function(var, call)
732{
733    if(as.character(call)[1L] != "scale") return(call)
734    if(!is.null(z <- attr(var, "scaled:center"))) call$center <- z
735    if(!is.null(z <- attr(var, "scaled:scale"))) call$scale <- z
736    call
737}
738
739.getXlevels <- function(Terms, m)
740{
741    xvars <- vapply(attr(Terms, "variables"), deparse2, "")[-1L]
742    if((yvar <- attr(Terms, "response")) > 0) xvars <- xvars[-yvar]
743    if(length(xvars)) {
744	xlev <- lapply(m[xvars], function(x)
745	    if(is.factor(x)) levels(x)
746	    else if(is.character(x)) levels(as.factor(x))) # else NULL
747	xlev[!vapply(xlev, is.null, NA)]
748    }
749}
750
751get_all_vars <- function(formula, data = NULL, ...)
752{
753    if(missing(formula)) {
754	if(!missing(data) && inherits(data, "data.frame") &&
755	   length(attr(data, "terms")) )
756	    return(data)
757	formula <- as.formula(data)
758    }
759    else if(missing(data) && inherits(formula, "data.frame")) {
760	if(length(attr(formula, "terms")))
761	    return(formula)
762	data <- formula
763	formula <- as.formula(data)
764    }
765    formula <- as.formula(formula)
766    if(missing(data))
767	data <- environment(formula)
768    else if (!is.data.frame(data) && !is.environment(data)
769             && !is.null(attr(data, "class")))
770        data <- as.data.frame(data)
771    else if (is.array(data))
772        stop("'data' must be a data.frame, not a matrix or an array")
773
774    ## Explicitly check "data" -- see comment in model.frame.default
775    if (!is.data.frame(data) && !is.environment(data) && !is.list(data)
776        && !is.null(data))
777        stop("'data' must be a data.frame, environment, or list")
778
779    if(!inherits(formula, "terms"))
780	formula <- terms(formula, data = data)
781    env <- environment(formula)
782    rownames <- .row_names_info(data, 0L) #attr(data, "row.names")
783    varnames <- all.vars(formula)
784    variables <- lapply(lapply(varnames, as.name), eval, data, env)
785    if(is.null(rownames) && (resp <- attr(formula, "response")) > 0) {
786        ## see if we can get rownames from the response
787        lhs <- variables[[resp]]
788	rownames <- if(!is.null(d <- dim(lhs)) && length(d) == 2L) {
789			if(is.data.frame(lhs)) .row_names_info(lhs, 0L) else rownames(lhs)
790		    } else names(lhs)
791    }
792    extras <- substitute(list(...))
793    extranames <- names(extras[-1L])
794    extras <- eval(extras, data, env)
795    x <- c(variables, extras)
796    ## protect the unprotected matrices:
797    if(anyM <- any(isM <- vapply(x, function(o) is.matrix(o) && !inherits(o,"AsIs"), NA)))
798        x[isM] <- lapply(x[isM], I)
799    nms.x <- c(varnames, extranames)
800    if(any(vapply(x, is.data.frame, NA)))
801        nms.x <- unlist(lapply(seq_along(x), function(i)
802            if(is.list(x[[i]])) names(x[[i]]) else nms.x[[i]]))
803    x <- as.data.frame(x, optional=TRUE)
804    names(x) <- nms.x
805    if(anyM)
806        x[isM] <- lapply(x[isM], function(o) `class<-`(o, class(o)[class(o) != "AsIs"]))
807    attr(x, "row.names") <-
808        if(is.null(rownames)) .set_row_names(max(vapply(x, NROW, integer(1))))
809        else rownames # might be short form
810    x
811}
812