1lmrob.lar <- function(x, y, control = lmrob.control(), ...)
2{
3  ## LAR : Least Absolute Residuals -- i.e. L_1  M-estimate
4  ## this function is identical to lmRob.lar of the robust package
5
6  ## '...': to be called as  'init(**, mf)' from lmrob()
7  x <- as.matrix(x)
8  p <- ncol(x)
9  n <- nrow(x)
10  stopifnot(p > 0, n >= p, length(y) == n, is.numeric(control$rel.tol))
11  storage.mode(x) <- "double"
12  storage.mode(y) <- "double"
13  bet0 <- 0.773372647623  ## bet0 = pnorm(0.75); only for normalizing scale=SIGMA
14  tmpn <- double(n)
15  tmpp <- double(p)
16
17  z1 <- .Fortran(rllarsbi, ##-> ../src/rllarsbi.f
18                 x,
19                 y,
20                 as.integer(n),
21                 as.integer(p),
22                 as.integer(n),
23                 as.integer(n),
24                 as.double(control$rel.tol),
25                 NIT=integer(1),
26                 K=integer(1),
27                 KODE=integer(1),
28                 SIGMA=double(1),
29                 THETA=tmpn,
30                 RS=tmpn,
31                 SC1=tmpn,
32                 SC2=tmpp,
33                 SC3=tmpp,
34                 SC4=tmpp,
35                 BET0=as.double(bet0))[c("THETA","SIGMA","RS","NIT","KODE")]
36  if (z1[5] > 1)
37      stop("calculations stopped prematurely in rllarsbi\n",
38           "(probably because of rounding errors).")
39  names(z1) <- c("coefficients", "scale", "residuals", "iter", "status")
40  ##           c("THETA",        "SIGMA", "RS",        "NIT",  "KODE")
41  z1$converged <- TRUE
42  length(z1$coefficients) <- p
43  z1
44}
45
46splitFrame <- function(mf, x = model.matrix(mt, mf),
47                       type = c("f","fi", "fii"))
48{
49    mt <- attr(mf, "terms")
50    type <- match.arg(type)
51    x <- as.matrix(x)
52    p <- ncol(x)
53
54    ## --- split categorical and interactions of categorical vars.
55    ##     from continuous variables
56    factors <- attr(mt, "factors")
57    factor.idx <- attr(mt, "dataClasses") %in% c("factor", "character")
58    if (!any(factor.idx)) ## There are no factors
59        return(list(x1.idx = rep.int(FALSE, p), x1=matrix(NA_real_,nrow(x),0L), x2=x))
60    switch(type,
61           ## --- include interactions cat * cont in x1:
62           fi = { factor.asgn <- which(factor.idx %*% factors > 0) },
63           ## --- include also continuous variables that interact with factors in x1:
64           ##     make sure to include interactions of continuous variables
65           ##     interacting with categorical variables, too
66           fii = { factor.asgn <- numeric(0)
67                   factors.cat <- factors
68                   factors.cat[factors.cat > 0] <- 1L ## fix triple+ interactions
69                   factors.cat[, factor.idx %*% factors == 0] <- 0L
70                   for (i in 1:ncol(factors)) {
71                       comp <- factors[,i] > 0
72                       ## if any of the components is a factor: include in x1 and continue
73                       if (any(factor.idx[comp])) {
74                           factor.asgn <- c(factor.asgn, i)
75                       } else {
76                           ## if there is an interaction of this term with a categorical var.
77                           tmp <- colSums(factors.cat[comp,,drop=FALSE]) >= sum(comp)
78                           if (any(tmp)) {
79                               ## if no other continuous variables are involved
80                               ## include in x1 and continue
81                               ## if (identical(factors[!comp, tmp], factors.cat[!comp, tmp]))
82                               if (!all(colSums(factors[!factor.idx & !comp, tmp, drop=FALSE]) > 0))
83                                   factor.asgn <- c(factor.asgn, i)
84                           }
85                       }
86                   } },
87           ## --- do not include interactions cat * cont in x1:
88           f = { factor.asgn <- which(factor.idx %*% factors & !(!factor.idx) %*% factors) },
89           stop("unknown split type"))
90    x1.idx <- attr(x, "assign") %in% c(0, factor.asgn) ## also include intercept
91    names(x1.idx) <- colnames(x)
92
93    ## x1: factors and (depending on type) interactions of / with factors
94    ## x2: continuous variables
95    list(x1 = x[,  x1.idx, drop=FALSE],
96         x1.idx = x1.idx,
97         x2 = x[, !x1.idx, drop=FALSE])
98}
99
100##' Compute M-S-estimator for linear regression ---> ../man/lmrob.M.S.Rd
101lmrob.M.S <- function(x, y, control, mf, split = splitFrame(mf, x, control$split.type)) {
102    if (ncol(split$x1) == 0) {
103      warning("No categorical variables found in model. Reverting to S-estimator.")
104      return(lmrob.S(x, y, control))
105    }
106    if (ncol(split$x2) == 0) {
107        warning("No continuous variables found in model. Reverting to L1-estimator.")
108        return(lmrob.lar(x, y, control))
109    }
110    ## this is the same as in lmrob.S():
111    if (length(seed <- control$seed) > 0) {
112        if (exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) {
113            seed.keep <- get(".Random.seed", envir = .GlobalEnv,
114                             inherits = FALSE)
115            on.exit(assign(".Random.seed", seed.keep, envir = .GlobalEnv))
116        }
117        assign(".Random.seed", seed, envir = .GlobalEnv) ## why not set.seed(seed)
118    }
119    x1 <- split$x1
120    x2 <- split$x2
121    storage.mode(x1) <- "double"
122    storage.mode(x2) <- "double"
123    storage.mode(y) <- "double"
124    c.chi <- .psi.conv.cc(control$psi, control$tuning.chi)
125    traceLev <- as.integer(control$trace.lev)
126    z <- .C(R_lmrob_M_S, ## NB: If you change this, adapt ../inst/xtraR/m-s_fns.R
127	    x1,
128	    x2,
129	    y,
130            res=double(length(y)),
131            n=length(y),
132            p1=ncol(x1),
133            p2=ncol(x2),
134            nResample = as.integer(control$nResample),
135            max_it_scale=as.integer(control$maxit.scale),
136            scale=double(1),
137            b1=double(ncol(x1)),
138            b2=double(ncol(x2)),
139            tuning_chi=as.double(c.chi),
140	    ipsi = .psi2ipsi(control$psi),
141            bb=as.double(control$bb),
142            K_m_s=as.integer(control$k.m_s),
143            max_k=as.integer(control$k.max),
144            rel_tol=as.double(control$rel.tol),
145	    inv_tol=as.double(control$solve.tol),
146	    scale_tol=as.double(control$scale.tol),
147            converged = logical(1),
148            trace_lev = traceLev,
149            orthogonalize=TRUE,
150            subsample=TRUE,
151            descent=TRUE,
152            mts=as.integer(control$mts),
153            ss=.convSs(control$subsampling)
154            )[c("b1","b2", "res","scale", "converged")]
155
156    conv <- z$converged
157    ## FIXME? warning if 'conv' is not ok ??
158    ## coefficients :
159    idx <- split$x1.idx
160    cf <- numeric(length(idx))
161    cf[ idx] <- z$b1
162    cf[!idx] <- z$b2
163    ## set method argument in control
164    control$method <- 'M-S'
165    obj <- list(coefficients = cf, scale = z$scale, residuals = z$res,
166                rweights = lmrob.rweights(z$res, z$scale, control$tuning.chi, control$psi),
167                ## ../src/lmrob.c : m_s_descent() notes that convergence is *not* guaranteed
168                converged = TRUE, descent.conv = conv, control = control)
169    if (control$method %in% control$compute.outlier.stats)
170        obj$ostats <- outlierStats(obj, x, control)
171    obj
172}
173