1lmrob.lar <- function(x, y, control = lmrob.control(), ...) 2{ 3 ## LAR : Least Absolute Residuals -- i.e. L_1 M-estimate 4 ## this function is identical to lmRob.lar of the robust package 5 6 ## '...': to be called as 'init(**, mf)' from lmrob() 7 x <- as.matrix(x) 8 p <- ncol(x) 9 n <- nrow(x) 10 stopifnot(p > 0, n >= p, length(y) == n, is.numeric(control$rel.tol)) 11 storage.mode(x) <- "double" 12 storage.mode(y) <- "double" 13 bet0 <- 0.773372647623 ## bet0 = pnorm(0.75); only for normalizing scale=SIGMA 14 tmpn <- double(n) 15 tmpp <- double(p) 16 17 z1 <- .Fortran(rllarsbi, ##-> ../src/rllarsbi.f 18 x, 19 y, 20 as.integer(n), 21 as.integer(p), 22 as.integer(n), 23 as.integer(n), 24 as.double(control$rel.tol), 25 NIT=integer(1), 26 K=integer(1), 27 KODE=integer(1), 28 SIGMA=double(1), 29 THETA=tmpn, 30 RS=tmpn, 31 SC1=tmpn, 32 SC2=tmpp, 33 SC3=tmpp, 34 SC4=tmpp, 35 BET0=as.double(bet0))[c("THETA","SIGMA","RS","NIT","KODE")] 36 if (z1[5] > 1) 37 stop("calculations stopped prematurely in rllarsbi\n", 38 "(probably because of rounding errors).") 39 names(z1) <- c("coefficients", "scale", "residuals", "iter", "status") 40 ## c("THETA", "SIGMA", "RS", "NIT", "KODE") 41 z1$converged <- TRUE 42 length(z1$coefficients) <- p 43 z1 44} 45 46splitFrame <- function(mf, x = model.matrix(mt, mf), 47 type = c("f","fi", "fii")) 48{ 49 mt <- attr(mf, "terms") 50 type <- match.arg(type) 51 x <- as.matrix(x) 52 p <- ncol(x) 53 54 ## --- split categorical and interactions of categorical vars. 55 ## from continuous variables 56 factors <- attr(mt, "factors") 57 factor.idx <- attr(mt, "dataClasses") %in% c("factor", "character") 58 if (!any(factor.idx)) ## There are no factors 59 return(list(x1.idx = rep.int(FALSE, p), x1=matrix(NA_real_,nrow(x),0L), x2=x)) 60 switch(type, 61 ## --- include interactions cat * cont in x1: 62 fi = { factor.asgn <- which(factor.idx %*% factors > 0) }, 63 ## --- include also continuous variables that interact with factors in x1: 64 ## make sure to include interactions of continuous variables 65 ## interacting with categorical variables, too 66 fii = { factor.asgn <- numeric(0) 67 factors.cat <- factors 68 factors.cat[factors.cat > 0] <- 1L ## fix triple+ interactions 69 factors.cat[, factor.idx %*% factors == 0] <- 0L 70 for (i in 1:ncol(factors)) { 71 comp <- factors[,i] > 0 72 ## if any of the components is a factor: include in x1 and continue 73 if (any(factor.idx[comp])) { 74 factor.asgn <- c(factor.asgn, i) 75 } else { 76 ## if there is an interaction of this term with a categorical var. 77 tmp <- colSums(factors.cat[comp,,drop=FALSE]) >= sum(comp) 78 if (any(tmp)) { 79 ## if no other continuous variables are involved 80 ## include in x1 and continue 81 ## if (identical(factors[!comp, tmp], factors.cat[!comp, tmp])) 82 if (!all(colSums(factors[!factor.idx & !comp, tmp, drop=FALSE]) > 0)) 83 factor.asgn <- c(factor.asgn, i) 84 } 85 } 86 } }, 87 ## --- do not include interactions cat * cont in x1: 88 f = { factor.asgn <- which(factor.idx %*% factors & !(!factor.idx) %*% factors) }, 89 stop("unknown split type")) 90 x1.idx <- attr(x, "assign") %in% c(0, factor.asgn) ## also include intercept 91 names(x1.idx) <- colnames(x) 92 93 ## x1: factors and (depending on type) interactions of / with factors 94 ## x2: continuous variables 95 list(x1 = x[, x1.idx, drop=FALSE], 96 x1.idx = x1.idx, 97 x2 = x[, !x1.idx, drop=FALSE]) 98} 99 100##' Compute M-S-estimator for linear regression ---> ../man/lmrob.M.S.Rd 101lmrob.M.S <- function(x, y, control, mf, split = splitFrame(mf, x, control$split.type)) { 102 if (ncol(split$x1) == 0) { 103 warning("No categorical variables found in model. Reverting to S-estimator.") 104 return(lmrob.S(x, y, control)) 105 } 106 if (ncol(split$x2) == 0) { 107 warning("No continuous variables found in model. Reverting to L1-estimator.") 108 return(lmrob.lar(x, y, control)) 109 } 110 ## this is the same as in lmrob.S(): 111 if (length(seed <- control$seed) > 0) { 112 if (exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) { 113 seed.keep <- get(".Random.seed", envir = .GlobalEnv, 114 inherits = FALSE) 115 on.exit(assign(".Random.seed", seed.keep, envir = .GlobalEnv)) 116 } 117 assign(".Random.seed", seed, envir = .GlobalEnv) ## why not set.seed(seed) 118 } 119 x1 <- split$x1 120 x2 <- split$x2 121 storage.mode(x1) <- "double" 122 storage.mode(x2) <- "double" 123 storage.mode(y) <- "double" 124 c.chi <- .psi.conv.cc(control$psi, control$tuning.chi) 125 traceLev <- as.integer(control$trace.lev) 126 z <- .C(R_lmrob_M_S, ## NB: If you change this, adapt ../inst/xtraR/m-s_fns.R 127 x1, 128 x2, 129 y, 130 res=double(length(y)), 131 n=length(y), 132 p1=ncol(x1), 133 p2=ncol(x2), 134 nResample = as.integer(control$nResample), 135 max_it_scale=as.integer(control$maxit.scale), 136 scale=double(1), 137 b1=double(ncol(x1)), 138 b2=double(ncol(x2)), 139 tuning_chi=as.double(c.chi), 140 ipsi = .psi2ipsi(control$psi), 141 bb=as.double(control$bb), 142 K_m_s=as.integer(control$k.m_s), 143 max_k=as.integer(control$k.max), 144 rel_tol=as.double(control$rel.tol), 145 inv_tol=as.double(control$solve.tol), 146 scale_tol=as.double(control$scale.tol), 147 converged = logical(1), 148 trace_lev = traceLev, 149 orthogonalize=TRUE, 150 subsample=TRUE, 151 descent=TRUE, 152 mts=as.integer(control$mts), 153 ss=.convSs(control$subsampling) 154 )[c("b1","b2", "res","scale", "converged")] 155 156 conv <- z$converged 157 ## FIXME? warning if 'conv' is not ok ?? 158 ## coefficients : 159 idx <- split$x1.idx 160 cf <- numeric(length(idx)) 161 cf[ idx] <- z$b1 162 cf[!idx] <- z$b2 163 ## set method argument in control 164 control$method <- 'M-S' 165 obj <- list(coefficients = cf, scale = z$scale, residuals = z$res, 166 rweights = lmrob.rweights(z$res, z$scale, control$tuning.chi, control$psi), 167 ## ../src/lmrob.c : m_s_descent() notes that convergence is *not* guaranteed 168 converged = TRUE, descent.conv = conv, control = control) 169 if (control$method %in% control$compute.outlier.stats) 170 obj$ostats <- outlierStats(obj, x, control) 171 obj 172} 173