1#### Mallows quasi-likelihood estimator of E. Cantoni and E. Ronchetti (2001) 2#### based originally on Eva Cantoni's S-plus code "robGLM" 3 4## FIXME{MM}: All these expression()s and eval()s -- once were really slick and fast. 5## ----- Nowadays, with 'codetools' and the byte-compiler, they "just don't fit anymore" 6## including those globalVariables() {also in other places!}: 7globalVariables(c("residP", "residPS", "dmu.deta", "snu"), add=TRUE) 8 9##' @title 10##' @param wts a character string \dQuote{weights.on.x} specifying how weights should be computed 11##' *or* a numeric vector of final weights in which case nothing is computed. 12##' @param X n x p design matrix aka model.matrix() 13##' @param intercept logical, if true, X[,] has an intercept column which should 14##' not be used for rob.wts 15##' @return n-vector of non-negative weights 16##' @author Martin Maechler 17robXweights <- function(wts, X, intercept=TRUE) { 18 stopifnot(length(d <- dim(X)) == 2, is.logical(intercept)) 19 nobs <- d[1] 20 if(d[2]) { ## X has >= 1 column, and hence there *are* coefficients in the end 21 if(is.character(wts)){ 22 switch(wts, 23 "none" = rep.int(1, nobs), 24 "hat" = wts_HiiDist(X)^2, # = (1 - Hii)^2 25 "robCov" = wts_RobDist(X, intercept, covFun = MASS::cov.rob), 26 ## MCD is currently problematic: many singular subsamples 27 "covMcd" = wts_RobDist(X, intercept, covFun = covMcd), 28 stop("Weighting method", sQuote(wts), 29 " is not implemented")) 30 } 31 ## (new; 2013-07-05; -> robustbase 0.9-9) 32 else if(is.list(wts)) { 33 if(length(wts) == 1 && is.function(covF <- wts[[1]])) 34 wts_RobDist(X, intercept, covFun = covF) 35 else stop("if a list, weights.on.x must contain a covariance function such as covMcd()") 36 } 37 else if(is.function(wts)) { 38 wts(X, intercept) 39 } 40 else { 41 if(!is.numeric(wts) || length(wts) != nobs) 42 ## FIXME: "when not a string, a list, or a function, then ..." 43 stop(gettextf("weights.on.x needs %d none-negative values", 44 nobs), domain=NA) 45 if(any(wts) < 0) 46 stop("All weights.on.x must be none negative") 47 } 48 } 49 else ## p = ncoef == 0 {maybe intercept, but that's not relevant here} 50 rep.int(1,nobs) 51} 52 53 54##' @param intercept logical, if true, X[,] has an intercept column which should 55##' not be used for rob.wts 56glmrobMqle <- 57 function(X, y, weights = NULL, start = NULL, offset = NULL, 58 family, weights.on.x = "none", 59 control = glmrobMqle.control(), intercept = TRUE, 60 trace = FALSE) 61{ 62 ## To DO: 63 ## o weights are not really implemented as *extra* user weights; rather as "glm-weights" 64 ## o offset is not fully implemented (really? -- should have test case!) 65 66 if(!is.matrix(X)) X <- as.matrix(X) 67## never used: 68## xnames <- dimnames(X)[[2]] 69## ynames <- if (is.matrix(y)) rownames(y) else names(y) 70 nobs <- NROW(y) 71 stopifnot(nobs == nrow(X)) 72 if (is.null(weights)) 73 weights <- rep.int(1, nobs) 74 else if(any(weights <= 0)) 75 stop("All weights must be positive") 76 if (is.null(offset)) 77 offset <- rep.int(0, nobs) else if(!all(offset==0)) 78 warning("'offset' not fully implemented") 79 variance <- family$variance 80 linkinv <- family$linkinv 81 if (!is.function(variance) || !is.function(linkinv)) 82 stop("illegal 'family' argument") 83 mu.eta <- family$mu.eta 84 if (is.null(valideta <- family$valideta)) valideta <- function(eta) TRUE 85 if (is.null(validmu <- family$validmu)) validmu <- function(mu) TRUE 86 87 ncoef <- ncol(X) 88 w.x <- robXweights(weights.on.x, X=X, intercept=intercept) 89 90### Initializations 91 stopifnot(control$maxit >= 1, (tcc <- control$tcc) >= 0) 92 93 ## note that etastart and mustart are used to make 'family$initialize' run 94 etastart <- NULL; mustart <- NULL 95 ## note that 'weights' are used and set by binomial()$initialize ! 96 eval(family$initialize) ## --> n, mustart, y and weights (=ni) 97 ni <- as.vector(weights)# dropping attributes for computation 98 ## 99 if(is.null(start)) 100 start <- glm.fit(x = X, y = y, weights = weights, offset = offset, 101 family = family)$coefficients 102 if(any(ina <- is.na(start))) { 103 cat("initial start 'theta' has NA's; eliminating columns X[, j];", 104 "j = ", pasteK(which(ina)),"\n") 105 theta.na <- start 106 X <- X[, !ina, drop = FALSE] 107 start <- glm.fit(x = X, y = y, weights = weights, offset = offset, 108 family = family)$coefficients 109 if(any(is.na(start))) 110 stop("start 'theta' has still NA's .. badly singular x\n") 111 ## FIXME 112 ncoef <- length(start) 113 } 114 115 thetaOld <- theta <- as.vector(start) # as.v*(): dropping attributes 116 eta <- as.vector(X %*% theta) 117 mu <- linkinv(eta) # mu estimates pi (in [0,1]) at the binomial model 118 if (!(validmu(mu) && valideta(eta))) 119 stop("Cannot find valid starting values: You need help") 120 ## 121 switch(family$family, 122 "binomial" = { 123 Epsi.init <- EpsiBin.init 124 Epsi <- EpsiBin 125 EpsiS <- EpsiSBin 126 Epsi2 <- Epsi2Bin 127 phiEst <- phiEst.cl <- 1 128 }, 129 "poisson" = { 130 Epsi.init <- EpsiPois.init 131 Epsi <- EpsiPois 132 EpsiS <- EpsiSPois 133 Epsi2 <- Epsi2Pois 134 phiEst <- phiEst.cl <- expression({1}) 135 }, 136 "gaussian" = { 137 Epsi.init <- EpsiGaussian.init 138 Epsi <- EpsiGaussian 139 EpsiS <- EpsiSGaussian 140 Epsi2 <- Epsi2Gaussian 141 phiEst.cl <- phiGaussianEst.cl 142 phiEst <- phiGaussianEst 143 }, 144 "Gamma" = { ## added by ARu 145 Epsi.init <- EpsiGamma.init 146 Epsi <- EpsiGamma 147 EpsiS <- EpsiSGamma 148 Epsi2 <- Epsi2Gamma 149 phiEst.cl <- phiGammaEst.cl 150 phiEst <- phiGammaEst 151 }, 152 ## else 153 stop(gettextf("family '%s' not yet implemented", family$family), 154 domain=NA) 155 ) 156 157 sV <- NULL # FIXME workaround for codetools 158 159 comp.V.resid <- expression({ 160 Vmu <- variance(mu) 161 if (any(is.na(Vmu))) stop("NAs in V(mu)") 162 if (any(Vmu == 0)) stop("0s in V(mu)") 163 sVF <- sqrt(Vmu) # square root of variance function 164 residP <- (y - mu)* sni/sVF # Pearson residuals 165 }) 166 167 comp.scaling <- expression({ 168 sV <- sVF * sqrt(phi) 169 residPS <- residP/sqrt(phi) # scaled Pearson residuals 170 }) 171 172 comp.Epsi.init <- expression({ 173 ## d mu / d eta : 174 dmu.deta <- mu.eta(eta) 175 if (any(is.na(dmu.deta))) stop("NAs in d(mu)/d(eta)") 176 ## "Epsi init" : 177 H <- floor(mu*ni - tcc* sni*sV) 178 K <- floor(mu*ni + tcc* sni*sV) 179 eval(Epsi.init) 180 }) 181 182 183### Iterations 184 185 if(trace && ncoef) { 186 cat("Initial theta: \n") 187 local({names(theta) <- names(start); print(theta) }) 188 189 digits <- max(1, getOption("digits") - 5) 190 w.th.1 <- 6+digits # width of one number; need 8 for 2 digits: "-4.8e-11" 191 width.th <- ncoef*(w.th.1 + 1) - 1 192 cat(sprintf("%3s | %*s | %12s\n", 193 "it", width.th, "d{theta}", "rel.change")) 194 mFormat <- function(x, wid) { 195 r <- formatC(x, digits=digits, width=wid) 196 sprintf("%*s", wid, sub("e([+-])0","e\\1", r)) 197 } 198 } 199 200 sni <- sqrt(ni) 201 eval(comp.V.resid) #-> (Vmu, sVF, residP) 202 phi <- eval(phiEst.cl) 203 ## Determine the range of phi values based on the distribution of |residP| 204 Rphi <- c(1e-12, 3*median(abs(residP)))^2 205 conv <- FALSE 206 if(ncoef) for (nit in 1:control$maxit) { 207 eval(comp.scaling) #-> (sV, residPS) 208 eval(comp.Epsi.init) 209 ## Computation of alpha and (7) using matrix column means: 210 cpsi <- pmax.int(-tcc, pmin.int(residPS,tcc)) - eval(Epsi) 211 EEq <- colMeans(cpsi * w.x * sni/sV * dmu.deta * X) 212 ## 213 ## Solve 1/n (t(X) %*% B %*% X) %*% delta.coef = EEq 214 DiagB <- eval(EpsiS) /(sni*sV) * w.x * (ni*dmu.deta)^2 215 if(any(n0 <- ni == 0)) DiagB[n0] <- 0 # instead of NaN 216 Dtheta <- solve(crossprod(X, DiagB*X)/nobs, EEq) 217 if (any(!is.finite(Dtheta))) { 218 warning("Non-finite coefficients at iteration ", nit) 219 break 220 } 221 theta <- thetaOld + Dtheta 222 eta <- as.vector(X %*% theta) + offset 223 mu <- linkinv(eta) 224 225 ## estimation of the dispersion parameter 226 eval(comp.V.resid) 227 phi <- eval(phiEst) 228 229 ## Check convergence: relative error < tolerance 230 relE <- sqrt(sum(Dtheta^2)/max(1e-20, sum(thetaOld^2))) 231 conv <- relE <= control$acc 232 if(trace) { 233 cat(sprintf("%3d | %*s | %12g\n", nit, width.th, 234 paste(mFormat(Dtheta, w.th.1), 235 collapse=" "), relE)) 236 } 237 if(conv) 238 break 239 thetaOld <- theta 240 } ## end of iteration 241 else { ## ncoef == 0 242 conv <- TRUE 243 nit <- 0 244 } 245 if (!conv) 246 warning("Algorithm did not converge") 247 248 eps <- 10 * .Machine$double.eps 249 switch(family$family, 250 "binomial" = { 251 if (any(mu/weights > 1 - eps) || any(mu/weights < eps)) 252 warning("fitted probabilities numerically 0 or 1 occurred") 253 }, 254 "poisson" = { 255 if (any(mu < eps)) 256 warning("fitted rates numerically 0 occurred") 257 }) 258 259 eval(comp.V.resid) #-> (Vmu, sVF, residP) 260 eval(comp.scaling) #-> (sV, residPS) 261 262 ## Estimated asymptotic covariance of the robust estimator 263 if(ncoef) { 264 eval(comp.Epsi.init) 265 alpha <- colMeans(eval(Epsi) * w.x * sni/sV * dmu.deta * X) 266 DiagA <- eval(Epsi2) / (ni*sV^2)* w.x^2* (ni*dmu.deta)^2 267 matQ <- crossprod(X, DiagA*X)/nobs - tcrossprod(alpha, alpha) 268 269 DiagB <- eval(EpsiS) / (sni*sV)* w.x * (ni*dmu.deta)^2 270 if(any(n0 <- ni == 0)) DiagB[n0] <- 0 # instead of NaN 271 matM <- crossprod(X, DiagB*X)/nobs 272 matMinv <- solve(matM) 273 asCov <- matMinv %*% matQ %*% matMinv / nobs 274 } else { ## ncoef == 0 275 matM <- matQ <- asCov <- matrix(NA_real_, 0,0) 276 } 277 278 if(any(ina)) {# put NA's back, extending theta[] to "original length" 279 ok <- !ina 280 theta.na[ok] <- theta ; theta <- theta.na 281 ## also extend the "p x p" matrices with NA's -- 282 ##No : lm() and glm() also do *not* do this 283 ##No p <- length(theta) 284 ##No nm <- names(theta) 285 ##No M <- matrix(NA_real_, p, p, dimnames = list(nm,nm)) 286 ##No Mn <- M; Mn[ok, ok] <- asCov ; asCov <- Mn 287 ##No Mn <- M; Mn[ok, ok] <- matM ; matM <- Mn 288 ##No Mn <- M; Mn[ok, ok] <- matQ ; matQ <- Mn 289 } 290 291 w.r <- pmin(1, tcc/abs(residPS)) 292 names(mu) <- names(eta) <- names(residPS) # re-add after computation 293 list(coefficients = theta, residuals = residP, # s.resid = residPS, 294 fitted.values = mu, 295 w.r = w.r, w.x = w.x, ni = ni, dispersion = phi, cov = asCov, 296 matM = matM, matQ = matQ, tcc = tcc, family = family, 297 linear.predictors = eta, deviance = NULL, iter = nit, y = y, 298 converged = conv) 299} 300 301 302## NB: X is model.matrix() aka design matrix used; typically including an intercept 303wts_HiiDist <- function(X) { 304 ## Hii := diag( tcrossprod( qr.Q(qr(X)) ) ) == rowSums( qr.Q(qr(X)) ^2 ) : 305 x <- qr(X) 306 Hii <- rowSums(qr.qy(x, diag(1, nrow = NROW(X), ncol = x$rank))^2) 307 (1-Hii) 308} 309 310##' Compute robustness weights depending on the design 'X' only, 311##' using robust(ified) Mahalanobis distances. 312##' This is an auxiliary function for robXweights() activated typically by 313##' weights.on.x = "..." from regression functions 314##' @title Compute Robust Weights based on Robustified Mahalanobis - Distances 315##' @param X n x p numeric matrix 316##' @param intercept logical; should be true iff X[,1] is a column with the intercept 317##' @param covFun function for computing a \bold{robust} covariance matrix; 318##' e.g., MASS::cov.rob(), or covMcd(). 319##' @return n-vector of non-negative weights. 320##' @author Martin Maechler 321wts_RobDist <- function(X, intercept, covFun) 322{ 323 D2 <- if(intercept) { ## X[,] has intercept column which should not be used for rob.wts 324 X <- X[, -1, drop=FALSE] 325 Xrc <- covFun(X) 326 mahalanobis(X, center = Xrc$center, cov = Xrc$cov) 327 } 328 else { ## X[,] can be used directly 329 if(!is.matrix(X)) X <- as.matrix(X) 330 Xrc <- covFun(X) 331 S <- Xrc$cov + tcrossprod(Xrc$center) 332 mahalanobis(X, center = FALSE, cov = S) 333 } 334 p <- ncol(X) ## E[chi^2_p] = p 335 1/sqrt(1+ pmax.int(0, 8*(D2 - p)/sqrt(2*p))) 336} 337 338 339## MM: 'acc' seems a misnomer to me, but it's inherited from MASS::rlm 340glmrobMqle.control <- 341 function(acc = 1e-04, test.acc = "coef", maxit = 50, tcc = 1.345) 342{ 343 if (!is.numeric(acc) || acc <= 0) 344 stop("value of acc must be > 0") 345 if (test.acc != "coef") 346 stop("Only 'test.acc = \"coef\"' is currently implemented") 347 ## if (!(any(test.vec == c("coef", "resid")))) 348 ## stop("invalid argument for test.acc") 349 if (!is.numeric(maxit) || maxit <= 0) 350 stop("maximum number of iterations must be > 0") 351 if (!is.numeric(tcc) || tcc <= 0) 352 stop("value of the tuning constant c (tcc) must be > 0") 353 list(acc = acc, test.acc = test.acc, maxit = maxit, tcc = tcc) 354} 355 356 357### ----------------- E[ f(psi ( X ) ) ] ------------------------------- 358 359## MM: These are now expressions instead of functions 360## since 'Epsi*' and 'Epsi2*' are *always* called together 361## and 'EpsiS*' when called is always after the other two 362## ==> do common computations only once in Epsi*.init ==> more efficient! 363## 364## FIXME(2): Some of these fail when Huber's "c", 'tcc' is = +Inf 365## ----- --> ../../robGLM1/R/rglm.R 366 367## FIXME: Do use a "robFamily", a *list* of functions 368## ------ which all have the same environment 369## ===> can get same efficiency as expressions, but better OOP 370 371 372### --- Poisson -- family --- 373 374EpsiPois.init <- expression( 375{ 376 dpH <- dpois(H, mu); dpH1 <- dpois(H-1, mu) 377 dpK <- dpois(K, mu); dpK1 <- dpois(K-1, mu) 378 pHm1 <- ppois(H-1, mu) ; pH <- pHm1 + dpH # = ppois(H,*) 379 pKm1 <- ppois(K-1, mu) ; pK <- pKm1 + dpK # = ppois(K,*) 380 E2f <- mu*(dpH1 - dpH - dpK1 + dpK) + pKm1 - pHm1 381}) 382 383EpsiPois <- expression( 384{ 385 tcc*(1 - pK - pH) + mu*(dpH - dpK)/sV 386}) 387 388Epsi2Pois <- expression( 389{ 390 ## Calculation of E(psi^2) for the diagonal elements of A in matrix Q: 391 tcc^2 * (pH + 1 - pK) + E2f 392}) 393 394EpsiSPois <- expression( 395{ 396 ## Calculation of E(psi*s) for the diagonal elements of B in the 397 ## expression matrix M = 1/n t(X) %*% B %*% X: 398 tcc*(dpH + dpK) + E2f / sV 399}) 400 401 402### --- Binomial -- family --- 403 404EpsiBin.init <- expression({ 405 pK <- pbinom(K, ni, mu) 406 pH <- pbinom(H, ni, mu) 407 pKm1 <- pbinom(K-1, pmax.int(0, ni-1), mu) 408 pHm1 <- pbinom(H-1, pmax.int(0, ni-1), mu) 409 pKm2 <- pbinom(K-2, pmax.int(0, ni-2), mu) 410 pHm2 <- pbinom(H-2, pmax.int(0, ni-2), mu) 411 412 ## QlV = Q / V, where Q = Sum_j (j - mu_i)^2 * P[Y_i = j] 413 ## i.e. Q = Sum_j j(j-1)* P[.] + 414 ## (1- 2*mu_i) Sum_j j * P[.] + 415 ## mu_i^2 Sum_j P[.] 416 QlV <- mu/Vmu*(mu*ni*(pK-pH) + 417 (1 - 2*mu*ni) * ifelse(ni == 1, (H <= 0)*(K >= 1), pKm1 - pHm1) + 418 (ni - 1) * mu * ifelse(ni == 2, (H <= 1)*(K >= 2), pKm2 - pHm2)) 419}) 420 421EpsiBin <- expression( 422{ 423 tcc*(1 - pK - pH) + 424 ifelse(ni == 1, (- (H < 0) + (K >= 1) ) * sV, 425 (pKm1 - pHm1 - pK + pH) * mu * sni/sV) 426}) 427 428Epsi2Bin <- expression( 429{ 430 ## Calculation of E(psi^2) for the diagonal elements of A in matrix Q: 431 tcc^2*(pH + 1 - pK) + QlV 432}) 433 434EpsiSBin <- expression( 435{ 436 ## Calculation of E(psi*s) for the diagonal elements of B in the 437 ## expression matrix M = (X' B X)/n 438 mu/Vmu*(tcc*(pH - ifelse(ni == 1, H >= 1, pHm1)) + 439 tcc*(pK - ifelse(ni == 1, K > 0, pKm1))) + ifelse(ni == 0, 0, QlV / (sni*sV)) 440}) 441 442### --- Gaussian -- family --- 443 444EpsiGaussian.init <- expression({ 445 dc <- dnorm(tcc) 446 pc <- pnorm(tcc) 447}) 448 449EpsiGaussian <- expression( 0 ) 450 451EpsiSGaussian <- expression( 2*pc-1 ) 452 453Epsi2Gaussian <- expression( 2*tcc^2*(1-pc)-2*tcc*dc+2*pc-1 ) 454 455phiGaussianEst.cl <- expression( 456{ 457 ## Classical estimation of the dispersion paramter phi = sigma^2 458 sum(((y - mu)/mu)^2)/(nobs - ncoef) 459}) 460 461phiGaussianEst <- expression( 462{ 463 sphi <- mad(residP, center=0)^2 464}) 465 466### --- Gamma -- family --- 467 468Gmn <- function(t, nu) { 469 ## Gm corrresponds to G * nu^((nu-1)/2) / Gamma(nu) 470 snu <- sqrt(nu) 471 snut <- snu+t 472 r <- numeric(length(snut)) 473 ok <- snut > 0 474 r[ok] <- { 475 nu <- nu[ok]; snu <- snu[ok]; snut <- snut[ok] 476 exp((nu-1)/2*log(nu) - lgamma(nu) - snu*snut + nu*log(snut)) 477 } 478 r 479} 480 481EpsiGamma.init <- expression({ 482 483 nu <- 1/phi ## form parameter nu 484 snu <- 1/sqrt(phi) ## == sqrt (nu) 485 486 pPtc <- pgamma(snu + c(-tcc,tcc), shape=nu, rate=snu) 487 pMtc <- pPtc[1] 488 pPtc <- pPtc[2] 489 490 aux2 <- tcc*snu 491 GLtcc <- Gmn(-tcc,nu) 492 GUtcc <- Gmn( tcc,nu) 493}) 494 495EpsiGamma <- expression( tcc*(1-pPtc-pMtc) + GLtcc - GUtcc ) 496 497EpsiSGamma <- expression( ((GLtcc - GUtcc) + snu*(pPtc-pMtc))/mu ) 498 499Epsi2Gamma <- expression({ 500 (tcc^2*(pMtc+1-pPtc) + (pPtc-pMtc) + 501 (GLtcc*(1-aux2) - GUtcc*(1+aux2))/snu ) 502}) 503 504 505phiGammaEst.cl <- expression( 506{ 507 ## Classical moment estimation of the dispersion parameter phi 508 sum(((y - mu)/mu)^2)/(nobs-ncoef) 509}) 510 511phiGammaEst <- expression( 512{ 513 ## robust estimation of the dispersion parameter by 514 ## Huber's proposal 2 515 sphi <- uniroot(Huberprop2, interval=Rphi, 516 ns.resid=residP, mu=mu, Vmu=Vmu, tcc=tcc)$root 517}) 518 519Huberprop2 <- function(phi, ns.resid, mu, Vmu, tcc) 520{ 521 eval(EpsiGamma.init) 522 compEpsi2 <- eval(Epsi2Gamma) 523 nobs <- length(mu) 524 ## return h := 525 sum(pmax.int(-tcc, pmin.int(ns.resid*snu, tcc))^2) - nobs*compEpsi2 526} 527 528if(FALSE) ## no-eval version 529Huberprop2 <- function(phi, ns.resid, mu, Vmu, tcc) 530{ 531 nobs <- length(mu) 532 nu <- 1/phi ## form parameter nu 533 snu <- 1/sqrt(phi) ## sqrt (nu) 534 pPtc <- pgamma(snu + c(-tcc,tcc), shape=nu, rate=snu) 535 pMtc <- pPtc[1] 536 pPtc <- pPtc[2] 537 538 ts <- tcc*snu 539 GLtcc <- Gmn(-tcc,nu) *(1-ts)/snu 540 GUtcc <- Gmn( tcc,nu) *(1+ts)/snu 541 ## 542 compEpsi2 <- tcc^2 + (pPtc - pMtc)*(1-tcc^2) + GLtcc - GUtcc 543 ## return h := 544 sum(pmax.int(-tcc, pmin.int(ns.resid*snu, tcc))^2) - nobs*compEpsi2 545} 546