1#### Mallows quasi-likelihood estimator of E. Cantoni and E. Ronchetti (2001)
2#### based originally on Eva Cantoni's S-plus code "robGLM"
3
4## FIXME{MM}: All these expression()s  and  eval()s -- once were really slick and fast.
5## -----  Nowadays, with 'codetools' and the byte-compiler, they "just don't fit anymore"
6## including those globalVariables() {also in other places!}:
7globalVariables(c("residP", "residPS", "dmu.deta", "snu"), add=TRUE)
8
9##' @title
10##' @param wts a character string \dQuote{weights.on.x} specifying how weights should be computed
11##'            *or* a numeric vector of final weights in which case nothing is computed.
12##' @param X  n x p  design matrix aka model.matrix()
13##' @param intercept logical, if true, X[,] has an intercept column which should
14##'                  not be used for rob.wts
15##' @return n-vector of non-negative weights
16##' @author Martin Maechler
17robXweights <- function(wts, X, intercept=TRUE) {
18    stopifnot(length(d <- dim(X)) == 2, is.logical(intercept))
19    nobs <- d[1]
20    if(d[2]) { ## X has >= 1 column, and hence there *are* coefficients in the end
21        if(is.character(wts)){
22	    switch(wts,
23		   "none" = rep.int(1, nobs),
24		   "hat" = wts_HiiDist(X)^2, # = (1 - Hii)^2
25		   "robCov" = wts_RobDist(X, intercept, covFun = MASS::cov.rob),
26		   ## MCD is currently problematic: many singular subsamples
27		   "covMcd" = wts_RobDist(X, intercept, covFun = covMcd),
28		   stop("Weighting method", sQuote(wts),
29			" is not implemented"))
30	}
31	## (new; 2013-07-05; -> robustbase 0.9-9)
32	else if(is.list(wts)) {
33	    if(length(wts) == 1 && is.function(covF <- wts[[1]]))
34		wts_RobDist(X, intercept, covFun = covF)
35	    else stop("if a list, weights.on.x must contain a covariance function such as covMcd()")
36	}
37	else if(is.function(wts)) {
38	    wts(X, intercept)
39	}
40	else {
41	    if(!is.numeric(wts) || length(wts) != nobs)
42		## FIXME: "when not a string, a list, or a function, then ..."
43		stop(gettextf("weights.on.x needs %d none-negative values",
44			      nobs), domain=NA)
45            if(any(wts) < 0)
46                stop("All weights.on.x must be none negative")
47        }
48    }
49    else ## p = ncoef == 0 {maybe intercept, but that's not relevant here}
50        rep.int(1,nobs)
51}
52
53
54##' @param intercept logical, if true, X[,] has an intercept column which should
55##'                  not be used for rob.wts
56glmrobMqle <-
57    function(X, y, weights = NULL, start = NULL, offset = NULL,
58	     family, weights.on.x = "none",
59	     control = glmrobMqle.control(), intercept = TRUE,
60             trace = FALSE)
61{
62    ## To DO:
63    ## o weights are not really implemented as *extra* user weights; rather as "glm-weights"
64    ## o offset is not fully implemented (really? -- should have test case!)
65
66    if(!is.matrix(X)) X <- as.matrix(X)
67## never used:
68##     xnames <- dimnames(X)[[2]]
69##     ynames <- if (is.matrix(y)) rownames(y) else names(y)
70    nobs <- NROW(y)
71    stopifnot(nobs == nrow(X))
72    if (is.null(weights))
73	weights <- rep.int(1, nobs)
74    else if(any(weights <= 0))
75	stop("All weights must be positive")
76    if (is.null(offset))
77	offset <- rep.int(0, nobs) else if(!all(offset==0))
78	    warning("'offset' not fully implemented")
79    variance <- family$variance
80    linkinv <- family$linkinv
81    if (!is.function(variance) || !is.function(linkinv))
82	stop("illegal 'family' argument")
83    mu.eta <- family$mu.eta
84    if (is.null(valideta <- family$valideta)) valideta <- function(eta) TRUE
85    if (is.null(validmu	 <- family$validmu))  validmu <-  function(mu) TRUE
86
87    ncoef <- ncol(X)
88    w.x <- robXweights(weights.on.x, X=X, intercept=intercept)
89
90### Initializations
91    stopifnot(control$maxit >= 1, (tcc <- control$tcc) >= 0)
92
93    ## note that etastart and mustart are used to make 'family$initialize' run
94    etastart <- NULL;  mustart <- NULL
95    ## note that 'weights' are used and set by binomial()$initialize !
96    eval(family$initialize) ## --> n, mustart, y and weights (=ni)
97    ni <- as.vector(weights)# dropping attributes for computation
98    ##
99    if(is.null(start))
100	start <- glm.fit(x = X, y = y, weights = weights, offset = offset,
101			 family = family)$coefficients
102    if(any(ina <- is.na(start))) {
103	cat("initial start 'theta' has NA's; eliminating columns X[, j];",
104	    "j = ", pasteK(which(ina)),"\n")
105	theta.na <- start
106	X <- X[, !ina, drop = FALSE]
107	start <- glm.fit(x = X, y = y, weights = weights, offset = offset,
108			 family = family)$coefficients
109	if(any(is.na(start)))
110	    stop("start 'theta' has still NA's .. badly singular x\n")
111	## FIXME
112	ncoef <- length(start)
113    }
114
115    thetaOld <- theta <- as.vector(start) # as.v*(): dropping attributes
116    eta <- as.vector(X %*% theta)
117    mu <- linkinv(eta) # mu estimates pi (in [0,1]) at the binomial model
118    if (!(validmu(mu) && valideta(eta)))
119	stop("Cannot find valid starting values: You need help")
120    ##
121    switch(family$family,
122	   "binomial" = {
123	       Epsi.init <- EpsiBin.init
124	       Epsi <- EpsiBin
125	       EpsiS <- EpsiSBin
126	       Epsi2 <- Epsi2Bin
127               phiEst <- phiEst.cl <- 1
128	   },
129	   "poisson" = {
130	       Epsi.init <- EpsiPois.init
131	       Epsi <- EpsiPois
132	       EpsiS <- EpsiSPois
133	       Epsi2 <- Epsi2Pois
134               phiEst <- phiEst.cl <- expression({1})
135	   },
136           "gaussian" = {
137               Epsi.init <- EpsiGaussian.init
138               Epsi <- EpsiGaussian
139               EpsiS <- EpsiSGaussian
140               Epsi2 <- Epsi2Gaussian
141               phiEst.cl <- phiGaussianEst.cl
142               phiEst <- phiGaussianEst
143           },
144          "Gamma" = { ## added by ARu
145             Epsi.init <- EpsiGamma.init
146             Epsi <- EpsiGamma
147             EpsiS <- EpsiSGamma
148             Epsi2 <- Epsi2Gamma
149             phiEst.cl <- phiGammaEst.cl
150             phiEst <- phiGammaEst
151           },
152           ## else
153           stop(gettextf("family '%s' not yet implemented", family$family),
154                domain=NA)
155	   )
156
157    sV <- NULL # FIXME workaround for codetools
158
159    comp.V.resid <- expression({
160	Vmu <- variance(mu)
161	if (any(is.na(Vmu)))  stop("NAs in V(mu)")
162	if (any(Vmu == 0))    stop("0s in V(mu)")
163	sVF <- sqrt(Vmu)   # square root of variance function
164	residP <- (y - mu)* sni/sVF  # Pearson residuals
165    })
166
167    comp.scaling <- expression({
168      sV <- sVF * sqrt(phi)
169      residPS <- residP/sqrt(phi) # scaled Pearson residuals
170    })
171
172    comp.Epsi.init <- expression({
173	## d mu / d eta :
174	dmu.deta <- mu.eta(eta)
175	if (any(is.na(dmu.deta))) stop("NAs in d(mu)/d(eta)")
176	## "Epsi init" :
177	H <- floor(mu*ni - tcc* sni*sV)
178	K <- floor(mu*ni + tcc* sni*sV)
179	eval(Epsi.init)
180    })
181
182
183### Iterations
184
185    if(trace && ncoef) {
186        cat("Initial theta: \n")
187        local({names(theta) <- names(start); print(theta) })
188
189        digits <- max(1, getOption("digits") - 5)
190	w.th.1 <- 6+digits # width of one number; need 8 for 2 digits: "-4.8e-11"
191	width.th <- ncoef*(w.th.1 + 1) - 1
192	cat(sprintf("%3s | %*s | %12s\n",
193		    "it", width.th, "d{theta}", "rel.change"))
194	mFormat <- function(x, wid) {
195	    r <- formatC(x, digits=digits, width=wid)
196	    sprintf("%*s", wid, sub("e([+-])0","e\\1", r))
197	}
198    }
199
200    sni <- sqrt(ni)
201    eval(comp.V.resid) #-> (Vmu, sVF, residP)
202    phi <- eval(phiEst.cl)
203    ## Determine the range of phi values based on the distribution of |residP|
204    Rphi <- c(1e-12, 3*median(abs(residP)))^2
205    conv <- FALSE
206    if(ncoef) for (nit in 1:control$maxit) {
207        eval(comp.scaling) #-> (sV, residPS)
208        eval(comp.Epsi.init)
209	## Computation of alpha and (7) using matrix column means:
210	cpsi <- pmax.int(-tcc, pmin.int(residPS,tcc)) - eval(Epsi)
211	EEq <- colMeans(cpsi * w.x * sni/sV * dmu.deta * X)
212	##
213	## Solve  1/n (t(X) %*% B %*% X) %*% delta.coef	  = EEq
214	DiagB <- eval(EpsiS) /(sni*sV) * w.x * (ni*dmu.deta)^2
215        if(any(n0 <- ni == 0)) DiagB[n0] <- 0 # instead of NaN
216	Dtheta <- solve(crossprod(X, DiagB*X)/nobs, EEq)
217	if (any(!is.finite(Dtheta))) {
218	    warning("Non-finite coefficients at iteration ", nit)
219	    break
220	}
221	theta <- thetaOld + Dtheta
222	eta <- as.vector(X %*% theta) + offset
223	mu <- linkinv(eta)
224
225        ## estimation of the dispersion parameter
226        eval(comp.V.resid)
227        phi <- eval(phiEst)
228
229	## Check convergence: relative error < tolerance
230	relE <- sqrt(sum(Dtheta^2)/max(1e-20, sum(thetaOld^2)))
231	conv <- relE <= control$acc
232        if(trace) {
233            cat(sprintf("%3d | %*s | %12g\n", nit, width.th,
234                        paste(mFormat(Dtheta, w.th.1),
235                              collapse=" "), relE))
236        }
237	if(conv)
238	    break
239	thetaOld <- theta
240    } ## end of iteration
241    else { ## ncoef == 0
242	conv <- TRUE
243	nit <- 0
244    }
245    if (!conv)
246	warning("Algorithm did not converge")
247
248    eps <- 10 * .Machine$double.eps
249    switch(family$family,
250	   "binomial" = {
251	       if (any(mu/weights > 1 - eps) || any(mu/weights < eps))
252		   warning("fitted probabilities numerically 0 or 1 occurred")
253	   },
254	   "poisson" = {
255	       if (any(mu < eps))
256		   warning("fitted rates numerically 0 occurred")
257	   })
258
259    eval(comp.V.resid) #-> (Vmu, sVF, residP)
260    eval(comp.scaling) #-> (sV, residPS)
261
262    ## Estimated asymptotic covariance of the robust estimator
263    if(ncoef) {
264	eval(comp.Epsi.init)
265	alpha <- colMeans(eval(Epsi) * w.x * sni/sV * dmu.deta * X)
266	DiagA <- eval(Epsi2) / (ni*sV^2)* w.x^2* (ni*dmu.deta)^2
267	matQ  <- crossprod(X, DiagA*X)/nobs - tcrossprod(alpha, alpha)
268
269	DiagB <- eval(EpsiS) / (sni*sV)* w.x * (ni*dmu.deta)^2
270        if(any(n0 <- ni == 0)) DiagB[n0] <- 0 # instead of NaN
271	matM <- crossprod(X, DiagB*X)/nobs
272	matMinv <- solve(matM)
273	asCov <-  matMinv %*% matQ %*% matMinv / nobs
274    } else { ## ncoef == 0
275	matM <- matQ <- asCov <- matrix(NA_real_, 0,0)
276    }
277
278    if(any(ina)) {# put NA's back, extending theta[] to "original length"
279	ok <- !ina
280	theta.na[ok] <- theta ; theta <- theta.na
281	## also extend the "p x p" matrices with NA's --
282	##No : lm() and glm() also do *not* do this
283	##No  p <- length(theta)
284	##No  nm <- names(theta)
285	##No  M <- matrix(NA_real_, p, p, dimnames = list(nm,nm))
286	##No  Mn <- M; Mn[ok, ok] <- asCov ; asCov <- Mn
287	##No  Mn <- M; Mn[ok, ok] <- matM  ; matM  <- Mn
288	##No  Mn <- M; Mn[ok, ok] <- matQ  ; matQ  <- Mn
289    }
290
291    w.r <- pmin(1, tcc/abs(residPS))
292    names(mu) <- names(eta) <- names(residPS) # re-add after computation
293    list(coefficients = theta, residuals = residP, # s.resid = residPS,
294         fitted.values = mu,
295	 w.r = w.r, w.x = w.x, ni = ni, dispersion = phi, cov = asCov,
296         matM = matM, matQ = matQ, tcc = tcc, family = family,
297         linear.predictors = eta, deviance = NULL, iter = nit, y = y,
298         converged = conv)
299}
300
301
302## NB: X  is model.matrix() aka design matrix used; typically including an intercept
303wts_HiiDist <- function(X) {
304    ## Hii := diag( tcrossprod( qr.Q(qr(X)) ) ) == rowSums( qr.Q(qr(X)) ^2 ) :
305    x <- qr(X)
306    Hii <- rowSums(qr.qy(x, diag(1, nrow = NROW(X), ncol = x$rank))^2)
307    (1-Hii)
308}
309
310##' Compute robustness weights depending on the design 'X' only,
311##' using robust(ified) Mahalanobis distances.
312##' This is an auxiliary function for robXweights() activated typically by
313##' weights.on.x = "..." from regression functions
314##' @title Compute Robust Weights based on Robustified Mahalanobis - Distances
315##' @param X n x p  numeric matrix
316##' @param intercept logical; should be true iff  X[,1] is a column with the intercept
317##' @param covFun function for computing a \bold{robust} covariance matrix;
318##'        e.g., MASS::cov.rob(), or covMcd().
319##' @return n-vector of non-negative weights.
320##' @author Martin Maechler
321wts_RobDist <- function(X, intercept, covFun)
322{
323    D2 <- if(intercept) { ## X[,] has intercept column which should not be used for rob.wts
324	X <- X[, -1, drop=FALSE]
325	Xrc <- covFun(X)
326	mahalanobis(X, center = Xrc$center, cov = Xrc$cov)
327    }
328    else { ## X[,]  can be used directly
329	if(!is.matrix(X)) X <- as.matrix(X)
330	Xrc <- covFun(X)
331	S <- Xrc$cov + tcrossprod(Xrc$center)
332	mahalanobis(X, center = FALSE, cov = S)
333    }
334    p <- ncol(X) ## E[chi^2_p] = p
335    1/sqrt(1+ pmax.int(0, 8*(D2 - p)/sqrt(2*p)))
336}
337
338
339## MM: 'acc' seems a misnomer to me, but it's inherited from  MASS::rlm
340glmrobMqle.control <-
341    function(acc = 1e-04, test.acc = "coef", maxit = 50, tcc = 1.345)
342{
343    if (!is.numeric(acc) || acc <= 0)
344	stop("value of acc must be > 0")
345    if (test.acc != "coef")
346	stop("Only 'test.acc = \"coef\"' is currently implemented")
347    ## if (!(any(test.vec == c("coef", "resid"))))
348    ##	  stop("invalid argument for test.acc")
349    if (!is.numeric(maxit) || maxit <= 0)
350	stop("maximum number of iterations must be > 0")
351    if (!is.numeric(tcc) || tcc <= 0)
352	stop("value of the tuning constant c (tcc) must be > 0")
353    list(acc = acc, test.acc = test.acc, maxit = maxit, tcc = tcc)
354}
355
356
357### ----------------- E[ f(psi ( X ) ) ] -------------------------------
358
359## MM: These are now expressions instead of functions
360##   since 'Epsi*' and 'Epsi2*' are *always* called together
361##   and 'EpsiS*' when called is always after the other two
362## ==> do common computations only once in Epsi*.init  ==> more efficient!
363##
364##   FIXME(2): Some of these fail when Huber's "c", 'tcc' is = +Inf
365##   -----    --> ../../robGLM1/R/rglm.R
366
367## FIXME:  Do use a "robFamily", a  *list* of functions
368## ------  which all have the same environment
369##   ===> can get same efficiency as expressions, but better OOP
370
371
372### --- Poisson -- family ---
373
374EpsiPois.init <- expression(
375{
376    dpH <- dpois(H, mu); dpH1 <- dpois(H-1, mu)
377    dpK <- dpois(K, mu); dpK1 <- dpois(K-1, mu)
378    pHm1 <- ppois(H-1, mu) ; pH <- pHm1 + dpH # = ppois(H,*)
379    pKm1 <- ppois(K-1, mu) ; pK <- pKm1 + dpK # = ppois(K,*)
380    E2f <- mu*(dpH1 - dpH - dpK1 + dpK) + pKm1 - pHm1
381})
382
383EpsiPois <- expression(
384{
385    tcc*(1 - pK - pH) + mu*(dpH - dpK)/sV
386})
387
388Epsi2Pois <- expression(
389{
390    ## Calculation of E(psi^2) for the diagonal elements of A in matrix Q:
391    tcc^2 * (pH + 1 - pK) + E2f
392})
393
394EpsiSPois <- expression(
395{
396    ## Calculation of E(psi*s) for the diagonal elements of B in the
397    ## expression matrix M = 1/n t(X) %*% B %*% X:
398    tcc*(dpH + dpK) + E2f / sV
399})
400
401
402### --- Binomial -- family ---
403
404EpsiBin.init <- expression({
405    pK <- pbinom(K, ni, mu)
406    pH <- pbinom(H, ni, mu)
407    pKm1 <- pbinom(K-1, pmax.int(0, ni-1), mu)
408    pHm1 <- pbinom(H-1, pmax.int(0, ni-1), mu)
409    pKm2 <- pbinom(K-2, pmax.int(0, ni-2), mu)
410    pHm2 <- pbinom(H-2, pmax.int(0, ni-2), mu)
411
412    ## QlV = Q / V, where Q = Sum_j (j - mu_i)^2 * P[Y_i = j]
413    ## i.e.  Q =	     Sum_j j(j-1)* P[.] +
414    ##		 (1- 2*mu_i) Sum_j   j	 * P[.] +
415    ##		     mu_i^2  Sum_j	   P[.]
416    QlV <- mu/Vmu*(mu*ni*(pK-pH) +
417		   (1 - 2*mu*ni) * ifelse(ni == 1, (H <= 0)*(K >= 1), pKm1 - pHm1) +
418		   (ni - 1) * mu * ifelse(ni == 2, (H <= 1)*(K >= 2), pKm2 - pHm2))
419})
420
421EpsiBin <- expression(
422{
423    tcc*(1 - pK - pH) +
424	ifelse(ni == 1, (- (H < 0) + (K >= 1) ) * sV,
425	       (pKm1 - pHm1 - pK + pH) * mu * sni/sV)
426})
427
428Epsi2Bin <- expression(
429{
430    ## Calculation of E(psi^2) for the diagonal elements of A in matrix Q:
431    tcc^2*(pH + 1 - pK) + QlV
432})
433
434EpsiSBin <- expression(
435{
436    ## Calculation of E(psi*s) for the diagonal elements of B in the
437    ## expression matrix M = (X' B X)/n
438    mu/Vmu*(tcc*(pH - ifelse(ni == 1, H >= 1, pHm1)) +
439	    tcc*(pK - ifelse(ni == 1, K > 0,  pKm1))) + ifelse(ni == 0, 0, QlV / (sni*sV))
440})
441
442### --- Gaussian -- family ---
443
444EpsiGaussian.init <- expression({
445    dc <- dnorm(tcc)
446    pc <- pnorm(tcc)
447})
448
449EpsiGaussian <- expression( 0 )
450
451EpsiSGaussian <- expression( 2*pc-1 )
452
453Epsi2Gaussian <- expression( 2*tcc^2*(1-pc)-2*tcc*dc+2*pc-1 )
454
455phiGaussianEst.cl <- expression(
456{
457    ## Classical estimation of the dispersion paramter phi = sigma^2
458    sum(((y - mu)/mu)^2)/(nobs - ncoef)
459})
460
461phiGaussianEst <- expression(
462{
463    sphi <- mad(residP, center=0)^2
464})
465
466### --- Gamma -- family ---
467
468Gmn <- function(t, nu) {
469    ## Gm corrresponds to G * nu^((nu-1)/2) / Gamma(nu)
470    snu <- sqrt(nu)
471    snut <- snu+t
472    r <- numeric(length(snut))
473    ok <- snut > 0
474    r[ok] <- {
475	nu <- nu[ok]; snu <- snu[ok]; snut <- snut[ok]
476	exp((nu-1)/2*log(nu) - lgamma(nu) - snu*snut + nu*log(snut))
477    }
478    r
479}
480
481EpsiGamma.init <- expression({
482
483    nu <- 1/phi      ## form parameter nu
484    snu <- 1/sqrt(phi) ## == sqrt (nu)
485
486    pPtc <- pgamma(snu + c(-tcc,tcc), shape=nu, rate=snu)
487    pMtc <- pPtc[1]
488    pPtc <- pPtc[2]
489
490    aux2 <- tcc*snu
491    GLtcc <- Gmn(-tcc,nu)
492    GUtcc <- Gmn( tcc,nu)
493})
494
495EpsiGamma <- expression( tcc*(1-pPtc-pMtc) + GLtcc - GUtcc )
496
497EpsiSGamma <- expression( ((GLtcc - GUtcc) + snu*(pPtc-pMtc))/mu )
498
499Epsi2Gamma <- expression({
500    (tcc^2*(pMtc+1-pPtc) + (pPtc-pMtc) +
501     (GLtcc*(1-aux2) - GUtcc*(1+aux2))/snu )
502})
503
504
505phiGammaEst.cl <- expression(
506{
507    ## Classical moment estimation of the dispersion parameter phi
508    sum(((y - mu)/mu)^2)/(nobs-ncoef)
509})
510
511phiGammaEst <- expression(
512{
513    ## robust estimation of the dispersion parameter by
514    ## Huber's proposal 2
515    sphi <- uniroot(Huberprop2, interval=Rphi,
516                    ns.resid=residP, mu=mu, Vmu=Vmu, tcc=tcc)$root
517})
518
519Huberprop2 <- function(phi, ns.resid, mu, Vmu, tcc)
520{
521    eval(EpsiGamma.init)
522    compEpsi2 <- eval(Epsi2Gamma)
523    nobs <- length(mu)
524    ## return h :=
525    sum(pmax.int(-tcc, pmin.int(ns.resid*snu, tcc))^2) -  nobs*compEpsi2
526}
527
528if(FALSE) ## no-eval version
529Huberprop2 <- function(phi, ns.resid, mu, Vmu, tcc)
530{
531    nobs <- length(mu)
532    nu <- 1/phi         ## form parameter  nu
533    snu <- 1/sqrt(phi)  ## sqrt (nu)
534    pPtc <- pgamma(snu + c(-tcc,tcc), shape=nu, rate=snu)
535    pMtc <- pPtc[1]
536    pPtc <- pPtc[2]
537
538    ts <- tcc*snu
539    GLtcc <- Gmn(-tcc,nu) *(1-ts)/snu
540    GUtcc <- Gmn( tcc,nu) *(1+ts)/snu
541    ##
542    compEpsi2 <- tcc^2 + (pPtc - pMtc)*(1-tcc^2) + GLtcc - GUtcc
543    ## return h :=
544    sum(pmax.int(-tcc, pmin.int(ns.resid*snu, tcc))^2) -  nobs*compEpsi2
545}
546