1### -*- mode: R ; delete-old-versions: never -*-
2
3## This program is free software; you can redistribute it and/or modify
4## it under the terms of the GNU General Public License as published by
5## the Free Software Foundation; either version 2 of the License, or
6## (at your option) any later version.
7##
8## This program is distributed in the hope that it will be useful,
9## but WITHOUT ANY WARRANTY; without even the implied warranty of
10## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11## GNU General Public License for more details.
12##
13## You should have received a copy of the GNU General Public License
14## along with this program; if not, a copy is available at
15## http://www.r-project.org/Licenses/
16
17### From package 'Biobase' (has only rowMedians + rowQ) / 'matrixStats'
18### MM: all type checking now in C
19## ---  TODO: implement  hasNA=NA ==> do check maybe differently than = TRUE
20##  --> ../src/rowMedians.c +  ../src/rowMedians_TYPE-template.h
21colMedians <- function(x, na.rm=FALSE, hasNA=TRUE, keep.names=TRUE)
22  .Call(R_rowMedians, x, na.rm, hasNA, FALSE, keep.names)
23
24rowMedians <- function(x, na.rm=FALSE, hasNA=TRUE, keep.names=TRUE)
25  .Call(R_rowMedians, x, na.rm, hasNA, TRUE, keep.names)
26
27
28
29
30### Maria Anna di Palma, without consistency factor 15.11.2014
31### Fixes by Valentin Todorov
32### Martin Maechler: added mad() consistency factor, 27.11.2014
33###     new name, class; more compatible to 'covMcd'
34
35covComed <- function (X, n.iter = 2, reweight = FALSE,
36                      tolSolve = control$ tolSolve,# had 1e-10 hardwired {now 1e-14 default}
37                      trace = control$ trace,
38                      wgtFUN = control$ wgtFUN,
39                      control = rrcov.control())
40{
41    ## ATTENTION ##
42    ## Med(abs(X))^2=Med(X*X) only if the number of rows is odd
43    d <- dim(X <- as.matrix(X))
44    n <- d[1]
45    p <- d[2]
46
47    if(is.character(wgtFUN))
48	wgtFUN <- .wgtFUN.covComed[[wgtFUN]](p=p, n=n, control)
49    if(!is.function(wgtFUN))
50	stop("'wgtFUN' must be a function or a string specifying such a function")
51
52    madX <- apply(X, 2, mad)
53    I.mad <- 1/madX
54    rho <- I.mad * COM(X) * rep(I.mad, each = p)
55    ## better than
56    ## D <- diag(1/madX)
57    ## rho <- D %*% COM(X) %*% t(D)
58
59    U <- svd(rho, p, nv = 0L)$u
60    ## DD <- diag(madX)
61    ## Q <- DD %*% U
62    ## invQ <- solve(Q) ##  == t(U) %*% D -- since  U is orthogonal!
63    t.inv.Q <- I.mad * U # = t(solve(Q)) = t(t(U) * D) == t(D) U = D U
64    Z <- X %*% t.inv.Q ## much faster than for (i in 1:n) Z[i,] <- invQ %*% X[i,]
65    out <- comedian(rho, Z, X)
66
67    ## Mahalanobis distance
68    for(it in seq_len(n.iter))# allow n.iter = 0
69        out <- comedian(out$S., out$Z, X)
70
71    mm <- colMedians(out$Z)
72    mx <- drop(out$Q %*% mm)
73    ## MM: These are "raw" distances compared to covMcd()
74    mah <- mahalanobis(X, mx, out$S., tol = tolSolve)
75
76    ## compute weights
77    weights <- wgtFUN(mah)
78    covW <- cov.wt(X, wt=weights)[c("cov", "center", "n.obs")]
79    covW$weights <-
80	if(reweight) { ## above 'mah' = 'raw.mah' .. ==> allow another reweighting as in covMcd()
81	    covW$raw.weights <- weights
82	    covW$mah <- mahalanobis(X, covW$center, covW$cov, tol = tolSolve)
83	    wgtFUN(mah)
84	} else # no re-weighting
85	    weights
86    structure(class = "comed",
87	      c(list(Z = out$Z, raw.cov = out$S., raw.center = mx, raw.mah = mah,
88		     wgtFUN=wgtFUN),
89		covW))
90}
91
92
93##' Martin Maechler's simple proposal for an *adaptive* cutoff
94##' i.e., one which does *not* reject outliers in good samples asymptotically
95.COM.adaptWgt.c <- function(n,p, eps = 0.2 / n^0.3) {
96    ## default eps ==>  1-eps(n=100) ~= 0.95; 1-eps(n=10) ~= 0.90
97    ## using upper tail:
98    1.4826 * qchisq(eps, p, lower.tail=FALSE) / qchisq(0.5, p)
99}
100
101## Default wgtFUN() constructors for covComed():
102.wgtFUN.covComed <-
103    list("01.original" = function(p, ...) {
104	     cMah <- .COM.adaptWgt.c(p=p, eps = 0.05)# 1 - eps = 0.95
105	     function(d) as.numeric(d < median(d)*cMah) },
106	 "01.flex" = function(p, n, control) { ## 'beta' instead of 0.95
107	     stopifnot(is.1num(beta <- control$beta), 0 <= beta, beta <= 1)
108	     cMah <- 1.4826 * qchisq(beta, p) / qchisq(0.5, p)
109	     function(d) as.numeric(d < median(d)*cMah) },
110	 "01.adaptive" = function(p, n, ...) { ## 'beta_n' instead of 0.975
111	     cMah <- .COM.adaptWgt.c(n,p)
112	     function(d) as.numeric(d < cMah) },
113	 "sm1.flex" = function(p, n, control) { ## 'beta' / smooth weight
114	     stopifnot(is.1num(beta <- control$beta), 0 <= beta, beta <= 1)
115	     cMah <- 1.4826 * qchisq(beta, p) / qchisq(0.5, p)
116	     function(d) smoothWgt(d / median(d), c=cMah, h = 1) },
117	 "sm1.adaptive" = function(p, n, ...) {
118	     cMah <- .COM.adaptWgt.c(n=n, p=p)
119	     function(d) smoothWgt(d / median(d), c = cMah, h = 1) },
120	 "sm2.adaptive" = function(p, n, ...) {
121	     cMah <- .COM.adaptWgt.c(n=n, p=p)
122	     function(d) smoothWgt(d / median(d), c = cMah, h = 2) }
123	 )
124
125
126comedian <- function (rho, Z, X)
127{
128    p <- ncol(X)
129    U <- svd(rho, nv = 0L)$u
130    madX <- apply(X, 2, mad)
131    I.mad <- 1/madX
132    ## D <- diag(madX)
133    ## Q <- D %*% U
134    Q <- madX * U
135    ## invQ <- solve(Q)
136    t.inv.Q <- I.mad * U # = t(solve(Q)) = t(t(U) * D) == t(D) U = D U
137    Z <- X %*% t.inv.Q ## for (i in 1:n)  Z[i,] <- invQ %*% X[i,]
138    madZ <- apply(Z, 2, mad)
139    list(Q=Q, Z=Z, S. = tcrossprod(Q * rep(madZ, each=p))) ## better than
140    ##             S. = Q %*% diag(madZ)^2 %*% t(Q)
141}
142
143COM <- function(X)
144{
145    ## Comedian *with* consistency factor.  Falk(1997) was without it.
146
147    stopifnot(is.1num(p <- ncol(X)), p >= 1)
148    med <- colMedians(X)
149    Y <- sweep(X, 2L, med, `-`)
150    COM <- matrix(0., p,p)
151    madY <- numeric(p)
152    for(i in 1:p) {
153        madY[i] <- madYi <- mad(Yi <- Y[,i])
154        for(j in seq_len(i-1)) { # j <= i ==> madY[j] "exists"
155            COM[j,i] <- COM[i,j] <- median(Yi * Y[,j]) / (madYi * madY[j])
156            ## COM[i,j] <- median((Y[,i])*(Y[,j]))
157            ## COM[i,j] <- (1.4826^2)*median((Y[,i])*(Y[,j]))
158        }
159        ## j == i :
160        COM[i,i] <- median(Yi^2) / (madYi^2)
161    }
162    ## return [ 1.4826 = formals(mad)$constant = consistency factor of mad()]
163    1.4826^2 * COM
164}
165