1library(robustbase)
2
3source(system.file("xtraR/ex-funs.R", package = "robustbase"))
4source(system.file("test-tools-1.R",  package = "Matrix", mustWork=TRUE))# assert.EQ
5
6
7###>> 1 ------------------- family = poisson ------------------------------------
8
9### very simple model [with outliers]
10set.seed(113)
11y <- rpois(17, lambda = 4) ## -> target:  beta_0 = log(E[Y]) = log(4) = 1.386294
12
13y[1:2] <- 99:100 # outliers
14y
15rm1 <- glmrob(y ~ 1, family = poisson, trace = TRUE,
16              acc = 1e-13) # default is just 1e-4
17## and check the robustness weights:
18assert.EQ(c(0.0287933850640724, 0.0284930623638766,
19		      0.950239140568007, 0.874115394740014),
20		    local({w <- rm1$w.r; w[ w != 1 ] }), tol = 1e-14)
21assert.EQ(coef(rm1), c("(Intercept)" = 1.41710946076738),tol = 1e-14)
22
23cm1 <- glm   (y ~ 1, family = poisson, trace = TRUE)
24
25rmMT <- glmrob(y ~ 1, family = poisson, trace = TRUE, method="MT")
26(sMT <- summary(rmMT))
27
28if(FALSE) # for manual digging:
29debug(robustbase:::glmrobMqle)
30
31allresid <- function(obj, types = c("deviance", "pearson", "working", "response"))
32{
33    sapply(types, residuals, object = obj)
34}
35
36okFit <- function(obj, check.attr=FALSE, ...) {
37  all.equal(obj$y, obj$fitted.values + residuals(obj, "response"),
38            check.attributes=check.attr, ...)
39}
40
41## check validity of several methods simultaneously:
42y. <- model.response(model.frame(rm1))
43stopifnot(okFit(cm1), okFit(rm1), y. == y)
44
45alr.c <- allresid(cm1)
46alr.r <- allresid(rm1)
47
48## MM --- just for now --
49plot(resid(cm1),                resid(rm1), asp=1); abline(0,1, col=2)
50plot(resid(cm1,type="pearson"), resid(rm1, type="pearson"), asp=1); abline(0,1, col=2)
51plot(resid(cm1,type="working"), resid(rm1, type="working"), asp=1); abline(0,1, col=2)
52
53## leave away the outliers --
54cm0 <- glm   (y ~ 1, family = poisson, trace = TRUE, subset = -(1:2))
55plot(resid(cm0),                resid(rm1)[-(1:2)], asp=1); abline(0,1, col=2)
56plot(resid(cm0,type="pearson"), resid(rm1, type="pearson")[-(1:2)], asp=1); abline(0,1, col=2)
57plot(resid(cm0,type="working"), resid(rm1, type="working")[-(1:2)], asp=1); abline(0,1, col=2)
58plot(resid(cm0,type="response"), resid(rm1, type="response")[-(1:2)], asp=1); abline(0,1, col=2)
59
60
61## Use weights (slow convergence !)
62w2 <- c(rep(1,8), rep(10,9))
63rm2 <- glmrob(y ~ 1, family = poisson, trace = TRUE,
64              weights = w2, maxit = 500, acc = 1e-10) # default is just 1e-4
65## slow convergence
66stopifnot(okFit(rm2))
67
68
69###>> 2 ------------------- family = binomial -----------------------------------
70
71## Using  *factor*  y ...
72x <- seq(0,5, length = 120)
73summary(px <- plogis(-5 + 2*x))
74set.seed(7)
75(f <- factor(rbinom(length(x), 1, prob=px)))
76
77summary(m.c0 <- glm   (f ~ x, family = binomial))
78summary(m.r0 <- glmrob(f ~ x, family = binomial))
79
80## add outliers --- in y:
81f. <- f
82f.[i1 <- 2:3] <- 1
83f.[i0 <- 110+c(1,7)] <- 0
84        m.c1 <- glm   (f. ~ x, family = binomial)
85summary(m.r1 <- glmrob(f. ~ x, family = binomial)) ## hmm, not so robust?
86stopifnot(m.r1$w.r[c(i0,i1)] < 1/3, # well, at least down weighted
87	  ## and coefficients change less :
88	  (coef(m.r1) - coef(m.c0)) / (coef(m.c1) - coef(m.c0)) < 1)
89assert.EQ(c("(Intercept)" = -3.10817337603974, x = 1.31618564057790),
90	  coef(m.r1), tol= 1e-14, giveRE=TRUE)
91
92y <- as.numeric(as.character(f.))
93m.r2  <- BYlogreg(x0=x, y=y, trace=TRUE, maxhalf= 10)
94m.r2A <- BYlogreg(x0=x, y=y, trace= 2  , maxhalf= 15)
95## different.. but not so much:
96iB <- 1:5
97assert.EQ(m.r2A[iB], m.r2[iB], tol = .003, giveRE=TRUE)
98
99
100assert.EQ(c("Intercept" = -2.9554950286, x = 1.2574679132),
101          ## 32-bit{ada-5}  -2.95549502890363   1.25746791332613
102	  m.r2$coef, tol=8e-10, giveRE=TRUE)# seen 5.316e-10   for --disable-long-double
103assert.EQ( c(0.685919891749065, 0.256419206157062),
104          ## 32-bit{ada-5}:
105          ## 0.685919891858219, 0.256419206203016)
106	  m.r2$sterror, tol=4e-9)# seen 1.025e-9   for --disable-long-double
107
108data(foodstamp)
109str(foodstamp)
110## Model with 'income' instead of log(income+1)  is "interesting"
111## because BYlogreg() needs  maxhalf > 10 for convergence!
112m.fs0   <- glm   (participation ~ ., family=binomial, data=foodstamp)
113m.fs0QL <- glmrob(participation ~ ., family=binomial, data=foodstamp)
114y.fs <- foodstamp[,"participation"]
115X.fs0 <- model.matrix(m.fs0)
116head(X.fs0)
117## (former default) maxhalf = 10  leads to too early convergence:
118m.fsWBY. <- BYlogreg(x0=X.fs0, y=y.fs,
119                     addIntercept=FALSE, trace=TRUE, maxhalf=10)
120m.fs.BY. <- BYlogreg(x0=X.fs0, y=y.fs, initwml=FALSE,
121                     addIntercept=FALSE, trace=TRUE, maxhalf=10)
122m.fsWBY <- BYlogreg(x0=X.fs0, y=y.fs,
123		    addIntercept=FALSE, trace=TRUE, maxhalf=18)
124m.fs.BY <- BYlogreg(x0=X.fs0, y=y.fs, initwml=FALSE,
125		    addIntercept=FALSE, trace=TRUE, maxhalf=18)
126
127assert.EQ(m.fsWBY.[iB], m.fsWBY[iB], tol= 0.07)## almost 7% different
128assert.EQ(m.fs.BY.[iB], m.fs.BY[iB], tol= 0.08)
129
130foodSt <- within(foodstamp, { logInc <- log(1 + income) ; rm(income) })
131
132m.fsML <- glm   (participation ~ ., family=binomial, data=foodSt)
133m.fsQL <- glmrob(participation ~ ., family=binomial, data=foodSt)
134X.fs <- model.matrix(m.fsML)
135stopifnot(dim(X.fs) == c(150, 4)) # including intercept!
136try(## FIXME -- Mahalanobis fails with singular matrix, here:
137m.fsWBY <- BYlogreg(x0=X.fs, y=y.fs,
138		    addIntercept=FALSE, trace=TRUE, maxhalf=18)
139)
140## maxhalf=18 is too much --> no convergence (in 1000 steps)
141m.fs.BY <- BYlogreg(x0=X.fs, y=y.fs, initwml=FALSE,
142		    addIntercept=FALSE, trace=TRUE, maxhalf=18)
143signif(
144    rbind(ML = coef(m.fsML),   QL =coef(m.fsQL),
145          WBY0=coef(m.fsWBY.), BY0=coef(m.fs.BY.),
146          WBY =coef(m.fsWBY ), BY =coef(m.fs.BY)
147          )
148    , 4)
149
150
151if(FALSE) {
152## *scaling* of X (  ?? <==> ??   'sigma1' ) ------------------
153
154## no "W" (Mahalanobis fail because of *singular* X):
155m.fs.BY100 <- BYlogreg(x0=100*X.fs, initwml=FALSE,
156                       y=y.fs,
157                       addIntercept=FALSE, trace=TRUE, maxhalf=18)
158## ==> no convergence
159
160X1c <- cbind(1, 100*X.fs[,-1])
161m.fsWBY1c <- BYlogreg(x0=X1c, y=y.fs,
162                      addIntercept=FALSE, trace=TRUE, maxhalf=18)
163## ==> illegal singularity$kind
164
165}## not yet
166
167###-------- Gamma ------------
168
169## Realistic "data" {from help(glmrob)}:
170mu <- c(122.131, 53.0979, 39.9039, 33.9232, 28.007,
171        24.923, 21.5747, 19.6971, 18.4516)
172ns.resid <- c(-0.0338228, 0.0923228, 0.0525284, 0.0317426, -0.035954,
173              0.00308925, -0.026637, -0.0353932, -0.0244761)
174Vmu <- c(14915.9, 2819.38, 1592.32, 1150.78, 784.39,
175         621.156, 465.467, 387.978, 340.462)
176Hp2  <- robustbase:::Huberprop2
177## Hp2. <- robustbase:::Huberprop2.
178
179## was: phis <- 2^(-70:-1)  -- but that was *not* reliable (on 32-bit e.g.)
180phis <- 2^(-42:-1)
181H1 <- sapply(phis, function(phi)
182    Hp2(phi, ns.resid=ns.resid, mu=mu, Vmu=Vmu, tcc = 1.345))
183## H2 <- sapply(phis, function(phi)
184##     Hp2.(phi, ns.resid=ns.resid, mu=mu, Vmu=Vmu, tcc = 1.345))
185dput(signif(H1))
186H2 <- c(9.91741,
187        9.88674, 9.89438, 9.88674, 9.88961, 9.88961, 9.88961, 9.88984,
188        9.88973, 9.88964, 9.8897, 9.88975, 9.88976, 9.88975, 9.88974,
189        9.88974, 9.88974, 9.88974, 9.88974, 9.88974, 9.88974, 9.88974,
190        9.88975, 9.88975, 9.88975, 9.33161, 8.70618, 8.39347, 8.23714,
191        8.15902, 8.12006, 7.16275, 3.38703, -0.0879886, -2.3322, -4.16929,
192        -5.26821, -5.80526, -6.04822, -6.11538, -6.02613, -5.66718)
193          all.equal(H1,H2, tolerance = 0) # -> see 8.869e-7
194stopifnot(all.equal(H1,H2, tolerance = 1e-5))
195
196if(dev.interactive(TRUE)) # shows that phi < 1e-12 is doubtful
197  matplot(phis, cbind(H1,H2), log="x", ylim = rrange(H1), type="o")
198
199