1library(robustbase) 2 3source(system.file("xtraR/ex-funs.R", package = "robustbase")) 4source(system.file("test-tools-1.R", package = "Matrix", mustWork=TRUE))# assert.EQ 5 6 7###>> 1 ------------------- family = poisson ------------------------------------ 8 9### very simple model [with outliers] 10set.seed(113) 11y <- rpois(17, lambda = 4) ## -> target: beta_0 = log(E[Y]) = log(4) = 1.386294 12 13y[1:2] <- 99:100 # outliers 14y 15rm1 <- glmrob(y ~ 1, family = poisson, trace = TRUE, 16 acc = 1e-13) # default is just 1e-4 17## and check the robustness weights: 18assert.EQ(c(0.0287933850640724, 0.0284930623638766, 19 0.950239140568007, 0.874115394740014), 20 local({w <- rm1$w.r; w[ w != 1 ] }), tol = 1e-14) 21assert.EQ(coef(rm1), c("(Intercept)" = 1.41710946076738),tol = 1e-14) 22 23cm1 <- glm (y ~ 1, family = poisson, trace = TRUE) 24 25rmMT <- glmrob(y ~ 1, family = poisson, trace = TRUE, method="MT") 26(sMT <- summary(rmMT)) 27 28if(FALSE) # for manual digging: 29debug(robustbase:::glmrobMqle) 30 31allresid <- function(obj, types = c("deviance", "pearson", "working", "response")) 32{ 33 sapply(types, residuals, object = obj) 34} 35 36okFit <- function(obj, check.attr=FALSE, ...) { 37 all.equal(obj$y, obj$fitted.values + residuals(obj, "response"), 38 check.attributes=check.attr, ...) 39} 40 41## check validity of several methods simultaneously: 42y. <- model.response(model.frame(rm1)) 43stopifnot(okFit(cm1), okFit(rm1), y. == y) 44 45alr.c <- allresid(cm1) 46alr.r <- allresid(rm1) 47 48## MM --- just for now -- 49plot(resid(cm1), resid(rm1), asp=1); abline(0,1, col=2) 50plot(resid(cm1,type="pearson"), resid(rm1, type="pearson"), asp=1); abline(0,1, col=2) 51plot(resid(cm1,type="working"), resid(rm1, type="working"), asp=1); abline(0,1, col=2) 52 53## leave away the outliers -- 54cm0 <- glm (y ~ 1, family = poisson, trace = TRUE, subset = -(1:2)) 55plot(resid(cm0), resid(rm1)[-(1:2)], asp=1); abline(0,1, col=2) 56plot(resid(cm0,type="pearson"), resid(rm1, type="pearson")[-(1:2)], asp=1); abline(0,1, col=2) 57plot(resid(cm0,type="working"), resid(rm1, type="working")[-(1:2)], asp=1); abline(0,1, col=2) 58plot(resid(cm0,type="response"), resid(rm1, type="response")[-(1:2)], asp=1); abline(0,1, col=2) 59 60 61## Use weights (slow convergence !) 62w2 <- c(rep(1,8), rep(10,9)) 63rm2 <- glmrob(y ~ 1, family = poisson, trace = TRUE, 64 weights = w2, maxit = 500, acc = 1e-10) # default is just 1e-4 65## slow convergence 66stopifnot(okFit(rm2)) 67 68 69###>> 2 ------------------- family = binomial ----------------------------------- 70 71## Using *factor* y ... 72x <- seq(0,5, length = 120) 73summary(px <- plogis(-5 + 2*x)) 74set.seed(7) 75(f <- factor(rbinom(length(x), 1, prob=px))) 76 77summary(m.c0 <- glm (f ~ x, family = binomial)) 78summary(m.r0 <- glmrob(f ~ x, family = binomial)) 79 80## add outliers --- in y: 81f. <- f 82f.[i1 <- 2:3] <- 1 83f.[i0 <- 110+c(1,7)] <- 0 84 m.c1 <- glm (f. ~ x, family = binomial) 85summary(m.r1 <- glmrob(f. ~ x, family = binomial)) ## hmm, not so robust? 86stopifnot(m.r1$w.r[c(i0,i1)] < 1/3, # well, at least down weighted 87 ## and coefficients change less : 88 (coef(m.r1) - coef(m.c0)) / (coef(m.c1) - coef(m.c0)) < 1) 89assert.EQ(c("(Intercept)" = -3.10817337603974, x = 1.31618564057790), 90 coef(m.r1), tol= 1e-14, giveRE=TRUE) 91 92y <- as.numeric(as.character(f.)) 93m.r2 <- BYlogreg(x0=x, y=y, trace=TRUE, maxhalf= 10) 94m.r2A <- BYlogreg(x0=x, y=y, trace= 2 , maxhalf= 15) 95## different.. but not so much: 96iB <- 1:5 97assert.EQ(m.r2A[iB], m.r2[iB], tol = .003, giveRE=TRUE) 98 99 100assert.EQ(c("Intercept" = -2.9554950286, x = 1.2574679132), 101 ## 32-bit{ada-5} -2.95549502890363 1.25746791332613 102 m.r2$coef, tol=8e-10, giveRE=TRUE)# seen 5.316e-10 for --disable-long-double 103assert.EQ( c(0.685919891749065, 0.256419206157062), 104 ## 32-bit{ada-5}: 105 ## 0.685919891858219, 0.256419206203016) 106 m.r2$sterror, tol=4e-9)# seen 1.025e-9 for --disable-long-double 107 108data(foodstamp) 109str(foodstamp) 110## Model with 'income' instead of log(income+1) is "interesting" 111## because BYlogreg() needs maxhalf > 10 for convergence! 112m.fs0 <- glm (participation ~ ., family=binomial, data=foodstamp) 113m.fs0QL <- glmrob(participation ~ ., family=binomial, data=foodstamp) 114y.fs <- foodstamp[,"participation"] 115X.fs0 <- model.matrix(m.fs0) 116head(X.fs0) 117## (former default) maxhalf = 10 leads to too early convergence: 118m.fsWBY. <- BYlogreg(x0=X.fs0, y=y.fs, 119 addIntercept=FALSE, trace=TRUE, maxhalf=10) 120m.fs.BY. <- BYlogreg(x0=X.fs0, y=y.fs, initwml=FALSE, 121 addIntercept=FALSE, trace=TRUE, maxhalf=10) 122m.fsWBY <- BYlogreg(x0=X.fs0, y=y.fs, 123 addIntercept=FALSE, trace=TRUE, maxhalf=18) 124m.fs.BY <- BYlogreg(x0=X.fs0, y=y.fs, initwml=FALSE, 125 addIntercept=FALSE, trace=TRUE, maxhalf=18) 126 127assert.EQ(m.fsWBY.[iB], m.fsWBY[iB], tol= 0.07)## almost 7% different 128assert.EQ(m.fs.BY.[iB], m.fs.BY[iB], tol= 0.08) 129 130foodSt <- within(foodstamp, { logInc <- log(1 + income) ; rm(income) }) 131 132m.fsML <- glm (participation ~ ., family=binomial, data=foodSt) 133m.fsQL <- glmrob(participation ~ ., family=binomial, data=foodSt) 134X.fs <- model.matrix(m.fsML) 135stopifnot(dim(X.fs) == c(150, 4)) # including intercept! 136try(## FIXME -- Mahalanobis fails with singular matrix, here: 137m.fsWBY <- BYlogreg(x0=X.fs, y=y.fs, 138 addIntercept=FALSE, trace=TRUE, maxhalf=18) 139) 140## maxhalf=18 is too much --> no convergence (in 1000 steps) 141m.fs.BY <- BYlogreg(x0=X.fs, y=y.fs, initwml=FALSE, 142 addIntercept=FALSE, trace=TRUE, maxhalf=18) 143signif( 144 rbind(ML = coef(m.fsML), QL =coef(m.fsQL), 145 WBY0=coef(m.fsWBY.), BY0=coef(m.fs.BY.), 146 WBY =coef(m.fsWBY ), BY =coef(m.fs.BY) 147 ) 148 , 4) 149 150 151if(FALSE) { 152## *scaling* of X ( ?? <==> ?? 'sigma1' ) ------------------ 153 154## no "W" (Mahalanobis fail because of *singular* X): 155m.fs.BY100 <- BYlogreg(x0=100*X.fs, initwml=FALSE, 156 y=y.fs, 157 addIntercept=FALSE, trace=TRUE, maxhalf=18) 158## ==> no convergence 159 160X1c <- cbind(1, 100*X.fs[,-1]) 161m.fsWBY1c <- BYlogreg(x0=X1c, y=y.fs, 162 addIntercept=FALSE, trace=TRUE, maxhalf=18) 163## ==> illegal singularity$kind 164 165}## not yet 166 167###-------- Gamma ------------ 168 169## Realistic "data" {from help(glmrob)}: 170mu <- c(122.131, 53.0979, 39.9039, 33.9232, 28.007, 171 24.923, 21.5747, 19.6971, 18.4516) 172ns.resid <- c(-0.0338228, 0.0923228, 0.0525284, 0.0317426, -0.035954, 173 0.00308925, -0.026637, -0.0353932, -0.0244761) 174Vmu <- c(14915.9, 2819.38, 1592.32, 1150.78, 784.39, 175 621.156, 465.467, 387.978, 340.462) 176Hp2 <- robustbase:::Huberprop2 177## Hp2. <- robustbase:::Huberprop2. 178 179## was: phis <- 2^(-70:-1) -- but that was *not* reliable (on 32-bit e.g.) 180phis <- 2^(-42:-1) 181H1 <- sapply(phis, function(phi) 182 Hp2(phi, ns.resid=ns.resid, mu=mu, Vmu=Vmu, tcc = 1.345)) 183## H2 <- sapply(phis, function(phi) 184## Hp2.(phi, ns.resid=ns.resid, mu=mu, Vmu=Vmu, tcc = 1.345)) 185dput(signif(H1)) 186H2 <- c(9.91741, 187 9.88674, 9.89438, 9.88674, 9.88961, 9.88961, 9.88961, 9.88984, 188 9.88973, 9.88964, 9.8897, 9.88975, 9.88976, 9.88975, 9.88974, 189 9.88974, 9.88974, 9.88974, 9.88974, 9.88974, 9.88974, 9.88974, 190 9.88975, 9.88975, 9.88975, 9.33161, 8.70618, 8.39347, 8.23714, 191 8.15902, 8.12006, 7.16275, 3.38703, -0.0879886, -2.3322, -4.16929, 192 -5.26821, -5.80526, -6.04822, -6.11538, -6.02613, -5.66718) 193 all.equal(H1,H2, tolerance = 0) # -> see 8.869e-7 194stopifnot(all.equal(H1,H2, tolerance = 1e-5)) 195 196if(dev.interactive(TRUE)) # shows that phi < 1e-12 is doubtful 197 matplot(phis, cbind(H1,H2), log="x", ylim = rrange(H1), type="o") 198 199