1library(robustbase) 2 3source(system.file("test-tools-1.R", package = "Matrix", mustWork=TRUE)) 4## -> assert.EQ(), identical3(), .. 5 6DNase1 <- DNase[ DNase$Run == 1, ] 7Y <- DNase1[,"density"] # for convenience below 8 9## classical 10fm1 <- nls(density ~ Asym/(1 + exp(( xmid - log(conc) )/scal ) ), 11 data = DNase1, start = list(Asym = 3, xmid = 0, scal = 1), trace=TRUE) 12summary(fm1) 13wm1 <- update(fm1, weights = sqrt(conc)) # (weights as function of <var>) 14 15## robust 16rm1 <- nlrob(formula(fm1), data = DNase1, trace = TRUE, 17 start = list(Asym = 3, xmid = 0, scal = 1)) 18(sm1 <- summary(rm1)) 19stopifnot(all.equal(Y, fitted(fm1) + residuals(fm1), check.attributes=FALSE), 20 ## fitted(<nls>) has "label" attribute 21 identical3(c(fitted(fm1)), predict(fm1), predict(fm1, newdata=DNase1)), 22 ## robust fit : 23 identical3(fitted(rm1), predict(rm1), predict(rm1, newdata=DNase1)), 24 all.equal(Y, unname(fitted(rm1) + residuals(rm1)))) 25print(coef(rm1), digits=12) 26## 2.35963008460 1.49945088410 1.04506391722 F19 Lx 64b 27## 2.35963008460 1.49945088410 1.04506391722 Win(Serv.2003) 64b 28## 2.35963008613 1.49945088600 1.04506391793 F19 Lx 32b 29## 2.35963008613 1.49945088600 1.04506391793 Win(Serv.2003) 32b 30assert.EQ(coef(rm1), giveRE=TRUE, 31 c(Asym=2.35963008, xmid=1.49945088, scal=1.04506392), tol = 4e-8) 32assert.EQ(sqrt(diag(sm1$cov)), giveRE=TRUE, 33 ## 32b 0.08626872273, 0.0902194541, 0.03503833759 34 c(Asym=0.0862687305, xmid=0.0902194608, scal=0.0350383389), 35 tol = 7e-7) 36## examples with weights: 37rm. <- update(rm1, weights = NULL)# 'NULL' but not missing() 38ww <- sqrt(DNase1[,"conc"]) 39wr1 <- update(rm1, weights = sqrt(conc), trace=FALSE) 40wr1. <- update(rm1, weights = ww, trace=FALSE) 41ii <- names(rm1) != "call" 42stopifnot(all.equal(rm1[ii], rm.[ii], tol = 1e-15), 43 all.equal(wr1[ii],wr1.[ii], tol = 1e-15)) 44 45 46## From: "Pascal A. Niklaus" <pascal.niklaus@ieu.uzh.ch> 47## To: <maechler@stat.math.ethz.ch> 48## Subject: nlrob problem 49## Date: Tue, 20 Dec 2011 07:04:38 +0100 50 51## For psi-functions that can become zero (e.g. psi.bisquare), weights in 52## the internal call to nls can become zero. 53 54 55## Was 56## psiTuk <- robustbase:::psi.bisquare 57## psiHamp <- robustbase:::psi.hampel 58 59lmrob.control(psi="bisquare")$tuning.psi 60psiTuk <- function(x, der=0) { 61 ## cc: dput( lmrob.control(psi="bisquare")$tuning.psi ) 62 if(der == 0) 63 Mwgt(x, cc=4.685061, psi="Tukey") 64 else 65 Mpsi(x, cc=4.685061, psi="Tukey", deriv=1) 66} 67 68c.Ha <- lmrob.control(psi="hampel"); c.Ha$tuning.psi 69psiHamp <- function(x, der=0) { 70 ## cc: dput( lmrob.control(psi="hampel")$tuning.psi ) 71 if(der == 0) 72 Mwgt(x, cc=c(1.35241275, 3.15562975, 7.212868), psi="Hampel") 73 else 74 Mpsi(x, cc=c(1.35241275, 3.15562975, 7.212868), psi="Hampel", deriv=1) 75} 76 77d <- data.frame(x = -6:9, 78 y = 43 + c(7, 52, 21, 12, 10, -4, -5, -4, 0, -77, -8, -10, 22, 33, 38, 51)) 79nlr1 <- nlrob(y ~ a*(x + b*exp(-c*x)), start=list(a= 4, b= 1, c= 1.2), 80 data = d, 81 maxit = 50, # default 20 is *not* sufficient 82 model = TRUE, 83 trace=TRUE) 84## These failed in robustbase version 0.8-0 and earlier 85nlr2 <- update(nlr1, psi = psiTuk) # now *does* converge... 86## check 'model' and dataClasses 87stopifnot(is.list(mod <- nlr2$model), is.data.frame(mod), 88 inherits(attr(mod, "terms"), "terms"), 89 identical(dCl <- attr(attr(mod, "terms"),"dataClasses"), 90 nlr2$dataClasses), 91 identical(dCl, c(y = "numeric", x = "numeric"))) 92## 'port' ditto: 93nlr2. <- update(nlr2, algorithm= "port") 94nlr3 <- update(nlr1, psi = psiHamp) # *does* converge, too... 95nlr3. <- update(nlr3, algorithm= "port") 96summary(nlr2.) 97summary(nlr3.) 98i. <- -c(2, 15) # <- drop 'call' and 'iter' components 99stopifnot(all.equal(nlr2[i.], nlr2.[i.], tolerance = 2e-5, check.environment = FALSE), 100 all.equal(nlr3[i.], nlr3.[i.], tolerance = 1e-4, check.environment = FALSE), 101 ## The redescending psi() give some exact 0 weights : 102 identical(which(abs(nlr2$rweights) < 1e-9), c(1L, 10 :12)), 103 identical(which(abs(nlr3$rweights) < 1e-9), c(1L, 10L,12L)) 104 ) 105 106## Different example with more data: 107pp <- list(a=10, b=4, c=1/4) 108x <- seq(-6,9, by = 1/8) 109f.x <- with(pp, a*(x + b*exp(-c*x))) 110set.seed(6); y <- y0 <- f.x + 4*rnorm(x) 111iO <- c(2:3,20,70:71,90); y[iO] <- y[iO] + 32*c(-1,-1,1)*(2+rlnorm(iO)); y <- round(y) 112plot(x,y); lines(x, f.x, col="tomato", lty = 2) 113dd <- data.frame(x,y) 114 115nlc1 <- nls(formula(nlr1), start = coef(nlr1), data=dd, trace=TRUE) 116nlR1 <- update(nlr1, data = dd)# update the model with the new data 117summary(nlR1) 118lines(x, predict(nlc1), col=3) 119lines(x, predict(nlR1), col=4) 120legend("top", c("f(x)", "least squares", "robust"), 121 col=c("tomato", palette()[3:4]), lty=c(2,1,1)) 122 123## These both now *do* converge, but failed earlier 124(nlbi <- update(nlR1, psi = psiTuk)) 125(nlFH <- update(nlR1, psi = psiHamp)) 126lines(x, predict(nlbi), col=5) 127lines(x, predict(nlFH), col=6) 128 129stopifnot(nlR1$status == "converged", nlbi$status == "converged", 130 nlFH$status == "converged") 131assert.EQ(coef(nlR1), c(a=9.914874, b=3.98612416, c=0.250896252), tol = 4e-9) 132assert.EQ(coef(nlbi), c(a=9.947458207, b=3.954210623, c=0.2535835248), tol = 4e-9) 133## This is suddently quite different : ???!?!?? 134## assert.EQ(coef(nlFH), c(a=9.94242831, b=3.97370746, c=0.252907618)) 135assert.EQ(coef(nlFH), c(a=9.952893755,b=3.949047387,c=0.2536216541), tol = 1e-7) 136assert.EQ(1000*diag(vcov(nlR1)), 137 c(a=16.167493, b=10.0986644, c=0.0200814189), tol = 7e-7, giveRE=TRUE) 138assert.EQ(1000*local({V <- vcov(nlFH); V[lower.tri(V, diag=TRUE)]}), 139 c(16.33774615, -9.704702857, 0.3149189329, 140 10.03560556, -0.4079936961, 0.02039106329), tol = 7e-7) 141assert.EQ(predict(nlR1), predict(nlbi), tol = 0.05, giveRE=TRUE) 142assert.EQ(predict(nlR1), predict(nlFH), tol = 0.05, giveRE=TRUE) 143 144nlFH2 <- update(nlFH, psi = .Mwgt.psi1("Hampel", c(2,4,8))) 145## TODO: and compare 146## TODO: same with Tukey 147 148 149##----- *Vector* parameters indexed by factor levels ------------- 150##----- MM: ~/R/MM/Pkg-ex/robustbase/nlrob-vectorpar.R 151 152data(biomassTill)## see also smaller example in ../man/biomassTill.Rd 153 154if(!dev.interactive(orNone=TRUE)) pdf("nlrob-biomT.pdf") 155 156require(lattice) 157xyplot(Biomass ~ DVS | Tillage, data = biomassTill) 158xyplot(Biom.2 ~ DVS | Tillage, data = biomassTill) 159 160## starting values 161m0.st <- list(Wm = rep(200, 3), 162 a = rep( 1, 3), 163 b = rep( 2, 3)) 164##-> nls(), even with -expm1(.) fails to start properly (and hence nlrob() fails too): 165try( 166m.c0 <- nls(Biomass ~ Wm[Tillage] * (- expm1(-(DVS/a[Tillage])^b[Tillage])), 167 data = biomassTill, start = m0.st, trace=TRUE) 168) 169 170## several other versions of the above fail similarly. This works: 171m00st <- list(Wm = rep(300, 3), 172 a = rep( 1.5, 3), 173 b = rep( 2.2, 3)) 174m.c00 <- nls(Biomass ~ Wm[Tillage] * (- expm1(-(DVS/a[Tillage])^b[Tillage])), 175 data = biomassTill, start = m00st, trace=TRUE) 176 177## These were the "true" beta for simulating in creation of {Biomass, Biom.2}: 178m1.st <- list(Wm = c(219.8, 265.9, 343.4), 179 a = c(1.461, 1.493, 1.294), 180 b = c(2.889, 2.838, 4.046)) 181 182m.cl <- nls(Biomass ~ Wm[Tillage] * (1 - exp(-(DVS/a[Tillage])^b[Tillage])), 183 data = biomassTill, start = m00st, trace=TRUE) 184## this now fails to converge: 185try( # "singular gradient" 186m.c2 <- nls(Biom.2 ~ Wm[Tillage] * (1 - exp(-(DVS/a[Tillage])^b[Tillage])), 187 data = biomassTill, start = m00st, trace=TRUE) 188) 189 190str(C1 <- nls.control(minFactor=1e-6, warnOnly=TRUE, printEval=TRUE, maxiter=500)) 191 192try( 193m.c2 <- nls(Biom.2 ~ Wm[Tillage] * (1 - exp(-(DVS/a[Tillage])^b[Tillage])), 194 data = biomassTill, start = m00st, trace=TRUE, control=C1) 195) 196## fails (!) too {numericDeriv() in iteration 129} even though we have 197## 'warnOnly' ! ==> bug in nls() !!!!!!!!!!!!!!!!!!!!!!!!!!! 198 199## -expm1(u) is better than (1 - exp(u)) : 200m.c00 <- nls(Biom.2 ~ Wm[Tillage] * (- expm1(-(DVS/a[Tillage])^b[Tillage])), 201 data = biomassTill, start = m00st, trace=TRUE, control=C1) 202## "fails" but returns .. very bad.. 203m.c00 204## Use better starting values, as we have such problems: 205m.c2 <- nls(Biom.2 ~ Wm[Tillage] * (- expm1(-(DVS/a[Tillage])^b[Tillage])), 206 data = biomassTill, start = m1.st, trace=TRUE, control=C1) 207## "fails" but returns at least: singular gradient iteration 126 208m.c2 209 210## Robust: not converging in 20 steps (only warning) 211mrob <- nlrob(Biomass ~ Wm[Tillage] * (-expm1(-(DVS/a[Tillage])^b[Tillage])), 212 data = biomassTill, start = m00st, trace=TRUE) 213stopifnot(identical(mrob$dataClasses, 214 c(Biomass = "numeric", Tillage = "factor", DVS = "numeric"))) 215try(## now: singular gradient in nls 216mr.2 <- nlrob(Biom.2 ~ Wm[Tillage] * (-expm1(-(DVS/a[Tillage])^b[Tillage])), 217 data = biomassTill, start = m00st, trace=TRUE) 218) 219 220## Compare coeffs: 221rbind(c.true = unlist(m1.st), 222 cl0 = coef(m.c00), 223 cl = coef(m.cl), rob = coef(mrob), 224 c2 = coef(m.c2))#, r.2 = coef(mr.2)) 225## Compare fit 226 227## Now for plotting --- nice would be xyplot, but I don't easily see how: 228 229 230(yl2 <- range(biomassTill[,"Biom.2"])) 231(ylim <- range(biomassTill[,"Biomass"]))# --> *not* showing the two outliers! 232## or even a bit more robustly: 233## sfsmisc::rrange(biomassTill[,"Biom.2"]) ##-> -201.3064 394.0914 234 235## using global data + fits from above 236p.biomass.fits <- function(ylim = c(-200, 400), n = 257, f.DVS = 0.1, 237 leg.txt = 238 c(outer(c("nls() ", "nlrob()"), 239 c("", "[ + 2 outl.]"), paste)), 240 col = c("blue2","blue3","tomato","red3"), 241 lty = c(2,1,2,1), 242 lwd = 2) 243{ 244 ## more and equispaced DVS values for nice plot: 245 rr <- extendrange(biomassTill[,"DVS"], f=f.DVS) 246 bbDVS <- seq(rr[1], rr[2], length = n) 247 b.Till <- biomassTill[,"Tillage"] 248 nP <- nlevels(b.Till) # == 3 249 m <- length(leg.txt) 250 col <- rep_len(col, m) 251 lwd <- rep_len(lwd, m) 252 lty <- rep_len(lty, m) 253 ## Prefer xyplot() - this is ugly but works (and tests predict(*, <subset>)): 254 op <- par(mfrow = c(nP,1), mar = .1 + c(3, 3, 2, 1), mgp = c(1.25, 0.6, 0)) 255 on.exit(par(op)) 256 for(lev in levels(b.Till)) { 257 cat(lev,":\n--------\n") 258 dsub <- subset(biomassTill, Tillage == lev) 259 plot(Biom.2 ~ DVS, data = dsub, ylim=ylim, main = paste("Tillage = ", lev)) 260 grid() 261 dd <- data.frame(Tillage = factor(rep.int(lev, n), levels=levels(b.Till)), 262 DVS = bbDVS) 263 lines(predict(m.cl, dd) ~ DVS, data=dd, col=col[1], lty=lty[1], lwd=lwd[1]) 264 lines(predict(mrob, dd) ~ DVS, data=dd, col=col[2], lty=lty[2], lwd=lwd[2]) 265 lines(predict(m.c2, dd) ~ DVS, data=dd, col=col[3], lty=lty[3], lwd=lwd[3]) 266 ## lines(predict(mr.2, dd) ~ DVS, data=dd, col=col[4], lty=lty[4], lwd=lwd[4]) 267 if(lev == "CA-") 268 legend("top", leg.txt, col = col, lty=lty, lwd=lwd, 269 inset=.02, bg = "gray96") #, bty="n") 270 } 271} 272 273## showing all data points: 274p.biomass.fits(ylim = yl2) 275## more interesting: 276p.biomass.fits() 277 278 279cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons'' 280