1## Fit density model 2ssden <- function(formula,type=NULL,data=list(),alpha=1.4, 3 weights=NULL,subset,na.action=na.omit, 4 id.basis=NULL,nbasis=NULL,seed=NULL, 5 domain=as.list(NULL),quad=NULL, 6 qdsz.depth=NULL,bias=NULL, 7 prec=1e-7,maxiter=30,skip.iter=FALSE) 8{ 9 ## Obtain model frame and model terms 10 mf <- match.call() 11 mf$type <- mf$alpha <- NULL 12 mf$id.basis <- mf$nbasis <- mf$seed <- NULL 13 mf$domain <- mf$quad <- mf$qdsz.depth <- mf$bias <- NULL 14 mf$prec <- mf$maxiter <- mf$skip.iter <- NULL 15 mf[[1]] <- as.name("model.frame") 16 mf <- eval(mf,parent.frame()) 17 cnt <- model.weights(mf) 18 mf$"(weights)" <- NULL 19 ## Generate sub-basis 20 nobs <- dim(mf)[1] 21 if (is.null(id.basis)) { 22 if (is.null(nbasis)) nbasis <- max(30,ceiling(10*nobs^(2/9))) 23 if (nbasis>=nobs) nbasis <- nobs 24 if (!is.null(seed)) set.seed(seed) 25 id.basis <- sample(nobs,nbasis,prob=cnt) 26 } 27 else { 28 if (max(id.basis)>nobs|min(id.basis)<1) 29 stop("gss error in ssden: id.basis out of range") 30 nbasis <- length(id.basis) 31 } 32 ## Set domain and/or generate quadrature 33 if (is.null(quad)) { 34 ## Set domain and type 35 fac.list <- NULL 36 for (xlab in names(mf)) { 37 x <- mf[[xlab]] 38 if (is.factor(x)) { 39 fac.list <- c(fac.list,xlab) 40 domain[[xlab]] <- NULL 41 } 42 else { 43 if (!is.vector(x)) 44 stop("gss error in ssden: no default quadrature") 45 if (is.null(domain[[xlab]])) { 46 mn <- min(x) 47 mx <- max(x) 48 domain[[xlab]] <- c(mn,mx)+c(-1,1)*(mx-mn)*.05 49 } 50 else domain[[xlab]] <- c(min(domain[[xlab]]),max(domain[[xlab]])) 51 if (is.null(type[[xlab]])) 52 type[[xlab]] <- list("cubic",domain[[xlab]]) 53 else { 54 if (length(type[[xlab]])==1) 55 type[[xlab]] <- list(type[[xlab]][[1]],domain[[xlab]]) 56 } 57 } 58 } 59 ## Generate numerical quadrature 60 domain <- data.frame(domain) 61 mn <- domain[1,] 62 mx <- domain[2,] 63 dm <- ncol(domain) 64 if (dm==1) { 65 ## Gauss-Legendre or uniform quadrature 66 xlab <- names(domain) 67 if (type[[xlab]][[1]]%in%c("per","cubic.per","linear.per")) { 68 quad <- list(pt=mn+(1:200)/200*(mx-mn), 69 wt=rep((mx-mn)/200,200)) 70 } 71 else quad <- gauss.quad(200,c(mn,mx)) 72 quad$pt <- data.frame(quad$pt) 73 colnames(quad$pt) <- colnames(domain) 74 } 75 else { 76 ## Smolyak cubature 77 if (is.null(qdsz.depth)) qdsz.depth <- switch(min(dm,6)-1,18,14,12,11,10) 78 quad <- smolyak.quad(dm,qdsz.depth) 79 for (i in 1:ncol(domain)) { 80 xlab <- colnames(domain)[i] 81 form <- as.formula(paste("~",xlab)) 82 jk <- ssden(form,data=mf,domain=domain[i],alpha=2, 83 id.basis=id.basis,weights=cnt) 84 quad$pt[,i] <- qssden(jk,quad$pt[,i]) 85 quad$wt <- quad$wt/dssden(jk,quad$pt[,i]) 86 } 87 jk <- NULL 88 quad$pt <- data.frame(quad$pt) 89 colnames(quad$pt) <- colnames(domain) 90 } 91 ## Incorporate factors in quadrature 92 if (!is.null(fac.list)) { 93 for (i in 1:length(fac.list)) { 94 wk <- 95 expand.grid(levels(mf[[fac.list[i]]]),1:length(quad$wt)) 96 quad$wt <- quad$wt[wk[,2]] 97 col.names <- c(fac.list[i],colnames(quad$pt)) 98 quad$pt <- data.frame(wk[,1],quad$pt[wk[,2],],stringsAsFactors=TRUE) 99 colnames(quad$pt) <- col.names 100 } 101 } 102 quad <- list(pt=quad$pt,wt=quad$wt) 103 } 104 else { 105 for (xlab in names(mf)) { 106 x <- mf[[xlab]] 107 if (is.vector(x)&!is.factor(x)) { 108 if (is.null(range <- domain[[xlab]])) { 109 mn <- min(x) 110 mx <- max(x) 111 range <- c(mn,mx)+c(-1,1)*(mx-mn)*.05 112 range[1] <- min(c(range[1],quad$pt[[xlab]])) 113 range[2] <- max(c(range[2],quad$pt[[xlab]])) 114 } 115 if (is.null(type[[xlab]])) 116 type[[xlab]] <- list("cubic",range) 117 else { 118 if (length(type[[xlab]])==1) 119 type[[xlab]] <- list(type[[xlab]][[1]],range) 120 else { 121 mn0 <- min(type[[xlab]][[2]]) 122 mx0 <- max(type[[xlab]][[2]]) 123 if ((mn0>mn)|(mx0<mx)) 124 stop("gss error in ssden: range not covering domain") 125 } 126 } 127 } 128 } 129 } 130 ## Generate terms 131 term <- mkterm(mf,type) 132 term$labels <- term$labels[term$labels!="1"] 133 ## sampling bias 134 qd.pt <- quad$pt 135 qd.wt <- quad$wt 136 nmesh <- length(qd.wt) 137 if (is.null(bias)) { 138 nt <- b.wt <- 1 139 t.wt <- matrix(1,nmesh,1) 140 bias0 <- list(nt=nt,wt=b.wt,qd.wt=t.wt) 141 } 142 else { 143 if ((nt <- length(bias$t))-length(bias$wt)) 144 stop("gss error in ssden: bias$t and bias$wt mismatch in size") 145 b.wt <- abs(bias$wt)/sum(abs(bias$wt)) 146 t.wt <- NULL 147 for (i in 1:nt) t.wt <- cbind(t.wt,bias$fun(bias$t[i],qd.pt)) 148 bias0 <- list(nt=nt,wt=b.wt,qd.wt=t.wt) 149 } 150 ## Generate s and r 151 s <- qd.s <- r <- qd.r <- NULL 152 nq <- 0 153 for (label in term$labels) { 154 x <- mf[,term[[label]]$vlist] 155 x.basis <- mf[id.basis,term[[label]]$vlist] 156 qd.x <- qd.pt[,term[[label]]$vlist] 157 nphi <- term[[label]]$nphi 158 nrk <- term[[label]]$nrk 159 if (nphi) { 160 phi <- term[[label]]$phi 161 for (i in 1:nphi) { 162 s <- cbind(s,phi$fun(x,nu=i,env=phi$env)) 163 qd.s <- cbind(qd.s,phi$fun(qd.x,nu=i,env=phi$env)) 164 } 165 } 166 if (nrk) { 167 rk <- term[[label]]$rk 168 for (i in 1:nrk) { 169 nq <- nq+1 170 r <- array(c(r,rk$fun(x.basis,x,nu=i,env=rk$env,out=TRUE)),c(nbasis,nobs,nq)) 171 qd.r <- array(c(qd.r,rk$fun(x.basis,qd.x,nu=i,env=rk$env,out=TRUE)), 172 c(nbasis,nmesh,nq)) 173 } 174 } 175 } 176 if (!is.null(s)) { 177 nnull <- dim(s)[2] 178 ## Check s rank 179 if (qr(s)$rank<nnull) 180 stop("gss error in ssden: unpenalized terms are linearly dependent") 181 s <- t(s) 182 qd.s <- t(qd.s) 183 } 184 ## Fit the model 185 if (nq==1) { 186 r <- r[,,1] 187 qd.r <- qd.r[,,1] 188 z <- sspdsty(s,r,r[,id.basis],cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias0) 189 } 190 else z <- mspdsty(s,r,id.basis,cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias0,skip.iter) 191 ## Brief description of model terms 192 desc <- NULL 193 for (label in term$labels) 194 desc <- rbind(desc,as.numeric(c(term[[label]][c("nphi","nrk")]))) 195 desc <- rbind(desc,apply(desc,2,sum)) 196 rownames(desc) <- c(term$labels,"total") 197 colnames(desc) <- c("Unpenalized","Penalized") 198 ## Return the results 199 obj <- c(list(call=match.call(),mf=mf,cnt=cnt,terms=term,desc=desc, 200 alpha=alpha,domain=domain,quad=quad,id.basis=id.basis, 201 qdsz.depth=qdsz.depth,bias=bias0,skip.iter=skip.iter),z) 202 class(obj) <- "ssden" 203 obj 204} 205 206## Fit single smoothing parameter density 207sspdsty <- function(s,r,q,cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias) 208{ 209 nxi <- dim(r)[1] 210 nobs <- dim(r)[2] 211 nqd <- length(qd.wt) 212 if (!is.null(s)) nnull <- dim(s)[1] 213 else nnull <- 0 214 nxis <- nxi+nnull 215 if (is.null(cnt)) cnt <- 0 216 ## cv function 217 cv <- function(lambda) { 218 fit <- .Fortran("dnewton", 219 cd=as.double(cd), as.integer(nxis), 220 as.double(10^(lambda+theta)*q), as.integer(nxi), 221 as.double(rbind(10^theta*r,s)), as.integer(nobs), 222 as.integer(sum(cnt)), as.integer(cnt), 223 as.double(t(rbind(10^theta*qd.r,qd.s))), as.integer(nqd), 224 as.integer(bias$nt), as.double(bias$wt), 225 as.double(t(qd.wt*bias$qd.wt)), 226 as.double(prec), as.integer(maxiter), 227 as.double(.Machine$double.eps), integer(nxis), 228 wk=double(2*((nqd+1)*bias$nt+nobs)+nxis*(2*nxis+4)+max(nxis,3)), 229 info=integer(1),PACKAGE="gss") 230 if (fit$info==1) stop("gss error in ssden: Newton iteration diverges") 231 if (fit$info==2) warning("gss warning in ssden: Newton iteration fails to converge") 232 assign("cd",fit$cd,inherits=TRUE) 233 cv <- alpha*fit$wk[2]-fit$wk[1] 234 alpha.wk <- max(0,log.la0-lambda-5)*(3-alpha) + alpha 235 alpha.wk <- min(alpha.wk,3) 236 adj <- ifelse (alpha.wk>alpha,(alpha.wk-alpha)*fit$wk[2],0) 237 cv+adj 238 } 239 ## initialization 240 mu.r <- apply(qd.wt*t(qd.r),2,sum)/sum(qd.wt) 241 v.r <- apply(qd.wt*t(qd.r^2),2,sum)/sum(qd.wt) 242 if (nnull) { 243 mu.s <- apply(qd.wt*t(qd.s),2,sum)/sum(qd.wt) 244 v.s <- apply(qd.wt*t(qd.s^2),2,sum)/sum(qd.wt) 245 } 246 if (is.null(s)) theta <- 0 247 else theta <- log10(sum(v.s-mu.s^2)/nnull/sum(v.r-mu.r^2)*nxi) / 2 248 log.la0 <- log10(sum(v.r-mu.r^2)/sum(diag(q))) + theta 249 ## lambda search 250 cd <- rep(0,nxi+nnull) 251 la <- log.la0 252 mn0 <- log.la0-6 253 mx0 <- log.la0+6 254 repeat { 255 mn <- max(la-1,mn0) 256 mx <- min(la+1,mx0) 257 zz <- nlm0(cv,c(mn,mx)) 258 if ((min(zz$est-mn,mx-zz$est)>=1e-1)|| 259 (min(zz$est-mn0,mx0-zz$est)<1e-1)) break 260 else la <- zz$est 261 } 262 ## return 263 jk1 <- cv(zz$est) 264 int <- sum(qd.wt*exp(t(rbind(10^theta*qd.r,qd.s))%*%cd)) 265 c <- cd[1:nxi] 266 if (nnull) d <- cd[nxi+(1:nnull)] 267 else d <- NULL 268 list(lambda=zz$est,theta=theta,c=c,d=d,int=int,cv=jk1) 269} 270 271## Fit multiple smoothing parameter density 272mspdsty <- function(s,r,id.basis,cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias,skip.iter) 273{ 274 nxi <- dim(r)[1] 275 nobs <- dim(r)[2] 276 nqd <- length(qd.wt) 277 nq <- dim(r)[3] 278 if (!is.null(s)) nnull <- dim(s)[1] 279 else nnull <- 0 280 nxis <- nxi+nnull 281 if (is.null(cnt)) cnt <- 0 282 ## cv function 283 cv <- function(theta) { 284 ind.wk <- theta!=theta.old 285 if (sum(ind.wk)==nq) { 286 r.wk0 <- qd.r.wk0 <- 0 287 for (i in 1:nq) { 288 r.wk0 <- r.wk0 + 10^theta[i]*r[,,i] 289 qd.r.wk0 <- qd.r.wk0 + 10^theta[i]*qd.r[,,i] 290 } 291 assign("r.wk",r.wk0+0,inherits=TRUE) 292 assign("qd.r.wk",qd.r.wk0+0,inherits=TRUE) 293 assign("theta.old",theta+0,inherits=TRUE) 294 } 295 else { 296 r.wk0 <- r.wk 297 qd.r.wk0 <- qd.r.wk 298 for (i in (1:nq)[ind.wk]) { 299 theta.wk <- (10^(theta[i]-theta.old[i])-1)*10^theta.old[i] 300 r.wk0 <- r.wk0 + theta.wk*r[,,i] 301 qd.r.wk0 <- qd.r.wk0 + theta.wk*qd.r[,,i] 302 } 303 } 304 q.wk <- r.wk0[,id.basis] 305 fit <- .Fortran("dnewton", 306 cd=as.double(cd), as.integer(nxis), 307 as.double(10^lambda*q.wk), as.integer(nxi), 308 as.double(rbind(r.wk0,s)), as.integer(nobs), 309 as.integer(sum(cnt)), as.integer(cnt), 310 as.double(t(rbind(qd.r.wk0,qd.s))), as.integer(nqd), 311 as.integer(bias$nt), as.double(bias$wt), 312 as.double(t(qd.wt*bias$qd.wt)), 313 as.double(prec), as.integer(maxiter), 314 as.double(.Machine$double.eps), integer(nxis), 315 wk=double(2*((nqd+1)*bias$nt+nobs)+nxis*(2*nxis+4)+max(nxis,3)), 316 info=integer(1),PACKAGE="gss") 317 if (fit$info==1) stop("gss error in ssden: Newton iteration diverges") 318 if (fit$info==2) warning("gss warning in ssden: Newton iteration fails to converge") 319 assign("cd",fit$cd,inherits=TRUE) 320 cv <- alpha*fit$wk[2]-fit$wk[1] 321 alpha.wk <- max(0,theta-log.th0-5)*(3-alpha) + alpha 322 alpha.wk <- min(alpha.wk,3) 323 adj <- ifelse (alpha.wk>alpha,(alpha.wk-alpha)*fit$wk[2],0) 324 cv+adj 325 } 326 cv.wk <- function(theta) cv.scale*cv(theta)+cv.shift 327 ## initialization 328 theta <- -log10(apply(r[,id.basis,],3,function(x)sum(diag(x)))) 329 r.wk <- qd.r.wk <- 0 330 for (i in 1:nq) { 331 r.wk <- r.wk + 10^theta[i]*r[,,i] 332 qd.r.wk <- qd.r.wk + 10^theta[i]*qd.r[,,i] 333 } 334 ## theta adjustment 335 z <- sspdsty(s,r.wk,r.wk[,id.basis],cnt,qd.s,qd.r.wk,qd.wt,prec,maxiter,alpha,bias) 336 theta <- theta + z$theta 337 r.wk <- qd.r.wk <- 0 338 for (i in 1:nq) { 339 theta[i] <- 2*theta[i] + log10(t(z$c)%*%r[,id.basis,i]%*%z$c) 340 r.wk <- r.wk + 10^theta[i]*r[,,i] 341 qd.r.wk <- qd.r.wk + 10^theta[i]*qd.r[,,i] 342 } 343 mu <- apply(qd.wt*t(qd.r.wk),2,sum)/sum(qd.wt) 344 v <- apply(qd.wt*t(qd.r.wk^2),2,sum)/sum(qd.wt) 345 log.la0 <- log10(sum(v-mu^2)/sum(diag(r.wk[,id.basis]))) 346 log.th0 <- theta-log.la0 347 ## lambda search 348 z <- sspdsty(s,r.wk,r.wk[,id.basis],cnt,qd.s,qd.r.wk,qd.wt,prec,maxiter,alpha,bias) 349 lambda <- z$lambda 350 log.th0 <- log.th0 + z$lambda 351 theta <- theta + z$theta 352 cd <- c(z$c,z$d) 353 int <- z$int 354 ## early return 355 if (skip.iter) { 356 z$theta <- theta 357 return(z) 358 } 359 ## theta search 360 counter <- 0 361 r.wk <- qd.r.wk <- 0 362 for (i in 1:nq) { 363 r.wk <- r.wk + 10^theta[i]*r[,,i] 364 qd.r.wk <- qd.r.wk + 10^theta[i]*qd.r[,,i] 365 } 366 theta.old <- theta 367 ## scale and shift cv 368 tmp <- abs(cv(theta)) 369 cv.scale <- 1 370 cv.shift <- 0 371 if (tmp<1&tmp>10^(-4)) { 372 cv.scale <- 10/tmp 373 cv.shift <- 0 374 } 375 if (tmp<10^(-4)) { 376 cv.scale <- 10^2 377 cv.shift <- 10 378 } 379 repeat { 380 zz <- nlm(cv.wk,theta,stepmax=1,ndigit=7) 381 if (zz$code<=3) break 382 theta <- zz$est 383 counter <- counter + 1 384 if (counter>=5) { 385 warning("gss warning in ssden: CV iteration fails to converge") 386 break 387 } 388 } 389 ## return 390 jk1 <- cv(zz$est) 391 qd.r.wk <- 0 392 for (i in 1:nq) qd.r.wk <- qd.r.wk + 10^zz$est[i]*qd.r[,,i] 393 int <- sum(qd.wt*exp(t(rbind(qd.r.wk,qd.s))%*%cd)) 394 c <- cd[1:nxi] 395 if (nnull) d <- cd[nxi+(1:nnull)] 396 else d <- NULL 397 list(lambda=lambda,theta=zz$est,c=c,d=d,int=int,cv=jk1) 398} 399