1## Fit density model
2ssden <- function(formula,type=NULL,data=list(),alpha=1.4,
3                  weights=NULL,subset,na.action=na.omit,
4                  id.basis=NULL,nbasis=NULL,seed=NULL,
5                  domain=as.list(NULL),quad=NULL,
6                  qdsz.depth=NULL,bias=NULL,
7                  prec=1e-7,maxiter=30,skip.iter=FALSE)
8{
9    ## Obtain model frame and model terms
10    mf <- match.call()
11    mf$type <- mf$alpha <- NULL
12    mf$id.basis <- mf$nbasis <- mf$seed <- NULL
13    mf$domain <- mf$quad <- mf$qdsz.depth <- mf$bias <- NULL
14    mf$prec <- mf$maxiter <- mf$skip.iter <- NULL
15    mf[[1]] <- as.name("model.frame")
16    mf <- eval(mf,parent.frame())
17    cnt <- model.weights(mf)
18    mf$"(weights)" <- NULL
19    ## Generate sub-basis
20    nobs <- dim(mf)[1]
21    if (is.null(id.basis)) {
22        if (is.null(nbasis))  nbasis <- max(30,ceiling(10*nobs^(2/9)))
23        if (nbasis>=nobs)  nbasis <- nobs
24        if (!is.null(seed))  set.seed(seed)
25        id.basis <- sample(nobs,nbasis,prob=cnt)
26    }
27    else {
28        if (max(id.basis)>nobs|min(id.basis)<1)
29            stop("gss error in ssden: id.basis out of range")
30        nbasis <- length(id.basis)
31    }
32    ## Set domain and/or generate quadrature
33    if (is.null(quad)) {
34        ## Set domain and type
35        fac.list <- NULL
36        for (xlab in names(mf)) {
37            x <- mf[[xlab]]
38            if (is.factor(x)) {
39                fac.list <- c(fac.list,xlab)
40                domain[[xlab]] <- NULL
41            }
42            else {
43                if (!is.vector(x))
44                    stop("gss error in ssden: no default quadrature")
45                if (is.null(domain[[xlab]])) {
46                    mn <- min(x)
47                    mx <- max(x)
48                    domain[[xlab]] <- c(mn,mx)+c(-1,1)*(mx-mn)*.05
49                }
50                else domain[[xlab]] <- c(min(domain[[xlab]]),max(domain[[xlab]]))
51                if (is.null(type[[xlab]]))
52                    type[[xlab]] <- list("cubic",domain[[xlab]])
53                else {
54                    if (length(type[[xlab]])==1)
55                        type[[xlab]] <- list(type[[xlab]][[1]],domain[[xlab]])
56                }
57            }
58        }
59        ## Generate numerical quadrature
60        domain <- data.frame(domain)
61        mn <- domain[1,]
62        mx <- domain[2,]
63        dm <- ncol(domain)
64        if (dm==1) {
65            ## Gauss-Legendre or uniform quadrature
66            xlab <- names(domain)
67            if (type[[xlab]][[1]]%in%c("per","cubic.per","linear.per")) {
68                quad <- list(pt=mn+(1:200)/200*(mx-mn),
69                             wt=rep((mx-mn)/200,200))
70            }
71            else quad <- gauss.quad(200,c(mn,mx))
72            quad$pt <- data.frame(quad$pt)
73            colnames(quad$pt) <- colnames(domain)
74        }
75        else {
76            ## Smolyak cubature
77            if (is.null(qdsz.depth)) qdsz.depth <- switch(min(dm,6)-1,18,14,12,11,10)
78            quad <- smolyak.quad(dm,qdsz.depth)
79            for (i in 1:ncol(domain)) {
80                xlab <- colnames(domain)[i]
81                form <- as.formula(paste("~",xlab))
82                jk <- ssden(form,data=mf,domain=domain[i],alpha=2,
83                            id.basis=id.basis,weights=cnt)
84                quad$pt[,i] <- qssden(jk,quad$pt[,i])
85                quad$wt <- quad$wt/dssden(jk,quad$pt[,i])
86            }
87            jk <- NULL
88            quad$pt <- data.frame(quad$pt)
89            colnames(quad$pt) <- colnames(domain)
90        }
91        ## Incorporate factors in quadrature
92        if (!is.null(fac.list)) {
93            for (i in 1:length(fac.list)) {
94                wk <-
95                  expand.grid(levels(mf[[fac.list[i]]]),1:length(quad$wt))
96                quad$wt <- quad$wt[wk[,2]]
97                col.names <- c(fac.list[i],colnames(quad$pt))
98                quad$pt <- data.frame(wk[,1],quad$pt[wk[,2],],stringsAsFactors=TRUE)
99                colnames(quad$pt) <- col.names
100            }
101        }
102        quad <- list(pt=quad$pt,wt=quad$wt)
103    }
104    else {
105        for (xlab in names(mf)) {
106            x <- mf[[xlab]]
107            if (is.vector(x)&!is.factor(x)) {
108                if (is.null(range <- domain[[xlab]])) {
109                    mn <- min(x)
110                    mx <- max(x)
111                    range <- c(mn,mx)+c(-1,1)*(mx-mn)*.05
112                    range[1] <- min(c(range[1],quad$pt[[xlab]]))
113                    range[2] <- max(c(range[2],quad$pt[[xlab]]))
114                }
115                if (is.null(type[[xlab]]))
116                    type[[xlab]] <- list("cubic",range)
117                else {
118                    if (length(type[[xlab]])==1)
119                        type[[xlab]] <- list(type[[xlab]][[1]],range)
120                    else {
121                        mn0 <- min(type[[xlab]][[2]])
122                        mx0 <- max(type[[xlab]][[2]])
123                        if ((mn0>mn)|(mx0<mx))
124                            stop("gss error in ssden: range not covering domain")
125                    }
126                }
127            }
128        }
129    }
130    ## Generate terms
131    term <- mkterm(mf,type)
132    term$labels <- term$labels[term$labels!="1"]
133    ## sampling bias
134    qd.pt <- quad$pt
135    qd.wt <- quad$wt
136    nmesh <- length(qd.wt)
137    if (is.null(bias)) {
138        nt <- b.wt <- 1
139        t.wt <- matrix(1,nmesh,1)
140        bias0 <- list(nt=nt,wt=b.wt,qd.wt=t.wt)
141    }
142    else {
143        if ((nt <- length(bias$t))-length(bias$wt))
144            stop("gss error in ssden: bias$t and bias$wt mismatch in size")
145        b.wt <- abs(bias$wt)/sum(abs(bias$wt))
146        t.wt <- NULL
147        for (i in 1:nt) t.wt <- cbind(t.wt,bias$fun(bias$t[i],qd.pt))
148        bias0 <- list(nt=nt,wt=b.wt,qd.wt=t.wt)
149    }
150    ## Generate s and r
151    s <- qd.s <- r <- qd.r <- NULL
152    nq <- 0
153    for (label in term$labels) {
154        x <- mf[,term[[label]]$vlist]
155        x.basis <- mf[id.basis,term[[label]]$vlist]
156        qd.x <- qd.pt[,term[[label]]$vlist]
157        nphi <- term[[label]]$nphi
158        nrk <- term[[label]]$nrk
159        if (nphi) {
160            phi <- term[[label]]$phi
161            for (i in 1:nphi) {
162                s <- cbind(s,phi$fun(x,nu=i,env=phi$env))
163                qd.s <- cbind(qd.s,phi$fun(qd.x,nu=i,env=phi$env))
164            }
165        }
166        if (nrk) {
167            rk <- term[[label]]$rk
168            for (i in 1:nrk) {
169                nq <- nq+1
170                r <- array(c(r,rk$fun(x.basis,x,nu=i,env=rk$env,out=TRUE)),c(nbasis,nobs,nq))
171                qd.r <- array(c(qd.r,rk$fun(x.basis,qd.x,nu=i,env=rk$env,out=TRUE)),
172                              c(nbasis,nmesh,nq))
173            }
174        }
175    }
176    if (!is.null(s)) {
177        nnull <- dim(s)[2]
178        ## Check s rank
179        if (qr(s)$rank<nnull)
180            stop("gss error in ssden: unpenalized terms are linearly dependent")
181        s <- t(s)
182        qd.s <- t(qd.s)
183    }
184    ## Fit the model
185    if (nq==1) {
186        r <- r[,,1]
187        qd.r <- qd.r[,,1]
188        z <- sspdsty(s,r,r[,id.basis],cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias0)
189    }
190    else z <- mspdsty(s,r,id.basis,cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias0,skip.iter)
191    ## Brief description of model terms
192    desc <- NULL
193    for (label in term$labels)
194        desc <- rbind(desc,as.numeric(c(term[[label]][c("nphi","nrk")])))
195    desc <- rbind(desc,apply(desc,2,sum))
196    rownames(desc) <- c(term$labels,"total")
197    colnames(desc) <- c("Unpenalized","Penalized")
198    ## Return the results
199    obj <- c(list(call=match.call(),mf=mf,cnt=cnt,terms=term,desc=desc,
200                  alpha=alpha,domain=domain,quad=quad,id.basis=id.basis,
201                  qdsz.depth=qdsz.depth,bias=bias0,skip.iter=skip.iter),z)
202    class(obj) <- "ssden"
203    obj
204}
205
206## Fit single smoothing parameter density
207sspdsty <- function(s,r,q,cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias)
208{
209    nxi <- dim(r)[1]
210    nobs <- dim(r)[2]
211    nqd <- length(qd.wt)
212    if (!is.null(s)) nnull <- dim(s)[1]
213    else nnull <- 0
214    nxis <- nxi+nnull
215    if (is.null(cnt)) cnt <- 0
216    ## cv function
217    cv <- function(lambda) {
218        fit <- .Fortran("dnewton",
219                        cd=as.double(cd), as.integer(nxis),
220                        as.double(10^(lambda+theta)*q), as.integer(nxi),
221                        as.double(rbind(10^theta*r,s)), as.integer(nobs),
222                        as.integer(sum(cnt)), as.integer(cnt),
223                        as.double(t(rbind(10^theta*qd.r,qd.s))), as.integer(nqd),
224                        as.integer(bias$nt), as.double(bias$wt),
225                        as.double(t(qd.wt*bias$qd.wt)),
226                        as.double(prec), as.integer(maxiter),
227                        as.double(.Machine$double.eps), integer(nxis),
228                        wk=double(2*((nqd+1)*bias$nt+nobs)+nxis*(2*nxis+4)+max(nxis,3)),
229                        info=integer(1),PACKAGE="gss")
230        if (fit$info==1) stop("gss error in ssden: Newton iteration diverges")
231        if (fit$info==2) warning("gss warning in ssden: Newton iteration fails to converge")
232        assign("cd",fit$cd,inherits=TRUE)
233        cv <- alpha*fit$wk[2]-fit$wk[1]
234        alpha.wk <- max(0,log.la0-lambda-5)*(3-alpha) + alpha
235        alpha.wk <- min(alpha.wk,3)
236        adj <- ifelse (alpha.wk>alpha,(alpha.wk-alpha)*fit$wk[2],0)
237        cv+adj
238    }
239    ## initialization
240    mu.r <- apply(qd.wt*t(qd.r),2,sum)/sum(qd.wt)
241    v.r <- apply(qd.wt*t(qd.r^2),2,sum)/sum(qd.wt)
242    if (nnull) {
243        mu.s <- apply(qd.wt*t(qd.s),2,sum)/sum(qd.wt)
244        v.s <- apply(qd.wt*t(qd.s^2),2,sum)/sum(qd.wt)
245    }
246    if (is.null(s)) theta <- 0
247    else theta <- log10(sum(v.s-mu.s^2)/nnull/sum(v.r-mu.r^2)*nxi) / 2
248    log.la0 <- log10(sum(v.r-mu.r^2)/sum(diag(q))) + theta
249    ## lambda search
250    cd <- rep(0,nxi+nnull)
251    la <- log.la0
252    mn0 <- log.la0-6
253    mx0 <- log.la0+6
254    repeat {
255        mn <- max(la-1,mn0)
256        mx <- min(la+1,mx0)
257        zz <- nlm0(cv,c(mn,mx))
258        if ((min(zz$est-mn,mx-zz$est)>=1e-1)||
259            (min(zz$est-mn0,mx0-zz$est)<1e-1)) break
260        else la <- zz$est
261    }
262    ## return
263    jk1 <- cv(zz$est)
264    int <- sum(qd.wt*exp(t(rbind(10^theta*qd.r,qd.s))%*%cd))
265    c <- cd[1:nxi]
266    if (nnull) d <- cd[nxi+(1:nnull)]
267    else d <- NULL
268    list(lambda=zz$est,theta=theta,c=c,d=d,int=int,cv=jk1)
269}
270
271## Fit multiple smoothing parameter density
272mspdsty <- function(s,r,id.basis,cnt,qd.s,qd.r,qd.wt,prec,maxiter,alpha,bias,skip.iter)
273{
274    nxi <- dim(r)[1]
275    nobs <- dim(r)[2]
276    nqd <- length(qd.wt)
277    nq <- dim(r)[3]
278    if (!is.null(s)) nnull <- dim(s)[1]
279    else nnull <- 0
280    nxis <- nxi+nnull
281    if (is.null(cnt)) cnt <- 0
282    ## cv function
283    cv <- function(theta) {
284        ind.wk <- theta!=theta.old
285        if (sum(ind.wk)==nq) {
286            r.wk0 <- qd.r.wk0 <- 0
287            for (i in 1:nq) {
288                r.wk0 <- r.wk0 + 10^theta[i]*r[,,i]
289                qd.r.wk0 <- qd.r.wk0 + 10^theta[i]*qd.r[,,i]
290            }
291            assign("r.wk",r.wk0+0,inherits=TRUE)
292            assign("qd.r.wk",qd.r.wk0+0,inherits=TRUE)
293            assign("theta.old",theta+0,inherits=TRUE)
294        }
295        else {
296            r.wk0 <- r.wk
297            qd.r.wk0 <- qd.r.wk
298            for (i in (1:nq)[ind.wk]) {
299                theta.wk <- (10^(theta[i]-theta.old[i])-1)*10^theta.old[i]
300                r.wk0 <- r.wk0 + theta.wk*r[,,i]
301                qd.r.wk0 <- qd.r.wk0 + theta.wk*qd.r[,,i]
302            }
303        }
304        q.wk <- r.wk0[,id.basis]
305        fit <- .Fortran("dnewton",
306                        cd=as.double(cd), as.integer(nxis),
307                        as.double(10^lambda*q.wk), as.integer(nxi),
308                        as.double(rbind(r.wk0,s)), as.integer(nobs),
309                        as.integer(sum(cnt)), as.integer(cnt),
310                        as.double(t(rbind(qd.r.wk0,qd.s))), as.integer(nqd),
311                        as.integer(bias$nt), as.double(bias$wt),
312                        as.double(t(qd.wt*bias$qd.wt)),
313                        as.double(prec), as.integer(maxiter),
314                        as.double(.Machine$double.eps), integer(nxis),
315                        wk=double(2*((nqd+1)*bias$nt+nobs)+nxis*(2*nxis+4)+max(nxis,3)),
316                        info=integer(1),PACKAGE="gss")
317        if (fit$info==1) stop("gss error in ssden: Newton iteration diverges")
318        if (fit$info==2) warning("gss warning in ssden: Newton iteration fails to converge")
319        assign("cd",fit$cd,inherits=TRUE)
320        cv <- alpha*fit$wk[2]-fit$wk[1]
321        alpha.wk <- max(0,theta-log.th0-5)*(3-alpha) + alpha
322        alpha.wk <- min(alpha.wk,3)
323        adj <- ifelse (alpha.wk>alpha,(alpha.wk-alpha)*fit$wk[2],0)
324        cv+adj
325    }
326    cv.wk <- function(theta) cv.scale*cv(theta)+cv.shift
327    ## initialization
328    theta <- -log10(apply(r[,id.basis,],3,function(x)sum(diag(x))))
329    r.wk <- qd.r.wk <- 0
330    for (i in 1:nq) {
331        r.wk <- r.wk + 10^theta[i]*r[,,i]
332        qd.r.wk <- qd.r.wk + 10^theta[i]*qd.r[,,i]
333    }
334    ## theta adjustment
335    z <- sspdsty(s,r.wk,r.wk[,id.basis],cnt,qd.s,qd.r.wk,qd.wt,prec,maxiter,alpha,bias)
336    theta <- theta + z$theta
337    r.wk <- qd.r.wk <- 0
338    for (i in 1:nq) {
339        theta[i] <- 2*theta[i] + log10(t(z$c)%*%r[,id.basis,i]%*%z$c)
340        r.wk <- r.wk + 10^theta[i]*r[,,i]
341        qd.r.wk <- qd.r.wk + 10^theta[i]*qd.r[,,i]
342    }
343    mu <- apply(qd.wt*t(qd.r.wk),2,sum)/sum(qd.wt)
344    v <- apply(qd.wt*t(qd.r.wk^2),2,sum)/sum(qd.wt)
345    log.la0 <- log10(sum(v-mu^2)/sum(diag(r.wk[,id.basis])))
346    log.th0 <- theta-log.la0
347    ## lambda search
348    z <- sspdsty(s,r.wk,r.wk[,id.basis],cnt,qd.s,qd.r.wk,qd.wt,prec,maxiter,alpha,bias)
349    lambda <- z$lambda
350    log.th0 <- log.th0 + z$lambda
351    theta <- theta + z$theta
352    cd <- c(z$c,z$d)
353    int <- z$int
354    ## early return
355    if (skip.iter) {
356        z$theta <- theta
357        return(z)
358    }
359    ## theta search
360    counter <- 0
361    r.wk <- qd.r.wk <- 0
362    for (i in 1:nq) {
363        r.wk <- r.wk + 10^theta[i]*r[,,i]
364        qd.r.wk <- qd.r.wk + 10^theta[i]*qd.r[,,i]
365    }
366    theta.old <- theta
367    ## scale and shift cv
368    tmp <- abs(cv(theta))
369    cv.scale <- 1
370    cv.shift <- 0
371    if (tmp<1&tmp>10^(-4)) {
372        cv.scale <- 10/tmp
373        cv.shift <- 0
374    }
375    if (tmp<10^(-4)) {
376        cv.scale <- 10^2
377        cv.shift <- 10
378    }
379    repeat {
380        zz <- nlm(cv.wk,theta,stepmax=1,ndigit=7)
381        if (zz$code<=3)  break
382        theta <- zz$est
383        counter <- counter + 1
384        if (counter>=5) {
385            warning("gss warning in ssden: CV iteration fails to converge")
386            break
387        }
388    }
389    ## return
390    jk1 <- cv(zz$est)
391    qd.r.wk <- 0
392    for (i in 1:nq) qd.r.wk <- qd.r.wk + 10^zz$est[i]*qd.r[,,i]
393    int <- sum(qd.wt*exp(t(rbind(qd.r.wk,qd.s))%*%cd))
394    c <- cd[1:nxi]
395    if (nnull) d <- cd[nxi+(1:nnull)]
396    else d <- NULL
397    list(lambda=lambda,theta=zz$est,c=c,d=d,int=int,cv=jk1)
398}
399