1## Fit gssanova model
2gssanova1 <- function(formula,family,type=NULL,data=list(),weights,
3                      subset,offset,na.action=na.omit,partial=NULL,
4                      method=NULL,varht=1,alpha=1.4,nu=NULL,
5                      id.basis=NULL,nbasis=NULL,seed=NULL,random=NULL,
6                      skip.iter=FALSE)
7{
8    if (!(family%in%c("binomial","poisson","Gamma","nbinomial","inverse.gaussian",
9                      "polr","weibull","lognorm","loglogis")))
10        stop("gss error in gssanova1: family not implemented")
11    ## Obtain model frame and model terms
12    mf <- match.call()
13    mf$family <- mf$type <- mf$partial <- NULL
14    mf$method <- mf$varht <- mf$nu <- NULL
15    mf$alpha <- mf$id.basis <- mf$nbasis <- mf$seed <- NULL
16    mf$random <- mf$skip.iter <- NULL
17    mf[[1]] <- as.name("model.frame")
18    mf <- eval(mf,parent.frame())
19    wt <- model.weights(mf)
20    ## Generate sub-basis
21    nobs <- dim(mf)[1]
22    if (is.null(id.basis)) {
23        if (is.null(nbasis))  nbasis <- max(30,ceiling(10*nobs^(2/9)))
24        if (nbasis>=nobs)  nbasis <- nobs
25        if (!is.null(seed))  set.seed(seed)
26        id.basis <- sample(nobs,nbasis,prob=wt)
27    }
28    else {
29        if (max(id.basis)>nobs|min(id.basis)<1)
30            stop("gss error in gssanova: id.basis out of range")
31        nbasis <- length(id.basis)
32    }
33    ## Generate terms
34    term <- mkterm(mf,type)
35    ## Generate random
36    if (!is.null(random)) {
37        if (class(random)=="formula") random <- mkran(random,data)
38    }
39    ## Specify default method
40    if (is.null(method)) {
41        method <- switch(family,
42                         binomial="u",
43                         nbinomial="u",
44                         poisson="u",
45                         inverse.gaussian="v",
46                         Gamma="v",
47                         polr="u",
48                         weibull="u",
49                         lognorm="u",
50                         loglogis="u")
51    }
52    ## Generate s, r, and y
53    s <- r <- NULL
54    nq <- 0
55    for (label in term$labels) {
56        if (label=="1") {
57            s <- cbind(s,rep(1,len=nobs))
58            next
59        }
60        x <- mf[,term[[label]]$vlist]
61        x.basis <- mf[id.basis,term[[label]]$vlist]
62        nphi <- term[[label]]$nphi
63        nrk <- term[[label]]$nrk
64        if (nphi) {
65            phi <- term[[label]]$phi
66            for (i in 1:nphi)
67                s <- cbind(s,phi$fun(x,nu=i,env=phi$env))
68        }
69        if (nrk) {
70            rk <- term[[label]]$rk
71            for (i in 1:nrk) {
72                nq <- nq+1
73                r <- array(c(r,rk$fun(x,x.basis,nu=i,env=rk$env,out=TRUE)),c(nobs,nbasis,nq))
74            }
75        }
76    }
77    if (is.null(r))
78        stop("gss error in gssanova1: use glm for models with only unpenalized terms")
79    ## Add the partial term
80    if (!is.null(partial)) {
81        mf.p <- model.frame(partial,data)
82        for (lab in colnames(mf.p)) mf[,lab] <- mf.p[,lab]
83        mt.p <- attr(mf.p,"terms")
84        lab.p <- labels(mt.p)
85        matx.p <- model.matrix(mt.p,data)[,-1,drop=FALSE]
86        if (dim(matx.p)[1]!=dim(mf)[1])
87            stop("gss error in ssanova: partial data are of wrong size")
88        matx.p <- scale(matx.p)
89        center.p <- attr(matx.p,"scaled:center")
90        scale.p <- attr(matx.p,"scaled:scale")
91        s <- cbind(s,matx.p)
92        part <- list(mt=mt.p,center=center.p,scale=scale.p)
93    }
94    else part <- lab.p <- NULL
95    if (qr(s)$rank<dim(s)[2])
96        stop("gss error in gssanova: unpenalized terms are linearly dependent")
97    ## Prepare the data
98    if (family=="polr") {
99        y <- model.response(mf)
100        if (!is.factor(y))
101            stop("gss error in gssanova1: need factor response for polr family")
102        lvls <- levels(y)
103        if (nlvl <- length(lvls)<3)
104            stop("gss error in gssanova1: need at least 3 levels to fit polr family")
105        y <- outer(y,lvls,"==")
106    }
107    else y <- model.response(mf,"numeric")
108    offset <- model.offset(mf)
109    if (!is.null(offset)) {
110        term$labels <- c(term$labels,"offset")
111        term$offset <- list(nphi=0,nrk=0)
112    }
113    nu.wk <- list(NULL,FALSE)
114    if ((family=="nbinomial")&is.vector(y)) nu.wk <- list(NULL,TRUE)
115    if (family%in%c("weibull","lognorm","loglogis")) {
116        if (is.null(nu)) nu.wk <- list(nu,TRUE)
117        else nu.wk <- list(nu,FALSE)
118    }
119    ## Fit the model
120    z <- ngreg1(family,s,r,id.basis,y,wt,offset,method,varht,alpha,nu.wk,
121                random,skip.iter)
122    ## Brief description of model terms
123    desc <- NULL
124    for (label in term$labels)
125        desc <- rbind(desc,as.numeric(c(term[[label]][c("nphi","nrk")])))
126    if (!is.null(partial)) {
127        desc <- rbind(desc,matrix(c(1,0),length(lab.p),2,byrow=TRUE))
128    }
129    desc <- rbind(desc,apply(desc,2,sum))
130    if (is.null(partial)) rownames(desc) <- c(term$labels,"total")
131    else rownames(desc) <- c(term$labels,lab.p,"total")
132    colnames(desc) <- c("Unpenalized","Penalized")
133    ## Return the results
134    obj <- c(list(call=match.call(),family=family,mf=mf,terms=term,desc=desc,
135                  alpha=alpha,id.basis=id.basis,partial=part,lab.p=lab.p,
136                  random=random,skip.iter=skip.iter),z)
137    class(obj) <- c("gssanova","ssanova")
138    obj
139}
140
141## Fit Single Smoothing Parameter REGression by Performance-Oriented Iteration
142ngreg1 <- function(family,s,r,id.basis,y,wt,offset,method,varht,alpha,nu,
143                   random,skip.iter)
144{
145    nobs <- dim(s)[1]
146    nq <- dim(r)[3]
147    eta <- rep(0,nobs)
148    if (family%in%c("Gamma","inverse.gaussian")) {
149        yy <- log(y)
150        if (!is.null(offset)) yy <- yy-offset
151        if (nq==1) {
152            z <- sspreg1(s,r[,,1],r[id.basis,,1],yy,wt,"v",alpha,varht,random)
153            eta <- s%*%z$d+10^z$theta*r[,,1]%*%z$c
154            if (!is.null(random)) eta <- eta+random$z%*%z$b
155            eta <- as.vector(eta)
156        }
157        else {
158            z <- mspreg1(s,r,id.basis,yy,wt,"v",alpha,varht,random,skip.iter)
159            r.wk <- 0
160            for (i in 1:nq) r.wk <- r.wk+10^z$theta[i]*r[,,i]
161            eta <- s%*%z$d+r.wk%*%z$c
162            if (!is.null(random)) eta <- eta+random$z%*%z$b
163            eta <- as.vector(eta)
164        }
165    }
166    if (nu[[2]]&is.null(nu[[1]])) {
167        wk <- switch(family,
168                     nbinomial=mkdata.nbinomial(y,eta,wt,offset,NULL),
169                     weibull=mkdata.weibull(y,eta,wt,offset,nu),
170                     lognorm=mkdata.lognorm(y,eta,wt,offset,nu),
171                     loglogis=mkdata.loglogis(y,eta,wt,offset,nu))
172        nu <- wk$nu
173    }
174    if (family=="polr") {
175        if (is.null(wt)) P <- apply(y,2,sum)
176        else P <- apply(y*wt,2,sum)
177        P <- P/sum(P)
178        P <- cumsum(P)
179        nnu <- length(P)-2
180        eta <- rep(qlogis(P[1]),nobs)
181        nu <- diff(qlogis(P[-(nnu+2)]))
182    }
183    iter <- 0
184    repeat {
185        iter <- iter+1
186        dat <- switch(family,
187                      binomial=mkdata.binomial(y,eta,wt,offset),
188                      nbinomial=mkdata.nbinomial(y,eta,wt,offset,nu),
189                      polr=mkdata.polr(y,eta,wt,offset,nu),
190                      poisson=mkdata.poisson(y,eta,wt,offset),
191                      inverse.gaussian=mkdata.inverse.gaussian(y,eta,wt,offset),
192                      Gamma=mkdata.Gamma(y,eta,wt,offset),
193                      weibull=mkdata.weibull(y,eta,wt,offset,nu),
194                      lognorm=mkdata.lognorm(y,eta,wt,offset,nu),
195                      loglogis=mkdata.loglogis(y,eta,wt,offset,nu))
196        nu <- dat$nu
197        w <- as.vector(sqrt(dat$wt))
198        if (nq==1) {
199            z <- sspreg1(s,r[,,1],r[id.basis,,1],dat$ywk,w,method,alpha,varht,random)
200        }
201        else z <- mspreg1(s,r,id.basis,dat$ywk,w,method,alpha,varht,random,skip.iter)
202        r.wk <- 0
203        for (i in 1:nq) r.wk <- r.wk + 10^z$theta[i]*r[,,i]
204        if (!is.null(random)) r.wk <- cbind(r.wk,random$z)
205        eta.new <- as.vector(s%*%z$d+r.wk%*%c(z$c,z$b))
206        if (!is.null(offset)) eta.new <- eta.new + offset
207        disc <- sum(dat$wt*((eta-eta.new)/(1+abs(eta)))^2)/sum(dat$wt)
208        eta <- eta.new
209        if (disc<1e-7) break
210        if (iter>=30) {
211            warning("gss warning in gssanova1: performance-oriented iteration fails to converge")
212            break
213        }
214    }
215    ## Return the fit
216    if (is.list(nu)) nu <- nu[[1]]
217    c(z,list(nu=nu,eta=eta,w=dat$wt))
218}
219