1## Fit gssanova model 2gssanova1 <- function(formula,family,type=NULL,data=list(),weights, 3 subset,offset,na.action=na.omit,partial=NULL, 4 method=NULL,varht=1,alpha=1.4,nu=NULL, 5 id.basis=NULL,nbasis=NULL,seed=NULL,random=NULL, 6 skip.iter=FALSE) 7{ 8 if (!(family%in%c("binomial","poisson","Gamma","nbinomial","inverse.gaussian", 9 "polr","weibull","lognorm","loglogis"))) 10 stop("gss error in gssanova1: family not implemented") 11 ## Obtain model frame and model terms 12 mf <- match.call() 13 mf$family <- mf$type <- mf$partial <- NULL 14 mf$method <- mf$varht <- mf$nu <- NULL 15 mf$alpha <- mf$id.basis <- mf$nbasis <- mf$seed <- NULL 16 mf$random <- mf$skip.iter <- NULL 17 mf[[1]] <- as.name("model.frame") 18 mf <- eval(mf,parent.frame()) 19 wt <- model.weights(mf) 20 ## Generate sub-basis 21 nobs <- dim(mf)[1] 22 if (is.null(id.basis)) { 23 if (is.null(nbasis)) nbasis <- max(30,ceiling(10*nobs^(2/9))) 24 if (nbasis>=nobs) nbasis <- nobs 25 if (!is.null(seed)) set.seed(seed) 26 id.basis <- sample(nobs,nbasis,prob=wt) 27 } 28 else { 29 if (max(id.basis)>nobs|min(id.basis)<1) 30 stop("gss error in gssanova: id.basis out of range") 31 nbasis <- length(id.basis) 32 } 33 ## Generate terms 34 term <- mkterm(mf,type) 35 ## Generate random 36 if (!is.null(random)) { 37 if (class(random)=="formula") random <- mkran(random,data) 38 } 39 ## Specify default method 40 if (is.null(method)) { 41 method <- switch(family, 42 binomial="u", 43 nbinomial="u", 44 poisson="u", 45 inverse.gaussian="v", 46 Gamma="v", 47 polr="u", 48 weibull="u", 49 lognorm="u", 50 loglogis="u") 51 } 52 ## Generate s, r, and y 53 s <- r <- NULL 54 nq <- 0 55 for (label in term$labels) { 56 if (label=="1") { 57 s <- cbind(s,rep(1,len=nobs)) 58 next 59 } 60 x <- mf[,term[[label]]$vlist] 61 x.basis <- mf[id.basis,term[[label]]$vlist] 62 nphi <- term[[label]]$nphi 63 nrk <- term[[label]]$nrk 64 if (nphi) { 65 phi <- term[[label]]$phi 66 for (i in 1:nphi) 67 s <- cbind(s,phi$fun(x,nu=i,env=phi$env)) 68 } 69 if (nrk) { 70 rk <- term[[label]]$rk 71 for (i in 1:nrk) { 72 nq <- nq+1 73 r <- array(c(r,rk$fun(x,x.basis,nu=i,env=rk$env,out=TRUE)),c(nobs,nbasis,nq)) 74 } 75 } 76 } 77 if (is.null(r)) 78 stop("gss error in gssanova1: use glm for models with only unpenalized terms") 79 ## Add the partial term 80 if (!is.null(partial)) { 81 mf.p <- model.frame(partial,data) 82 for (lab in colnames(mf.p)) mf[,lab] <- mf.p[,lab] 83 mt.p <- attr(mf.p,"terms") 84 lab.p <- labels(mt.p) 85 matx.p <- model.matrix(mt.p,data)[,-1,drop=FALSE] 86 if (dim(matx.p)[1]!=dim(mf)[1]) 87 stop("gss error in ssanova: partial data are of wrong size") 88 matx.p <- scale(matx.p) 89 center.p <- attr(matx.p,"scaled:center") 90 scale.p <- attr(matx.p,"scaled:scale") 91 s <- cbind(s,matx.p) 92 part <- list(mt=mt.p,center=center.p,scale=scale.p) 93 } 94 else part <- lab.p <- NULL 95 if (qr(s)$rank<dim(s)[2]) 96 stop("gss error in gssanova: unpenalized terms are linearly dependent") 97 ## Prepare the data 98 if (family=="polr") { 99 y <- model.response(mf) 100 if (!is.factor(y)) 101 stop("gss error in gssanova1: need factor response for polr family") 102 lvls <- levels(y) 103 if (nlvl <- length(lvls)<3) 104 stop("gss error in gssanova1: need at least 3 levels to fit polr family") 105 y <- outer(y,lvls,"==") 106 } 107 else y <- model.response(mf,"numeric") 108 offset <- model.offset(mf) 109 if (!is.null(offset)) { 110 term$labels <- c(term$labels,"offset") 111 term$offset <- list(nphi=0,nrk=0) 112 } 113 nu.wk <- list(NULL,FALSE) 114 if ((family=="nbinomial")&is.vector(y)) nu.wk <- list(NULL,TRUE) 115 if (family%in%c("weibull","lognorm","loglogis")) { 116 if (is.null(nu)) nu.wk <- list(nu,TRUE) 117 else nu.wk <- list(nu,FALSE) 118 } 119 ## Fit the model 120 z <- ngreg1(family,s,r,id.basis,y,wt,offset,method,varht,alpha,nu.wk, 121 random,skip.iter) 122 ## Brief description of model terms 123 desc <- NULL 124 for (label in term$labels) 125 desc <- rbind(desc,as.numeric(c(term[[label]][c("nphi","nrk")]))) 126 if (!is.null(partial)) { 127 desc <- rbind(desc,matrix(c(1,0),length(lab.p),2,byrow=TRUE)) 128 } 129 desc <- rbind(desc,apply(desc,2,sum)) 130 if (is.null(partial)) rownames(desc) <- c(term$labels,"total") 131 else rownames(desc) <- c(term$labels,lab.p,"total") 132 colnames(desc) <- c("Unpenalized","Penalized") 133 ## Return the results 134 obj <- c(list(call=match.call(),family=family,mf=mf,terms=term,desc=desc, 135 alpha=alpha,id.basis=id.basis,partial=part,lab.p=lab.p, 136 random=random,skip.iter=skip.iter),z) 137 class(obj) <- c("gssanova","ssanova") 138 obj 139} 140 141## Fit Single Smoothing Parameter REGression by Performance-Oriented Iteration 142ngreg1 <- function(family,s,r,id.basis,y,wt,offset,method,varht,alpha,nu, 143 random,skip.iter) 144{ 145 nobs <- dim(s)[1] 146 nq <- dim(r)[3] 147 eta <- rep(0,nobs) 148 if (family%in%c("Gamma","inverse.gaussian")) { 149 yy <- log(y) 150 if (!is.null(offset)) yy <- yy-offset 151 if (nq==1) { 152 z <- sspreg1(s,r[,,1],r[id.basis,,1],yy,wt,"v",alpha,varht,random) 153 eta <- s%*%z$d+10^z$theta*r[,,1]%*%z$c 154 if (!is.null(random)) eta <- eta+random$z%*%z$b 155 eta <- as.vector(eta) 156 } 157 else { 158 z <- mspreg1(s,r,id.basis,yy,wt,"v",alpha,varht,random,skip.iter) 159 r.wk <- 0 160 for (i in 1:nq) r.wk <- r.wk+10^z$theta[i]*r[,,i] 161 eta <- s%*%z$d+r.wk%*%z$c 162 if (!is.null(random)) eta <- eta+random$z%*%z$b 163 eta <- as.vector(eta) 164 } 165 } 166 if (nu[[2]]&is.null(nu[[1]])) { 167 wk <- switch(family, 168 nbinomial=mkdata.nbinomial(y,eta,wt,offset,NULL), 169 weibull=mkdata.weibull(y,eta,wt,offset,nu), 170 lognorm=mkdata.lognorm(y,eta,wt,offset,nu), 171 loglogis=mkdata.loglogis(y,eta,wt,offset,nu)) 172 nu <- wk$nu 173 } 174 if (family=="polr") { 175 if (is.null(wt)) P <- apply(y,2,sum) 176 else P <- apply(y*wt,2,sum) 177 P <- P/sum(P) 178 P <- cumsum(P) 179 nnu <- length(P)-2 180 eta <- rep(qlogis(P[1]),nobs) 181 nu <- diff(qlogis(P[-(nnu+2)])) 182 } 183 iter <- 0 184 repeat { 185 iter <- iter+1 186 dat <- switch(family, 187 binomial=mkdata.binomial(y,eta,wt,offset), 188 nbinomial=mkdata.nbinomial(y,eta,wt,offset,nu), 189 polr=mkdata.polr(y,eta,wt,offset,nu), 190 poisson=mkdata.poisson(y,eta,wt,offset), 191 inverse.gaussian=mkdata.inverse.gaussian(y,eta,wt,offset), 192 Gamma=mkdata.Gamma(y,eta,wt,offset), 193 weibull=mkdata.weibull(y,eta,wt,offset,nu), 194 lognorm=mkdata.lognorm(y,eta,wt,offset,nu), 195 loglogis=mkdata.loglogis(y,eta,wt,offset,nu)) 196 nu <- dat$nu 197 w <- as.vector(sqrt(dat$wt)) 198 if (nq==1) { 199 z <- sspreg1(s,r[,,1],r[id.basis,,1],dat$ywk,w,method,alpha,varht,random) 200 } 201 else z <- mspreg1(s,r,id.basis,dat$ywk,w,method,alpha,varht,random,skip.iter) 202 r.wk <- 0 203 for (i in 1:nq) r.wk <- r.wk + 10^z$theta[i]*r[,,i] 204 if (!is.null(random)) r.wk <- cbind(r.wk,random$z) 205 eta.new <- as.vector(s%*%z$d+r.wk%*%c(z$c,z$b)) 206 if (!is.null(offset)) eta.new <- eta.new + offset 207 disc <- sum(dat$wt*((eta-eta.new)/(1+abs(eta)))^2)/sum(dat$wt) 208 eta <- eta.new 209 if (disc<1e-7) break 210 if (iter>=30) { 211 warning("gss warning in gssanova1: performance-oriented iteration fails to converge") 212 break 213 } 214 } 215 ## Return the fit 216 if (is.list(nu)) nu <- nu[[1]] 217 c(z,list(nu=nu,eta=eta,w=dat$wt)) 218} 219