1## Fit hazard model
2sshzd2d1 <- function(formula1,formula2,symmetry=FALSE,data,alpha=1.4,weights=NULL,
3                     subset=NULL,rho="marginal",id.basis=NULL,nbasis=NULL,
4                     seed=NULL,prec=1e-7,maxiter=30,skip.iter=FALSE)
5{
6    ## Prepare data
7    if (!is.null(subset)) {
8        data <- data[subset,]
9        if (!is.null(weights)) {
10            id.wk <- apply(!is.na(data),1,all)
11            cnt <- weights[id.wk]
12        }
13        else cnt <- NULL
14        data <- na.omit(data[subset,])
15    }
16    else {
17        if (!is.null(weights)) {
18            id.wk <- apply(!is.na(data),1,all)
19            cnt <- weights[id.wk]
20        }
21        else cnt <- NULL
22        data <- na.omit(data)
23    }
24    ## Extract formulas
25    if (class(formula1)=="formula") {
26        form1 <- formula1
27        part1 <- random1 <- type1 <- NULL
28    }
29    else {
30        if (class(formula1)!="list")
31            stop("gss error in sshzd2d1: models must be specified via formulas or lists")
32        form1 <- formula1[[1]]
33        part1 <- formula1$partial
34        random1 <- formula1$random
35        type1 <- formula1$type
36    }
37    if (class(formula2)=="formula") {
38        form2 <- formula2
39        part2 <- random2 <- type2 <- NULL
40    }
41    else {
42        if (class(formula2)!="list")
43            stop("gss error in sshzd2d1: models must be specified via formulas or lists")
44        form2 <- formula2[[1]]
45        part2 <- formula2$partial
46        random2 <- formula2$random
47        type2 <- formula2$type
48    }
49    ## Local function handling formula
50    Surv <- function(time,status,start=0) {
51        tname <- as.character(as.list(match.call())$time)
52        if (!is.numeric(time)|!is.vector(time))
53            stop("gss error in sshzd2d1: time should be a numerical vector")
54        if ((nobs <- length(time))-length(status))
55            stop("gss error in sshzd2d1: time and status mismatch in size")
56        if ((length(start)-nobs)&(length(start)-1))
57            stop("gss error in sshzd2d1: time and start mismatch in size")
58        if (any(start>time))
59            stop("gss error in sshzd2d1: start after follow-up time")
60        if (min(start)<0)
61            stop("gss error in sshzd2d1: start before time 0")
62        time <- cbind(start,time)
63        list(tname=tname,start=time[,1],end=time[,2],status=as.logical(status))
64    }
65    ## Obtain model terms
66    for (i in 1:2) {
67        ## Formula
68        form.wk <- switch(i,form1,form2)
69        term.wk <- terms.formula(form.wk)
70        resp <- attr(term.wk,"variable")[[2]]
71        ind.wk <- length(strsplit(deparse(resp),'')[[1]])
72        if ((substr(deparse(resp),1,5)!='Surv(')
73            |(substr(deparse(resp),ind.wk,ind.wk)!=')'))
74            stop("gss error in sshzd2d1: response should be Surv(...)")
75        yy <- with(data,eval(resp))
76        tname <- yy$tname
77        term.labels <- attr(term.wk,"term.labels")
78        if (!(tname%in%term.labels))
79            stop("gss error in sshzd2d1: time main effect missing in model")
80        form.wk <- eval(parse(text=paste("~",paste(term.labels,collapse="+"))))
81        mf.wk <- model.frame(form.wk,data)
82        ## Partial
83        part.wk <- switch(i,part1,part2)
84        if (!is.null(part.wk)) {
85            mf.p.wk <- model.frame(part.wk,data)
86            mt.p.wk <- attr(mf.p.wk,"terms")
87            matx.p.wk <- model.matrix(mt.p.wk,data)[,-1,drop=FALSE]
88            if (dim(matx.p.wk)[1]!=dim(mf.wk)[1])
89                stop("gss error in sshzd2d1: partial data are of wrong size")
90        }
91        else mf.p.wk <- mt.p.wk <- matx.p.wk <- NULL
92        ## Random
93        random.wk <- switch(i,random1,random2)
94        if (!is.null(random.wk)) {
95            if (class(random.wk)=="formula") random.wk <- mkran(random.wk,data)
96        }
97        else random.wk <- NULL
98        ## Set domain and type for time
99        type.wk <- switch(i,type1,type2)
100        mn <- min(yy$start)
101        mx <- max(yy$end)
102        tdomain <- c(max(mn-.05*(mx-mn),0),mx)
103        tname <- yy$tname
104        if (is.null(type.wk[[tname]])) type.wk[[tname]] <- list("cubic",tdomain)
105        if (length(type.wk[[tname]])==1) type.wk[[tname]] <- c(type.wk[[tname]],tdomain)
106        if (!(type.wk[[tname]][[1]]%in%c("cubic","linear")))
107            stop("gss error in sshzd2d1: wrong type")
108        if ((min(type.wk[[tname]][[2]])>min(tdomain))|
109            (max(type.wk[[tname]][[2]])<max(tdomain)))
110            stop("gss error in sshzd2d1: time range not covering domain")
111        ## Save
112        if (i==1) {
113            mf1 <- mf.wk
114            yy1 <- yy
115            mf.p1 <- mf.p.wk
116            mt.p1 <- mt.p.wk
117            matx.p1 <- matx.p.wk
118            random1 <- random.wk
119            type1 <- type.wk
120            tdomain1 <- tdomain
121        }
122        else {
123            mf2 <- mf.wk
124            yy2 <- yy
125            mf.p2 <- mf.p.wk
126            mt.p2 <- mt.p.wk
127            matx.p2 <- matx.p.wk
128            random2 <- random.wk
129            type2 <- type.wk
130            tdomain2 <- tdomain
131        }
132    }
133    ## Generate sub-basis
134    nobs <- dim(mf1)[1]
135    if (is.null(id.basis)) {
136        if (is.null(nbasis)) nbasis <- max(30,ceiling(10*nobs^(2/9)))
137        if (nbasis>=nobs) nbasis <- nobs
138        if (!is.null(seed)) set.seed(seed)
139        id.basis <- sample(nobs,nbasis,prob=cnt)
140    }
141    else {
142        if (max(id.basis)>nobs|min(id.basis)<1)
143            stop("gss error in sshzd2d1: id.basis out of range")
144        nbasis <- length(id.basis)
145    }
146    ## Generate terms
147    if (symmetry) {
148        nhzd <- 1
149        if (dim(mf2)[2]!=dim(mf1)[2])
150            stop("gss error in sshzd2d1: variables in parallel formulas must match")
151        mf1.wk <- mf1
152        mf2.wk <- mf2
153        names(mf1.wk) <- names(mf2)
154        names(mf2.wk) <- names(mf1)
155        tdomain1 <- c(min(tdomain1,tdomain2),max(tdomain1,tdomain2))
156        type1[[yy1$tname]][[2]] <- tdomain1
157        type2[[yy2$tname]][[2]] <- tdomain1
158        term1 <- mkterm(rbind(mf1,mf2.wk),type1)
159        term2 <- mkterm(rbind(mf2,mf1.wk),type2)
160        mf1 <- rbind(mf1,mf2.wk)
161        yy1.sv <- yy1
162        yy1$start <- c(yy1$start,yy2$start)
163        yy1$end <- c(yy1$end,yy2$end)
164        yy1$status <- c(yy1$status,yy2$status)
165        id.basis.wk <- c(id.basis,id.basis+nobs)
166        if (!is.null(mf.p1)) {
167            if (is.null(mf.p2)||(dim(mf.p2)[2]!=dim(mf.p1)[2]))
168                stop("gss error in sshzd2d1: variables in parallel formulas must match")
169            matx.p1 <- rbind(matx.p1,matx.p2)
170        }
171        if (!is.null(random1)) {
172            if (is.null(random2)||(dim(random2$z)[2]!=dim(random1$z)[2]))
173                stop("gss error in sshzd2d1: variables in parallel formulas must match")
174            random1$z <- rbind(random1$z,random2$z)
175        }
176        if (!is.null(cnt)) cnt.wk <- c(cnt,cnt)
177        else cnt.wk <- NULL
178    }
179    else {
180        nhzd <- 2
181        term1 <- mkterm(mf1,type1)
182        term2 <- mkterm(mf2,type2)
183        id.basis.wk <- id.basis
184        cnt.wk <- cnt
185    }
186    ## Fit marginal hazard models
187    for (ii in 1:nhzd) {
188        ## Extract model components
189        mf <- switch(ii,mf1,mf2)
190        yy <- switch(ii,yy1,yy2)
191        term <- switch(ii,term1,term2)
192        mf.p <- switch(ii,mf.p1,mf.p2)
193        mt.p <- switch(ii,mt.p1,mt.p2)
194        matx.p <- switch(ii,matx.p1,matx.p2)
195        random <- switch(ii,random1,random2)
196        tdomain <- switch(ii,tdomain1,tdomain2)
197        if (rho=="weibull") partt <- switch(ii,part1,part2)
198        ## Finalize id.basis
199        nobs <- length(yy$status)
200        id.basis.wk <- id.basis.wk[yy$status[id.basis.wk]]
201        nbasis <- length(id.basis.wk)
202        id.wk <- NULL
203        nT <- sum(yy$status)
204        for (i in 1:nbasis) {
205            id.wk <- c(id.wk,(1:nT)[(1:nobs)[yy$status]%in%id.basis.wk[i]])
206        }
207        ## Generate Gauss-Legendre quadrature
208        nmesh <- 200
209        quad <- gauss.quad(nmesh,tdomain)
210        ## set up partial terms
211        if (!is.null(mf.p)) {
212            for (lab in colnames(mf.p)) mf[,lab] <- mf.p[,lab]
213            lab.p <- labels(mt.p)
214            matx.p <- scale(matx.p)
215            center.p <- attr(matx.p,"scaled:center")
216            scale.p <- attr(matx.p,"scaled:scale")
217            part <- list(mt=mt.p,center=center.p,scale=scale.p)
218        }
219        else part <- lab.p <- NULL
220        ## Obtain unique covariate observations
221        tname <- yy$tname
222        xnames <- names(mf)
223        xnames <- xnames[!xnames%in%tname]
224        if (length(xnames)) {
225            xx <- mf[,xnames,drop=FALSE]
226            if (!is.null(part)) xx <- cbind(xx,matx.p)
227            if (!is.null(random)) xx <- cbind(xx,random$z)
228            xx <- apply(xx,1,function(x)paste(x,collapse="\r"))
229            ux <- unique(xx)
230            nx <- length(ux)
231            x.dup.ind <- duplicated(xx)
232            x.dup <- as.vector(xx[x.dup.ind])
233            x.pt <- mf[!x.dup.ind,xnames,drop=FALSE]
234            ## xx[i,]==x.pt[x.ind[i],]
235            x.ind <- 1:nobs
236            x.ind[!x.dup.ind] <- 1:nx
237            if (nobs-nx) {
238                x.ind.wk <- range <- 1:(nobs-nx)
239                for (i in 1:nx) {
240                    range.wk <- NULL
241                    for (j in range) {
242                        if (identical(ux[i],x.dup[j])) {
243                            x.ind.wk[j] <- i
244                            range.wk <- c(range.wk,j)
245                        }
246                    }
247                    if (!is.null(range.wk)) range <- range[!(range%in%range.wk)]
248                }
249                x.ind[x.dup.ind] <- x.ind.wk
250            }
251            if (!is.null(random)) {
252                qd.z <- random$z[!x.dup.ind,]
253                random$z <- random$z[yy$status,]
254            }
255        }
256        else stop("gss error in sshzd2d1: missing covariate")
257        ## calculate rho
258        if (is.null(cnt.wk)) yy$cnt <- rep(1,nobs)
259        else yy$cnt <- cnt.wk
260        if (rho=="marginal") {
261            rho.wk <- sshzd(Surv(end,status,start)~end,data=yy,
262                            id.basis=id.basis.wk,weights=cnt,alpha=2)
263            rho.qd <- hzdcurve.sshzd(rho.wk,quad$pt)
264            rhowk <- hzdcurve.sshzd(rho.wk,yy$end[yy$status])
265        }
266        if (rho=="weibull") {
267            y.wk <- cbind(yy$end,yy$status,yy$start)
268            form <- as.formula(paste("y.wk~",paste(xnames,collapse="+")))
269            rho.wk <- gssanova(form,family="weibull",partial=partt,data=mf,
270                               id.basis=id.basis.wk,weights=cnt,alpha=2)
271            yhat <- predict(rho.wk,rho.wk$mf)
272            rho.qd <- exp(rho.wk$nu*outer(log(quad$pt),yhat[!x.dup.ind],"-"))/quad$pt
273            rhowk <- (exp(rho.wk$nu*(log(yy$end)-yhat))/yy$end)[yy$status]
274        }
275        ## integration weights at x.pt[i,]
276        qd.wt <- matrix(0,nmesh,nx)
277        for (i in 1:nobs) {
278            wk <- (quad$pt<=yy$end[i])&(quad$pt>yy$start[i])
279            if (is.vector(rho.qd)) wk <- wk*rho.qd
280            else wk <- wk*rho.qd[,x.ind[i]]
281            if (is.null(cnt.wk)) qd.wt[,x.ind[i]] <- qd.wt[,x.ind[i]]+wk
282            else qd.wt[,x.ind[i]] <- qd.wt[,x.ind[i]]+cnt.wk[i]*wk
283        }
284        if (is.null(cnt.wk)) qd.wt <- quad$wt*qd.wt/nobs
285        else qd.wt <- quad$wt*qd.wt/sum(cnt.wk)
286        ## Generate s, r, int.s, and int.r
287        s <- r <- int.s <- int.r <- NULL
288        nq <- 0
289        for (label in term$labels) {
290            if (label=="1") {
291                s <- cbind(s,rep(1,len=nT))
292                int.s <- c(int.s,sum(qd.wt))
293                next
294            }
295            vlist <- term[[label]]$vlist
296            x.list <- xnames[xnames%in%vlist]
297            xy <- mf[yy$status,vlist]
298            xy.basis <- mf[id.basis.wk,vlist]
299            qd.xy <- data.frame(matrix(0,nmesh,length(vlist)))
300            names(qd.xy) <- vlist
301            if (tname%in%vlist) qd.xy[,tname] <- quad$pt
302            if (length(x.list)) xx <- x.pt[,x.list,drop=FALSE]
303            else xx <- NULL
304            nphi <- term[[label]]$nphi
305            nrk <- term[[label]]$nrk
306            if (nphi) {
307                phi <- term[[label]]$phi
308                for (i in 1:nphi) {
309                    s <- cbind(s,phi$fun(xy,nu=i,env=phi$env))
310                    if (is.null(xx)) {
311                        qd.wk <- phi$fun(qd.xy[,,drop=TRUE],nu=i,env=phi$env)
312                        int.s <- c(int.s,sum(qd.wk*apply(qd.wt,1,sum)))
313                    }
314                    else {
315                        int.s.wk <- 0
316                        for (j in 1:nx) {
317                            qd.xy[,x.list] <- xx[rep(j,nmesh),]
318                            qd.wk <- phi$fun(qd.xy[,,drop=TRUE],i,phi$env)
319                            int.s.wk <- int.s.wk + sum(qd.wk*qd.wt[,j])
320                        }
321                        int.s <- c(int.s,int.s.wk)
322                    }
323                }
324            }
325            if (nrk) {
326                rk <- term[[label]]$rk
327                for (i in 1:nrk) {
328                    nq <- nq+1
329                    r <- array(c(r,rk$fun(xy,xy.basis,nu=i,env=rk$env,out=TRUE)),c(nT,nbasis,nq))
330                    if (is.null(xx)) {
331                        qd.wk <- rk$fun(qd.xy[,,drop=TRUE],xy.basis,i,rk$env,out=TRUE)
332                        int.r <- cbind(int.r,apply(apply(qd.wt,1,sum)*qd.wk,2,sum))
333                    }
334                    else {
335                        int.r.wk <- 0
336                        for (j in 1:nx) {
337                            qd.xy[,x.list] <- xx[rep(j,nmesh),]
338                            qd.wk <- rk$fun(qd.xy[,,drop=TRUE],xy.basis,i,rk$env,TRUE)
339                            int.r.wk <- int.r.wk + apply(qd.wt[,j]*qd.wk,2,sum)
340                        }
341                        int.r <- cbind(int.r,int.r.wk)
342                    }
343                }
344            }
345        }
346        ## Add the partial term
347        if (!is.null(part)) {
348            s <- cbind(s,matx.p[yy$status,])
349            int.s <- c(int.s,t(matx.p[!x.dup.ind,])%*%apply(qd.wt,2,sum))
350        }
351        ## generate int.z
352        if (!is.null(random)) random$int.z <- t(qd.z)%*%apply(qd.wt,2,sum)
353        ## Check s rank
354        if (!is.null(s)) {
355            nnull <- dim(s)[2]
356            if (qr(s)$rank<nnull)
357                stop("gss error in sshzd2d1: unpenalized terms are linearly dependent")
358        }
359        ## Fit the model
360        Nobs <- ifelse(is.null(cnt.wk),nobs,sum(cnt.wk))
361        if (!is.null(cnt.wk)) cntt <- cnt.wk[yy$status]
362        else cntt <- NULL
363        z <- msphzd1(s,r,id.wk,Nobs,cntt,int.s,int.r,rhowk,random,prec,maxiter,alpha,skip.iter)
364        ## cfit
365        if (!is.null(random)) rhowk <- rhowk*exp(-random$z%*%z$b)
366        if (!is.null(cnt.wk)) cfit <- sum(cntt*rhowk)/Nobs/sum(qd.wt)
367        else cfit <- sum(rhowk)/Nobs/sum(qd.wt)
368        ## Brief description of model terms
369        desc <- NULL
370        for (label in term$labels)
371            desc <- rbind(desc,as.numeric(c(term[[label]][c("nphi","nrk")])))
372        if (!is.null(part)) {
373            desc <- rbind(desc,matrix(c(1,0),length(lab.p),2,byrow=TRUE))
374        }
375        desc <- rbind(desc,apply(desc,2,sum))
376        if (is.null(part)) rownames(desc) <- c(term$labels,"total")
377        else rownames(desc) <- c(term$labels,lab.p,"total")
378        colnames(desc) <- c("Unpenalized","Penalized")
379        ## Return the results
380        hzd <- c(list(call=match.call(),mf=mf,cnt=cnt.wk,terms=term,desc=desc,
381                      alpha=alpha,tname=tname,xnames=xnames,tdomain=tdomain,cfit=cfit,
382                      quad=quad,x.pt=x.pt,qd.wt=qd.wt,id.basis=id.basis.wk,partial=part,
383                      lab.p=lab.p,random=random,skip.iter=skip.iter),z)
384        hzd$se.aux$v <- sqrt(Nobs)*hzd$se.aux$v
385        class(hzd) <- c("sshzd1","sshzd")
386        if (ii==1) hzd1 <- hzd
387        else hzd2 <- hzd
388    }
389    ## Finalize hzd2
390    if (symmetry) {
391        hzd2 <- hzd1
392        hzd2$terms <- term2
393        names(hzd2$mf) <- hzd2$xnames <- c(names(mf2),names(mf.p2))
394        hzd2$tname <- yy2$tname
395        hzd2$xnames <- names(hzd2$x.pt) <- hzd2$xnames[!hzd2$xnames%in%hzd2$tname]
396        hzd2$lab.p <- labels(mt.p2)
397        if (!is.null(hzd2$partial)) hzd2$partial$mt <- mt.p2
398        desc <- NULL
399        for (label in term2$labels)
400            desc <- rbind(desc,as.numeric(c(term2[[label]][c("nphi","nrk")])))
401        if (!is.null(part)) {
402            desc <- rbind(desc,matrix(c(1,0),length(hzd2$lab.p),2,byrow=TRUE))
403        }
404        desc <- rbind(desc,apply(desc,2,sum))
405        if (is.null(part)) rownames(desc) <- c(term2$labels,"total")
406        else rownames(desc) <- c(term2$labels,hzd2$lab.p,"total")
407        colnames(desc) <- c("Unpenalized","Penalized")
408        hzd2$desc <- desc
409        yy1 <- yy1.sv
410    }
411    ## Estimate copula
412    nobs <- length(yy1$status)
413    s1 <- min(hzd1$tdomain)
414    s2 <- min(hzd2$tdomain)
415    u1 <- u2 <- NULL
416    for (i in 1:nobs) {
417        u1 <- c(u1,survexp.sshzd(hzd1,yy1$end[i],data[i,,drop=FALSE],s1))
418        u2 <- c(u2,survexp.sshzd(hzd2,yy2$end[i],data[i,,drop=FALSE],s2))
419    }
420    v1 <-v2 <- NULL
421    if (any(yy1$start&yy2$start)) {
422        for (i in 1:nobs) {
423            v1 <- c(v1,survexp.sshzd(hzd1,yy1$start[i],data[i,,drop=FALSE],s1))
424            v2 <- c(v2,survexp.sshzd(hzd2,yy2$start[i],data[i,,drop=FALSE],s2))
425        }
426    }
427    cens <- as.numeric(!yy1$status)+as.numeric(!yy2$status)*2
428    if (!is.null(v1)) trun <- cbind(v1,v2)
429    else trun <- NULL
430    copu <- sscopu2(cbind(u1,u2),cens,trun,symmetry,id.basis=id.basis)
431    ## Return the fits
432    obj <- list(call=match.call(),symmetry=symmetry,alpha=alpha,
433                id.basis=id.basis,hzd1=hzd1,hzd2=hzd2,copu=copu)
434    class(obj) <- "sshzd2d"
435    obj
436}
437