1## Fit hazard model 2sshzd2d1 <- function(formula1,formula2,symmetry=FALSE,data,alpha=1.4,weights=NULL, 3 subset=NULL,rho="marginal",id.basis=NULL,nbasis=NULL, 4 seed=NULL,prec=1e-7,maxiter=30,skip.iter=FALSE) 5{ 6 ## Prepare data 7 if (!is.null(subset)) { 8 data <- data[subset,] 9 if (!is.null(weights)) { 10 id.wk <- apply(!is.na(data),1,all) 11 cnt <- weights[id.wk] 12 } 13 else cnt <- NULL 14 data <- na.omit(data[subset,]) 15 } 16 else { 17 if (!is.null(weights)) { 18 id.wk <- apply(!is.na(data),1,all) 19 cnt <- weights[id.wk] 20 } 21 else cnt <- NULL 22 data <- na.omit(data) 23 } 24 ## Extract formulas 25 if (class(formula1)=="formula") { 26 form1 <- formula1 27 part1 <- random1 <- type1 <- NULL 28 } 29 else { 30 if (class(formula1)!="list") 31 stop("gss error in sshzd2d1: models must be specified via formulas or lists") 32 form1 <- formula1[[1]] 33 part1 <- formula1$partial 34 random1 <- formula1$random 35 type1 <- formula1$type 36 } 37 if (class(formula2)=="formula") { 38 form2 <- formula2 39 part2 <- random2 <- type2 <- NULL 40 } 41 else { 42 if (class(formula2)!="list") 43 stop("gss error in sshzd2d1: models must be specified via formulas or lists") 44 form2 <- formula2[[1]] 45 part2 <- formula2$partial 46 random2 <- formula2$random 47 type2 <- formula2$type 48 } 49 ## Local function handling formula 50 Surv <- function(time,status,start=0) { 51 tname <- as.character(as.list(match.call())$time) 52 if (!is.numeric(time)|!is.vector(time)) 53 stop("gss error in sshzd2d1: time should be a numerical vector") 54 if ((nobs <- length(time))-length(status)) 55 stop("gss error in sshzd2d1: time and status mismatch in size") 56 if ((length(start)-nobs)&(length(start)-1)) 57 stop("gss error in sshzd2d1: time and start mismatch in size") 58 if (any(start>time)) 59 stop("gss error in sshzd2d1: start after follow-up time") 60 if (min(start)<0) 61 stop("gss error in sshzd2d1: start before time 0") 62 time <- cbind(start,time) 63 list(tname=tname,start=time[,1],end=time[,2],status=as.logical(status)) 64 } 65 ## Obtain model terms 66 for (i in 1:2) { 67 ## Formula 68 form.wk <- switch(i,form1,form2) 69 term.wk <- terms.formula(form.wk) 70 resp <- attr(term.wk,"variable")[[2]] 71 ind.wk <- length(strsplit(deparse(resp),'')[[1]]) 72 if ((substr(deparse(resp),1,5)!='Surv(') 73 |(substr(deparse(resp),ind.wk,ind.wk)!=')')) 74 stop("gss error in sshzd2d1: response should be Surv(...)") 75 yy <- with(data,eval(resp)) 76 tname <- yy$tname 77 term.labels <- attr(term.wk,"term.labels") 78 if (!(tname%in%term.labels)) 79 stop("gss error in sshzd2d1: time main effect missing in model") 80 form.wk <- eval(parse(text=paste("~",paste(term.labels,collapse="+")))) 81 mf.wk <- model.frame(form.wk,data) 82 ## Partial 83 part.wk <- switch(i,part1,part2) 84 if (!is.null(part.wk)) { 85 mf.p.wk <- model.frame(part.wk,data) 86 mt.p.wk <- attr(mf.p.wk,"terms") 87 matx.p.wk <- model.matrix(mt.p.wk,data)[,-1,drop=FALSE] 88 if (dim(matx.p.wk)[1]!=dim(mf.wk)[1]) 89 stop("gss error in sshzd2d1: partial data are of wrong size") 90 } 91 else mf.p.wk <- mt.p.wk <- matx.p.wk <- NULL 92 ## Random 93 random.wk <- switch(i,random1,random2) 94 if (!is.null(random.wk)) { 95 if (class(random.wk)=="formula") random.wk <- mkran(random.wk,data) 96 } 97 else random.wk <- NULL 98 ## Set domain and type for time 99 type.wk <- switch(i,type1,type2) 100 mn <- min(yy$start) 101 mx <- max(yy$end) 102 tdomain <- c(max(mn-.05*(mx-mn),0),mx) 103 tname <- yy$tname 104 if (is.null(type.wk[[tname]])) type.wk[[tname]] <- list("cubic",tdomain) 105 if (length(type.wk[[tname]])==1) type.wk[[tname]] <- c(type.wk[[tname]],tdomain) 106 if (!(type.wk[[tname]][[1]]%in%c("cubic","linear"))) 107 stop("gss error in sshzd2d1: wrong type") 108 if ((min(type.wk[[tname]][[2]])>min(tdomain))| 109 (max(type.wk[[tname]][[2]])<max(tdomain))) 110 stop("gss error in sshzd2d1: time range not covering domain") 111 ## Save 112 if (i==1) { 113 mf1 <- mf.wk 114 yy1 <- yy 115 mf.p1 <- mf.p.wk 116 mt.p1 <- mt.p.wk 117 matx.p1 <- matx.p.wk 118 random1 <- random.wk 119 type1 <- type.wk 120 tdomain1 <- tdomain 121 } 122 else { 123 mf2 <- mf.wk 124 yy2 <- yy 125 mf.p2 <- mf.p.wk 126 mt.p2 <- mt.p.wk 127 matx.p2 <- matx.p.wk 128 random2 <- random.wk 129 type2 <- type.wk 130 tdomain2 <- tdomain 131 } 132 } 133 ## Generate sub-basis 134 nobs <- dim(mf1)[1] 135 if (is.null(id.basis)) { 136 if (is.null(nbasis)) nbasis <- max(30,ceiling(10*nobs^(2/9))) 137 if (nbasis>=nobs) nbasis <- nobs 138 if (!is.null(seed)) set.seed(seed) 139 id.basis <- sample(nobs,nbasis,prob=cnt) 140 } 141 else { 142 if (max(id.basis)>nobs|min(id.basis)<1) 143 stop("gss error in sshzd2d1: id.basis out of range") 144 nbasis <- length(id.basis) 145 } 146 ## Generate terms 147 if (symmetry) { 148 nhzd <- 1 149 if (dim(mf2)[2]!=dim(mf1)[2]) 150 stop("gss error in sshzd2d1: variables in parallel formulas must match") 151 mf1.wk <- mf1 152 mf2.wk <- mf2 153 names(mf1.wk) <- names(mf2) 154 names(mf2.wk) <- names(mf1) 155 tdomain1 <- c(min(tdomain1,tdomain2),max(tdomain1,tdomain2)) 156 type1[[yy1$tname]][[2]] <- tdomain1 157 type2[[yy2$tname]][[2]] <- tdomain1 158 term1 <- mkterm(rbind(mf1,mf2.wk),type1) 159 term2 <- mkterm(rbind(mf2,mf1.wk),type2) 160 mf1 <- rbind(mf1,mf2.wk) 161 yy1.sv <- yy1 162 yy1$start <- c(yy1$start,yy2$start) 163 yy1$end <- c(yy1$end,yy2$end) 164 yy1$status <- c(yy1$status,yy2$status) 165 id.basis.wk <- c(id.basis,id.basis+nobs) 166 if (!is.null(mf.p1)) { 167 if (is.null(mf.p2)||(dim(mf.p2)[2]!=dim(mf.p1)[2])) 168 stop("gss error in sshzd2d1: variables in parallel formulas must match") 169 matx.p1 <- rbind(matx.p1,matx.p2) 170 } 171 if (!is.null(random1)) { 172 if (is.null(random2)||(dim(random2$z)[2]!=dim(random1$z)[2])) 173 stop("gss error in sshzd2d1: variables in parallel formulas must match") 174 random1$z <- rbind(random1$z,random2$z) 175 } 176 if (!is.null(cnt)) cnt.wk <- c(cnt,cnt) 177 else cnt.wk <- NULL 178 } 179 else { 180 nhzd <- 2 181 term1 <- mkterm(mf1,type1) 182 term2 <- mkterm(mf2,type2) 183 id.basis.wk <- id.basis 184 cnt.wk <- cnt 185 } 186 ## Fit marginal hazard models 187 for (ii in 1:nhzd) { 188 ## Extract model components 189 mf <- switch(ii,mf1,mf2) 190 yy <- switch(ii,yy1,yy2) 191 term <- switch(ii,term1,term2) 192 mf.p <- switch(ii,mf.p1,mf.p2) 193 mt.p <- switch(ii,mt.p1,mt.p2) 194 matx.p <- switch(ii,matx.p1,matx.p2) 195 random <- switch(ii,random1,random2) 196 tdomain <- switch(ii,tdomain1,tdomain2) 197 if (rho=="weibull") partt <- switch(ii,part1,part2) 198 ## Finalize id.basis 199 nobs <- length(yy$status) 200 id.basis.wk <- id.basis.wk[yy$status[id.basis.wk]] 201 nbasis <- length(id.basis.wk) 202 id.wk <- NULL 203 nT <- sum(yy$status) 204 for (i in 1:nbasis) { 205 id.wk <- c(id.wk,(1:nT)[(1:nobs)[yy$status]%in%id.basis.wk[i]]) 206 } 207 ## Generate Gauss-Legendre quadrature 208 nmesh <- 200 209 quad <- gauss.quad(nmesh,tdomain) 210 ## set up partial terms 211 if (!is.null(mf.p)) { 212 for (lab in colnames(mf.p)) mf[,lab] <- mf.p[,lab] 213 lab.p <- labels(mt.p) 214 matx.p <- scale(matx.p) 215 center.p <- attr(matx.p,"scaled:center") 216 scale.p <- attr(matx.p,"scaled:scale") 217 part <- list(mt=mt.p,center=center.p,scale=scale.p) 218 } 219 else part <- lab.p <- NULL 220 ## Obtain unique covariate observations 221 tname <- yy$tname 222 xnames <- names(mf) 223 xnames <- xnames[!xnames%in%tname] 224 if (length(xnames)) { 225 xx <- mf[,xnames,drop=FALSE] 226 if (!is.null(part)) xx <- cbind(xx,matx.p) 227 if (!is.null(random)) xx <- cbind(xx,random$z) 228 xx <- apply(xx,1,function(x)paste(x,collapse="\r")) 229 ux <- unique(xx) 230 nx <- length(ux) 231 x.dup.ind <- duplicated(xx) 232 x.dup <- as.vector(xx[x.dup.ind]) 233 x.pt <- mf[!x.dup.ind,xnames,drop=FALSE] 234 ## xx[i,]==x.pt[x.ind[i],] 235 x.ind <- 1:nobs 236 x.ind[!x.dup.ind] <- 1:nx 237 if (nobs-nx) { 238 x.ind.wk <- range <- 1:(nobs-nx) 239 for (i in 1:nx) { 240 range.wk <- NULL 241 for (j in range) { 242 if (identical(ux[i],x.dup[j])) { 243 x.ind.wk[j] <- i 244 range.wk <- c(range.wk,j) 245 } 246 } 247 if (!is.null(range.wk)) range <- range[!(range%in%range.wk)] 248 } 249 x.ind[x.dup.ind] <- x.ind.wk 250 } 251 if (!is.null(random)) { 252 qd.z <- random$z[!x.dup.ind,] 253 random$z <- random$z[yy$status,] 254 } 255 } 256 else stop("gss error in sshzd2d1: missing covariate") 257 ## calculate rho 258 if (is.null(cnt.wk)) yy$cnt <- rep(1,nobs) 259 else yy$cnt <- cnt.wk 260 if (rho=="marginal") { 261 rho.wk <- sshzd(Surv(end,status,start)~end,data=yy, 262 id.basis=id.basis.wk,weights=cnt,alpha=2) 263 rho.qd <- hzdcurve.sshzd(rho.wk,quad$pt) 264 rhowk <- hzdcurve.sshzd(rho.wk,yy$end[yy$status]) 265 } 266 if (rho=="weibull") { 267 y.wk <- cbind(yy$end,yy$status,yy$start) 268 form <- as.formula(paste("y.wk~",paste(xnames,collapse="+"))) 269 rho.wk <- gssanova(form,family="weibull",partial=partt,data=mf, 270 id.basis=id.basis.wk,weights=cnt,alpha=2) 271 yhat <- predict(rho.wk,rho.wk$mf) 272 rho.qd <- exp(rho.wk$nu*outer(log(quad$pt),yhat[!x.dup.ind],"-"))/quad$pt 273 rhowk <- (exp(rho.wk$nu*(log(yy$end)-yhat))/yy$end)[yy$status] 274 } 275 ## integration weights at x.pt[i,] 276 qd.wt <- matrix(0,nmesh,nx) 277 for (i in 1:nobs) { 278 wk <- (quad$pt<=yy$end[i])&(quad$pt>yy$start[i]) 279 if (is.vector(rho.qd)) wk <- wk*rho.qd 280 else wk <- wk*rho.qd[,x.ind[i]] 281 if (is.null(cnt.wk)) qd.wt[,x.ind[i]] <- qd.wt[,x.ind[i]]+wk 282 else qd.wt[,x.ind[i]] <- qd.wt[,x.ind[i]]+cnt.wk[i]*wk 283 } 284 if (is.null(cnt.wk)) qd.wt <- quad$wt*qd.wt/nobs 285 else qd.wt <- quad$wt*qd.wt/sum(cnt.wk) 286 ## Generate s, r, int.s, and int.r 287 s <- r <- int.s <- int.r <- NULL 288 nq <- 0 289 for (label in term$labels) { 290 if (label=="1") { 291 s <- cbind(s,rep(1,len=nT)) 292 int.s <- c(int.s,sum(qd.wt)) 293 next 294 } 295 vlist <- term[[label]]$vlist 296 x.list <- xnames[xnames%in%vlist] 297 xy <- mf[yy$status,vlist] 298 xy.basis <- mf[id.basis.wk,vlist] 299 qd.xy <- data.frame(matrix(0,nmesh,length(vlist))) 300 names(qd.xy) <- vlist 301 if (tname%in%vlist) qd.xy[,tname] <- quad$pt 302 if (length(x.list)) xx <- x.pt[,x.list,drop=FALSE] 303 else xx <- NULL 304 nphi <- term[[label]]$nphi 305 nrk <- term[[label]]$nrk 306 if (nphi) { 307 phi <- term[[label]]$phi 308 for (i in 1:nphi) { 309 s <- cbind(s,phi$fun(xy,nu=i,env=phi$env)) 310 if (is.null(xx)) { 311 qd.wk <- phi$fun(qd.xy[,,drop=TRUE],nu=i,env=phi$env) 312 int.s <- c(int.s,sum(qd.wk*apply(qd.wt,1,sum))) 313 } 314 else { 315 int.s.wk <- 0 316 for (j in 1:nx) { 317 qd.xy[,x.list] <- xx[rep(j,nmesh),] 318 qd.wk <- phi$fun(qd.xy[,,drop=TRUE],i,phi$env) 319 int.s.wk <- int.s.wk + sum(qd.wk*qd.wt[,j]) 320 } 321 int.s <- c(int.s,int.s.wk) 322 } 323 } 324 } 325 if (nrk) { 326 rk <- term[[label]]$rk 327 for (i in 1:nrk) { 328 nq <- nq+1 329 r <- array(c(r,rk$fun(xy,xy.basis,nu=i,env=rk$env,out=TRUE)),c(nT,nbasis,nq)) 330 if (is.null(xx)) { 331 qd.wk <- rk$fun(qd.xy[,,drop=TRUE],xy.basis,i,rk$env,out=TRUE) 332 int.r <- cbind(int.r,apply(apply(qd.wt,1,sum)*qd.wk,2,sum)) 333 } 334 else { 335 int.r.wk <- 0 336 for (j in 1:nx) { 337 qd.xy[,x.list] <- xx[rep(j,nmesh),] 338 qd.wk <- rk$fun(qd.xy[,,drop=TRUE],xy.basis,i,rk$env,TRUE) 339 int.r.wk <- int.r.wk + apply(qd.wt[,j]*qd.wk,2,sum) 340 } 341 int.r <- cbind(int.r,int.r.wk) 342 } 343 } 344 } 345 } 346 ## Add the partial term 347 if (!is.null(part)) { 348 s <- cbind(s,matx.p[yy$status,]) 349 int.s <- c(int.s,t(matx.p[!x.dup.ind,])%*%apply(qd.wt,2,sum)) 350 } 351 ## generate int.z 352 if (!is.null(random)) random$int.z <- t(qd.z)%*%apply(qd.wt,2,sum) 353 ## Check s rank 354 if (!is.null(s)) { 355 nnull <- dim(s)[2] 356 if (qr(s)$rank<nnull) 357 stop("gss error in sshzd2d1: unpenalized terms are linearly dependent") 358 } 359 ## Fit the model 360 Nobs <- ifelse(is.null(cnt.wk),nobs,sum(cnt.wk)) 361 if (!is.null(cnt.wk)) cntt <- cnt.wk[yy$status] 362 else cntt <- NULL 363 z <- msphzd1(s,r,id.wk,Nobs,cntt,int.s,int.r,rhowk,random,prec,maxiter,alpha,skip.iter) 364 ## cfit 365 if (!is.null(random)) rhowk <- rhowk*exp(-random$z%*%z$b) 366 if (!is.null(cnt.wk)) cfit <- sum(cntt*rhowk)/Nobs/sum(qd.wt) 367 else cfit <- sum(rhowk)/Nobs/sum(qd.wt) 368 ## Brief description of model terms 369 desc <- NULL 370 for (label in term$labels) 371 desc <- rbind(desc,as.numeric(c(term[[label]][c("nphi","nrk")]))) 372 if (!is.null(part)) { 373 desc <- rbind(desc,matrix(c(1,0),length(lab.p),2,byrow=TRUE)) 374 } 375 desc <- rbind(desc,apply(desc,2,sum)) 376 if (is.null(part)) rownames(desc) <- c(term$labels,"total") 377 else rownames(desc) <- c(term$labels,lab.p,"total") 378 colnames(desc) <- c("Unpenalized","Penalized") 379 ## Return the results 380 hzd <- c(list(call=match.call(),mf=mf,cnt=cnt.wk,terms=term,desc=desc, 381 alpha=alpha,tname=tname,xnames=xnames,tdomain=tdomain,cfit=cfit, 382 quad=quad,x.pt=x.pt,qd.wt=qd.wt,id.basis=id.basis.wk,partial=part, 383 lab.p=lab.p,random=random,skip.iter=skip.iter),z) 384 hzd$se.aux$v <- sqrt(Nobs)*hzd$se.aux$v 385 class(hzd) <- c("sshzd1","sshzd") 386 if (ii==1) hzd1 <- hzd 387 else hzd2 <- hzd 388 } 389 ## Finalize hzd2 390 if (symmetry) { 391 hzd2 <- hzd1 392 hzd2$terms <- term2 393 names(hzd2$mf) <- hzd2$xnames <- c(names(mf2),names(mf.p2)) 394 hzd2$tname <- yy2$tname 395 hzd2$xnames <- names(hzd2$x.pt) <- hzd2$xnames[!hzd2$xnames%in%hzd2$tname] 396 hzd2$lab.p <- labels(mt.p2) 397 if (!is.null(hzd2$partial)) hzd2$partial$mt <- mt.p2 398 desc <- NULL 399 for (label in term2$labels) 400 desc <- rbind(desc,as.numeric(c(term2[[label]][c("nphi","nrk")]))) 401 if (!is.null(part)) { 402 desc <- rbind(desc,matrix(c(1,0),length(hzd2$lab.p),2,byrow=TRUE)) 403 } 404 desc <- rbind(desc,apply(desc,2,sum)) 405 if (is.null(part)) rownames(desc) <- c(term2$labels,"total") 406 else rownames(desc) <- c(term2$labels,hzd2$lab.p,"total") 407 colnames(desc) <- c("Unpenalized","Penalized") 408 hzd2$desc <- desc 409 yy1 <- yy1.sv 410 } 411 ## Estimate copula 412 nobs <- length(yy1$status) 413 s1 <- min(hzd1$tdomain) 414 s2 <- min(hzd2$tdomain) 415 u1 <- u2 <- NULL 416 for (i in 1:nobs) { 417 u1 <- c(u1,survexp.sshzd(hzd1,yy1$end[i],data[i,,drop=FALSE],s1)) 418 u2 <- c(u2,survexp.sshzd(hzd2,yy2$end[i],data[i,,drop=FALSE],s2)) 419 } 420 v1 <-v2 <- NULL 421 if (any(yy1$start&yy2$start)) { 422 for (i in 1:nobs) { 423 v1 <- c(v1,survexp.sshzd(hzd1,yy1$start[i],data[i,,drop=FALSE],s1)) 424 v2 <- c(v2,survexp.sshzd(hzd2,yy2$start[i],data[i,,drop=FALSE],s2)) 425 } 426 } 427 cens <- as.numeric(!yy1$status)+as.numeric(!yy2$status)*2 428 if (!is.null(v1)) trun <- cbind(v1,v2) 429 else trun <- NULL 430 copu <- sscopu2(cbind(u1,u2),cens,trun,symmetry,id.basis=id.basis) 431 ## Return the fits 432 obj <- list(call=match.call(),symmetry=symmetry,alpha=alpha, 433 id.basis=id.basis,hzd1=hzd1,hzd2=hzd2,copu=copu) 434 class(obj) <- "sshzd2d" 435 obj 436} 437