1 2make.formula<-function(names) formula(paste("~",paste(names,collapse="+"))) 3 4dimnames.survey.design<-function(x) dimnames(x$variables) 5dimnames.svyrep.design<-function(x) dimnames(x$variables) 6dimnames.twophase<-function(x) dimnames(x$phase1$sample$variables) 7 8oldsvydesign<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL, 9 data=NULL, nest=FALSE, check.strata=!nest,weights=NULL){ 10 11 .Deprecated("svydesign") 12 13 ## less memory-hungry version for sparse tables 14 interaction<-function (..., drop = TRUE) { 15 args <- list(...) 16 narg <- length(args) 17 if (narg == 1 && is.list(args[[1]])) { 18 args <- args[[1]] 19 narg <- length(args) 20 } 21 22 ls<-sapply(args,function(a) length(levels(a))) 23 ans<-do.call("paste",c(lapply(args,as.character),sep=".")) 24 ans<-factor(ans) 25 return(ans) 26 27 } 28 29 30 na.failsafe<-function(object,...){ 31 if (NCOL(object)==0) 32 object 33 else na.fail(object) 34 } 35 36 if(inherits(ids,"formula")) { 37 mf<-substitute(model.frame(ids,data=data,na.action=na.failsafe)) 38 ids<-eval.parent(mf) 39 } else if (!is.null(ids)) 40 ids<-na.fail(data.frame(ids)) 41 42 if(inherits(probs,"formula")){ 43 mf<-substitute(model.frame(probs,data=data,na.action=na.failsafe)) 44 probs<-eval.parent(mf) 45 } 46 47 if(inherits(weights,"formula")){ 48 mf<-substitute(model.frame(weights,data=data,na.action=na.failsafe)) 49 weights<-eval.parent(mf) 50 } else if (!is.null(weights)) 51 weights<-na.fail(data.frame(weights)) 52 53 if(!is.null(weights)){ 54 if (!is.null(probs)) 55 stop("Can't specify both sampling weights and probabilities") 56 else 57 probs<-1/weights 58 } 59 60 61 62 if (!is.null(strata)){ 63 if(inherits(strata,"formula")){ 64 mf<-substitute(model.frame(strata,data=data, na.action=na.failsafe)) 65 strata<-eval.parent(mf) 66 } 67 if(is.list(strata)) 68 strata<-na.fail(do.call("interaction", strata)) 69 if (!is.factor(strata)) 70 strata<-factor(strata) 71 has.strata<-TRUE 72 } else { 73 strata<-factor(rep(1,NROW(ids))) 74 has.strata <-FALSE 75 } 76 77 if (inherits(variables,"formula")){ 78 mf<-substitute(model.frame(variables,data=data,na.action=na.pass)) 79 variables <- eval.parent(mf) 80 } else if (is.null(variables)){ 81 variables<-data 82 } else 83 variables<-data.frame(variables) 84 85 86 if (inherits(fpc,"formula")){ 87 mf<-substitute(model.frame(fpc,data=data,na.action=na.failsafe)) 88 fpc<-eval.parent(mf) 89 if (length(fpc)) 90 fpc<-fpc[,1] 91 } 92 93 if (is.null(ids) || NCOL(ids)==0) 94 ids<-data.frame(.id=seq(length=NROW(variables))) 95 96 ## force subclusters nested in clusters 97 if (nest && NCOL(ids)>1){ 98 N<-ncol(ids) 99 for(i in 2:(N)){ 100 ids[,i]<-do.call("interaction", ids[,1:i,drop=TRUE]) 101 } 102 } 103 ## force clusters nested in strata 104 if (nest && has.strata && NCOL(ids)){ 105 N<-NCOL(ids) 106 for(i in 1:N) 107 ids[,i]<-do.call("interaction", list(strata, ids[,i])) 108 } 109 110 ## check if clusters nested in strata 111 if (check.strata && nest) 112 warning("No point in check.strata=TRUE if nest=TRUE") 113 if(check.strata && !is.null(strata) && NCOL(ids)){ 114 sc<-rowSums(table(ids[,1],strata)>0) 115 if(any(sc>1)) stop("Clusters not nested in strata") 116 } 117 118 ## Put degrees of freedom (# of PSUs in each stratum) in object, to 119 ## allow subpopulations 120 if (NCOL(ids)){ 121 nPSU<-table(strata[!duplicated(ids[,1])]) 122 } 123 124 125 if (!is.null(fpc)){ 126 127 if (NCOL(ids)>1){ 128 if (all(fpc<1)) 129 warning("FPC is not currently supported for multi-stage sampling") 130 else 131 stop("Can't compute FPC from population size for multi-stage sampling") 132 } 133 134 ## Finite population correction: specified per observation 135 if (is.numeric(fpc) && length(fpc)==NROW(variables)){ 136 tbl<-by(fpc,list(strata),unique) 137 if (any(sapply(tbl,length)!=1)) 138 stop("fpc not constant within strata") 139 fpc<-data.frame(strata=factor(rownames(tbl),levels=levels(strata)), 140 N=as.vector(tbl)) 141 } 142 ## Now reduced to fpc per stratum 143 nstr<-table(strata[!duplicated(ids[[1]])]) 144 145 if (all(fpc[,2]<=1)){ 146 fpc[,2]<- nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2] 147 } else if (any(fpc[,2]<nstr[match(as.character(fpc[,1]), names(nstr))])) 148 stop("Over 100% sampling in some strata") 149 150 } 151 152 ## if FPC specified, but no weights, use it for weights 153 if (is.null(probs) && is.null(weights) && !is.null(fpc)){ 154 pstr<-nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2] 155 probs<-pstr[match(as.character(strata),as.character(fpc[,1]))] 156 probs<-as.vector(probs) 157 } 158 159 160 certainty<-rep(FALSE,length(unique(strata))) 161 names(certainty)<-as.character(unique(strata)) 162 if (any(nPSU==1)){ 163 ## lonely PSUs: are they certainty PSUs? 164 if (!is.null(fpc)){ 165 certainty<- fpc$N < 1.01 166 names(certainty)<-as.character(fpc$strata) 167 } else if (all(as.vector(probs)<=1)){ 168 certainty<- !is.na(match(as.character(unique(strata)),as.character(strata)[probs > 0.99])) 169 names(certainty)<-as.character(unique(strata)) 170 } else { 171 warning("Some strata have only one PSU and I can't tell if they are certainty PSUs") 172 } 173 174 } 175 176 if (is.numeric(probs) && length(probs)==1) 177 probs<-rep(probs, NROW(variables)) 178 179 if (length(probs)==0) probs<-rep(1,NROW(variables)) 180 181 if (NCOL(probs)==1) probs<-data.frame(probs) 182 183 rval<-list(cluster=ids) 184 rval$strata<-strata 185 rval$has.strata<-has.strata 186 rval$prob<- apply(probs,1,prod) 187 rval$allprob<-probs 188 rval$call<-match.call() 189 rval$variables<-variables 190 rval$fpc<-fpc 191 rval$certainty<-certainty 192 rval$call<-sys.call() 193 rval$nPSU<-nPSU 194 class(rval)<-"survey.design" 195 rval 196 } 197 198print.survey.design<-function(x,varnames=FALSE,design.summaries=FALSE,...){ 199 .svycheck(x) 200 n<-NROW(x$cluster) 201 if (x$has.strata) cat("Stratified ") 202 un<-length(unique(x$cluster[,1])) 203 if(n==un){ 204 cat("Independent Sampling design\n") 205 is.independent<-TRUE 206 } else { 207 cat(NCOL(x$cluster),"- level Cluster Sampling design\n") 208 nn<-lapply(x$cluster,function(i) length(unique(i))) 209 cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep="")) 210 is.independent<-FALSE 211 } 212 print(x$call) 213 if (design.summaries){ 214 cat("Probabilities:\n") 215 print(summary(x$prob)) 216 if(x$has.strata){ 217 cat("Stratum sizes: \n") 218 a<-rbind(obs=table(x$strata), 219 design.PSU=x$nPSU, 220 actual.PSU=if(!is.independent || !is.null(x$fpc)) 221 table(x$strata[!duplicated(x$cluster[,1])])) 222 print(a) 223 } 224 if (!is.null(x$fpc)){ 225 if (x$has.strata) { 226 cat("Population stratum sizes (PSUs): \n") 227 print(x$fpc) 228 } else { 229 cat("Population size (PSUs):",x$fpc[,2],"\n") 230 } 231 } 232 } 233 if (varnames){ 234 cat("Data variables:\n") 235 print(names(x$variables)) 236 } 237 invisible(x) 238} 239 240"[.survey.design"<-function (x,i, ...){ 241 242 if (!missing(i)){ 243 if (is.calibrated(x)){ 244 tmp<-x$prob[i,] 245 x$prob<-rep(Inf, length(x$prob)) 246 x$prob[i,]<-tmp 247 } else { 248 x$variables<-"[.data.frame"(x$variables,i,...,drop=FALSE) 249 x$cluster<-x$cluster[i,,drop=FALSE] 250 x$prob<-x$prob[i] 251 x$allprob<-x$allprob[i,,drop=FALSE] 252 x$strata<-x$strata[i] 253 } 254 } else { 255 x$variables<-x$variables[,...,drop=FALSE] 256 } 257 258 x 259} 260 261"[<-.survey.design"<-function(x, ...,value){ 262 if (inherits(value, "survey.design")) 263 value<-value$variables 264 x$variables[...]<-value 265 x 266} 267 268dim.survey.design<-function(x){ 269 dim(x$variables) 270} 271 272na.fail.survey.design<-function(object,...){ 273 tmp<-na.fail(object$variables,...) 274 object 275} 276 277na.omit.survey.design<-function(object,...){ 278 tmp<-na.omit(object$variables,...) 279 omit<-attr(tmp,"na.action") 280 if (length(omit)){ 281 object<-object[-omit,] 282 object$variables<-tmp 283 attr(object,"na.action")<-omit 284 } 285 object 286} 287 288na.exclude.survey.design<-function(object,...){ 289 tmp<-na.exclude(object$variables,...) 290 exclude<-attr(tmp,"na.action") 291 if (length(exclude)){ 292 object<-object[-exclude,] 293 object$variables<-tmp 294 attr(object,"na.action")<-exclude 295 } 296 object 297} 298 299 300update.survey.design<-function(object,...){ 301 302 dots<-substitute(list(...))[-1] 303 newnames<-names(dots) 304 305 for(j in seq(along=dots)){ 306 object$variables[,newnames[j]]<-eval(dots[[j]],object$variables, parent.frame()) 307 } 308 309 object$call<-sys.call(-1) 310 object 311} 312 313 314subset.survey.design<-function(x,subset,...){ 315 e <- substitute(subset) 316 r <- eval(e, x$variables, parent.frame()) 317 r <- r & !is.na(r) 318 x<-x[r,] 319 x$call<-sys.call(-1) 320 x 321} 322 323summary.survey.design<-function(object,...){ 324 class(object)<-"summary.survey.design" 325 object 326} 327 328print.summary.survey.design<-function(x,...){ 329 y<-x 330 class(y)<-"survey.design" 331 print(y,varnames=TRUE,design.summaries=TRUE,...) 332} 333 334postStratify.survey.design<-function(design, strata, population, partial=FALSE,...){ 335 336 if(inherits(strata,"formula")){ 337 mf<-substitute(model.frame(strata, data=design$variables,na.action=na.fail)) 338 strata<-eval.parent(mf) 339 } 340 strata<-as.data.frame(strata) 341 342 sampletable<-xtabs(I(1/design$prob)~.,data=strata) 343 sampletable<-as.data.frame(sampletable) 344 345 if (inherits(population,"table")) 346 population<-as.data.frame(population) 347 else if (is.data.frame(population)) 348 population$Freq <- as.vector(population$Freq) ##allows Freq computed by tapply() 349 else 350 stop("population must be a table or dataframe") 351 352 if (!all(names(strata) %in% names(population))) 353 stop("Stratifying variables don't match") 354 nn<- names(population) %in% names(strata) 355 if (sum(!nn)!=1) 356 stop("stratifying variables don't match") 357 358 names(population)[which(!nn)]<-"Pop.Freq" 359 360 both<-merge(sampletable, population, by=names(strata), all=TRUE) 361 362 samplezero <- both$Freq %in% c(0, NA) 363 popzero <- both$Pop.Freq %in% c(0, NA) 364 both<-both[!(samplezero & popzero),] 365 366 if (any(onlysample<- popzero & !samplezero)){ 367 print(both[onlysample,]) 368 stop("Strata in sample absent from population. This Can't Happen") 369 } 370 if (any(onlypop <- samplezero & !popzero)){ 371 if (partial){ 372 both<-both[!onlypop,] 373 warning("Some strata absent from sample: ignored") 374 } else { 375 print(both[onlypop,]) 376 stop("Some strata absent from sample: use partial=TRUE to ignore them.") 377 } 378 } 379 380 reweight<-both$Pop.Freq/both$Freq 381 both$label <- do.call("interaction", list(both[,names(strata)])) 382 designlabel <- do.call("interaction", strata) 383 index<-match(designlabel, both$label) 384 385 attr(index,"oldweights")<-1/design$prob 386 design$prob<-design$prob/reweight[index] 387 attr(index,"weights")<-1/design$prob 388 design$postStrata<-c(design$postStrata,list(index)) 389 390 ## Do we need to iterate here a la raking to get design strata 391 ## and post-strata both balanced? 392 design$call<-sys.call(-1) 393 394 design 395} 396 397 398svyCprod<-function(x, strata, psu, fpc, nPSU, certainty=NULL, postStrata=NULL, 399 lonely.psu=getOption("survey.lonely.psu")){ 400 401 x<-as.matrix(x) 402 n<-NROW(x) 403 404 ## Remove post-stratum means, which may cut across PSUs 405 if(!is.null(postStrata)){ 406 for (psvar in postStrata){ 407 if (inherits(psvar, "greg_calibration") || inherits(psvar, "raking")) 408 stop("rake() and calibrate() not supported for old-style design objects") 409 psw<-attr(psvar,"weights") 410 psmeans<-rowsum(x/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar))) 411 x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw 412 } 413 } 414 415 ##First collapse over PSUs 416 417 if (is.null(strata)){ 418 strata<-rep("1",n) 419 if (!is.null(nPSU)) 420 names(nPSU)<-"1" 421 } 422 else 423 strata<-as.character(strata) ##can't use factors as indices in for()' 424 425 if (is.null(certainty)){ 426 certainty<-rep(FALSE,length(strata)) 427 names(certainty)<-strata 428 } 429 430 if (!is.null(psu)){ 431 x<-rowsum(x, psu, reorder=FALSE) 432 strata<-strata[!duplicated(psu)] 433 n<-NROW(x) 434 } 435 436 if (!is.null(nPSU)){ 437 obsn<-table(strata) 438 dropped<-nPSU[match(names(obsn),names(nPSU))]-obsn 439 if(sum(dropped)){ 440 xtra<-matrix(0,ncol=NCOL(x),nrow=sum(dropped)) 441 strata<-c(strata,rep(names(dropped),dropped)) 442 if(is.matrix(x)) 443 x<-rbind(x,xtra) 444 else 445 x<-c(x,xtra) 446 n<-NROW(x) 447 } 448 } else obsn<-table(strata) 449 450 if(is.null(strata)){ 451 x<-t(t(x)-colMeans(x)) 452 } else { 453 strata.means<-drop(rowsum(x,strata, reorder=FALSE))/drop(rowsum(rep(1,n),strata, reorder=FALSE)) 454 if (!is.matrix(strata.means)) 455 strata.means<-matrix(strata.means, ncol=NCOL(x)) 456 x<- x- strata.means[ match(strata, unique(strata)),,drop=FALSE] 457 } 458 459 p<-NCOL(x) 460 v<-matrix(0,p,p) 461 462 ss<-unique(strata) 463 for(s in ss){ 464 this.stratum <- strata %in% s 465 466 ## original number of PSUs in this stratum 467 ## before missing data/subsetting 468 this.n <-nPSU[match(s,names(nPSU))] 469 470 this.df <- this.n/(this.n-1) 471 472 if (is.null(fpc)) 473 this.fpc <- 1 474 else{ 475 this.fpc <- fpc[,2][ fpc[,1]==as.character(s)] 476 this.fpc <- (this.fpc - this.n)/this.fpc 477 } 478 479 xs<-x[this.stratum,,drop=FALSE] 480 481 this.certain<-certainty[names(certainty) %in% s] 482 483 ## stratum with only 1 design cluster leads to undefined variance 484 lonely.psu<-match.arg(lonely.psu, c("remove","adjust","fail", 485 "certainty","average")) 486 if (this.n==1 && !this.certain){ 487 this.df<-1 488 if (lonely.psu=="fail") 489 stop("Stratum ",s, " has only one sampling unit.") 490 else if (lonely.psu!="certainty") 491 warning("Stratum ",s, " has only one sampling unit.") 492 if (lonely.psu=="adjust") 493 xs<-strata.means[match(s,ss),,drop=FALSE] 494 } else if (obsn[match(s,names(obsn))]==1 && !this.certain){ 495 ## stratum with only 1 cluster left after subsetting 496 warning("Stratum ",s," has only one PSU in this subset.") 497 if (lonely.psu=="adjust") 498 xs<-strata.means[match(s,ss),,drop=FALSE] 499 } 500 ## add it up 501 if (!this.certain) 502 v<-v+crossprod(xs)*this.df*this.fpc 503 } 504 if (lonely.psu=="average"){ 505 v<- v/(1-mean(obsn==1 & !certainty)) 506 } 507 v 508} 509 510 511 512svymean<-function(x, design,na.rm=FALSE,...){ 513 .svycheck(design) 514 UseMethod("svymean",design) 515} 516 517svymean.survey.design<-function(x,design, na.rm=FALSE,deff=FALSE, influence=FALSE,...){ 518 519 if (!inherits(design,"survey.design")) 520 stop("design is not a survey design") 521 522 if (inherits(x,"formula")){ 523 ## do the right thing with factors 524 mf<-model.frame(x,design$variables,na.action=na.pass) 525 xx<-lapply(attr(terms(x),"variables")[-1], 526 function(tt) model.matrix(eval(bquote(~0+.(tt))),mf)) 527 cols<-sapply(xx,NCOL) 528 x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols)) 529 scols<-c(0,cumsum(cols)) 530 for(i in 1:length(xx)){ 531 x[,scols[i]+1:cols[i]]<-xx[[i]] 532 } 533 colnames(x)<-do.call("c",lapply(xx,colnames)) 534 } 535 else if(typeof(x) %in% c("expression","symbol")) 536 x<-eval(x, design$variables) 537 538 x<-as.matrix(x) 539 540 if (na.rm){ 541 nas<-rowSums(is.na(x)) 542 design<-design[nas==0,] 543 x<-x[nas==0,,drop=FALSE] 544 } 545 546 pweights<-1/design$prob 547 psum<-sum(pweights) 548 average<-colSums(x*pweights/psum) 549 x<-sweep(x,2,average) 550 v<-svyCprod(x*pweights/psum,design$strata,design$cluster[[1]], design$fpc, 551 design$nPSU,design$certainty, design$postStrata) 552 attr(average,"var")<-v 553 attr(average,"statistic")<-"mean" 554 if(influence) 555 attr(average, "influence")<-x*pweights/psum 556 class(average)<-"svystat" 557 if (is.character(deff) || deff){ 558 nobs<-NROW(design$cluster) 559 vsrs<-svyvar(x,design,na.rm=na.rm)/nobs 560 vsrs<-vsrs*(psum-nobs)/psum 561 attr(average, "deff")<-v/vsrs 562 } 563 564 return(average) 565} 566 567 568print.svystat<-function(x,...){ 569 vv<-attr(x,"var") 570 if (is.matrix(vv)) 571 m<-cbind(x,sqrt(diag(vv))) 572 else 573 m<-cbind(x,sqrt(vv)) 574 hasdeff<-!is.null(attr(x,"deff")) 575 if (hasdeff) { 576 m<-cbind(m,deff(x)) 577 colnames(m)<-c(attr(x,"statistic"),"SE","DEff") 578 } else { 579 colnames(m)<-c(attr(x,"statistic"),"SE") 580 } 581 printCoefmat(m) 582} 583 584as.data.frame.svystat<-function(x,...){ 585 rval<-data.frame(statistic=coef(x),SE=SE(x)) 586 names(rval)[1]<-attr(x,"statistic") 587 if (!is.null(attr(x,"deff"))) 588 rval<-cbind(rval,deff=deff(x)) 589 rval 590} 591 592coef.svystat<-function(object,...){ 593 attr(object,"statistic")<-NULL 594 attr(object,"deff")<-NULL 595 attr(object,"var")<-NULL 596 unclass(object) 597} 598 599vcov.svystat<-function(object,...){ 600 as.matrix(attr(object,"var")) 601} 602 603influence.svystat<-function(object,...){ 604 attr(object,"influence") 605} 606 607SE.svystat<-function(object,...){ 608 v<-vcov(object) 609 if (!is.matrix(v) || NCOL(v)==1) sqrt(v) else sqrt(diag(v)) 610} 611 612deff <- function(object,quietly=FALSE,...) UseMethod("deff") 613 614deff.default <- function(object, quietly=FALSE,...){ 615 rval<-attr(object,"deff") 616 if (is.null(rval)) { 617 if(!quietly) 618 warning("object has no design effect information") 619 } else rval<-diag(as.matrix(rval)) 620 rval 621} 622 623cv<-function(object,...) UseMethod("cv") 624 625cv.default<-function(object, warn=TRUE, ...){ 626 rval<-SE(object)/coef(object) 627 if (warn && any(coef(object)<0,na.rm=TRUE)) warning("CV may not be useful for negative statistics") 628 rval 629} 630 631 632svytotal<-function(x,design,na.rm=FALSE,...){ 633 .svycheck(design) 634 UseMethod("svytotal",design) 635} 636svytotal.survey.design<-function(x,design, na.rm=FALSE, deff=FALSE,influence=FALSE,...){ 637 638 if (!inherits(design,"survey.design")) 639 stop("design is not a survey design") 640 641 if (inherits(x,"formula")){ 642 ## do the right thing with factors 643 mf<-model.frame(x,design$variables,na.action=na.pass) 644 xx<-lapply(attr(terms(x),"variables")[-1], 645 function(tt) model.matrix(eval(bquote(~0+.(tt))),mf)) 646 cols<-sapply(xx,NCOL) 647 x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols)) 648 scols<-c(0,cumsum(cols)) 649 for(i in 1:length(xx)){ 650 x[,scols[i]+1:cols[i]]<-xx[[i]] 651 } 652 colnames(x)<-do.call("c",lapply(xx,colnames)) 653 } else if(typeof(x) %in% c("expression","symbol")) 654 x<-eval(x, design$variables) 655 656 x<-as.matrix(x) 657 658 if (na.rm){ 659 nas<-rowSums(is.na(x)) 660 design<-design[nas==0,] 661 x<-x[nas==0,,drop=FALSE] 662 } 663 664 N<-sum(1/design$prob) 665 m <- svymean(x, design, na.rm=na.rm) 666 total<-m*N 667 attr(total, "var")<-v<-svyCprod(x/design$prob,design$strata, 668 design$cluster[[1]], design$fpc, 669 design$nPSU,design$certainty,design$postStrata) 670 attr(total,"statistic")<-"total" 671 if (influence){ 672 attr(total,"influence")<-x/design$prob 673 } 674 if (is.character(deff) || deff){ 675 vsrs<-svyvar(x,design)*sum(weights(design)^2) 676 vsrs<-vsrs*(N-NROW(design$cluster))/N 677 attr(total,"deff")<-v/vsrs 678 } 679 return(total) 680} 681 682svyvar<-function(x, design, na.rm=FALSE,...){ 683 .svycheck(design) 684 UseMethod("svyvar",design) 685} 686svyvar.survey.design<-function(x, design, na.rm=FALSE,...){ 687 688 if (inherits(x,"formula")) 689 x<-model.frame(x,model.frame(design),na.action=na.pass) 690 else if(typeof(x) %in% c("expression","symbol")) 691 x<-eval(x, design$variables) 692 693 n<-sum(weights(design,"sampling")!=0) 694 xbar<-svymean(x,design, na.rm=na.rm) 695 if(NCOL(x)==1) { 696 x<-x-xbar 697 v<-svymean(x*x*n/(n-1),design, na.rm=na.rm) 698 attr(v,"statistic")<-"variance" 699 return(v) 700 } 701 x<-t(t(x)-xbar) 702 p<-NCOL(x) 703 a<-matrix(rep(x,p),ncol=p*p) 704 b<-x[,rep(1:p,each=p)] 705 ## Kish uses the n-1 divisor, so it affects design effects 706 v<-svymean(a*b*n/(n-1),design, na.rm=na.rm) 707 vv<-matrix(v,ncol=p) 708 dimnames(vv)<-list(names(xbar),names(xbar)) 709 attr(vv,"var")<-attr(v,"var") 710 attr(vv,"statistic")<-"variance" 711 class(vv)<-c("svyvar","svystat") 712 vv 713 } 714 715print.svyvar<-function (x, covariance=FALSE, ...) 716{ 717 if(!is.matrix(x)) NextMethod() 718 719 vv <- attr(x, "var") 720 if (covariance){ 721 nms<-outer(rownames(x),colnames(x),paste,sep=":") 722 m<-cbind(as.vector(x), sqrt(diag(vv))) 723 rownames(m)<-nms 724 } else{ 725 ii <- which(diag(sqrt(length(x)))>0) 726 m <- cbind(x[ii], sqrt(diag(vv))[ii]) 727 } 728 colnames(m) <- c(attr(x, "statistic"), "SE") 729 printCoefmat(m) 730} 731 732as.matrix.svyvar<-function(x,...) unclass(x) 733 734coef.svyratio<-function(object,...,drop=TRUE){ 735 if (!drop) return(object$ratio) 736 cf<-as.vector(object$ratio) 737 nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/")) 738 names(cf)<-nms 739 cf 740} 741 742SE.svyratio<-function(object,...,drop=TRUE){ 743 if(!drop) return(sqrt(object$var)) 744 se<-as.vector(sqrt(object$var)) 745 nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/")) 746 names(se)<-nms 747 se 748} 749 750svyratio<-function(numerator,denominator, design,...){ 751 .svycheck(design) 752 UseMethod("svyratio",design) 753} 754 755svyratio.survey.design<-function(numerator, denominator, design,...){ 756 757 if (inherits(numerator,"formula")) 758 numerator<-model.frame(numerator,design$variables) 759 else if(typeof(numerator) %in% c("expression","symbol")) 760 numerator<-eval(numerator, design$variables) 761 if (inherits(denominator,"formula")) 762 denominator<-model.frame(denominator,design$variables) 763 else if(typeof(denominator) %in% c("expression","symbol")) 764 denominator<-eval(denominator, design$variables) 765 766 nn<-NCOL(numerator) 767 nd<-NCOL(denominator) 768 769 all<-cbind(numerator,denominator) 770 allstats<-svytotal(all,design) 771 rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/")) 772 773 774 vars<-matrix(ncol=nd,nrow=nn) 775 for(i in 1:nn){ 776 for(j in 1:nd){ 777 r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob) 778 vars[i,j]<-svyCprod(r*1/design$prob, design$strata, design$cluster[[1]], design$fpc, 779 design$nPSU, design$certainty,design$postStrata) 780 } 781 } 782 colnames(vars)<-names(denominator) 783 rownames(vars)<-names(numerator) 784 rval$var<-vars 785 rval$call<-sys.call() 786 class(rval)<-"svyratio" 787 rval 788 789 } 790 791print.svyratio_separate<-function(x,...){ 792 cat("Stratified ratio estimate: ") 793 if (!is.null(x$call)) 794 print(x$call) 795 else if (!is.null(attr(x,"call"))) 796 print(attr(x$call)) 797 for(r in x$ratios) { 798 print(r) 799 } 800 invisible(x) 801} 802 803print.svyratio<-function(x,...){ 804 cat("Ratio estimator: ") 805 if (!is.null(x$call)) 806 print(x$call) 807 else if(!is.null(attr(x,"call"))) 808 print(attr(x,"call")) 809 cat("Ratios=\n") 810 print(x$ratio) 811 cat("SEs=\n") 812 print(sqrt(x$var)) 813 invisible(NULL) 814} 815 816predict.svyratio<-function(object, total, se=TRUE,...){ 817 if (se) 818 return(list(total=object$ratio*total,se=sqrt(object$var)*total)) 819 else 820 return(object$ratio*total) 821} 822 823predict.svyratio_separate<-function(object, total, se=TRUE,...){ 824 825 if (length(total)!=length(object$ratios)) 826 stop("Number of strata differ in ratio object and totals.") 827 if (!is.null(names(total)) && !is.null(levels(object$strata))){ 828 if (!setequal(names(total), levels(object$strata))) 829 warning("Names of strata differ in ratio object and totals") 830 else if (!all(names(total)==levels(object$strata))){ 831 warning("Reordering supplied totals to make their names match the ratio object") 832 total<-total[match(names(total),levels(object$strata))] 833 } 834 } 835 totals<-mapply(predict, object=object$ratios, total=total,se=se,...,SIMPLIFY=FALSE) 836 837 if(se){ 838 rval<-totals[[1]]$total 839 v<-totals[[1]]$se^2 840 for(ti in totals[-1]) { 841 rval<-rval+ti$total 842 v<-v+ti$se^2 843 } 844 list(total=rval,se=sqrt(v)) 845 } else { 846 rval<-totals[[1]] 847 for (ti in totals[-1]) rval<-rval+ti 848 rval 849 } 850 851} 852 853 854cv.svyratio<-function(object,...){ 855 sqrt(object$var)/object$ratio 856} 857 858svytable<-function(formula, design, ...){ 859 UseMethod("svytable",design) 860} 861 862svytable.survey.design<-function(formula, design, Ntotal=NULL, round=FALSE,...){ 863 864 if (!inherits(design,"survey.design")) stop("design must be a survey design") 865 weights<-1/design$prob 866 867 ## unstratified or unadjusted 868 if (length(Ntotal)<=1 || !design$has.strata){ 869 if (length(formula)==3) 870 tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~.(formula[[3]]), data=model.frame(design),...)) 871 else 872 tblcall<-bquote(xtabs(weights~.(formula[[2]]), data=model.frame(design),...)) 873 tbl<-eval(tblcall) 874 if (!is.null(Ntotal)) { 875 if(length(formula)==3) 876 tbl<-tbl/sum(Ntotal) 877 else 878 tbl<-tbl*sum(Ntotal)/sum(tbl) 879 } 880 if (round) 881 tbl<-round(tbl) 882 attr(tbl,"call")<-match.call() 883 class(tbl)<-c("svytable",class(tbl)) 884 return(tbl) 885 } 886 ## adjusted and stratified 887 if (length(formula)==3) 888 tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~design$strata[,1]+.(formula[[3]]), data=model.frame(design),...)) 889 else 890 tblcall<-bquote(xtabs(weights~design$strata[,1]+.(formula[[2]]), data=model.frame(design),...)) 891 892 tbl<-eval(tblcall) 893 894 ss<-match(sort(unique(design$strata[,1])), Ntotal[,1]) 895 dm<-dim(tbl) 896 layer<-prod(dm[-1]) 897 tbl<-sweep(tbl,1,Ntotal[ss, 2]/apply(tbl,1,sum),"*") 898 tbl<-apply(tbl, 2:length(dm), sum) 899 if (round) 900 tbl<-round(tbl) 901 class(tbl)<-c("svytable","xtabs", "table") 902 attr(tbl, "call")<-match.call() 903 tbl 904} 905 906svycoxph<-function(formula,design,subset=NULL,rescale=TRUE,...){ 907 .svycheck(design) 908 UseMethod("svycoxph",design) 909} 910 911svycoxph.survey.design<-function(formula,design, subset=NULL, rescale=TRUE, ...){ 912 subset<-substitute(subset) 913 subset<-eval(subset, model.frame(design),parent.frame()) 914 if (!is.null(subset)) 915 design<-design[subset,] 916 917 if(any(weights(design)<0)) stop("weights must be non-negative") 918 919 data<-model.frame(design) 920 921 g<-match.call() 922 g$formula<-eval.parent(g$formula) 923 g$design<-NULL 924 g$var<-NULL 925 g$rescale <- NULL 926 if (is.null(g$weights)) 927 g$weights<-quote(.survey.prob.weights) 928 else 929 g$weights<-bquote(.survey.prob.weights*.(g$weights)) 930 g[[1]]<-quote(coxph) 931 g$data<-quote(data) 932 g$subset<-quote(.survey.prob.weights>0) 933 g$model <- TRUE 934 935 ##need to rescale weights for stability 936 ## unless the user doesn't want to 937 if (rescale) 938 data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob) 939 if (!all(all.vars(formula) %in% names(data))) 940 stop("all variables must be in design= argument") 941 g<-with(list(data=data), eval(g)) 942 943 if (inherits(g, "coxph.penal")) 944 warning("svycoxph does not support penalised terms") 945 946 g$call<-match.call() 947 g$call[[1]]<-as.name(.Generic) 948 g$printcall<-sys.call(-1) 949 g$printcall[[1]]<-as.name(.Generic) 950 class(g)<-c("svycoxph", class(g)) 951 g$survey.design<-design 952 953 nas<-g$na.action 954 if (length(nas)) 955 design<-design[-nas,] 956 957 dbeta.subset<-resid(g,"dfbeta",weighted=TRUE) 958 if (nrow(design)==NROW(dbeta.subset)){ 959 dbeta<-as.matrix(dbeta.subset) 960 } else { 961 dbeta<-matrix(0,ncol=NCOL(dbeta.subset),nrow=nrow(design)) 962 dbeta[is.finite(design$prob),]<-dbeta.subset 963 } 964 g$inv.info<-g$var 965 966 if (inherits(design,"survey.design2")) 967 g$var<-svyrecvar(dbeta, design$cluster, 968 design$strata, design$fpc, 969 postStrata=design$postStrata) 970 else if (inherits(design, "twophase")) 971 g$var<-twophasevar(dbeta, design) 972 else if(inherits(design, "twophase2")) 973 g$var<-twophase2var(dbeta, design) 974 else if(inherits(design, "pps")) 975 g$var<-ppsvar(dbeta,design) 976 else 977 g$var<-svyCprod(dbeta, design$strata, 978 design$cluster[[1]], design$fpc,design$nPSU, 979 design$certainty,design$postStrata) 980 981 g$wald.test<-coef(g)%*%solve(g$var,coef(g)) 982 g$ll<-g$loglik 983 g$loglik<-NULL 984 g$rscore<-NULL 985 g$score<-NA 986 g$degf.resid<-degf(design)-length(coef(g)[!is.na(coef(g))])+1 987 988 g 989} 990 991 992model.frame.svycoxph<-function(formula,...){ 993 f<-formula$call 994 env <- environment(formula(formula)) 995 if (is.null(env)) 996 env <- parent.frame() 997 f[[1]]<-as.name("model.frame") 998 f$data<-quote(data) 999 f$design<-NULL 1000 f$method<-f$control<-f$singular.ok<-f$model<-f$x<-f$y<-f$iter<-NULL 1001 f$formula<-formula(formula) 1002 if (is.null(f$weights)) 1003 f$weights<-quote(.survey.prob.weights) 1004 else 1005 f$weights<-bquote(.survey.prob.weights*.(f$weights)) 1006 design<-formula$survey.design 1007 data<-model.frame(design) 1008 data$.survey.prob.weights<-(1/design$prob)/sum(1/design$prob) 1009 with(list(data=data), eval(f)) 1010} 1011 1012model.matrix.svycoxph<-function (object, data = NULL, contrast.arg = object$contrasts, 1013 ...) 1014{ 1015 if (!is.null(object[["x"]])) 1016 object[["x"]] 1017 else { 1018 if (is.null(data)) 1019 data <- model.frame(object, ...) 1020 else data <- model.frame(object, data = data, ...) 1021 Terms <- object$terms 1022 attr(Terms, "intercept") <- 1 1023 strats <- attr(Terms, "specials")$strata 1024 cluster <- attr(Terms, "specials")$cluster 1025 dropx <- NULL 1026 if (length(cluster)) { 1027 tempc <- untangle.specials(Terms, "cluster", 1:10) 1028 ord <- attr(Terms, "order")[tempc$terms] 1029 if (any(ord > 1)) 1030 stop("Cluster can not be used in an interaction") 1031 dropx <- tempc$terms 1032 } 1033 if (length(strats)) { 1034 temp <- untangle.specials(Terms, "strata", 1) 1035 dropx <- c(dropx, temp$terms) 1036 } 1037 if (length(dropx)) { 1038 newTerms <- Terms[-dropx] 1039 X <- model.matrix(newTerms, data, contrasts = contrast.arg) 1040 } 1041 else { 1042 newTerms <- Terms 1043 X <- model.matrix(Terms, data, contrasts = contrast.arg) 1044 } 1045 X 1046 } 1047} 1048 1049print.svycoxph<-function(x,...){ 1050 print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...) 1051## x$call<-x$printcall 1052 NextMethod() 1053} 1054 1055summary.svycoxph<-function(object,...){ 1056 print(object$survey.design,varnames=FALSE, design.summaries=FALSE,...) 1057## object$call<-object$printcall 1058 NextMethod() 1059} 1060 1061survfit.svycoxph<-function(object,...){ 1062 stop("No survfit method for survey models") 1063} 1064extractAIC.svycoxph<-function(fit,...){ 1065 stop("No AIC for survey models") 1066} 1067 1068anova.svycoxph<-function(object,...){ 1069 stop("No anova method for survey models") 1070} 1071 1072svyglm<-function(formula, design,subset=NULL,family=stats::gaussian(),start=NULL, ...){ 1073 .svycheck(design) 1074 UseMethod("svyglm",design) 1075} 1076 1077svyglm.survey.design<-function(formula,design,subset=NULL, family=stats::gaussian(),start=NULL, 1078 rescale=TRUE,..., deff=FALSE, influence=FALSE){ 1079 1080 subset<-substitute(subset) 1081 subset<-eval(subset, model.frame(design), parent.frame()) 1082 if (!is.null(subset)){ 1083 if (any(is.na(subset))) 1084 stop("subset must not contain NA values") 1085 design<-design[subset,] 1086 } 1087 data<-model.frame(design) 1088 1089 g<-match.call() 1090 g$formula<-eval.parent(g$formula) 1091 g$influence<-NULL 1092 g$design<-NULL 1093 g$var <- NULL 1094 g$rescale <- NULL 1095 g$deff<-NULL 1096 g$subset <- NULL ## done it already 1097 g$family<-family 1098 if (is.null(g$weights)) 1099 g$weights<-quote(.survey.prob.weights) 1100 else 1101 g$weights<-bquote(.survey.prob.weights*.(g$weights)) 1102 g$data<-quote(data) 1103 g[[1]]<-quote(glm) 1104 1105 ##need to rescale weights for stability in binomial 1106 ## (unless the user doesn't want to) 1107 if (rescale) 1108 data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob) 1109 else 1110 data$.survey.prob.weights<-(1/design$prob) 1111 1112 if(any(is.na(data$.survey.prob.weights))) 1113 stop("weights must not contain NA values") 1114 if (!all(all.vars(formula) %in% names(data))) 1115 stop("all variables must be in design= argument") 1116 g<-with(list(data=data), eval(g)) 1117 summ<-summary(g) 1118 g$naive.cov<-summ$cov.unscaled 1119 1120 nas<-g$na.action 1121 if (length(nas)) 1122 design<-design[-nas,] 1123 1124 g$cov.unscaled<-svy.varcoef(g,design) 1125 g$df.residual <- degf(design)+1-length(coef(g)[!is.na(coef(g))]) 1126 1127 class(g)<-c("svyglm",class(g)) 1128 g$call<-match.call() 1129 g$call[[1]]<-as.name(.Generic) 1130 if(!("formula" %in% names(g$call))) { 1131 if (is.null(names(g$call))) 1132 i<-1 1133 else 1134 i<-min(which(names(g$call)[-1]=="")) 1135 names(g$call)[i+1]<-"formula" 1136 } 1137 1138 if(deff){ 1139 vsrs<-summ$cov.scaled*mean(data$.survey.prob.weights) 1140 attr(g,"deff")<-g$cov.unscaled/vsrs 1141 } 1142 1143 if (influence){ 1144 estfun< model.matrix(g)*naa_shorter(nas, resid(g,"working"))*g$weights 1145 if (g$rank<NCOL(estfun)){ 1146 estfun<-estfun[,g$qr$pivot[1:g$rank]] 1147 } 1148 attr(g, "influence")<-estfun%*%g$naive.cov 1149 } 1150 1151 g$survey.design<-design 1152 g 1153} 1154 1155print.svyglm<-function(x,...){ 1156 print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...) 1157 NextMethod() 1158 1159} 1160 1161coef.svyglm<-function(object,complete=FALSE,...,na.rm=!complete) { 1162 beta<-object$coefficients 1163 if (!na.rm || length(beta)==object$rank) 1164 beta 1165 else 1166 beta[object$qr$pivot[1:object$rank]] 1167} 1168 1169vcov.svyglm<-function(object,...) { 1170 v<-object$cov.unscaled 1171 dimnames(v)<-list(names(coef(object)),names(coef(object))) 1172 v 1173} 1174 1175 1176svy.varcoef<-function(glm.object,design){ 1177 Ainv<-summary(glm.object)$cov.unscaled 1178 nas<-glm.object$na.action 1179 estfun<-model.matrix(glm.object)*naa_shorter(nas, resid(glm.object,"working"))*glm.object$weights 1180 if (glm.object$rank<NCOL(estfun)){ 1181 estfun<-estfun[,glm.object$qr$pivot[1:glm.object$rank]] 1182 } 1183 naa<-glm.object$na.action 1184 ## the design may still have rows with weight zero for missing values 1185 ## if there are weights or calibration. model.matrix will have removed them 1186 if (length(naa) && (NROW(estfun)!=nrow(design) )){ 1187 if ((length(naa)+NROW(estfun))!=nrow(design) ) 1188 stop("length mismatch: this can't happen.") 1189 n<-nrow(design) 1190 inx <- (1:n)[-naa] 1191 ee <- matrix(0,nrow=n,ncol=NCOL(estfun)) 1192 ee[inx,]<-estfun 1193 estfun<-ee 1194 } 1195 1196 if (inherits(design,"survey.design2")) 1197 svyrecvar(estfun%*%Ainv,design$cluster,design$strata,design$fpc,postStrata=design$postStrata) 1198 else if (inherits(design, "twophase")) 1199 twophasevar(estfun%*%Ainv, design) 1200 else if (inherits(design, "twophase2")) 1201 twophase2var(estfun%*%Ainv, design) 1202 else if (inherits(design, "pps")) 1203 ppsvar(estfun%*%Ainv, design) 1204 else 1205 svyCprod(estfun%*%Ainv,design$strata,design$cluster[[1]],design$fpc, design$nPSU, 1206 design$certainty,design$postStrata) 1207 } 1208 1209residuals.svyglm<-function(object,type = c("deviance", "pearson", "working", 1210 "response", "partial"),...){ 1211 type<-match.arg(type) 1212 if (type=="pearson"){ 1213 y <- object$y 1214 mu <- object$fitted.values 1215 wts <- object$prior.weights 1216 pwts<- 1/object$survey.design$prob 1217 pwts<- pwts/mean(pwts) 1218 ## missing values in calibrated/post-stratified designs 1219 ## the rows will still be in the design object but not in the model 1220 if (length(naa<-object$na.action) && (length(pwts)!=length(wts))){ 1221 if(length(naa)+length(wts) != length(pwts)) 1222 stop("length mismatch: this can't happen.") 1223 inx<-(1:length(pwts))[-naa] 1224 } else inx<-1:length(pwts) 1225 r<-numeric(length(pwts)) 1226 r[inx]<-(y - mu) * sqrt(wts/pwts[inx])/(sqrt(object$family$variance(mu))) 1227 if (is.null(object$na.action)) 1228 r 1229 else 1230 naresid(object$na.action, r) 1231 } else 1232 NextMethod() 1233 1234} 1235 1236summary.svyglm<-function (object, correlation = FALSE, df.resid=NULL,...) 1237{ 1238 Qr <- object$qr 1239 est.disp <- TRUE 1240 if (is.null(df.resid)) 1241 df.r <- object$df.residual 1242 else 1243 df.r<-df.resid 1244 1245 dispersion<-svyvar(resid(object,"pearson"), object$survey.design, 1246 na.rm=TRUE) 1247 1248 coef.p <- coef(object) 1249 covmat<-vcov(object) 1250 dimnames(covmat) <- list(names(coef.p), names(coef.p)) 1251 var.cf <- diag(covmat) 1252 s.err <- sqrt(var.cf) 1253 tvalue <- coef.p/s.err 1254 dn <- c("Estimate", "Std. Error") 1255 if (!est.disp) { 1256 pvalue <- 2 * pnorm(-abs(tvalue)) 1257 coef.table <- cbind(coef.p, s.err, tvalue, pvalue) 1258 dimnames(coef.table) <- list(names(coef.p), c(dn, "z value", 1259 "Pr(>|z|)")) 1260 } 1261 else if (df.r > 0) { 1262 pvalue <- 2 * pt(-abs(tvalue), df.r) 1263 coef.table <- cbind(coef.p, s.err, tvalue, pvalue) 1264 dimnames(coef.table) <- list(names(coef.p), c(dn, "t value", 1265 "Pr(>|t|)")) 1266 } 1267 else { 1268 coef.table <- cbind(coef.p, s.err, tvalue, NaN) 1269 dimnames(coef.table) <- list(names(coef.p), c(dn, "t value", 1270 "Pr(>|t|)")) 1271 } 1272 ans <- c(object[c("call", "terms", "family", "deviance", 1273 "aic", "contrasts", "df.residual", "null.deviance", "df.null", 1274 "iter")], list(deviance.resid = residuals(object, type = "deviance"), 1275 aic = object$aic, coefficients = coef.table, dispersion = dispersion, 1276 df = c(object$rank, df.r,NCOL(Qr$qr)), cov.unscaled = covmat, 1277 cov.scaled = covmat)) 1278 if (correlation) { 1279 dd <- sqrt(diag(covmat)) 1280 ans$correlation <- covmat/outer(dd, dd) 1281 } 1282 ans$aliased<-is.na(coef(object,na.rm=FALSE)) 1283 ans$survey.design<-list(call=object$survey.design$call) 1284 class(ans) <- c("summary.svyglm","summary.glm") 1285 return(ans) 1286} 1287 1288 1289confint.svyglm<-function(object,parm,level=0.95,method=c("Wald","likelihood"),ddf=NULL,...){ 1290 method<-match.arg(method) 1291 if(method=="Wald"){ 1292 if (is.null(ddf)) 1293 ddf <- object$df.residual 1294 if (ddf<=0) { 1295 ci<-confint.default(object,parm=parm,level=.95,...)*NaN 1296 } else { 1297 tlevel <- 1 - 2*pnorm(qt((1 - level)/2, df = ddf)) 1298 ci<-confint.default(object,parm=parm,level=tlevel,...) 1299 } 1300 a <- (1 - level)/2 1301 a <- c(a, 1 - a) 1302 pct <- format.perc(a, 3) 1303 colnames(ci)<-pct 1304 return(ci) 1305 } 1306 pnames <- names(coef(object)) 1307 if (missing(parm)) 1308 parm <- seq_along(pnames) 1309 else if (is.character(parm)) 1310 parm <- match(parm, pnames, nomatch = 0) 1311 lambda<-diag(object$cov.unscaled[parm,parm,drop=FALSE])/diag(object$naive.cov[parm,parm,drop=FALSE]) 1312 if(is.null(ddf)) ddf<-object$df.residual 1313 if (ddf==Inf){ 1314 alpha<-pnorm(qnorm((1-level)/2)*sqrt(lambda))/2 1315 } else if (ddf<=0) { 1316 stop("zero or negative denomintor df") 1317 } else { 1318 alpha<-pnorm(qt((1-level)/2,df=ddf)*sqrt(lambda))/2 1319 } 1320 rval<-vector("list",length(parm)) 1321 for(i in 1:length(parm)){ 1322 temp<-MASSprofile_glm(fitted=object,which=parm[i],alpha=alpha[i],...) 1323 rval[[i]]<-confint_profile(temp,parm=parm[i],level=level, unscaled_level=2*alpha[i],...) 1324 } 1325 1326 names(rval)<-pnames[parm] 1327 if (length(rval)==1) 1328 rval<-rval[[1]] 1329 else 1330 rval<-do.call(rbind,rval) 1331 attr(rval,"levels")<-level 1332 rval 1333} 1334 1335 1336## 1337## based on MASS:::confint.profile.glm 1338## which is GPL and (c) Bill Venables and Brian D Ripley. 1339## 1340confint_profile <- function (object, parm = seq_along(pnames), level = 0.95, unscaled_level, ...) 1341{ 1342 of <- attr(object, "original.fit") 1343 pnames <- names(coef(of)) 1344 if (is.character(parm)) 1345 parm <- match(parm, pnames, nomatch = 0L) 1346 a <- (1-level)/2 1347 a <- c(a, 1 - a) 1348 pct <- paste(round(100 * a, 1), "%") 1349 ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(pnames[parm], 1350 pct)) 1351 cutoff <- c(qnorm(unscaled_level),qnorm(unscaled_level, lower.tail=FALSE)) 1352 for (pm in parm) { 1353 pro <- object[[pnames[pm]]] 1354 if (is.null(pro)) 1355 next 1356 if (length(pnames) > 1L) 1357 sp <- spline(x = pro[, "par.vals"][, pm], y = pro[, 1358 1]) 1359 else sp <- spline(x = pro[, "par.vals"], y = pro[, 1]) 1360 ci[pnames[pm], ] <- approx(sp$y, sp$x, xout = cutoff)$y 1361 } 1362 drop(ci) 1363} 1364 1365## 1366## MASS:::profile.glm with very slight changes to avoid rounding error in 1-alpha 1367## original is GPL and (c) Bill Venables and Brian D Ripley. 1368## 1369 1370MASSprofile_glm<-function (fitted, which = 1:p, alpha = 0.01, maxsteps = 10, del = zmax/5, 1371 trace = FALSE, ...) 1372{ 1373 Pnames <- names(B0 <- coef(fitted)) 1374 nonA <- !is.na(B0) 1375 pv0 <- t(as.matrix(B0)) 1376 p <- length(Pnames) 1377 if (is.character(which)) 1378 which <- match(which, Pnames) 1379 summ <- summary(fitted) 1380 std.err <- summ$coefficients[, "Std. Error", drop = FALSE] 1381 mf <- model.frame(fitted) 1382 Y <- model.response(mf) 1383 n <- NROW(Y) 1384 O <- model.offset(mf) 1385 if (!length(O)) 1386 O <- rep(0, n) 1387 W <- model.weights(mf) 1388 if (length(W) == 0L) 1389 W <- rep(1, n) 1390 OriginalDeviance <- deviance(fitted) 1391 DispersionParameter <- summ$dispersion 1392 X <- model.matrix(fitted) 1393 fam <- family(fitted) 1394 switch(fam$family, binomial = , poisson = , `Negative Binomial` = { 1395 zmax <- sqrt(qchisq(alpha, 1,lower.tail=FALSE)) 1396 profName <- "z" 1397 }, gaussian = , quasi = , inverse.gaussian = , quasibinomial = , 1398 quasipoisson = , { 1399 zmax <- sqrt(qf(alpha, 1, n - p,lower.tail=FALSE)) 1400 profName <- "tau" 1401 }) 1402 prof <- vector("list", length = length(which)) 1403 names(prof) <- Pnames[which] 1404 for (i in which) { 1405 if (!nonA[i]) 1406 next 1407 zi <- 0 1408 pvi <- pv0 1409 a <- nonA 1410 a[i] <- FALSE 1411 Xi <- X[, a, drop = FALSE] 1412 pi <- Pnames[i] 1413 for (sgn in c(-1, 1)) { 1414 if (trace) 1415 message("\nParameter: ", pi, " ", c("down", "up")[(sgn + 1416 1)/2 + 1]) 1417 step <- 0 1418 z <- 0 1419 LP <- X[, nonA, drop = FALSE] %*% B0[nonA] + O 1420 while ((step <- step + 1) < maxsteps && abs(z) < 1421 zmax) { 1422 bi <- B0[i] + sgn * step * del * std.err[Pnames[i], 1423 1] 1424 o <- O + X[, i] * bi 1425 fm <- glm.fit(x = Xi, y = Y, weights = W, etastart = LP, 1426 offset = o, family = fam, control = fitted$control) 1427 LP <- Xi %*% fm$coefficients + o 1428 ri <- pv0 1429 ri[, names(coef(fm))] <- coef(fm) 1430 ri[, pi] <- bi 1431 pvi <- rbind(pvi, ri) 1432 zz <- (fm$deviance - OriginalDeviance)/DispersionParameter 1433 if (zz > -0.001) 1434 zz <- max(zz, 0) 1435 else stop("profiling has found a better solution, so original fit had not converged") 1436 z <- sgn * sqrt(zz) 1437 zi <- c(zi, z) 1438 } 1439 } 1440 si <- order(zi) 1441 prof[[pi]] <- structure(data.frame(zi[si]), names = profName) 1442 prof[[pi]]$par.vals <- pvi[si, , drop = FALSE] 1443 } 1444 val <- structure(prof, original.fit = fitted, summary = summ) 1445 class(val) <- c("profile.glm", "profile") 1446 val 1447} 1448 1449 1450### 1451 1452svymle<-function(loglike, gradient=NULL, design, formulas, 1453 start=NULL, control=list(), 1454 na.action="na.fail", method=NULL,lower=NULL,upper=NULL, 1455 influence=FALSE,...){ 1456 if(is.null(method)) 1457 method<-if(is.null(gradient)) "Nelder-Mead" else "newuoa" 1458 if (!inherits(design,"survey.design")) 1459 stop("design is not a survey.design") 1460 weights<-weights(design) 1461 wtotal<-sum(weights) 1462 1463 if (!(method %in% c("bobyqa","newuoa")) && is.null(control$fnscale)) 1464 control$fnscale <- -wtotal/length(weights) 1465 if (inherits(design, "twophase")) { 1466 data<-design$phase1$sample$variables 1467 } else { 1468 data<-design$variables 1469 } 1470 1471 ## Get the response variable 1472 nms<-names(formulas) 1473 if (nms[1]==""){ 1474 if (inherits(formulas[[1]],"formula")) { 1475 y<-eval.parent(model.frame(formulas[[1]],data=data,na.action=na.pass)) 1476 } else { 1477 y<-eval(y,data,parent.frame()) 1478 } 1479 formulas[1]<-NULL 1480 if (FALSE && NCOL(y)>1) stop("Y has more than one column") 1481 } else { 1482 ## one formula must have response 1483 has.response<-sapply(formulas,length)==3 1484 if (sum(has.response)!=1) stop("Need a response variable") 1485 ff<-formulas[[which(has.response)]] 1486 ff[[3]]<-1 1487 y<-eval.parent(model.frame(ff,data=data,na.action=na.pass)) 1488 formulas[[which(has.response)]]<-delete.response(terms(formulas[[which(has.response)]])) 1489 nms<-c("",nms) 1490 } 1491 1492 if(length(which(nms==""))>1) stop("Formulas must have names") 1493 1494 mf<-vector("list",length(formulas)) 1495 vnms <- unique(do.call(c, lapply(formulas, all.vars))) 1496 uformula <- make.formula(vnms) 1497 mf <- model.frame(uformula, data=data,na.action=na.pass) 1498 mf <- cbind(`(Response)`=y, mf) 1499 mf<-mf[,!duplicated(colnames(mf)),drop=FALSE] 1500 1501 mf<-get(na.action)(mf) 1502 nas<-attr(mf,"na.action") 1503 if (length(nas)) 1504 design<-design[-nas,] 1505 weights<-1/design$prob 1506 wtotal<-sum(weights) 1507 1508 Y<-mf[,1] 1509 1510# mm <- lapply(formulas,model.matrix, data=mf) 1511 1512 mmFrame <- lapply(formulas,model.frame, data=mf) 1513 mm = mapply(model.matrix, object = formulas, data=mmFrame, SIMPLIFY=FALSE) 1514 mmOffset <- lapply(mmFrame, model.offset) 1515 1516 # add a vector of zeros if there is no offset provided 1517 noOffset = which(unlist(lapply(mmOffset, length))==0) 1518 for(D in noOffset) { 1519 mmOffset[[D]] = rep(0, NROW(mm[[1]])) 1520 } 1521 1522 ## parameter names 1523 parnms<-lapply(mm,colnames) 1524 for(i in 1:length(parnms)) 1525 parnms[[i]]<-paste(nms[i+1],parnms[[i]],sep=".") 1526 parnms<-unlist(parnms) 1527 1528 # maps position in theta to model matrices 1529 np<-c(0,cumsum(sapply(mm,NCOL))) 1530 1531 1532 objectivefn<-function(theta,...){ 1533 args<-vector("list",length(nms)) 1534 args[[1]]<-Y 1535 for(i in 2:length(nms)) 1536 args[[i]]<-mm[[i-1]]%*%theta[(np[i-1]+1):np[i]] + 1537 mmOffset[[i-1]] 1538 names(args)<-nms 1539 args<-c(args, ...) 1540 sum(do.call("loglike",args)*weights) 1541 } 1542 1543 if (is.null(gradient)) { 1544 grad<-NULL 1545 } else { 1546 fnargs<-names(formals(loglike))[-1] 1547 grargs<-names(formals(gradient))[-1] 1548 if(!identical(fnargs,grargs)) 1549 stop("loglike and gradient have different arguments.") 1550 reorder<-na.omit(match(grargs,nms[-1])) 1551 grad<-function(theta,...){ 1552 args<-vector("list",length(nms)) 1553 args[[1]]<-Y 1554 for(i in 2:length(nms)) 1555 args[[i]]<-drop( 1556 mm[[i-1]]%*%theta[(np[i-1]+1):np[i]] + 1557 mmOffset[[i-1]]) 1558 names(args)<-nms 1559 args<-c(args,...) 1560 rval<-NULL 1561 tmp<-do.call("gradient",args) 1562 for(i in reorder){ 1563 rval<-c(rval, colSums(as.matrix(tmp[,i]*weights*mm[[i]]))) 1564 } 1565 drop(rval) 1566 } 1567 } 1568 1569 theta0<-numeric(np[length(np)]) 1570 if (is.list(start)) 1571 st<-do.call("c",start) 1572 else 1573 st<-start 1574 1575 if (length(st)==length(theta0)) { 1576 theta0<-st 1577 } else { 1578 stop("starting values wrong length") 1579 } 1580 1581 if (method=="nlm"){ 1582 ff<-function(theta){ 1583 rval<- -objectivefn(theta) 1584 if (is.na(rval)) rval<- -Inf 1585 attr(rval,"gradient")<- -grad(theta) 1586 rval 1587 } 1588 rval<-nlm(ff, theta0,hessian=TRUE) 1589 if (rval$code>3) warning("nlm did not converge") 1590 rval$par<-rval$estimate 1591 } else if (method == "newuoa"){ 1592 rval<-minqa::uobyqa(theta0, function(par,...) -objectivefn(par,...), control=control, ...) 1593 if(rval$ier>0) warning(rval$msg) 1594 rval$hessian <- numDeriv::hessian(objectivefn, rval$par) 1595 } else if (method == "bobyqa"){ 1596 if (is.list(lower)) lower<-do.call(c, lower) 1597 if (is.list(upper)) upper<-do.call(c,upper) 1598 rval<-minqa::bobyqa(theta0, function(par,...) -objectivefn(par,...), control=control,lower=lower,upper=upper, ...) 1599 if(rval$ier>0) warning(rval$msg) 1600 rval$hessian <- numDeriv::hessian(objectivefn,rval$par) 1601 }else { 1602 rval<-optim(theta0, objectivefn, grad,control=control, 1603 hessian=TRUE,method=method,...) 1604 if (rval$conv!=0) warning("optim did not converge") 1605 } 1606 1607 1608 1609 1610 names(rval$par)<-parnms 1611 dimnames(rval$hessian)<-list(parnms,parnms) 1612 1613 if (is.null(gradient)) { 1614 rval$invinf<-solve(-rval$hessian) 1615 rval$scores<-NULL 1616 rval$sandwich<-NULL 1617 } else { 1618 theta<-rval$par 1619 args<-vector("list",length(nms)) 1620 args[[1]]<-Y 1621 for(i in 2:length(nms)) 1622 args[[i]]<-drop(mm[[i-1]]%*%theta[(np[i-1]+1):np[i]]+ mmOffset[[i-1]]) 1623 names(args)<-nms 1624 args<-c(args,...) 1625 deta<-do.call("gradient",args) 1626 rval$scores<-NULL 1627 for(i in reorder) 1628 rval$scores<-cbind(rval$scores,deta[,i]*weights*mm[[i]]) 1629 1630 rval$invinf<-solve(-rval$hessian) 1631 dimnames(rval$invinf)<-list(parnms,parnms) 1632 1633 db<-rval$scores%*%rval$invinf 1634 if (inherits(design,"survey.design2")) 1635 rval$sandwich<-svyrecvar(db,design$cluster,design$strata, design$fpc, 1636 postStrata=design$postStrata) 1637 else if (inherits(design, "twophase")) 1638 rval$sandwich<-twophasevar(db,design) 1639 else 1640 rval$sandwich<-svyCprod(db,design$strata,design$cluster[[1]], 1641 design$fpc, design$nPSU, 1642 design$certainty, design$postStrata) 1643 dimnames(rval$sandwich)<-list(parnms,parnms) 1644 } 1645 1646 if (influence) 1647 attr(rval,"influence")<-db 1648 rval$call<-match.call() 1649 rval$design<-design 1650 class(rval)<-"svymle" 1651 rval 1652 1653} 1654 1655 1656coef.svymle<-function(object,...) object$par 1657 1658vcov.svymle<-function(object,stderr=c("robust","model"),...) { 1659 stderr<-match.arg(stderr) 1660 if (stderr=="robust"){ 1661 rval<-object$sandwich 1662 if (is.null(rval)) { 1663 p<-length(coef(object)) 1664 rval<-matrix(NA,p,p) 1665 } 1666 } else { 1667 rval<-object$invinf*mean(1/object$design$prob) 1668 } 1669 rval 1670} 1671 1672 1673print.svymle<-function(x,...){ 1674 cat("Survey-sampled mle: \n") 1675 print(x$call) 1676 cat("Coef: \n") 1677 print(x$par) 1678} 1679 1680summary.svymle<-function(object,stderr=c("robust","model"),...){ 1681 cat("Survey-sampled mle: \n") 1682 print(object$call) 1683 stderr<-match.arg(stderr) 1684 tbl<-data.frame(Coef=coef(object),SE=sqrt(diag(vcov(object,stderr=stderr)))) 1685 tbl$p.value<-format.pval(2*(1-pnorm(abs(tbl$Coef/tbl$SE))), digits=3,eps=0.001) 1686 print(tbl) 1687 print(object$design) 1688} 1689 1690model.frame.survey.design<-function(formula,...,drop=TRUE){ 1691 formula$variables 1692} 1693model.frame.svyrep.design<-function(formula,...){ 1694 formula$variables 1695} 1696model.frame.survey.design2<-function(formula,...){ 1697 formula$variables 1698} 1699 1700.onLoad<-function(...){ 1701 if (is.null(getOption("survey.lonely.psu"))) 1702 options(survey.lonely.psu="fail") 1703 if (is.null(getOption("survey.ultimate.cluster"))) 1704 options(survey.ultimate.cluster=FALSE) 1705 if (is.null(getOption("survey.want.obsolete"))) 1706 options(survey.want.obsolete=FALSE) 1707 if (is.null(getOption("survey.adjust.domain.lonely"))) 1708 options(survey.adjust.domain.lonely=FALSE) 1709 if (is.null(getOption("survey.drop.replicates"))) 1710 options(survey.drop.replicates=TRUE) 1711 if (is.null(getOption("survey.multicore"))) 1712 options(survey.multicore=FALSE) 1713 if (is.null(getOption("survey.replicates.mse"))) 1714 options(survey.replicates.mse=FALSE) 1715} 1716 1717 1718predterms<-function(object,se=FALSE,terms=NULL){ 1719 tt<-terms(object) 1720 n <- length(object$residuals) 1721 p <- object$rank 1722 p1 <- seq_len(p) 1723 piv <- object$qr$pivot[p1] 1724 beta<-coef(object) 1725 X<-mm<-model.matrix(object) 1726 aa <- attr(mm, "assign") 1727 ll <- attr(tt, "term.labels") 1728 hasintercept <- attr(tt, "intercept") > 0L 1729 if (hasintercept) 1730 ll <- c("(Intercept)", ll) 1731 aaa <- factor(aa, labels = ll) 1732 asgn <- split(order(aa), aaa) 1733 if (hasintercept) { 1734 asgn$"(Intercept)" <- NULL 1735 } 1736 avx <- colMeans(mm) 1737 termsconst <- sum(avx[piv] * beta[piv]) 1738 nterms <- length(asgn) 1739 ip <- matrix(ncol = nterms, nrow = NROW(X)) 1740 if (nterms > 0) { 1741 predictor <- matrix(ncol = nterms, nrow = NROW(X)) 1742 dimnames(predictor) <- list(rownames(X), names(asgn)) 1743 1744 if (hasintercept) 1745 X <- sweep(X, 2L, avx, check.margin = FALSE) 1746 unpiv <- rep.int(0L, NCOL(X)) 1747 unpiv[piv] <- p1 1748 for (i in seq.int(1L, nterms, length.out = nterms)) { 1749 iipiv <- asgn[[i]] 1750 ii <- unpiv[iipiv] 1751 iipiv[ii == 0L] <- 0L 1752 predictor[, i] <- if (any(iipiv > 0L)) 1753 X[, iipiv, drop = FALSE] %*% beta[iipiv] 1754 else 0 1755 if (se){ 1756 ip[,i]<-if (any(iipiv > 0L)) 1757 rowSums(as.matrix(X[, iipiv, drop = FALSE] %*% vcov(object)[ii,ii]) * X[, iipiv, drop = FALSE]) else 0 1758 } 1759 } 1760 if (!is.null(terms)) { 1761 predictor <- predictor[, terms, drop = FALSE] 1762 if (se) 1763 ip <- ip[, terms, drop = FALSE] 1764 } 1765 } 1766 else { 1767 predictor <- ip <- matrix(0, n, 0) 1768 } 1769 attr(predictor, "constant") <- if (hasintercept) 1770 termsconst 1771 else 0 1772 if(se) 1773 dimnames(ip)<-dimnames(predictor) 1774 if (se) list(fit=predictor,se.fit=sqrt(ip)) else predictor 1775} 1776 1777 1778predict.svyglm <- function(object, newdata=NULL, total=NULL, 1779 type = c("link", "response","terms"), 1780 se.fit=(type!="terms"), 1781 vcov=FALSE,...){ 1782 if(is.null(newdata)) 1783 newdata<-model.frame(object$survey.design) 1784 type<-match.arg(type) 1785 if (type=="terms") 1786 return(predterms(object,se=se.fit,...)) 1787 tt<-delete.response(terms(formula(object))) 1788 mf<-model.frame(tt,data=newdata, xlev=object$xlevels) 1789 mm<-model.matrix(tt,mf,contrasts.arg = object$contrasts) 1790 if (!is.null(total) && attr(tt,"intercept")){ 1791 mm[,attr(tt,"intercept")]<-mm[,attr(tt,"intercept")]*total 1792 } 1793 eta<-drop(mm %*% coef(object)) 1794 d<-drop(object$family$mu.eta(eta)) 1795 eta<-switch(type, link=eta, response=object$family$linkinv(eta)) 1796 if(se.fit){ 1797 if(vcov){ 1798 vv<-mm %*% vcov(object) %*% t(mm) 1799 attr(eta,"var")<-switch(type, 1800 link=vv, 1801 response=d*(t(vv*d))) 1802 } else { 1803 ## FIXME make this more efficient 1804 vv<-drop(rowSums((mm %*% vcov(object)) * mm)) 1805 attr(eta,"var")<-switch(type, 1806 link=vv, 1807 response=drop(d*(t(vv*d)))) 1808 } 1809 } 1810 attr(eta,"statistic")<-type 1811 class(eta)<-"svystat" 1812 eta 1813 } 1814 1815