1## 2## Recursive estimation of linearisation variances 3## in multistage samples. 4## 5 6svydesign<-function(ids, probs = NULL, strata = NULL, variables = NULL, 7 fpc = NULL, data=NULL, nest = FALSE, check.strata = !nest, 8 weights = NULL,pps=FALSE,...){ 9 UseMethod("svydesign", data) 10 } 11 12detibble<-function(data) { 13 if ("tbl_df" %in% class(data)) 14 as.data.frame(data) 15 else 16 data 17} 18 19svydesign.default<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL, 20 data=NULL, nest=FALSE, check.strata=!nest,weights=NULL,pps=FALSE, 21 variance=c("HT","YG"),...){ 22 23 data<-detibble(data) 24 25 variance<-match.arg(variance) 26 if(is.character(pps)){ 27 a<-match.arg(pps,c("brewer","overton","other")) 28 if (!(pps %in% c("brewer","other"))) 29 return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc, 30 data=data,method=a,call=sys.call(-1),variance=variance,...)) 31 } else if (!is.logical(pps)){ 32 return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc, 33 data=data,method=pps,call=sys.call(-1),variance=variance,...)) 34 } 35 36 if (!is.character(pps) || pps!="other"){ 37 if (variance!="HT") 38 stop("Only variance='HT' supported for this design") 39 } 40 41 ## less memory-hungry version for sparse tables 42 interaction<-function (..., drop = TRUE) { 43 args <- list(...) 44 narg <- length(args) 45 if (narg == 1 && is.list(args[[1]])) { 46 args <- args[[1]] 47 narg <- length(args) 48 } 49 50 ls<-sapply(args,function(a) length(levels(a))) 51 ans<-do.call("paste",c(lapply(args,as.character),sep=".")) 52 ans<-factor(ans) 53 return(ans) 54 55 } 56 57 na.failsafe<-function(message="missing values in object"){ 58 function(object,...){ 59 if (NCOL(object)==0) 60 object 61 else { 62 ok <- complete.cases(object) 63 if (all(ok)) 64 object 65 else stop(message) 66 } 67 } 68 } 69 70 na.id<-na.failsafe("missing values in `id'") 71 if(inherits(ids,"formula")) { 72 mf<-substitute(model.frame(ids,data=data, na.action=na.id)) 73 ids<-eval.parent(mf) 74 if (ncol(ids)==0) ## formula was ~1 75 ids<-data.frame(id=1:nrow(ids)) 76 } else{ 77 if (is.null(ids)) 78 stop("Must provide ids= argument") 79 else 80 ids<-na.id(data.frame(ids)) 81 } 82 83 ## make ids factor if they are character 84 for(i in 1:ncol(ids)){ 85 if (is.character(ids[[i]])) 86 ids[[i]]<-factor(ids[[i]]) 87 } 88 89 na.prob<-na.failsafe("missing values in `prob'") 90 if(inherits(probs,"formula")){ 91 mf<-substitute(model.frame(probs,data=data,na.action=na.prob)) 92 probs<-eval.parent(mf) 93 } 94 95 na.weight<-na.failsafe("missing values in `weights'") 96 if(inherits(weights,"formula")){ 97 mf<-substitute(model.frame(weights,data=data,na.action=na.weight)) 98 weights<-eval.parent(mf) 99 } else if (!is.null(weights)) 100 weights<-na.weight(data.frame(weights)) 101 if(!is.null(weights)){ 102 if (!is.null(probs)) 103 stop("Can't specify both sampling weights and probabilities") 104 else 105 probs<-as.data.frame(1/as.matrix(weights)) 106 } 107 108 109 110 na.strata<-na.failsafe("missing values in `strata'") 111 if (!is.null(strata)){ 112 if(inherits(strata,"formula")){ 113 mf<-substitute(model.frame(strata,data=data, na.action=na.strata)) 114 strata<-eval.parent(mf) 115 } 116 if (!is.list(strata)) 117 strata<-data.frame(strata=strata) 118 has.strata<-TRUE 119 for(i in 1:NCOL(strata)){ ##drop empty strata 120 if (is.factor(strata[[i]])) 121 strata[[i]]<-as.factor(as.character(strata[[i]])) 122 } 123 } else { 124 has.strata <-FALSE 125 strata<-na.strata(as.data.frame(matrix(1, nrow=NROW(ids), ncol=NCOL(ids)))) 126 } 127 128 129 if (inherits(variables,"formula")){ 130 mf<-substitute(model.frame(variables,data=data,na.action=na.pass)) 131 variables <- eval.parent(mf) 132 } else if (is.null(variables)){ 133 variables<-data 134 } else 135 variables<-do.call("data.frame",variables) 136 137 138 na.fpc<-na.failsafe("missing values in `fpc'") 139 if (inherits(fpc,"formula")){ 140 mf<-substitute(model.frame(fpc,data=data,na.action=na.fpc)) 141 fpc<-eval.parent(mf) 142 } 143 144 ## check for only one PSU: probably a typo 145 if ((length(unique(ids[,1]))==1) && !(nest && has.strata)){ 146 stop("Design has only one primary sampling unit") 147 } 148 149 ## force subclusters nested in clusters 150 if (NCOL(ids)>1){ 151 N<-ncol(ids) 152 for(i in 2:N){ 153 ids[,i]<-do.call("interaction", ids[,1:i,drop=FALSE]) 154 } 155 } 156 ## force clusters nested in strata 157 if (nest && has.strata && NCOL(ids)){ 158 N<-NCOL(ids) 159 NS<-NCOL(strata) 160 for(i in 1:N) 161 ids[,i]<-do.call("interaction", 162 c(strata[,1:min(i,NS),drop=FALSE], ids[,i,drop=FALSE])) 163 } 164 165 ## check if clusters nested in strata 166 if (check.strata && nest) 167 warning("No point in check.strata=TRUE if nest=TRUE") 168 if(check.strata && !is.null(strata) && NCOL(ids)){ 169 sc<-(rowSums(table(ids[,1],strata[,1])>0)) 170 if(any(sc>1)) stop("Clusters not nested in strata at top level; you may want nest=TRUE.") 171 } 172 173 ## force substrata nested in clusters 174 N<-ncol(ids) 175 NS<-ncol(strata) 176 if (N>1){ 177 for(i in 2:N) 178 strata[,i]<-interaction(strata[,min(i,NS)], ids[,i-1]) 179 } 180 181 ## PPS: valid choices currently are FALSE and "brewer" 182 if (is.logical(pps) && pps) stop("'pps' must be FALSE or a character string") 183 if (is.character(pps)) { 184 pps<-TRUE 185 } 186 187 ## Finite population correction: specified per observation 188 ## Also incorporates design sample sizes formerly in nPSU 189 190 if (!is.null(fpc) && !is.numeric(fpc) && !is.data.frame(fpc)) 191 stop("fpc must be a matrix or dataframe or NULL") 192 193 fpc<-as.fpc(fpc,strata, ids, pps=pps) 194 195 ## if FPC specified, but no weights, use it for weights 196 if (is.null(probs) && is.null(weights)){ 197 if (is.null(fpc$popsize)){ 198 if (missing(probs) && missing(weights)) 199 warning("No weights or probabilities supplied, assuming equal probability") 200 probs<-rep(1,nrow(ids)) 201 } else { 202 probs<-1/weights(fpc, final=FALSE) 203 } 204 } 205 206 207 if (is.numeric(probs) && length(probs)==1) 208 probs<-rep(probs, NROW(variables)) 209 210 if (length(probs)==0) probs<-rep(1,NROW(variables)) 211 212 if (NCOL(probs)==1) probs<-data.frame(probs) 213 214 rval<-list(cluster=ids) 215 rval$strata<-strata 216 rval$has.strata<-has.strata 217 rval$prob<- apply(probs,1,prod) 218 rval$allprob<-probs 219 rval$call<-match.call() 220 rval$variables<-variables 221 rval$fpc<-fpc 222 rval$call<-sys.call(-1) 223 rval$pps<-pps 224 class(rval)<-c("survey.design2","survey.design") 225 rval 226} 227 228onestrat<-function(x,cluster,nPSU,fpc, lonely.psu,stratum=NULL,stage=1,cal=cal){ 229 230 if (is.null(fpc)) 231 f<-rep(1,NROW(x)) 232 else{ 233 f<-ifelse(fpc==Inf, 1, (fpc-nPSU)/fpc) 234 } 235 236 if (nPSU>1) 237 scale<-f*nPSU/(nPSU-1) 238 else 239 scale<-f 240 if (all(f<0.0000001))## self-representing stratum 241 return(matrix(0,NCOL(x),NCOL(x))) 242 243 scale<-scale[!duplicated(cluster)] 244 245 x<-rowsum(x,cluster) 246 nsubset<-nrow(x) 247 248 if (nsubset<nPSU) { 249 ##can't be PPS, so scale must be a constant 250 x<-rbind(x,matrix(0,ncol=ncol(x),nrow=nPSU-nrow(x))) 251 scale<-rep(scale[1],NROW(x)) 252 } 253 if (lonely.psu!="adjust" || nsubset>1 || 254 (nPSU>1 & !getOption("survey.adjust.domain.lonely"))) 255 x<-sweep(x, 2, colMeans(x), "-") 256 257 if (nsubset==1 && nPSU>1 && getOption("survey.adjust.domain.lonely")){ 258 warning("Stratum (",stratum,") has only one PSU at stage ",stage) 259 if (lonely.psu=="average" && getOption("survey.adjust.domain.lonely")) 260 scale<-NA 261 } 262 263 if (nPSU>1){ 264 return(crossprod(x*sqrt(scale))) 265 } else { 266 rval<-switch(lonely.psu, 267 certainty=crossprod(x*sqrt(scale)), 268 remove=crossprod(x*sqrt(scale)), 269 adjust=crossprod(x*sqrt(scale)), 270 average=NA*crossprod(x), 271 fail= stop("Stratum (",stratum,") has only one PSU at stage ",stage), 272 stop("Can't handle lonely.psu=",lonely.psu) 273 ) 274 rval 275 } 276} 277 278 279onestage<-function(x, strata, clusters, nPSU, fpc, lonely.psu=getOption("survey.lonely.psu"),stage=0, cal){ 280 if (NROW(x)==0) 281 return(matrix(0,NCOL(x),NCOL(x))) 282 stratvars<- tapply(1:NROW(x), list(factor(strata)), function(index){ 283 onestrat(x[index,,drop=FALSE], clusters[index], 284 nPSU[index][1], fpc[index], ##changed from fpc[index][1], to allow pps(brewer) 285 lonely.psu=lonely.psu,stratum=strata[index][1], stage=stage,cal=cal) 286 }) 287 p<-NCOL(x) 288 nstrat<-length(unique(strata)) 289 nokstrat<-sum(sapply(stratvars,function(m) !any(is.na(m)))) 290 apply(array(unlist(stratvars),c(p,p,length(stratvars))),1:2,sum,na.rm=TRUE)*nstrat/nokstrat 291} 292 293 294svyrecvar<-function(x, clusters, stratas, fpcs, postStrata=NULL, 295 lonely.psu=getOption("survey.lonely.psu"), 296 one.stage=getOption("survey.ultimate.cluster")){ 297 298 x<-as.matrix(x) 299 cal<-NULL 300 301 ## Remove post-stratum means, which may cut across clusters 302 ## Also center the data using any "g-calibration" models 303 if(!is.null(postStrata)){ 304 for (psvar in postStrata){ 305 if (inherits(psvar, "greg_calibration")) { 306 if (psvar$stage==0){ 307 ## G-calibration at population level 308 x<-as.matrix(qr.resid(psvar$qr,x/psvar$w)*psvar$w) 309 } else { 310 ## G-calibration within clusters 311 cal<-c(cal, list(psvar)) 312 } 313 } else if (inherits(psvar, "raking")){ 314 ## raking by iterative proportional fitting 315 for(iterations in 1:10){ 316 for(margin in psvar){ 317 psw<-attr(margin, "weights") 318 x<- x - psw*apply(x/psw, 2, ave, margin) 319 } 320 } 321 } else { 322 ## ordinary post-stratification 323 psw<-attr(psvar, "weights") 324 oldw<-attr(psvar, "oldweights") 325 if (is.null(oldw)) oldw<-rep(1,length(psw)) 326 zeroes<-which(psw==0 & oldw==0) 327 if (length(zeroes)) psw[zeroes]=1 328 psvar<-as.factor(psvar) 329 psmeans<-rowsum(x*oldw/psw,psvar,reorder=TRUE)/as.vector(by(oldw,psvar,sum)) 330 x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw 331 } 332 } 333 } 334 335 multistage(x, clusters,stratas,fpcs$sampsize, fpcs$popsize, 336 lonely.psu=getOption("survey.lonely.psu"), 337 one.stage=one.stage,stage=1,cal=cal) 338} 339 340multistage<-function(x, clusters, stratas, nPSUs, fpcs, 341 lonely.psu=getOption("survey.lonely.psu"), 342 one.stage=FALSE,stage,cal){ 343 344 n<-NROW(x) 345 346 347 v <- onestage(x,stratas[,1], clusters[,1], nPSUs[,1], 348 fpcs[,1], lonely.psu=lonely.psu,stage=stage,cal=cal) 349 350 if (one.stage!=TRUE && !is.null(fpcs) && NCOL(clusters)>1) { 351 v.sub<-by(1:n, list(as.numeric(clusters[,1])), function(index){ 352 ## residuals for G-calibration using population information 353 ## only on clusters at this stage. 354 for(cali in cal){ 355 if (cali$stage != stage) 356 next 357 j<-match(clusters[index,1],cali$index) 358 if (length(unique(j))!=1) 359 stop("Internal problem in g-calibration data: stage",stage, 360 ", cluster", j) 361 j<-j[[1]] 362 x[index,]<-as.matrix(qr.resid(cali$qr[[j]], x[index,,drop=FALSE]/cali$w[[j]])*cali$w[[j]]) 363 } 364 multistage(x[index,,drop=FALSE], clusters[index,-1,drop=FALSE], 365 stratas[index,-1,drop=FALSE], nPSUs[index,-1,drop=FALSE], 366 fpcs[index,-1,drop=FALSE], 367 lonely.psu=lonely.psu,one.stage=one.stage-1, 368 stage=stage+1,cal=cal)*nPSUs[index[1],1]/fpcs[index[1],1] 369 }) 370 371 for(i in 1:length(v.sub)) 372 v<-v+v.sub[[i]] 373 } 374 dimnames(v)<-list(colnames(x),colnames(x)) 375 v 376} 377 378 379## fpc not given are zero: full sampling. 380as.fpc<-function(df,strata,ids,pps=FALSE){ 381 382 count<-function(x) sum(!duplicated(x)) 383 384 sampsize<-matrix(ncol=ncol(ids),nrow=nrow(ids)) 385 for(i in 1:ncol(ids)) 386 split(sampsize[,i],strata[,i])<-lapply(split(ids[,i],strata[,i]),count) 387 388 if (is.null(df)){ 389 ## No fpc 390 rval<-list(popsize=NULL, sampsize=sampsize) 391 class(rval)="survey_fpc" 392 return(rval) 393 } 394 395 fpc<-as.matrix(df) 396 if (xor(ispopsize<-any(df>1), all(df>=1))){ 397 big<-which(fpc>=1,arr.ind=TRUE) 398 small<-which(fpc<1,arr.ind=TRUE) 399 cat("record",big[1,1]," stage",big[1,2],": fpc=", fpc[big[1,,drop=FALSE]],"\n") 400 cat("record",small[1,1]," stage ",small[1,2],": fpc=", fpc[small[1,,drop=FALSE]],"\n") 401 stop("Must have all fpc>=1 or all fpc<=1") 402 } 403 404 if (ispopsize){ 405 if(pps) stop("fpc must be specified as sampling fraction for PPS sampling") 406 popsize<-fpc 407 } else { 408 popsize<-sampsize/(fpc) 409 } 410 if (any(popsize<sampsize)){ 411 toobig<-which(popsize<sampsize,arr.ind=TRUE) 412 cat("record",toobig[1,1],"stage",toobig[1,2],": popsize=",popsize[toobig[1,,drop=FALSE]], 413 " sampsize=", sampsize[toobig[1,,drop=FALSE]],"\n") 414 stop("FPC implies >100% sampling in some strata") 415 } 416 if (!ispopsize && any(is.finite(popsize) & (popsize>1e10))){ 417 big<-which(popsize>1e10 & is.finite(popsize),arr.ind=TRUE) 418 warning("FPC implies population larger than ten billion (record",big[1,1]," stage ",big[1,2],")") 419 } 420 if(!pps){ 421 ## check that fpc is constant within strata. 422 for(i in 1:ncol(popsize)){ 423 diff<-by(popsize[,i], list(strata[,i]), count) 424 if (any(as.vector(diff)>1)){ 425 j<-which(as.vector(diff)>1)[1] 426 warning("`fpc' varies within strata: stratum ",names(diff)[j], " at stage ",i) 427 } 428 } 429 } else{ 430 ## check that fpc is constant with clusters 431 diff<-by(popsize[,i], list(ids[,i]), count) 432 if (any(as.vector(diff)>1)){ 433 j<-which(as.vector(diff)>1)[1] 434 warning("`fpc' varies within cluster: cluster ",names(diff)[j], " at stage ",i) 435 } 436 } 437 438 439 rval<-list(popsize=popsize, sampsize=sampsize) 440 class(rval)<-"survey_fpc" 441 rval 442} 443 444"weights.survey_fpc"<-function(object,final=TRUE,...){ 445 if (is.null(object$popsize) || any(object$popsize>1e12)) 446 stop("Weights not supplied and can't be computed from fpc.") 447 if (final) { 448 pop<-apply(object$popsize,1,prod) 449 samp<-apply(object$sampsize,1,prod) 450 pop/samp 451 } else { 452 object$popsize/object$sampsize 453 } 454} 455 456 457 458 459print.survey.design2<-function(x,varnames=FALSE,design.summaries=FALSE,...){ 460 n<-NROW(x$cluster) 461 if (x$has.strata) cat("Stratified ") 462 un<-length(unique(x$cluster[,1])) 463 if(n==un){ 464 cat("Independent Sampling design") 465 is.independent<-TRUE 466 if (is.null(x$fpc$popsize)) 467 cat(" (with replacement)\n") 468 else cat("\n") 469 } else { 470 cat(NCOL(x$cluster),"- level Cluster Sampling design") 471 if (is.null(x$fpc$popsize)) 472 cat(" (with replacement)\n") 473 else cat("\n") 474 nn<-lapply(x$cluster,function(i) length(unique(i))) 475 cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep="")) 476 is.independent<-FALSE 477 } 478 479 print(x$call) 480 if (design.summaries){ 481 cat("Probabilities:\n") 482 print(summary(x$prob)) 483 if(x$has.strata){ 484 if (NCOL(x$cluster)>1) 485 cat("First-level ") 486 cat("Stratum Sizes: \n") 487 oo<-order(unique(x$strata[,1])) 488 a<-rbind(obs=table(x$strata[,1]), 489 design.PSU=x$fpc$sampsize[!duplicated(x$strata[,1]),1][oo], 490 actual.PSU=table(x$strata[!duplicated(x$cluster[,1]),1])) 491 print(a) 492 } 493 if (!is.null(x$fpc$popsize)){ 494 if (x$has.strata) { 495 cat("Population stratum sizes (PSUs): \n") 496 s<-!duplicated(x$strata[,1]) 497 a<-x$fpc$popsize[s,1] 498 names(a)<-x$strata[s,1] 499 a<-a[order(names(a))] 500 print(a) 501 } else { 502 cat("Population size (PSUs):",x$fpc$popsize[1,1],"\n") 503 } 504 } 505 } 506 if (varnames){ 507 cat("Data variables:\n") 508 print(colnames(x)) 509 } 510 invisible(x) 511} 512 513 514summary.survey.design2<-function(object,...){ 515 class(object)<-c("summary.survey.design2",class(object)) 516 object 517} 518 519print.summary.survey.design2<-function(x,...){ 520 y<-x 521 class(y)<-c("survey.design2",class(x)) 522 print(y,varnames=TRUE,design.summaries=TRUE,...) 523} 524 525 526.svycheck<-function(object){ 527 if (inherits(object,"survey.design") && 528 !is.null(object$nPSU)) 529 warning("This is an old-style design object. Please use as.svydesign2 to update it.") 530} 531 532as.svydesign2<-function(object){ 533 if (inherits(object,"survey.design2")) 534 return(object) 535 if (!inherits(object,"survey.design")) 536 stop("This function is for updating old-style survey.design objects") 537 538 539 count<-function(x) length(unique(x)) 540 541 strata<-data.frame(one=object$strata) 542 if ((nc<-ncol(object$cluster))>1){ 543 for(i in 2:nc){ 544 strata<-cbind(strata,object$cluster[,i-1]) 545 } 546 } 547 548 sampsize<-matrix(ncol=nc,nrow=nrow(object$cluster)) 549 550 sampsize[,1]<-object$nPSU[match(object$strata, names(object$nPSU))] 551 if (nc>1){ 552 for(i in 2:nc){ 553 split(sampsize[,i],strata[,i])<-lapply(split(object$cluster[,i],strata[,i]),count) 554 } 555 } 556 557 if (!is.null(object$fpc)){ 558 popsize<-sampsize 559 popsize[,1]<-object$fpc$N[match(object$strata,object$fpc$strata)] 560 } else popsize<-NULL 561 if (nc>1 && !is.null(object$fpc)){ 562 warning("Assuming complete sampling at stages 2 -",nc) 563 } 564 565 fpc<-list(popsize=popsize,sampsize=sampsize) 566 class(fpc)<-"survey_fpc" 567 568 569 object$fpc<-fpc 570 object$strata<-strata 571 object$nPSU<-NULL 572 class(object)<-c("survey.design2","survey.design") 573 object 574 575} 576 577is.pps<-function(x) if(is.null(x$pps)) FALSE else (x$pps!=FALSE) 578 579"[.survey.design2"<-function (x,i, ..., drop=TRUE){ 580 if (!missing(i)){ 581 if (is.calibrated(x) || is.pps(x) || !drop){ 582 ## Set weights to zero: no memory saving possible 583 ## There should be an easier way to complement a subscript.. 584 if (is.logical(i)) 585 x$prob[!i]<-Inf 586 else if (is.numeric(i) && length(i)) 587 x$prob[-i]<-Inf 588 else { 589 tmp<-x$prob[i,] 590 x$prob<-rep(Inf, length(x$prob)) 591 x$prob[i,]<-tmp 592 } 593 index<-is.finite(x$prob) 594 psu<-!duplicated(x$cluster[index,1]) 595 tt<-table(x$strata[index,1][psu]) 596 if(any(tt==1) && getOption("survey.adjust.domain.lonely")){ 597 warning(sum(tt==1)," strata have only one PSU in this subset.") 598 } 599 } else { 600 ## subset everything. 601 if (!is.null(x$variables)) ## phase 2 of twophase design 602 x$variables<-"[.data.frame"(x$variables,i,..1,drop=FALSE) 603 x$cluster<-x$cluster[i,,drop=FALSE] 604 x$prob<-x$prob[i] 605 x$allprob<-x$allprob[i,,drop=FALSE] 606 x$strata<-x$strata[i,,drop=FALSE] 607 x$fpc$sampsize<-x$fpc$sampsize[i,,drop=FALSE] 608 x$fpc$popsize<-x$fpc$popsize[i,,drop=FALSE] 609 } 610 611 } else { 612 if(!is.null(x$variables)) 613 x$variables<-x$variables[,..1,drop=FALSE] 614 } 615 616 x 617} 618 619svytotal.survey.design2<-function(x,design, na.rm=FALSE, deff=FALSE,influence=FALSE,...){ 620 621 622 if (inherits(x,"formula")){ 623 ## do the right thing with factors 624 mf<-model.frame(x,design$variables,na.action=na.pass) 625 xx<-lapply(attr(terms(x),"variables")[-1], 626 function(tt) model.matrix(eval(bquote(~0+.(tt))),mf)) 627 cols<-sapply(xx,NCOL) 628 x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols)) 629 scols<-c(0,cumsum(cols)) 630 for(i in 1:length(xx)){ 631 x[,scols[i]+1:cols[i]]<-xx[[i]] 632 } 633 colnames(x)<-do.call("c",lapply(xx,colnames)) 634 } else{ 635 if(typeof(x) %in% c("expression","symbol")) 636 x<-eval(x, design$variables) 637 else { 638 if(is.data.frame(x) && any(sapply(x,is.factor))){ 639 xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi}) 640 cols<-sapply(xx,NCOL) 641 scols<-c(0,cumsum(cols)) 642 cn<-character(sum(cols)) 643 for(i in 1:length(xx)) 644 cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="") 645 x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols)) 646 for(i in 1:length(xx)){ 647 x[,scols[i]+1:cols[i]]<-xx[[i]] 648 } 649 colnames(x)<-cn 650 } 651 } 652 } 653 x<-as.matrix(x) 654 655 if (na.rm){ 656 nas<-rowSums(is.na(x)) 657 design<-design[nas==0,] 658 if (length(nas)>length(design$prob)) 659 x<-x[nas==0,,drop=FALSE] 660 else 661 x[nas>0,]<-0 662 } 663 664 N<-sum(1/design$prob) 665 total <- colSums(x/as.vector(design$prob),na.rm=na.rm) 666 class(total)<-"svystat" 667 attr(total, "var")<-v<-svyrecvar(x/design$prob,design$cluster, 668 design$strata, design$fpc, 669 postStrata=design$postStrata) 670 attr(total,"statistic")<-"total" 671 if (influence){ 672 attr(total, "influence")<-x/design$prob 673 } 674 675 if (is.character(deff) || deff){ 676 nobs<-sum(weights(design)!=0) 677 if (deff=="replace") 678 vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2/nobs 679 else 680 vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2*(N-nobs)/(N*nobs) 681 attr(total, "deff")<-v/vsrs 682 } 683 684 685 return(total) 686} 687 688 689svymean.survey.design2<-function(x,design, na.rm=FALSE,deff=FALSE,influence=FALSE,...){ 690 691 if (inherits(x,"formula")){ 692 ## do the right thing with factors 693 mf<-model.frame(x,design$variables,na.action=na.pass) 694 xx<-lapply(attr(terms(x),"variables")[-1], 695 function(tt) model.matrix(eval(bquote(~0+.(tt))),mf)) 696 cols<-sapply(xx,NCOL) 697 x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols)) 698 scols<-c(0,cumsum(cols)) 699 for(i in 1:length(xx)){ 700 x[,scols[i]+1:cols[i]]<-xx[[i]] 701 } 702 colnames(x)<-do.call("c",lapply(xx,colnames)) 703 } 704 else { 705 if(typeof(x) %in% c("expression","symbol")) 706 x<-eval(x, design$variables) 707 else if(is.data.frame(x) && any(sapply(x,is.factor))){ 708 xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi}) 709 cols<-sapply(xx,NCOL) 710 scols<-c(0,cumsum(cols)) 711 cn<-character(sum(cols)) 712 for(i in 1:length(xx)) 713 cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="") 714 x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols)) 715 for(i in 1:length(xx)){ 716 x[,scols[i]+1:cols[i]]<-xx[[i]] 717 } 718 colnames(x)<-cn 719 } 720 } 721 x<-as.matrix(x) 722 723 if (na.rm){ 724 nas<-rowSums(is.na(x)) 725 design<-design[nas==0,] 726 if (length(nas)>length(design$prob)) 727 x<-x[nas==0,,drop=FALSE] 728 else 729 x[nas>0,]<-0 730 } 731 732 pweights<-1/design$prob 733 psum<-sum(pweights) 734 average<-colSums(x*pweights/psum) 735 x<-sweep(x,2,average) 736 v<-svyrecvar(x*pweights/psum,design$cluster,design$strata, design$fpc, 737 postStrata=design$postStrata) 738 attr(average,"var")<-v 739 attr(average,"statistic")<-"mean" 740 if (influence){ 741 attr(average,"influence") <- x*pweights/psum 742 } 743 class(average)<-"svystat" 744 if (is.character(deff) || deff){ 745 nobs<-sum(weights(design)!=0) 746 if(deff=="replace"){ 747 vsrs<-svyvar(x,design,na.rm=na.rm)/(nobs) 748 } else { 749 if(psum<nobs) { 750 vsrs<-NA*v 751 warning("Sample size greater than population size: are weights correctly scaled?") 752 } else{ 753 vsrs<-svyvar(x,design,na.rm=na.rm)*(psum-nobs)/(psum*nobs) 754 } 755 } 756 attr(average, "deff")<-v/vsrs 757 } 758 759 return(average) 760} 761 762svyratio.survey.design2<-function(numerator=formula, denominator, design, separate=FALSE,na.rm=FALSE, 763 formula,covmat=FALSE,deff=FALSE,influence=FALSE,...){ 764 765 if (separate){ 766 strats<-sort(unique(design$strata[,1])) 767 if (!design$has.strata) 768 warning("Separate and combined ratio estimators are the same for unstratified designs") 769 if(influence) 770 warning("influence functions not available for separate ratio estimators") 771 rval<-list(ratios=lapply(strats, 772 function(s) { 773 tmp<-svyratio(numerator, denominator, 774 subset(design, design$strata[,1] %in% s), 775 separate=FALSE,...) 776 attr(tmp,"call")<-bquote(Stratum==.(s)) 777 tmp})) 778 names(rval$ratios)<-strats 779 780 class(rval)<-c("svyratio_separate") 781 rval$call<-sys.call() 782 rval$strata<-strats 783 return(rval) 784 } 785 786 if (inherits(numerator,"formula")) 787 numerator<-model.frame(numerator,design$variables,na.action=na.pass) 788 else if(typeof(numerator) %in% c("expression","symbol")) 789 numerator<-eval(numerator, design$variables) 790 if (inherits(denominator,"formula")) 791 denominator<-model.frame(denominator,design$variables,na.action=na.pass) 792 else if(typeof(denominator) %in% c("expression","symbol")) 793 denominator<-eval(denominator, design$variables) 794 795 numerator<-as.matrix(numerator) 796 denominator<-as.matrix(denominator) 797 nn<-NCOL(numerator) 798 nd<-NCOL(denominator) 799 800 all<-cbind(numerator,denominator) 801 nas<-!complete.cases(all) 802 if ((na.rm==TRUE) && any(nas)){ 803 design<-design[!nas,] 804 if (NROW(design$cluster) == NROW(all)){ 805 ## subset by zero weights 806 all[nas,]<-1 807 numerator[nas,]<-0 808 denominator[nas,]<-1 809 } else { 810 ## subset by actually dropping rows 811 all<-all[!nas,,drop=FALSE] 812 numerator<-numerator[!nas,,drop=FALSE] 813 denominator<-denominator[!nas,,drop=FALSE] 814 } 815 } 816 allstats<-svytotal(all,design) 817 rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/")) 818 819 820 vars<-matrix(ncol=nd,nrow=nn) 821 822 if (deff=="replace" || deff) deffs<-matrix(ncol=nd,nrow=nn) 823 824 for(i in 1:nn){ 825 for(j in 1:nd){ 826 r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob) 827 vars[i,j]<-svyrecvar(r*1/design$prob, design$cluster, design$strata, design$fpc, 828 postStrata=design$postStrata) 829 if (deff=="replace" || deff){ 830 deffs[i,j]<-deff(svytotal(r,design,deff=deff)) 831 } 832 } 833 } 834 if (covmat){ 835 ii<-rep(1:nn,nd) 836 jj<-rep(1:nd,each=nn) 837 allr<-sweep(numerator[,ii]-t(as.vector(rval$ratio)*t(denominator[,jj,drop=FALSE])), 838 2, colSums(denominator[,jj,drop=FALSE]/design$prob),"/") 839 vcovmat<-svyrecvar(allr*1/design$prob, design$cluster, design$strata, design$fpc, 840 postStrata=design$postStrata) 841 colnames(vcovmat)<-colnames(denominator)[ii] 842 rval$vcov<-vcovmat 843 } 844 colnames(vars)<-colnames(denominator) 845 rownames(vars)<-colnames(numerator) 846 rval$var<-vars 847 if (deff=="replace" || deff) 848 attr(rval,"deff")<-deffs 849 850 attr(rval,"call")<-sys.call() 851 if (influence) 852 attr(rval, "influence")<-r/design$prob 853 854 class(rval)<-"svyratio" 855 rval 856 857 } 858