1 2R version 3.4.0 (2017-04-21) -- "You Stupid Darkness" 3Copyright (C) 2017 The R Foundation for Statistical Computing 4Platform: x86_64-apple-darwin15.6.0 (64-bit) 5 6R is free software and comes with ABSOLUTELY NO WARRANTY. 7You are welcome to redistribute it under certain conditions. 8Type 'license()' or 'licence()' for distribution details. 9 10R is a collaborative project with many contributors. 11Type 'contributors()' for more information and 12'citation()' on how to cite R or R packages in publications. 13 14Type 'demo()' for some demos, 'help()' for on-line help, or 15'help.start()' for an HTML browser interface to help. 16Type 'q()' to quit R. 17 18> ## 19> ## Calibration examples 20> ## 21> 22> 23> ## Example of calibration to first-stage clusters 24> library(survey) 25Loading required package: grid 26Loading required package: Matrix 27Loading required package: survival 28 29Attaching package: 'survey' 30 31The following object is masked from 'package:graphics': 32 33 dotchart 34 35> data(api) 36> 37> clusters<-table(apiclus2$dnum) 38> clusters<-clusters[clusters>1 & names(clusters)!="639"] 39> apiclus2a<-subset(apiclus2, dnum %in% as.numeric(names(clusters))) 40> 41> dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2a) 42> 43> popclusters<-subset(apipop, dnum %in% as.numeric(names(clusters))) 44> 45> pop<-lapply(as.numeric(names(clusters)), function(cluster) { 46+ colSums(model.matrix(~api99, model.frame(~api99, subset(popclusters, dnum %in% cluster))))}) 47> 48> names(pop)<-names(clusters) 49> 50> dclus2g<-calibrate(dclus2, ~api99, pop,stage=1) 51> 52> svymean(~api99, dclus2) 53 mean SE 54api99 642.14 31.434 55> svymean(~api99, dclus2g) 56 mean SE 57api99 654.49 29.82 58> 59> round(svyby(~api99, ~dnum, design=dclus2, svymean),4) 60 dnum api99 se 6183 83 694.3333 0.0000 62132 132 505.0000 0.0000 63152 152 574.0000 0.0000 64173 173 894.7500 0.0000 65198 198 533.7500 0.0000 66200 200 589.8000 6.8335 67228 228 477.0000 0.0000 68295 295 646.4000 0.0000 69302 302 903.5000 0.0000 70403 403 852.4000 0.0000 71452 452 533.0000 0.0000 72480 480 614.2000 0.0000 73523 523 580.5000 0.0000 74534 534 564.6000 0.0000 75549 549 896.2000 0.0000 76552 552 730.0000 0.0000 77570 570 518.4000 7.5478 78575 575 800.8000 4.2513 79596 596 785.6000 2.4155 80620 620 591.6000 10.5869 81638 638 560.2000 4.0954 82674 674 760.0000 0.0000 83679 679 610.2500 0.0000 84687 687 718.6667 0.0000 85701 701 651.5000 0.0000 86711 711 690.5000 0.0000 87731 731 702.0000 2.1744 88768 768 562.5000 0.0000 89781 781 854.4000 0.7456 90> 91> round(svyby(~api99, ~dnum, design=dclus2g, svymean),4) 92 dnum api99 se 9383 83 694.3333 0 94132 132 505.0000 0 95152 152 574.0000 0 96173 173 894.7500 0 97198 198 533.7500 0 98200 200 567.5455 0 99228 228 477.0000 0 100295 295 646.4000 0 101302 302 903.5000 0 102403 403 852.4000 0 103452 452 533.0000 0 104480 480 614.2000 0 105523 523 580.5000 0 106534 534 564.6000 0 107549 549 896.2000 0 108552 552 730.0000 0 109570 570 548.9444 0 110575 575 824.5357 0 111596 596 787.5714 0 112620 620 609.3750 0 113638 638 585.6429 0 114674 674 760.0000 0 115679 679 610.2500 0 116687 687 718.6667 0 117701 701 651.5000 0 118711 711 690.5000 0 119731 731 700.6667 0 120768 768 562.5000 0 121781 781 851.0000 0 122> 123> ## Averaging to first stage 124> 125> dclus1<- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) 126> pop<-colSums(cbind(1,apipop$enroll),na.rm=TRUE) 127> 128> dclus1g<-calibrate(dclus1, ~enroll, pop, aggregate=1) 129> 130> svytotal(~enroll,dclus1g) 131 total SE 132enroll 3811472 0 133> svytotal(~api.stu,dclus1g) 134 total SE 135api.stu 3242857 38967 136> 137> #variation within clusters should be zero 138> all.equal(0, max(ave(weights(dclus1g),dclus1g$cluster,FUN=var),na.rm=TRUE)) 139[1] TRUE 140> 141> ##bounded weights 142> dclus1g<-calibrate(dclus1, ~enroll, pop) 143> range(weights(dclus1g)/weights(dclus1)) 144[1] 0.7906782 1.7891164 145> dclus1gb<-calibrate(dclus1, ~enroll, pop, bounds=c(.6,1.5)) 146> range(weights(dclus1gb)/weights(dclus1)) 147[1] 0.7198751 1.5000000 148> 149> ## Ratio estimators 150> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc) 151> svytotal(~api.stu,dstrat) 152 total SE 153api.stu 3086009 99477 154> common<-svyratio(~api.stu, ~enroll, dstrat, separate=FALSE) 155> total.enroll<-sum(apipop$enroll,na.rm=TRUE) 156> predict(common, total=total.enroll) 157$total 158 enroll 159api.stu 3190038 160 161$se 162 enroll 163api.stu 29565.98 164 165> dstratg<-calibrate(dstrat,~enroll-1, total.enroll, variance=1) 166> svytotal(~api.stu, dstratg) 167 total SE 168api.stu 3190038 29566 169> 170> ## postStratify vs calibrate in stratified sample (Ben French) 171> set.seed(17) 172> dat<-data.frame(y=rep(0:1,each=100),x=rnorm(200)+2*rep(0:1,each=100), 173+ z=rbinom(200,1,.2), fpc=rep(c(100,10000),each=100)) 174> dat$w<-ifelse(dat$y,dat$z,1-dat$z) 175> popw<-data.frame(w=c("0","1"), Freq=c(2000,8000)) 176> des<-svydesign(id=~1,fpc=~fpc, data=dat,strata=~y) 177> postStratify(des,~w,popw)->dps 178> dcal<-calibrate(des,~factor(w), pop=c(10000,8000)) 179> 180> all.equal(SE(svymean(~x,dcal)),SE(svymean(~x,dps))) 181[1] TRUE 182> 183> ## missing data in calibrated design 184> dps$variables$z[1]<-NA 185> summary(svyglm(y~z+x,design=dps,family=quasibinomial)) 186 187Call: 188svyglm(formula = y ~ z + x, design = dps, family = quasibinomial) 189 190Survey design: 191postStratify(des, ~w, popw) 192 193Coefficients: 194 Estimate Std. Error t value Pr(>|t|) 195(Intercept) -0.1203 0.3380 -0.356 0.722 196z 6.2118 0.6451 9.630 <2e-16 *** 197x 2.2602 0.2514 8.992 <2e-16 *** 198--- 199Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 200 201(Dispersion parameter for quasibinomial family taken to be 1.919987) 202 203Number of Fisher Scoring iterations: 9 204 205> 206> ## Ratio estimator using the heteroskedasticity parameter (Daniel Oehm) 207> # should match the ratio estmate above 208> dstratgh <- calibrate(dstrat,~enroll-1, total.enroll, variance=apistrat$enroll) 209> svytotal(~api.stu, dstratgh) 210 total SE 211api.stu 3190038 29566 212> 213> ## individual boundary constraints as multiplicative values (Daniel Oehm) 214> bnds <- list( 215+ lower = c(1, 1, rep(-Inf, nrow(apistrat)-2)), 216+ upper = c(1, 1, rep(Inf, nrow(apistrat)-2))) # the first two weights will remain unchanged the others are free to move 217> lapply(bnds, head) 218$lower 219[1] 1 1 -Inf -Inf -Inf -Inf 220 221$upper 222[1] 1 1 Inf Inf Inf Inf 223 224> dstratg1<-calibrate(dstrat, ~enroll-1, total.enroll, bounds = bnds, variance=apistrat$enroll) 225> svytotal(~api.stu, dstratg1) 226 total SE 227api.stu 3190133 29561 228> head(weights(dstrat)) 229 1 2 3 4 5 6 23044.21 44.21 44.21 44.21 44.21 44.21 231> head(weights(dstratg1)) 232 1 2 3 4 5 6 23344.21000 44.21000 45.72055 45.72055 45.72055 45.72055 234> all.equal(weights(dstrat)[1:2], weights(dstratg1)[1:2]) 235[1] TRUE 236> 237> ## individual boundary constraints as constant values (Daniel Oehm) 238> bnds <- list( 239+ lower = c(44.21, 44.21, rep(-Inf, nrow(apistrat)-2)), 240+ upper = c(44.21, 44.21, rep(Inf, nrow(apistrat)-2))) # the first two weights will remain unchanged 241> lapply(bnds, head) 242$lower 243[1] 44.21 44.21 -Inf -Inf -Inf -Inf 244 245$upper 246[1] 44.21 44.21 Inf Inf Inf Inf 247 248> dstratg2<-calibrate(dstrat, ~enroll-1, total.enroll, bounds = bnds, bounds.const = TRUE, variance=apistrat$enroll) 249> svytotal(~api.stu, dstratg2) 250 total SE 251api.stu 3190133 29561 252> head(weights(dstrat)) 253 1 2 3 4 5 6 25444.21 44.21 44.21 44.21 44.21 44.21 255> head(weights(dstratg2)) 256 1 2 3 4 5 6 25744.21000 44.21000 45.72055 45.72055 45.72055 45.72055 258> all.equal(round(weights(dstrat)[1:2], 8), round(weights(dstratg2)[1:2]), 8) # minor rounding error but all good 259[1] TRUE 260> 261> # sparse matrix support (Daniel Oehm) 262> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc) 263> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018) 264> dclus1g<-calibrate(dclus1, ~stype, pop.totals) 265> svymean(~api00, dclus1g) 266 mean SE 267api00 642.31 23.921 268> svytotal(~enroll, dclus1g) 269 total SE 270enroll 3680893 406293 271> 272> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018) 273> dclus1g<-calibrate(dclus1, ~stype, pop.totals, sparse = TRUE) 274> svymean(~api00, dclus1g) 275 mean SE 276api00 642.31 23.921 277> svytotal(~enroll, dclus1g) 278 total SE 279enroll 3680893 406293 280> 281> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018) 282> dclus1g<-calibrate(dclus1, ~stype, pop.totals, sparse = TRUE, calfun = "raking") 283> svymean(~api00, dclus1g) 284 mean SE 285api00 642.31 23.921 286> svytotal(~enroll, dclus1g) 287 total SE 288enroll 3680893 406293 289> 290> proc.time() 291 user system elapsed 292 2.591 0.084 2.712 293