1 2R version 3.1.0 (2014-04-10) -- "Spring Dance" 3Copyright (C) 2014 The R Foundation for Statistical Computing 4Platform: x86_64-apple-darwin13.1.0 (64-bit) 5 6R is free software and comes with ABSOLUTELY NO WARRANTY. 7You are welcome to redistribute it under certain conditions. 8Type 'license()' or 'licence()' for distribution details. 9 10R is a collaborative project with many contributors. 11Type 'contributors()' for more information and 12'citation()' on how to cite R or R packages in publications. 13 14Type 'demo()' for some demos, 'help()' for on-line help, or 15'help.start()' for an HTML browser interface to help. 16Type 'q()' to quit R. 17 18> ## 19> ## Domain means can be written as ratio estimators or as regression coefficients 20> ## 21> ## This code checks that subsetting the design object gives the same results as 22> ## these approaches. 23> ## 24> 25> 26> library(survey) 27 28Attaching package: 'survey' 29 30The following object is masked from 'package:graphics': 31 32 dotchart 33 34> data(fpc) 35> dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE) 36> dsub<-subset(dfpc,x>4) 37> (m1<-svymean(~x,design=dsub)) 38 mean SE 39x 6.195 0.7555 40> 41> ## These should give the same domain estimates and standard errors 42> (m2<-svyby(~x,~I(x>4),design=dfpc, svymean,keep.var=TRUE)) 43 I(x > 4) x se 44FALSE FALSE 3.314286 0.3117042 45TRUE TRUE 6.195000 0.7555129 46> m3<-svyglm(x~I(x>4)+0,design=dfpc) 47> summary(m3) 48 49Call: 50svyglm(formula = x ~ I(x > 4) + 0, design = dfpc) 51 52Survey design: 53svydesign(id = ~psuid, strat = ~stratid, weight = ~weight, data = fpc, 54 nest = TRUE) 55 56Coefficients: 57 Estimate Std. Error t value Pr(>|t|) 58I(x > 4)FALSE 3.3143 0.3117 10.63 0.000127 *** 59I(x > 4)TRUE 6.1950 0.7555 8.20 0.000439 *** 60--- 61Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 62 63(Dispersion parameter for gaussian family taken to be 2.557379) 64 65Number of Fisher Scoring iterations: 2 66 67> (m4<-svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc)) 68Ratio estimator: svyratio.survey.design2(~I(x * (x > 4)), ~as.numeric(x > 4), 69 dfpc) 70Ratios= 71 as.numeric(x > 4) 72I(x * (x > 4)) 6.195 73SEs= 74 as.numeric(x > 4) 75I(x * (x > 4)) 0.7555129 76> stopifnot(isTRUE(all.equal(SE(m2), as.vector(SE(m3))))) 77> stopifnot(isTRUE(all.equal(SE(m2)[2], as.vector(SE(m4))))) 78> 79> ## with strata 80> data(api) 81> dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc) 82> m1<-svymean(~enroll, subset(dstrat, comp.imp=="Yes")) 83> m2<-svyglm(enroll~comp.imp-1, dstrat) 84> m3<- svyratio(~I(enroll*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dstrat) 85> stopifnot(isTRUE(all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1))))) 86> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3)))))) 87> 88> ## with calibration 89> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc) 90> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018) 91> (dclus1g3 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069))) 921 - level Cluster Sampling design 93With (15) clusters. 94calibrate(dclus1, ~stype + api99, c(pop.totals, api99 = 3914069)) 95> 96> m1<-svymean(~api00, subset(dclus1g3, comp.imp=="Yes")) 97> m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1g3) 98> m2<-svyglm(api00~comp.imp-1, dclus1g3) 99> stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1))))) 100> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3)))))) 101> 102> ## with raking 103> pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018)) 104> pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122)) 105> dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide)) 106> m1<-svymean(~api00, subset(dclus1r, comp.imp=="Yes")) 107> m2<-svyglm(api00~comp.imp-1, dclus1r) 108> m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1r) 109> stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1))))) 110> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3)))))) 111> 112> 113> 114> ## 115> ## based on bug report from Takahiro Tsuchiya for version 3.4 116> ## 117> rei<-read.table(tmp<-textConnection( 118+ " id N n.a h n.ah n.h sub y 119+ 1 1 300 20 1 12 5 TRUE 1 120+ 2 2 300 20 1 12 5 TRUE 2 121+ 3 3 300 20 1 12 5 TRUE 3 122+ 4 4 300 20 1 12 5 TRUE 4 123+ 5 5 300 20 1 12 5 TRUE 5 124+ 6 6 300 20 1 12 5 FALSE NA 125+ 7 7 300 20 1 12 5 FALSE NA 126+ 8 8 300 20 1 12 5 FALSE NA 127+ 9 9 300 20 1 12 5 FALSE NA 128+ 10 10 300 20 1 12 5 FALSE NA 129+ 11 11 300 20 1 12 5 FALSE NA 130+ 12 12 300 20 1 12 5 FALSE NA 131+ 13 13 300 20 2 8 3 TRUE 6 132+ 14 14 300 20 2 8 3 TRUE 7 133+ 15 15 300 20 2 8 3 TRUE 8 134+ 16 16 300 20 2 8 3 FALSE NA 135+ 17 17 300 20 2 8 3 FALSE NA 136+ 18 18 300 20 2 8 3 FALSE NA 137+ 19 19 300 20 2 8 3 FALSE NA 138+ 20 20 300 20 2 8 3 FALSE NA 139+ "), header=TRUE) 140> close(tmp) 141> 142> 143> des.rei2 <- twophase(id=list(~id,~id), strata=list(NULL,~h), 144+ fpc=list(~N,NULL), subset=~sub, data=rei, method="full") 145> tot2<- svytotal(~y, subset(des.rei2, y>3)) 146> 147> rei$y<-rei$y*(rei$y>3) 148> ## based on Sarndal et al (9.4.14) 149> rei$w.ah <- rei$n.ah / rei$n.a 150> a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE) 151> a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE) 152> a.rei$y.u <- sum(a.rei$w.ah * a.rei$y) 153> V <- with(a.rei, sum(N * (N-1) * ((n.ah-1)/(n.a-1) - (n.h-1)/(N-1)) * w.ah * S.ysh / n.h)) 154> V <- V + with(a.rei, sum(N * (N-n.a) * w.ah * (y - y.u)^2 / (n.a-1))) 155> 156> a.rei$f.h<-with(a.rei, n.h/n.ah) 157> Vphase2<-with(a.rei, sum(N*N*w.ah^2* ((1-f.h)/n.h) *S.ysh)) 158> 159> a.rei$f<-with(a.rei, n.a/N) 160> a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1)) 161> Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2))) 162> 163> V 164[1] 70761.47 165> Vphase1 166[1] 44325.47 167> Vphase2 168[1] 26436 169> vcov(tot2) 170 [,1] 171[1,] 70761.47 172attr(,"phases") 173attr(,"phases")$phase1 174 [,1] 175[1,] 44325.47 176 177attr(,"phases")$phase2 178 [,1] 179[1,] 26436 180 181> 182> ## comparing to regression 183> reg<-svyglm(y~I(y<4), design=des.rei2) 184> mn<-svymean(~y, subset(des.rei2,y>3)) 185> all.equal(as.vector(coef(reg))[1],as.vector(coef(mn))) 186[1] TRUE 187> all.equal(as.vector(SE(reg))[1],as.vector(SE(mn))) 188[1] TRUE 189> vcov(mn) 190 [,1] 191[1,] 0.3292258 192attr(,"phases") 193attr(,"phases")$phase1 194 [,1] 195[1,] 0.1599264 196 197attr(,"phases")$phase2 198 [,1] 199[1,] 0.1692994 200 201> vcov(reg) 202 (Intercept) I(y < 4)TRUE 203(Intercept) 0.3292258 -0.3292258 204I(y < 4)TRUE -0.3292258 0.5901907 205attr(,"phases") 206attr(,"phases")$phase1 207 (Intercept) I(y < 4)TRUE 208(Intercept) 0.1599264 -0.1599264 209I(y < 4)TRUE -0.1599264 0.2588542 210 211attr(,"phases")$phase2 212 (Intercept) I(y < 4)TRUE 213(Intercept) 0.1692994 -0.1692994 214I(y < 4)TRUE -0.1692994 0.3313365 215 216> 217> 218> proc.time() 219 user system elapsed 220 1.707 0.055 1.778 221