1
2R version 3.1.0 (2014-04-10) -- "Spring Dance"
3Copyright (C) 2014 The R Foundation for Statistical Computing
4Platform: x86_64-apple-darwin13.1.0 (64-bit)
5
6R is free software and comes with ABSOLUTELY NO WARRANTY.
7You are welcome to redistribute it under certain conditions.
8Type 'license()' or 'licence()' for distribution details.
9
10R is a collaborative project with many contributors.
11Type 'contributors()' for more information and
12'citation()' on how to cite R or R packages in publications.
13
14Type 'demo()' for some demos, 'help()' for on-line help, or
15'help.start()' for an HTML browser interface to help.
16Type 'q()' to quit R.
17
18> ##
19> ## Domain means can be written as ratio estimators or as regression coefficients
20> ##
21> ## This code checks that subsetting the design object gives the same results as
22> ## these approaches.
23> ##
24>
25>
26> library(survey)
27
28Attaching package: 'survey'
29
30The following object is masked from 'package:graphics':
31
32    dotchart
33
34> data(fpc)
35> dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
36> dsub<-subset(dfpc,x>4)
37> (m1<-svymean(~x,design=dsub))
38   mean     SE
39x 6.195 0.7555
40>
41> ## These should give the same domain estimates and standard errors
42> (m2<-svyby(~x,~I(x>4),design=dfpc, svymean,keep.var=TRUE))
43      I(x > 4)        x        se
44FALSE    FALSE 3.314286 0.3117042
45TRUE      TRUE 6.195000 0.7555129
46> m3<-svyglm(x~I(x>4)+0,design=dfpc)
47> summary(m3)
48
49Call:
50svyglm(formula = x ~ I(x > 4) + 0, design = dfpc)
51
52Survey design:
53svydesign(id = ~psuid, strat = ~stratid, weight = ~weight, data = fpc,
54    nest = TRUE)
55
56Coefficients:
57              Estimate Std. Error t value Pr(>|t|)
58I(x > 4)FALSE   3.3143     0.3117   10.63 0.000127 ***
59I(x > 4)TRUE    6.1950     0.7555    8.20 0.000439 ***
60---
61Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
62
63(Dispersion parameter for gaussian family taken to be 2.557379)
64
65Number of Fisher Scoring iterations: 2
66
67> (m4<-svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc))
68Ratio estimator: svyratio.survey.design2(~I(x * (x > 4)), ~as.numeric(x > 4),
69    dfpc)
70Ratios=
71               as.numeric(x > 4)
72I(x * (x > 4))             6.195
73SEs=
74               as.numeric(x > 4)
75I(x * (x > 4))         0.7555129
76> stopifnot(isTRUE(all.equal(SE(m2), as.vector(SE(m3)))))
77> stopifnot(isTRUE(all.equal(SE(m2)[2], as.vector(SE(m4)))))
78>
79> ## with strata
80> data(api)
81> dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
82> m1<-svymean(~enroll, subset(dstrat, comp.imp=="Yes"))
83> m2<-svyglm(enroll~comp.imp-1, dstrat)
84> m3<- svyratio(~I(enroll*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dstrat)
85> stopifnot(isTRUE(all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
86> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
87>
88> ## with calibration
89> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
90> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
91> (dclus1g3 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069)))
921 - level Cluster Sampling design
93With (15) clusters.
94calibrate(dclus1, ~stype + api99, c(pop.totals, api99 = 3914069))
95>
96> m1<-svymean(~api00, subset(dclus1g3, comp.imp=="Yes"))
97> m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1g3)
98> m2<-svyglm(api00~comp.imp-1, dclus1g3)
99> stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
100> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
101>
102> ## with raking
103> pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
104> pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))
105> dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
106> m1<-svymean(~api00, subset(dclus1r, comp.imp=="Yes"))
107> m2<-svyglm(api00~comp.imp-1, dclus1r)
108> m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1r)
109> stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
110> stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
111>
112>
113>
114> ##
115> ## based on bug report from Takahiro Tsuchiya for version 3.4
116> ##
117> rei<-read.table(tmp<-textConnection(
118+ "  id   N n.a h n.ah n.h   sub  y
119+ 1   1 300  20 1   12   5  TRUE  1
120+ 2   2 300  20 1   12   5  TRUE  2
121+ 3   3 300  20 1   12   5  TRUE  3
122+ 4   4 300  20 1   12   5  TRUE  4
123+ 5   5 300  20 1   12   5  TRUE  5
124+ 6   6 300  20 1   12   5 FALSE NA
125+ 7   7 300  20 1   12   5 FALSE NA
126+ 8   8 300  20 1   12   5 FALSE NA
127+ 9   9 300  20 1   12   5 FALSE NA
128+ 10 10 300  20 1   12   5 FALSE NA
129+ 11 11 300  20 1   12   5 FALSE NA
130+ 12 12 300  20 1   12   5 FALSE NA
131+ 13 13 300  20 2    8   3  TRUE  6
132+ 14 14 300  20 2    8   3  TRUE  7
133+ 15 15 300  20 2    8   3  TRUE  8
134+ 16 16 300  20 2    8   3 FALSE NA
135+ 17 17 300  20 2    8   3 FALSE NA
136+ 18 18 300  20 2    8   3 FALSE NA
137+ 19 19 300  20 2    8   3 FALSE NA
138+ 20 20 300  20 2    8   3 FALSE NA
139+ "), header=TRUE)
140> close(tmp)
141>
142>
143> des.rei2 <- twophase(id=list(~id,~id), strata=list(NULL,~h),
144+                     fpc=list(~N,NULL), subset=~sub, data=rei, method="full")
145> tot2<- svytotal(~y, subset(des.rei2, y>3))
146>
147> rei$y<-rei$y*(rei$y>3)
148> ## based on Sarndal et al (9.4.14)
149> rei$w.ah <- rei$n.ah / rei$n.a
150> a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
151> a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
152> a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
153> V <- with(a.rei, sum(N * (N-1) * ((n.ah-1)/(n.a-1) - (n.h-1)/(N-1)) * w.ah * S.ysh / n.h))
154> V <- V + with(a.rei, sum(N * (N-n.a) * w.ah * (y - y.u)^2 / (n.a-1)))
155>
156> a.rei$f.h<-with(a.rei, n.h/n.ah)
157> Vphase2<-with(a.rei, sum(N*N*w.ah^2* ((1-f.h)/n.h) *S.ysh))
158>
159> a.rei$f<-with(a.rei, n.a/N)
160> a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
161> Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
162>
163> V
164[1] 70761.47
165> Vphase1
166[1] 44325.47
167> Vphase2
168[1] 26436
169> vcov(tot2)
170         [,1]
171[1,] 70761.47
172attr(,"phases")
173attr(,"phases")$phase1
174         [,1]
175[1,] 44325.47
176
177attr(,"phases")$phase2
178      [,1]
179[1,] 26436
180
181>
182> ## comparing to regression
183> reg<-svyglm(y~I(y<4), design=des.rei2)
184> mn<-svymean(~y, subset(des.rei2,y>3))
185> all.equal(as.vector(coef(reg))[1],as.vector(coef(mn)))
186[1] TRUE
187> all.equal(as.vector(SE(reg))[1],as.vector(SE(mn)))
188[1] TRUE
189> vcov(mn)
190          [,1]
191[1,] 0.3292258
192attr(,"phases")
193attr(,"phases")$phase1
194          [,1]
195[1,] 0.1599264
196
197attr(,"phases")$phase2
198          [,1]
199[1,] 0.1692994
200
201> vcov(reg)
202             (Intercept) I(y < 4)TRUE
203(Intercept)    0.3292258   -0.3292258
204I(y < 4)TRUE  -0.3292258    0.5901907
205attr(,"phases")
206attr(,"phases")$phase1
207             (Intercept) I(y < 4)TRUE
208(Intercept)    0.1599264   -0.1599264
209I(y < 4)TRUE  -0.1599264    0.2588542
210
211attr(,"phases")$phase2
212             (Intercept) I(y < 4)TRUE
213(Intercept)    0.1692994   -0.1692994
214I(y < 4)TRUE  -0.1692994    0.3313365
215
216>
217>
218> proc.time()
219   user  system elapsed
220  1.707   0.055   1.778
221