1##
2## Domain means can be written as ratio estimators or as regression coefficients
3##
4## This code checks that subsetting the design object gives the same results as
5## these approaches.
6##
7
8
9library(survey)
10data(fpc)
11dfpc<-svydesign(id=~psuid,strat=~stratid,weight=~weight,data=fpc,nest=TRUE)
12dsub<-subset(dfpc,x>4)
13(m1<-svymean(~x,design=dsub))
14
15## These should give the same domain estimates and standard errors
16(m2<-svyby(~x,~I(x>4),design=dfpc, svymean,keep.var=TRUE))
17m3<-svyglm(x~I(x>4)+0,design=dfpc)
18summary(m3)
19(m4<-svyratio(~I(x*(x>4)),~as.numeric(x>4), dfpc))
20stopifnot(isTRUE(all.equal(SE(m2), as.vector(SE(m3)))))
21stopifnot(isTRUE(all.equal(SE(m2)[2], as.vector(SE(m4)))))
22
23## with strata
24data(api)
25dstrat<-svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
26m1<-svymean(~enroll, subset(dstrat, comp.imp=="Yes"))
27m2<-svyglm(enroll~comp.imp-1, dstrat)
28m3<- svyratio(~I(enroll*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dstrat)
29stopifnot(isTRUE(all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
30stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
31
32## with calibration
33dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
34pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
35(dclus1g3 <- calibrate(dclus1, ~stype+api99, c(pop.totals, api99=3914069)))
36
37m1<-svymean(~api00, subset(dclus1g3, comp.imp=="Yes"))
38m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1g3)
39m2<-svyglm(api00~comp.imp-1, dclus1g3)
40stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
41stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
42
43## with raking
44pop.types <- data.frame(stype=c("E","H","M"), Freq=c(4421,755,1018))
45pop.schwide <- data.frame(sch.wide=c("No","Yes"), Freq=c(1072,5122))
46dclus1r<-rake(dclus1, list(~stype,~sch.wide), list(pop.types, pop.schwide))
47m1<-svymean(~api00, subset(dclus1r, comp.imp=="Yes"))
48m2<-svyglm(api00~comp.imp-1, dclus1r)
49m3<-svyratio(~I(api00*(comp.imp=="Yes")), ~as.numeric(comp.imp=="Yes"), dclus1r)
50stopifnot(isTRUE( all.equal(as.vector(SE(m2)["comp.impYes"]), as.vector(SE(m1)))))
51stopifnot(isTRUE( all.equal(as.vector(SE(m1)), as.vector(drop(SE(m3))))))
52
53
54
55##
56## based on bug report from Takahiro Tsuchiya for version 3.4
57##
58rei<-read.table(tmp<-textConnection(
59"  id   N n.a h n.ah n.h   sub  y
601   1 300  20 1   12   5  TRUE  1
612   2 300  20 1   12   5  TRUE  2
623   3 300  20 1   12   5  TRUE  3
634   4 300  20 1   12   5  TRUE  4
645   5 300  20 1   12   5  TRUE  5
656   6 300  20 1   12   5 FALSE NA
667   7 300  20 1   12   5 FALSE NA
678   8 300  20 1   12   5 FALSE NA
689   9 300  20 1   12   5 FALSE NA
6910 10 300  20 1   12   5 FALSE NA
7011 11 300  20 1   12   5 FALSE NA
7112 12 300  20 1   12   5 FALSE NA
7213 13 300  20 2    8   3  TRUE  6
7314 14 300  20 2    8   3  TRUE  7
7415 15 300  20 2    8   3  TRUE  8
7516 16 300  20 2    8   3 FALSE NA
7617 17 300  20 2    8   3 FALSE NA
7718 18 300  20 2    8   3 FALSE NA
7819 19 300  20 2    8   3 FALSE NA
7920 20 300  20 2    8   3 FALSE NA
80"), header=TRUE)
81close(tmp)
82
83
84des.rei2 <- twophase(id=list(~id,~id), strata=list(NULL,~h),
85                    fpc=list(~N,NULL), subset=~sub, data=rei, method="full")
86tot2<- svytotal(~y, subset(des.rei2, y>3))
87
88rei$y<-rei$y*(rei$y>3)
89## based on Sarndal et al (9.4.14)
90rei$w.ah <- rei$n.ah / rei$n.a
91a.rei <- aggregate(rei, by=list(rei$h), mean, na.rm=TRUE)
92a.rei$S.ysh <- tapply(rei$y, rei$h, var, na.rm=TRUE)
93a.rei$y.u <- sum(a.rei$w.ah * a.rei$y)
94V <- with(a.rei, sum(N * (N-1) * ((n.ah-1)/(n.a-1) - (n.h-1)/(N-1)) * w.ah * S.ysh / n.h))
95V <- V + with(a.rei, sum(N * (N-n.a) * w.ah * (y - y.u)^2 / (n.a-1)))
96
97a.rei$f.h<-with(a.rei, n.h/n.ah)
98Vphase2<-with(a.rei, sum(N*N*w.ah^2* ((1-f.h)/n.h) *S.ysh))
99
100a.rei$f<-with(a.rei, n.a/N)
101a.rei$delta.h<-with(a.rei, (1/n.h)*(n.a-n.ah)/(n.a-1))
102Vphase1<-with(a.rei, sum(N*N*((1-f)/n.a)*( w.ah*(1-delta.h)*S.ysh+ ((n.a)/(n.a-1))*w.ah*(y-y.u)^2)))
103
104V
105Vphase1
106Vphase2
107vcov(tot2)
108
109## comparing to regression
110reg<-svyglm(y~I(y<4), design=des.rei2)
111mn<-svymean(~y, subset(des.rei2,y>3))
112all.equal(as.vector(coef(reg))[1],as.vector(coef(mn)))
113all.equal(as.vector(SE(reg))[1],as.vector(SE(mn)))
114vcov(mn)
115vcov(reg)
116
117