1
2R version 3.4.0 (2017-04-21) -- "You Stupid Darkness"
3Copyright (C) 2017 The R Foundation for Statistical Computing
4Platform: x86_64-apple-darwin15.6.0 (64-bit)
5
6R is free software and comes with ABSOLUTELY NO WARRANTY.
7You are welcome to redistribute it under certain conditions.
8Type 'license()' or 'licence()' for distribution details.
9
10R is a collaborative project with many contributors.
11Type 'contributors()' for more information and
12'citation()' on how to cite R or R packages in publications.
13
14Type 'demo()' for some demos, 'help()' for on-line help, or
15'help.start()' for an HTML browser interface to help.
16Type 'q()' to quit R.
17
18> ##
19> ## Calibration examples
20> ##
21>
22>
23> ## Example of calibration to first-stage clusters
24> library(survey)
25Loading required package: grid
26Loading required package: Matrix
27Loading required package: survival
28
29Attaching package: 'survey'
30
31The following object is masked from 'package:graphics':
32
33    dotchart
34
35> data(api)
36>
37> clusters<-table(apiclus2$dnum)
38> clusters<-clusters[clusters>1 & names(clusters)!="639"]
39> apiclus2a<-subset(apiclus2, dnum %in% as.numeric(names(clusters)))
40>
41> dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2a)
42>
43> popclusters<-subset(apipop, dnum %in% as.numeric(names(clusters)))
44>
45> pop<-lapply(as.numeric(names(clusters)), function(cluster) {
46+   colSums(model.matrix(~api99, model.frame(~api99, subset(popclusters, dnum %in% cluster))))})
47>
48> names(pop)<-names(clusters)
49>
50> dclus2g<-calibrate(dclus2, ~api99, pop,stage=1)
51>
52> svymean(~api99, dclus2)
53        mean     SE
54api99 642.14 31.434
55> svymean(~api99, dclus2g)
56        mean    SE
57api99 654.49 29.82
58>
59> round(svyby(~api99, ~dnum, design=dclus2, svymean),4)
60    dnum    api99      se
6183    83 694.3333  0.0000
62132  132 505.0000  0.0000
63152  152 574.0000  0.0000
64173  173 894.7500  0.0000
65198  198 533.7500  0.0000
66200  200 589.8000  6.8335
67228  228 477.0000  0.0000
68295  295 646.4000  0.0000
69302  302 903.5000  0.0000
70403  403 852.4000  0.0000
71452  452 533.0000  0.0000
72480  480 614.2000  0.0000
73523  523 580.5000  0.0000
74534  534 564.6000  0.0000
75549  549 896.2000  0.0000
76552  552 730.0000  0.0000
77570  570 518.4000  7.5478
78575  575 800.8000  4.2513
79596  596 785.6000  2.4155
80620  620 591.6000 10.5869
81638  638 560.2000  4.0954
82674  674 760.0000  0.0000
83679  679 610.2500  0.0000
84687  687 718.6667  0.0000
85701  701 651.5000  0.0000
86711  711 690.5000  0.0000
87731  731 702.0000  2.1744
88768  768 562.5000  0.0000
89781  781 854.4000  0.7456
90>
91> round(svyby(~api99, ~dnum, design=dclus2g, svymean),4)
92    dnum    api99 se
9383    83 694.3333  0
94132  132 505.0000  0
95152  152 574.0000  0
96173  173 894.7500  0
97198  198 533.7500  0
98200  200 567.5455  0
99228  228 477.0000  0
100295  295 646.4000  0
101302  302 903.5000  0
102403  403 852.4000  0
103452  452 533.0000  0
104480  480 614.2000  0
105523  523 580.5000  0
106534  534 564.6000  0
107549  549 896.2000  0
108552  552 730.0000  0
109570  570 548.9444  0
110575  575 824.5357  0
111596  596 787.5714  0
112620  620 609.3750  0
113638  638 585.6429  0
114674  674 760.0000  0
115679  679 610.2500  0
116687  687 718.6667  0
117701  701 651.5000  0
118711  711 690.5000  0
119731  731 700.6667  0
120768  768 562.5000  0
121781  781 851.0000  0
122>
123> ## Averaging to first stage
124>
125> dclus1<- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
126> pop<-colSums(cbind(1,apipop$enroll),na.rm=TRUE)
127>
128> dclus1g<-calibrate(dclus1, ~enroll, pop, aggregate=1)
129>
130> svytotal(~enroll,dclus1g)
131         total SE
132enroll 3811472  0
133> svytotal(~api.stu,dclus1g)
134          total    SE
135api.stu 3242857 38967
136>
137> #variation within clusters should be zero
138> all.equal(0, max(ave(weights(dclus1g),dclus1g$cluster,FUN=var),na.rm=TRUE))
139[1] TRUE
140>
141> ##bounded weights
142>  dclus1g<-calibrate(dclus1, ~enroll, pop)
143>  range(weights(dclus1g)/weights(dclus1))
144[1] 0.7906782 1.7891164
145>  dclus1gb<-calibrate(dclus1, ~enroll, pop, bounds=c(.6,1.5))
146>  range(weights(dclus1gb)/weights(dclus1))
147[1] 0.7198751 1.5000000
148>
149> ## Ratio estimators
150> dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
151> svytotal(~api.stu,dstrat)
152          total    SE
153api.stu 3086009 99477
154> common<-svyratio(~api.stu, ~enroll, dstrat, separate=FALSE)
155> total.enroll<-sum(apipop$enroll,na.rm=TRUE)
156> predict(common, total=total.enroll)
157$total
158         enroll
159api.stu 3190038
160
161$se
162          enroll
163api.stu 29565.98
164
165> dstratg<-calibrate(dstrat,~enroll-1, total.enroll, variance=1)
166> svytotal(~api.stu, dstratg)
167          total    SE
168api.stu 3190038 29566
169>
170> ## postStratify vs calibrate in stratified sample (Ben French)
171> set.seed(17)
172> dat<-data.frame(y=rep(0:1,each=100),x=rnorm(200)+2*rep(0:1,each=100),
173+                 z=rbinom(200,1,.2), fpc=rep(c(100,10000),each=100))
174> dat$w<-ifelse(dat$y,dat$z,1-dat$z)
175> popw<-data.frame(w=c("0","1"), Freq=c(2000,8000))
176>  des<-svydesign(id=~1,fpc=~fpc, data=dat,strata=~y)
177> postStratify(des,~w,popw)->dps
178> dcal<-calibrate(des,~factor(w), pop=c(10000,8000))
179>
180> all.equal(SE(svymean(~x,dcal)),SE(svymean(~x,dps)))
181[1] TRUE
182>
183> ## missing data in calibrated design
184> dps$variables$z[1]<-NA
185> summary(svyglm(y~z+x,design=dps,family=quasibinomial))
186
187Call:
188svyglm(formula = y ~ z + x, design = dps, family = quasibinomial)
189
190Survey design:
191postStratify(des, ~w, popw)
192
193Coefficients:
194            Estimate Std. Error t value Pr(>|t|)
195(Intercept)  -0.1203     0.3380  -0.356    0.722
196z             6.2118     0.6451   9.630   <2e-16 ***
197x             2.2602     0.2514   8.992   <2e-16 ***
198---
199Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
200
201(Dispersion parameter for quasibinomial family taken to be 1.919987)
202
203Number of Fisher Scoring iterations: 9
204
205>
206> ## Ratio estimator using the heteroskedasticity parameter (Daniel Oehm)
207> # should match the ratio estmate above
208> dstratgh <- calibrate(dstrat,~enroll-1, total.enroll, variance=apistrat$enroll)
209> svytotal(~api.stu, dstratgh)
210          total    SE
211api.stu 3190038 29566
212>
213> ## individual boundary constraints as multiplicative values (Daniel Oehm)
214> bnds <- list(
215+   lower = c(1, 1, rep(-Inf, nrow(apistrat)-2)),
216+   upper = c(1, 1, rep(Inf, nrow(apistrat)-2))) # the first two weights will remain unchanged the others are free to move
217> lapply(bnds, head)
218$lower
219[1]    1    1 -Inf -Inf -Inf -Inf
220
221$upper
222[1]   1   1 Inf Inf Inf Inf
223
224> dstratg1<-calibrate(dstrat, ~enroll-1, total.enroll, bounds = bnds, variance=apistrat$enroll)
225> svytotal(~api.stu, dstratg1)
226          total    SE
227api.stu 3190133 29561
228> head(weights(dstrat))
229    1     2     3     4     5     6
23044.21 44.21 44.21 44.21 44.21 44.21
231> head(weights(dstratg1))
232       1        2        3        4        5        6
23344.21000 44.21000 45.72055 45.72055 45.72055 45.72055
234> all.equal(weights(dstrat)[1:2], weights(dstratg1)[1:2])
235[1] TRUE
236>
237> ## individual boundary constraints as constant values (Daniel Oehm)
238> bnds <- list(
239+   lower = c(44.21, 44.21, rep(-Inf, nrow(apistrat)-2)),
240+   upper = c(44.21, 44.21, rep(Inf, nrow(apistrat)-2))) # the first two weights will remain unchanged
241> lapply(bnds, head)
242$lower
243[1] 44.21 44.21  -Inf  -Inf  -Inf  -Inf
244
245$upper
246[1] 44.21 44.21   Inf   Inf   Inf   Inf
247
248> dstratg2<-calibrate(dstrat, ~enroll-1, total.enroll, bounds = bnds, bounds.const = TRUE, variance=apistrat$enroll)
249> svytotal(~api.stu, dstratg2)
250          total    SE
251api.stu 3190133 29561
252> head(weights(dstrat))
253    1     2     3     4     5     6
25444.21 44.21 44.21 44.21 44.21 44.21
255> head(weights(dstratg2))
256       1        2        3        4        5        6
25744.21000 44.21000 45.72055 45.72055 45.72055 45.72055
258> all.equal(round(weights(dstrat)[1:2], 8), round(weights(dstratg2)[1:2]), 8) # minor rounding error but all good
259[1] TRUE
260>
261> # sparse matrix support (Daniel Oehm)
262> dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
263> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
264> dclus1g<-calibrate(dclus1, ~stype, pop.totals)
265> svymean(~api00, dclus1g)
266        mean     SE
267api00 642.31 23.921
268> svytotal(~enroll, dclus1g)
269         total     SE
270enroll 3680893 406293
271>
272> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
273> dclus1g<-calibrate(dclus1, ~stype, pop.totals, sparse = TRUE)
274> svymean(~api00, dclus1g)
275        mean     SE
276api00 642.31 23.921
277> svytotal(~enroll, dclus1g)
278         total     SE
279enroll 3680893 406293
280>
281> pop.totals<-c(`(Intercept)`=6194, stypeH=755, stypeM=1018)
282> dclus1g<-calibrate(dclus1, ~stype, pop.totals, sparse = TRUE, calfun = "raking")
283> svymean(~api00, dclus1g)
284        mean     SE
285api00 642.31 23.921
286> svytotal(~enroll, dclus1g)
287         total     SE
288enroll 3680893 406293
289>
290> proc.time()
291   user  system elapsed
292  2.591   0.084   2.712
293