1##
2##  Recursive estimation of linearisation variances
3##  in multistage samples.
4##
5
6svydesign<-function(ids, probs = NULL, strata = NULL, variables = NULL,
7    fpc = NULL, data=NULL, nest = FALSE, check.strata = !nest,
8    weights = NULL,pps=FALSE,...){
9	UseMethod("svydesign", data)
10	}
11
12detibble<-function(data) {
13    if ("tbl_df" %in% class(data))
14        as.data.frame(data)
15    else
16        data
17}
18
19svydesign.default<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL,
20                    data=NULL, nest=FALSE, check.strata=!nest,weights=NULL,pps=FALSE,
21                            variance=c("HT","YG"),...){
22
23  data<-detibble(data)
24
25  variance<-match.arg(variance)
26  if(is.character(pps)){
27    a<-match.arg(pps,c("brewer","overton","other"))
28    if (!(pps %in% c("brewer","other")))
29      return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc,
30                 data=data,method=a,call=sys.call(-1),variance=variance,...))
31  } else if (!is.logical(pps)){
32    return(pps_design(ids=ids,probs=probs, strata=strata,variables=variables, fpc=fpc,
33                      data=data,method=pps,call=sys.call(-1),variance=variance,...))
34  }
35
36  if (!is.character(pps) || pps!="other"){
37    if (variance!="HT")
38      stop("Only variance='HT' supported for this design")
39  }
40
41  ## less memory-hungry version for sparse tables
42    interaction<-function (..., drop = TRUE) {
43        args <- list(...)
44        narg <- length(args)
45        if (narg == 1 && is.list(args[[1]])) {
46            args <- args[[1]]
47            narg <- length(args)
48        }
49
50        ls<-sapply(args,function(a) length(levels(a)))
51        ans<-do.call("paste",c(lapply(args,as.character),sep="."))
52        ans<-factor(ans)
53        return(ans)
54
55    }
56
57    na.failsafe<-function(message="missing values in object"){
58      function(object,...){
59        if (NCOL(object)==0)
60          object
61        else {
62          ok <- complete.cases(object)
63          if (all(ok))
64            object
65          else stop(message)
66        }
67      }
68    }
69
70     na.id<-na.failsafe("missing values in `id'")
71     if(inherits(ids,"formula")) {
72	 mf<-substitute(model.frame(ids,data=data, na.action=na.id))
73	 ids<-eval.parent(mf)
74         if (ncol(ids)==0) ## formula was ~1
75           ids<-data.frame(id=1:nrow(ids))
76       } else{
77         if (is.null(ids))
78           stop("Must provide ids= argument")
79         else
80           ids<-na.id(data.frame(ids))
81       }
82
83    ## make ids factor if they are character
84    for(i in 1:ncol(ids)){
85        if (is.character(ids[[i]]))
86            ids[[i]]<-factor(ids[[i]])
87    }
88
89    na.prob<-na.failsafe("missing values in `prob'")
90    if(inherits(probs,"formula")){
91      mf<-substitute(model.frame(probs,data=data,na.action=na.prob))
92      probs<-eval.parent(mf)
93    }
94
95    na.weight<-na.failsafe("missing values in `weights'")
96    if(inherits(weights,"formula")){
97      mf<-substitute(model.frame(weights,data=data,na.action=na.weight))
98      weights<-eval.parent(mf)
99     } else if (!is.null(weights))
100         weights<-na.weight(data.frame(weights))
101    if(!is.null(weights)){
102      if (!is.null(probs))
103         stop("Can't specify both sampling weights and probabilities")
104       else
105         probs<-as.data.frame(1/as.matrix(weights))
106     }
107
108
109
110    na.strata<-na.failsafe("missing values in `strata'")
111    if (!is.null(strata)){
112      if(inherits(strata,"formula")){
113        mf<-substitute(model.frame(strata,data=data, na.action=na.strata))
114        strata<-eval.parent(mf)
115      }
116      if (!is.list(strata))
117        strata<-data.frame(strata=strata)
118      has.strata<-TRUE
119      for(i in 1:NCOL(strata)){ ##drop empty strata
120          if (is.factor(strata[[i]]))
121              strata[[i]]<-as.factor(as.character(strata[[i]]))
122          }
123    } else {
124      has.strata <-FALSE
125      strata<-na.strata(as.data.frame(matrix(1, nrow=NROW(ids), ncol=NCOL(ids))))
126    }
127
128
129    if (inherits(variables,"formula")){
130        mf<-substitute(model.frame(variables,data=data,na.action=na.pass))
131        variables <- eval.parent(mf)
132    } else if (is.null(variables)){
133        variables<-data
134    } else
135        variables<-do.call("data.frame",variables)
136
137
138    na.fpc<-na.failsafe("missing values in `fpc'")
139    if (inherits(fpc,"formula")){
140      mf<-substitute(model.frame(fpc,data=data,na.action=na.fpc))
141      fpc<-eval.parent(mf)
142    }
143
144  ## check for only one PSU: probably a typo
145  if ((length(unique(ids[,1]))==1) && !(nest && has.strata)){
146    stop("Design has only one primary sampling unit")
147  }
148
149      ## force subclusters nested in clusters
150      if (NCOL(ids)>1){
151        N<-ncol(ids)
152        for(i in 2:N){
153          ids[,i]<-do.call("interaction", ids[,1:i,drop=FALSE])
154        }
155      }
156      ## force clusters nested in strata
157      if (nest && has.strata && NCOL(ids)){
158        N<-NCOL(ids)
159        NS<-NCOL(strata)
160        for(i in 1:N)
161          ids[,i]<-do.call("interaction",
162                           c(strata[,1:min(i,NS),drop=FALSE], ids[,i,drop=FALSE]))
163      }
164
165    ## check if clusters nested in strata
166     if (check.strata && nest)
167      warning("No point in check.strata=TRUE if nest=TRUE")
168    if(check.strata && !is.null(strata) && NCOL(ids)){
169       sc<-(rowSums(table(ids[,1],strata[,1])>0))
170       if(any(sc>1)) stop("Clusters not nested in strata at top level; you may want nest=TRUE.")
171    }
172
173      ## force substrata nested in clusters
174      N<-ncol(ids)
175      NS<-ncol(strata)
176      if (N>1){
177        for(i in 2:N)
178          strata[,i]<-interaction(strata[,min(i,NS)], ids[,i-1])
179      }
180
181    ## PPS: valid choices currently are FALSE and "brewer"
182    if (is.logical(pps) && pps) stop("'pps' must be FALSE or a character string")
183    if (is.character(pps)) {
184      pps<-TRUE
185    }
186
187    ## Finite population correction: specified per observation
188    ## Also incorporates design sample sizes formerly in nPSU
189
190      if (!is.null(fpc) && !is.numeric(fpc) && !is.data.frame(fpc))
191        stop("fpc must be a matrix or dataframe or NULL")
192
193      fpc<-as.fpc(fpc,strata, ids, pps=pps)
194
195      ## if FPC specified, but no weights, use it for weights
196    if (is.null(probs) && is.null(weights)){
197      if (is.null(fpc$popsize)){
198        if (missing(probs) && missing(weights))
199          warning("No weights or probabilities supplied, assuming equal probability")
200        probs<-rep(1,nrow(ids))
201      } else {
202        probs<-1/weights(fpc, final=FALSE)
203      }
204    }
205
206
207    if (is.numeric(probs) && length(probs)==1)
208      probs<-rep(probs, NROW(variables))
209
210    if (length(probs)==0) probs<-rep(1,NROW(variables))
211
212    if (NCOL(probs)==1) probs<-data.frame(probs)
213
214    rval<-list(cluster=ids)
215    rval$strata<-strata
216    rval$has.strata<-has.strata
217    rval$prob<- apply(probs,1,prod)
218    rval$allprob<-probs
219    rval$call<-match.call()
220    rval$variables<-variables
221    rval$fpc<-fpc
222    rval$call<-sys.call(-1)
223    rval$pps<-pps
224    class(rval)<-c("survey.design2","survey.design")
225    rval
226}
227
228onestrat<-function(x,cluster,nPSU,fpc, lonely.psu,stratum=NULL,stage=1,cal=cal){
229
230  if (is.null(fpc))
231      f<-rep(1,NROW(x))
232  else{
233      f<-ifelse(fpc==Inf, 1, (fpc-nPSU)/fpc)
234  }
235
236  if (nPSU>1)
237      scale<-f*nPSU/(nPSU-1)
238  else
239      scale<-f
240  if (all(f<0.0000001))## self-representing stratum
241      return(matrix(0,NCOL(x),NCOL(x)))
242
243  scale<-scale[!duplicated(cluster)]
244
245  x<-rowsum(x,cluster)
246  nsubset<-nrow(x)
247
248  if (nsubset<nPSU) {
249    ##can't be PPS, so scale must be a constant
250    x<-rbind(x,matrix(0,ncol=ncol(x),nrow=nPSU-nrow(x)))
251    scale<-rep(scale[1],NROW(x))
252  }
253  if (lonely.psu!="adjust" || nsubset>1 ||
254      (nPSU>1 & !getOption("survey.adjust.domain.lonely")))
255      x<-sweep(x, 2, colMeans(x), "-")
256
257  if (nsubset==1 && nPSU>1 && getOption("survey.adjust.domain.lonely")){
258      warning("Stratum (",stratum,") has only one PSU at stage ",stage)
259      if (lonely.psu=="average" && getOption("survey.adjust.domain.lonely"))
260          scale<-NA
261    }
262
263  if (nPSU>1){
264      return(crossprod(x*sqrt(scale)))
265  } else {
266      rval<-switch(lonely.psu,
267                   certainty=crossprod(x*sqrt(scale)),
268                   remove=crossprod(x*sqrt(scale)),
269                   adjust=crossprod(x*sqrt(scale)),
270                   average=NA*crossprod(x),
271                   fail= stop("Stratum (",stratum,") has only one PSU at stage ",stage),
272                   stop("Can't handle lonely.psu=",lonely.psu)
273            )
274      rval
275  }
276}
277
278
279onestage<-function(x, strata, clusters, nPSU, fpc, lonely.psu=getOption("survey.lonely.psu"),stage=0, cal){
280   if (NROW(x)==0)
281        return(matrix(0,NCOL(x),NCOL(x)))
282  stratvars<- tapply(1:NROW(x), list(factor(strata)), function(index){
283             onestrat(x[index,,drop=FALSE], clusters[index],
284             nPSU[index][1], fpc[index], ##changed from fpc[index][1], to allow pps(brewer)
285             lonely.psu=lonely.psu,stratum=strata[index][1], stage=stage,cal=cal)
286  })
287  p<-NCOL(x)
288  nstrat<-length(unique(strata))
289  nokstrat<-sum(sapply(stratvars,function(m) !any(is.na(m))))
290  apply(array(unlist(stratvars),c(p,p,length(stratvars))),1:2,sum,na.rm=TRUE)*nstrat/nokstrat
291}
292
293
294svyrecvar<-function(x, clusters,  stratas, fpcs, postStrata=NULL,
295                    lonely.psu=getOption("survey.lonely.psu"),
296                    one.stage=getOption("survey.ultimate.cluster")){
297
298  x<-as.matrix(x)
299  cal<-NULL
300
301  ## Remove post-stratum means, which may cut across clusters
302  ## Also center the data using any "g-calibration" models
303  if(!is.null(postStrata)){
304    for (psvar in postStrata){
305      if (inherits(psvar, "greg_calibration")) {
306        if (psvar$stage==0){
307          ## G-calibration at population level
308          x<-as.matrix(qr.resid(psvar$qr,x/psvar$w)*psvar$w)
309        } else {
310          ## G-calibration within clusters
311          cal<-c(cal, list(psvar))
312        }
313      } else if (inherits(psvar, "raking")){
314        ## raking by iterative proportional fitting
315        for(iterations in 1:10){
316          for(margin in psvar){
317            psw<-attr(margin, "weights")
318            x<- x - psw*apply(x/psw, 2, ave, margin)
319          }
320        }
321      } else {
322        ## ordinary post-stratification
323        psw<-attr(psvar, "weights")
324        oldw<-attr(psvar, "oldweights")
325        if (is.null(oldw)) oldw<-rep(1,length(psw))
326        zeroes<-which(psw==0 & oldw==0)
327        if (length(zeroes)) psw[zeroes]=1
328        psvar<-as.factor(psvar)
329        psmeans<-rowsum(x*oldw/psw,psvar,reorder=TRUE)/as.vector(by(oldw,psvar,sum))
330        x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
331      }
332    }
333  }
334
335  multistage(x, clusters,stratas,fpcs$sampsize, fpcs$popsize,
336             lonely.psu=getOption("survey.lonely.psu"),
337             one.stage=one.stage,stage=1,cal=cal)
338}
339
340multistage<-function(x, clusters,  stratas, nPSUs, fpcs,
341                    lonely.psu=getOption("survey.lonely.psu"),
342                     one.stage=FALSE,stage,cal){
343
344  n<-NROW(x)
345
346
347  v <- onestage(x,stratas[,1], clusters[,1], nPSUs[,1],
348                fpcs[,1], lonely.psu=lonely.psu,stage=stage,cal=cal)
349
350  if (one.stage!=TRUE && !is.null(fpcs) && NCOL(clusters)>1) {
351    v.sub<-by(1:n, list(as.numeric(clusters[,1])), function(index){
352      ## residuals for G-calibration using population information
353      ## only on clusters at this stage.
354      for(cali in cal){
355        if (cali$stage != stage)
356          next
357        j<-match(clusters[index,1],cali$index)
358        if (length(unique(j))!=1)
359          stop("Internal problem in g-calibration data: stage",stage,
360               ", cluster", j)
361        j<-j[[1]]
362        x[index,]<-as.matrix(qr.resid(cali$qr[[j]], x[index,,drop=FALSE]/cali$w[[j]])*cali$w[[j]])
363      }
364      multistage(x[index,,drop=FALSE], clusters[index,-1,drop=FALSE],
365                 stratas[index,-1,drop=FALSE], nPSUs[index,-1,drop=FALSE],
366                 fpcs[index,-1,drop=FALSE],
367                 lonely.psu=lonely.psu,one.stage=one.stage-1,
368                 stage=stage+1,cal=cal)*nPSUs[index[1],1]/fpcs[index[1],1]
369    })
370
371    for(i in 1:length(v.sub))
372      v<-v+v.sub[[i]]
373  }
374  dimnames(v)<-list(colnames(x),colnames(x))
375  v
376}
377
378
379## fpc not given are zero: full sampling.
380as.fpc<-function(df,strata,ids,pps=FALSE){
381
382  count<-function(x) sum(!duplicated(x))
383
384  sampsize<-matrix(ncol=ncol(ids),nrow=nrow(ids))
385  for(i in 1:ncol(ids))
386    split(sampsize[,i],strata[,i])<-lapply(split(ids[,i],strata[,i]),count)
387
388  if (is.null(df)){
389    ## No fpc
390    rval<-list(popsize=NULL, sampsize=sampsize)
391    class(rval)="survey_fpc"
392    return(rval)
393  }
394
395  fpc<-as.matrix(df)
396  if (xor(ispopsize<-any(df>1), all(df>=1))){
397    big<-which(fpc>=1,arr.ind=TRUE)
398    small<-which(fpc<1,arr.ind=TRUE)
399    cat("record",big[1,1]," stage",big[1,2],": fpc=", fpc[big[1,,drop=FALSE]],"\n")
400    cat("record",small[1,1]," stage ",small[1,2],": fpc=", fpc[small[1,,drop=FALSE]],"\n")
401    stop("Must have all fpc>=1 or all fpc<=1")
402  }
403
404  if (ispopsize){
405    if(pps) stop("fpc must be specified as sampling fraction for PPS sampling")
406    popsize<-fpc
407  } else {
408    popsize<-sampsize/(fpc)
409  }
410  if (any(popsize<sampsize)){
411    toobig<-which(popsize<sampsize,arr.ind=TRUE)
412    cat("record",toobig[1,1],"stage",toobig[1,2],": popsize=",popsize[toobig[1,,drop=FALSE]],
413        " sampsize=", sampsize[toobig[1,,drop=FALSE]],"\n")
414    stop("FPC implies >100% sampling in some strata")
415  }
416  if (!ispopsize && any(is.finite(popsize) & (popsize>1e10))){
417    big<-which(popsize>1e10 & is.finite(popsize),arr.ind=TRUE)
418    warning("FPC implies population larger than ten billion (record",big[1,1]," stage ",big[1,2],")")
419  }
420  if(!pps){
421    ## check that fpc is constant within strata.
422    for(i in 1:ncol(popsize)){
423      diff<-by(popsize[,i], list(strata[,i]), count)
424      if (any(as.vector(diff)>1)){
425        j<-which(as.vector(diff)>1)[1]
426        warning("`fpc' varies within strata: stratum ",names(diff)[j], " at stage ",i)
427      }
428    }
429  } else{
430    ## check that fpc is constant with clusters
431     diff<-by(popsize[,i], list(ids[,i]), count)
432      if (any(as.vector(diff)>1)){
433        j<-which(as.vector(diff)>1)[1]
434        warning("`fpc' varies within cluster: cluster ",names(diff)[j], " at stage ",i)
435      }
436   }
437
438
439  rval<-list(popsize=popsize, sampsize=sampsize)
440  class(rval)<-"survey_fpc"
441  rval
442}
443
444"weights.survey_fpc"<-function(object,final=TRUE,...){
445  if (is.null(object$popsize) || any(object$popsize>1e12))
446    stop("Weights not supplied and can't be computed from fpc.")
447  if (final) {
448    pop<-apply(object$popsize,1,prod)
449    samp<-apply(object$sampsize,1,prod)
450    pop/samp
451  } else {
452    object$popsize/object$sampsize
453  }
454}
455
456
457
458
459print.survey.design2<-function(x,varnames=FALSE,design.summaries=FALSE,...){
460  n<-NROW(x$cluster)
461  if (x$has.strata) cat("Stratified ")
462  un<-length(unique(x$cluster[,1]))
463  if(n==un){
464    cat("Independent Sampling design")
465    is.independent<-TRUE
466    if (is.null(x$fpc$popsize))
467      cat(" (with replacement)\n")
468    else cat("\n")
469  } else {
470    cat(NCOL(x$cluster),"- level Cluster Sampling design")
471    if (is.null(x$fpc$popsize))
472      cat(" (with replacement)\n")
473    else cat("\n")
474    nn<-lapply(x$cluster,function(i) length(unique(i)))
475    cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep=""))
476    is.independent<-FALSE
477  }
478
479  print(x$call)
480  if (design.summaries){
481    cat("Probabilities:\n")
482    print(summary(x$prob))
483    if(x$has.strata){
484      if (NCOL(x$cluster)>1)
485        cat("First-level ")
486      cat("Stratum Sizes: \n")
487      oo<-order(unique(x$strata[,1]))
488      a<-rbind(obs=table(x$strata[,1]),
489	       design.PSU=x$fpc$sampsize[!duplicated(x$strata[,1]),1][oo],
490               actual.PSU=table(x$strata[!duplicated(x$cluster[,1]),1]))
491      print(a)
492    }
493    if (!is.null(x$fpc$popsize)){
494      if (x$has.strata) {
495        cat("Population stratum sizes (PSUs): \n")
496        s<-!duplicated(x$strata[,1])
497        a<-x$fpc$popsize[s,1]
498        names(a)<-x$strata[s,1]
499        a<-a[order(names(a))]
500        print(a)
501      } else {
502        cat("Population size (PSUs):",x$fpc$popsize[1,1],"\n")
503      }
504    }
505  }
506  if (varnames){
507    cat("Data variables:\n")
508    print(colnames(x))
509  }
510  invisible(x)
511}
512
513
514summary.survey.design2<-function(object,...){
515  class(object)<-c("summary.survey.design2",class(object))
516  object
517}
518
519print.summary.survey.design2<-function(x,...){
520  y<-x
521  class(y)<-c("survey.design2",class(x))
522  print(y,varnames=TRUE,design.summaries=TRUE,...)
523}
524
525
526.svycheck<-function(object){
527  if (inherits(object,"survey.design") &&
528      !is.null(object$nPSU))
529    warning("This is an old-style design object. Please use as.svydesign2 to update it.")
530}
531
532as.svydesign2<-function(object){
533  if (inherits(object,"survey.design2"))
534    return(object)
535  if (!inherits(object,"survey.design"))
536    stop("This function is for updating old-style survey.design objects")
537
538
539  count<-function(x) length(unique(x))
540
541  strata<-data.frame(one=object$strata)
542  if ((nc<-ncol(object$cluster))>1){
543    for(i in 2:nc){
544      strata<-cbind(strata,object$cluster[,i-1])
545    }
546  }
547
548  sampsize<-matrix(ncol=nc,nrow=nrow(object$cluster))
549
550  sampsize[,1]<-object$nPSU[match(object$strata, names(object$nPSU))]
551  if (nc>1){
552    for(i in 2:nc){
553      split(sampsize[,i],strata[,i])<-lapply(split(object$cluster[,i],strata[,i]),count)
554    }
555  }
556
557  if (!is.null(object$fpc)){
558    popsize<-sampsize
559    popsize[,1]<-object$fpc$N[match(object$strata,object$fpc$strata)]
560  } else popsize<-NULL
561  if (nc>1 && !is.null(object$fpc)){
562    warning("Assuming complete sampling at stages 2 -",nc)
563  }
564
565  fpc<-list(popsize=popsize,sampsize=sampsize)
566  class(fpc)<-"survey_fpc"
567
568
569  object$fpc<-fpc
570  object$strata<-strata
571  object$nPSU<-NULL
572  class(object)<-c("survey.design2","survey.design")
573  object
574
575}
576
577is.pps<-function(x) if(is.null(x$pps)) FALSE else (x$pps!=FALSE)
578
579"[.survey.design2"<-function (x,i, ..., drop=TRUE){
580  if (!missing(i)){
581      if (is.calibrated(x) || is.pps(x) || !drop){
582          ## Set weights to zero: no memory saving possible
583          ## There should be an easier way to complement a subscript..
584          if (is.logical(i))
585              x$prob[!i]<-Inf
586          else if (is.numeric(i) && length(i))
587              x$prob[-i]<-Inf
588          else {
589              tmp<-x$prob[i,]
590              x$prob<-rep(Inf, length(x$prob))
591              x$prob[i,]<-tmp
592          }
593          index<-is.finite(x$prob)
594          psu<-!duplicated(x$cluster[index,1])
595          tt<-table(x$strata[index,1][psu])
596          if(any(tt==1) && getOption("survey.adjust.domain.lonely")){
597              warning(sum(tt==1)," strata have only one PSU in this subset.")
598          }
599      } else {
600          ## subset everything.
601          if (!is.null(x$variables)) ## phase 2 of twophase design
602              x$variables<-"[.data.frame"(x$variables,i,..1,drop=FALSE)
603          x$cluster<-x$cluster[i,,drop=FALSE]
604          x$prob<-x$prob[i]
605          x$allprob<-x$allprob[i,,drop=FALSE]
606          x$strata<-x$strata[i,,drop=FALSE]
607          x$fpc$sampsize<-x$fpc$sampsize[i,,drop=FALSE]
608          x$fpc$popsize<-x$fpc$popsize[i,,drop=FALSE]
609      }
610
611  } else {
612      if(!is.null(x$variables))
613          x$variables<-x$variables[,..1,drop=FALSE]
614  }
615
616  x
617}
618
619svytotal.survey.design2<-function(x,design, na.rm=FALSE, deff=FALSE,influence=FALSE,...){
620
621
622    if (inherits(x,"formula")){
623        ## do the right thing with factors
624        mf<-model.frame(x,design$variables,na.action=na.pass)
625        xx<-lapply(attr(terms(x),"variables")[-1],
626                   function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
627        cols<-sapply(xx,NCOL)
628        x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
629        scols<-c(0,cumsum(cols))
630        for(i in 1:length(xx)){
631            x[,scols[i]+1:cols[i]]<-xx[[i]]
632        }
633        colnames(x)<-do.call("c",lapply(xx,colnames))
634    } else{
635        if(typeof(x) %in% c("expression","symbol"))
636            x<-eval(x, design$variables)
637        else {
638            if(is.data.frame(x) && any(sapply(x,is.factor))){
639                xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
640                cols<-sapply(xx,NCOL)
641                scols<-c(0,cumsum(cols))
642                cn<-character(sum(cols))
643                for(i in 1:length(xx))
644                    cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
645                x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
646                for(i in 1:length(xx)){
647                    x[,scols[i]+1:cols[i]]<-xx[[i]]
648                }
649                colnames(x)<-cn
650            }
651        }
652    }
653    x<-as.matrix(x)
654
655    if (na.rm){
656        nas<-rowSums(is.na(x))
657        design<-design[nas==0,]
658        if (length(nas)>length(design$prob))
659            x<-x[nas==0,,drop=FALSE]
660        else
661            x[nas>0,]<-0
662    }
663
664    N<-sum(1/design$prob)
665    total <- colSums(x/as.vector(design$prob),na.rm=na.rm)
666    class(total)<-"svystat"
667    attr(total, "var")<-v<-svyrecvar(x/design$prob,design$cluster,
668                                     design$strata, design$fpc,
669                                   postStrata=design$postStrata)
670    attr(total,"statistic")<-"total"
671    if (influence){
672        attr(total, "influence")<-x/design$prob
673        }
674
675    if (is.character(deff) || deff){
676      nobs<-sum(weights(design)!=0)
677      if (deff=="replace")
678        vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2/nobs
679      else
680        vsrs<-svyvar(x,design,na.rm=na.rm)*sum(weights(design))^2*(N-nobs)/(N*nobs)
681      attr(total, "deff")<-v/vsrs
682    }
683
684
685  return(total)
686}
687
688
689svymean.survey.design2<-function(x,design, na.rm=FALSE,deff=FALSE,influence=FALSE,...){
690
691  if (inherits(x,"formula")){
692    ## do the right thing with factors
693    mf<-model.frame(x,design$variables,na.action=na.pass)
694    xx<-lapply(attr(terms(x),"variables")[-1],
695               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
696    cols<-sapply(xx,NCOL)
697    x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
698    scols<-c(0,cumsum(cols))
699    for(i in 1:length(xx)){
700      x[,scols[i]+1:cols[i]]<-xx[[i]]
701    }
702    colnames(x)<-do.call("c",lapply(xx,colnames))
703  }
704  else {
705      if(typeof(x) %in% c("expression","symbol"))
706          x<-eval(x, design$variables)
707      else if(is.data.frame(x) && any(sapply(x,is.factor))){
708          xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
709          cols<-sapply(xx,NCOL)
710          scols<-c(0,cumsum(cols))
711          cn<-character(sum(cols))
712          for(i in 1:length(xx))
713              cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
714          x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
715          for(i in 1:length(xx)){
716              x[,scols[i]+1:cols[i]]<-xx[[i]]
717          }
718          colnames(x)<-cn
719      }
720    }
721  x<-as.matrix(x)
722
723  if (na.rm){
724    nas<-rowSums(is.na(x))
725    design<-design[nas==0,]
726    if (length(nas)>length(design$prob))
727        x<-x[nas==0,,drop=FALSE]
728    else
729        x[nas>0,]<-0
730  }
731
732  pweights<-1/design$prob
733  psum<-sum(pweights)
734  average<-colSums(x*pweights/psum)
735  x<-sweep(x,2,average)
736  v<-svyrecvar(x*pweights/psum,design$cluster,design$strata, design$fpc,
737              postStrata=design$postStrata)
738  attr(average,"var")<-v
739    attr(average,"statistic")<-"mean"
740    if (influence){
741        attr(average,"influence") <- x*pweights/psum
742    }
743  class(average)<-"svystat"
744  if (is.character(deff) || deff){
745      nobs<-sum(weights(design)!=0)
746      if(deff=="replace"){
747        vsrs<-svyvar(x,design,na.rm=na.rm)/(nobs)
748      } else {
749        if(psum<nobs) {
750          vsrs<-NA*v
751          warning("Sample size greater than population size: are weights correctly scaled?")
752        } else{
753          vsrs<-svyvar(x,design,na.rm=na.rm)*(psum-nobs)/(psum*nobs)
754        }
755      }
756      attr(average, "deff")<-v/vsrs
757  }
758
759  return(average)
760}
761
762svyratio.survey.design2<-function(numerator=formula, denominator, design, separate=FALSE,na.rm=FALSE,
763                                  formula,covmat=FALSE,deff=FALSE,influence=FALSE,...){
764
765    if (separate){
766      strats<-sort(unique(design$strata[,1]))
767      if (!design$has.strata)
768          warning("Separate and combined ratio estimators are the same for unstratified designs")
769      if(influence)
770          warning("influence functions not available for separate ratio estimators")
771      rval<-list(ratios=lapply(strats,
772                   function(s) {
773                     tmp<-svyratio(numerator, denominator,
774                                   subset(design, design$strata[,1] %in% s),
775                                   separate=FALSE,...)
776                     attr(tmp,"call")<-bquote(Stratum==.(s))
777                     tmp}))
778      names(rval$ratios)<-strats
779
780      class(rval)<-c("svyratio_separate")
781      rval$call<-sys.call()
782      rval$strata<-strats
783      return(rval)
784    }
785
786    if (inherits(numerator,"formula"))
787        numerator<-model.frame(numerator,design$variables,na.action=na.pass)
788    else if(typeof(numerator) %in% c("expression","symbol"))
789        numerator<-eval(numerator, design$variables)
790    if (inherits(denominator,"formula"))
791        denominator<-model.frame(denominator,design$variables,na.action=na.pass)
792    else if(typeof(denominator) %in% c("expression","symbol"))
793        denominator<-eval(denominator, design$variables)
794
795    numerator<-as.matrix(numerator)
796    denominator<-as.matrix(denominator)
797    nn<-NCOL(numerator)
798    nd<-NCOL(denominator)
799
800    all<-cbind(numerator,denominator)
801    nas<-!complete.cases(all)
802    if ((na.rm==TRUE) && any(nas)){
803      design<-design[!nas,]
804      if (NROW(design$cluster) == NROW(all)){
805        ## subset by zero weights
806        all[nas,]<-1
807        numerator[nas,]<-0
808        denominator[nas,]<-1
809      } else {
810        ## subset by actually dropping rows
811        all<-all[!nas,,drop=FALSE]
812        numerator<-numerator[!nas,,drop=FALSE]
813        denominator<-denominator[!nas,,drop=FALSE]
814      }
815    }
816    allstats<-svytotal(all,design)
817    rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
818
819
820    vars<-matrix(ncol=nd,nrow=nn)
821
822    if (deff=="replace" || deff) deffs<-matrix(ncol=nd,nrow=nn)
823
824    for(i in 1:nn){
825      for(j in 1:nd){
826        r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
827        vars[i,j]<-svyrecvar(r*1/design$prob, design$cluster, design$strata, design$fpc,
828                            postStrata=design$postStrata)
829        if (deff=="replace" || deff){
830          deffs[i,j]<-deff(svytotal(r,design,deff=deff))
831        }
832      }
833    }
834    if (covmat){
835        ii<-rep(1:nn,nd)
836        jj<-rep(1:nd,each=nn)
837        allr<-sweep(numerator[,ii]-t(as.vector(rval$ratio)*t(denominator[,jj,drop=FALSE])),
838                    2, colSums(denominator[,jj,drop=FALSE]/design$prob),"/")
839        vcovmat<-svyrecvar(allr*1/design$prob, design$cluster, design$strata, design$fpc,
840                           postStrata=design$postStrata)
841        colnames(vcovmat)<-colnames(denominator)[ii]
842        rval$vcov<-vcovmat
843    }
844    colnames(vars)<-colnames(denominator)
845    rownames(vars)<-colnames(numerator)
846    rval$var<-vars
847    if (deff=="replace" || deff)
848        attr(rval,"deff")<-deffs
849
850    attr(rval,"call")<-sys.call()
851    if (influence)
852        attr(rval, "influence")<-r/design$prob
853
854    class(rval)<-"svyratio"
855    rval
856
857  }
858