1
2make.formula<-function(names) formula(paste("~",paste(names,collapse="+")))
3
4dimnames.survey.design<-function(x) dimnames(x$variables)
5dimnames.svyrep.design<-function(x) dimnames(x$variables)
6dimnames.twophase<-function(x) dimnames(x$phase1$sample$variables)
7
8oldsvydesign<-function(ids,probs=NULL,strata=NULL,variables=NULL, fpc=NULL,
9                    data=NULL, nest=FALSE, check.strata=!nest,weights=NULL){
10
11    .Deprecated("svydesign")
12
13    ## less memory-hungry version for sparse tables
14    interaction<-function (..., drop = TRUE) {
15        args <- list(...)
16        narg <- length(args)
17        if (narg == 1 && is.list(args[[1]])) {
18            args <- args[[1]]
19            narg <- length(args)
20        }
21
22        ls<-sapply(args,function(a) length(levels(a)))
23        ans<-do.call("paste",c(lapply(args,as.character),sep="."))
24        ans<-factor(ans)
25        return(ans)
26
27    }
28
29
30    na.failsafe<-function(object,...){
31      if (NCOL(object)==0)
32        object
33      else na.fail(object)
34    }
35
36     if(inherits(ids,"formula")) {
37	 mf<-substitute(model.frame(ids,data=data,na.action=na.failsafe))
38	 ids<-eval.parent(mf)
39	} else if (!is.null(ids))
40            ids<-na.fail(data.frame(ids))
41
42     if(inherits(probs,"formula")){
43	mf<-substitute(model.frame(probs,data=data,na.action=na.failsafe))
44	probs<-eval.parent(mf)
45	}
46
47     if(inherits(weights,"formula")){
48       mf<-substitute(model.frame(weights,data=data,na.action=na.failsafe))
49       weights<-eval.parent(mf)
50     } else if (!is.null(weights))
51         weights<-na.fail(data.frame(weights))
52
53     if(!is.null(weights)){
54       if (!is.null(probs))
55         stop("Can't specify both sampling weights and probabilities")
56       else
57         probs<-1/weights
58     }
59
60
61
62    if (!is.null(strata)){
63      if(inherits(strata,"formula")){
64        mf<-substitute(model.frame(strata,data=data, na.action=na.failsafe))
65        strata<-eval.parent(mf)
66      }
67      if(is.list(strata))
68        strata<-na.fail(do.call("interaction", strata))
69      if (!is.factor(strata))
70        strata<-factor(strata)
71      has.strata<-TRUE
72    } else {
73      strata<-factor(rep(1,NROW(ids)))
74      has.strata <-FALSE
75    }
76
77    if (inherits(variables,"formula")){
78        mf<-substitute(model.frame(variables,data=data,na.action=na.pass))
79        variables <- eval.parent(mf)
80    } else if (is.null(variables)){
81        variables<-data
82    } else
83        variables<-data.frame(variables)
84
85
86     if (inherits(fpc,"formula")){
87       mf<-substitute(model.frame(fpc,data=data,na.action=na.failsafe))
88       fpc<-eval.parent(mf)
89       if (length(fpc))
90         fpc<-fpc[,1]
91     }
92
93    if (is.null(ids) || NCOL(ids)==0)
94	ids<-data.frame(.id=seq(length=NROW(variables)))
95
96     ## force subclusters nested in clusters
97     if (nest && NCOL(ids)>1){
98      N<-ncol(ids)
99      for(i in 2:(N)){
100          ids[,i]<-do.call("interaction", ids[,1:i,drop=TRUE])
101      }
102    }
103     ## force clusters nested in strata
104     if (nest && has.strata && NCOL(ids)){
105       N<-NCOL(ids)
106       for(i in 1:N)
107         ids[,i]<-do.call("interaction", list(strata, ids[,i]))
108     }
109
110    ## check if clusters nested in strata
111     if (check.strata && nest)
112      warning("No point in check.strata=TRUE if nest=TRUE")
113    if(check.strata && !is.null(strata) && NCOL(ids)){
114       sc<-rowSums(table(ids[,1],strata)>0)
115       if(any(sc>1)) stop("Clusters not nested in strata")
116    }
117
118    ## Put degrees of freedom (# of PSUs in each stratum) in object, to
119    ## allow subpopulations
120    if (NCOL(ids)){
121        nPSU<-table(strata[!duplicated(ids[,1])])
122    }
123
124
125    if (!is.null(fpc)){
126
127       if (NCOL(ids)>1){
128         if (all(fpc<1))
129           warning("FPC is not currently supported for multi-stage sampling")
130         else
131           stop("Can't compute FPC from population size for multi-stage sampling")
132       }
133
134       ## Finite population correction: specified per observation
135       if (is.numeric(fpc) && length(fpc)==NROW(variables)){
136         tbl<-by(fpc,list(strata),unique)
137         if (any(sapply(tbl,length)!=1))
138           stop("fpc not constant within strata")
139         fpc<-data.frame(strata=factor(rownames(tbl),levels=levels(strata)),
140                         N=as.vector(tbl))
141       }
142       ## Now reduced to fpc per stratum
143       nstr<-table(strata[!duplicated(ids[[1]])])
144
145       if (all(fpc[,2]<=1)){
146         fpc[,2]<- nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2]
147       } else if (any(fpc[,2]<nstr[match(as.character(fpc[,1]), names(nstr))]))
148         stop("Over 100% sampling in some strata")
149
150     }
151
152    ## if FPC specified, but no weights, use it for weights
153    if (is.null(probs) && is.null(weights) && !is.null(fpc)){
154      pstr<-nstr[match(as.character(fpc[,1]), names(nstr))]/fpc[,2]
155      probs<-pstr[match(as.character(strata),as.character(fpc[,1]))]
156      probs<-as.vector(probs)
157    }
158
159
160    certainty<-rep(FALSE,length(unique(strata)))
161    names(certainty)<-as.character(unique(strata))
162    if (any(nPSU==1)){
163      ## lonely PSUs: are they certainty PSUs?
164      if (!is.null(fpc)){
165        certainty<- fpc$N < 1.01
166        names(certainty)<-as.character(fpc$strata)
167      } else if (all(as.vector(probs)<=1)){
168        certainty<- !is.na(match(as.character(unique(strata)),as.character(strata)[probs > 0.99]))
169        names(certainty)<-as.character(unique(strata))
170      } else {
171        warning("Some strata have only one PSU and I can't tell if they are certainty PSUs")
172      }
173
174    }
175
176    if (is.numeric(probs) && length(probs)==1)
177        probs<-rep(probs, NROW(variables))
178
179    if (length(probs)==0) probs<-rep(1,NROW(variables))
180
181    if (NCOL(probs)==1) probs<-data.frame(probs)
182
183    rval<-list(cluster=ids)
184    rval$strata<-strata
185    rval$has.strata<-has.strata
186    rval$prob<- apply(probs,1,prod)
187    rval$allprob<-probs
188    rval$call<-match.call()
189    rval$variables<-variables
190    rval$fpc<-fpc
191    rval$certainty<-certainty
192    rval$call<-sys.call()
193    rval$nPSU<-nPSU
194    class(rval)<-"survey.design"
195    rval
196  }
197
198print.survey.design<-function(x,varnames=FALSE,design.summaries=FALSE,...){
199  .svycheck(x)
200  n<-NROW(x$cluster)
201  if (x$has.strata) cat("Stratified ")
202  un<-length(unique(x$cluster[,1]))
203  if(n==un){
204    cat("Independent Sampling design\n")
205    is.independent<-TRUE
206  } else {
207    cat(NCOL(x$cluster),"- level Cluster Sampling design\n")
208    nn<-lapply(x$cluster,function(i) length(unique(i)))
209    cat(paste("With (",paste(unlist(nn),collapse=", "),") clusters.\n",sep=""))
210    is.independent<-FALSE
211  }
212  print(x$call)
213  if (design.summaries){
214    cat("Probabilities:\n")
215    print(summary(x$prob))
216    if(x$has.strata){
217      cat("Stratum sizes: \n")
218      a<-rbind(obs=table(x$strata),
219	       design.PSU=x$nPSU,
220               actual.PSU=if(!is.independent || !is.null(x$fpc))
221               table(x$strata[!duplicated(x$cluster[,1])]))
222      print(a)
223    }
224    if (!is.null(x$fpc)){
225      if (x$has.strata) {
226        cat("Population stratum sizes (PSUs): \n")
227        print(x$fpc)
228      } else {
229        cat("Population size (PSUs):",x$fpc[,2],"\n")
230      }
231    }
232  }
233  if (varnames){
234    cat("Data variables:\n")
235    print(names(x$variables))
236  }
237  invisible(x)
238}
239
240"[.survey.design"<-function (x,i, ...){
241
242  if (!missing(i)){
243    if (is.calibrated(x)){
244      tmp<-x$prob[i,]
245      x$prob<-rep(Inf, length(x$prob))
246      x$prob[i,]<-tmp
247    } else {
248      x$variables<-"[.data.frame"(x$variables,i,...,drop=FALSE)
249      x$cluster<-x$cluster[i,,drop=FALSE]
250      x$prob<-x$prob[i]
251      x$allprob<-x$allprob[i,,drop=FALSE]
252      x$strata<-x$strata[i]
253    }
254  } else {
255    x$variables<-x$variables[,...,drop=FALSE]
256  }
257
258  x
259}
260
261"[<-.survey.design"<-function(x, ...,value){
262  if (inherits(value, "survey.design"))
263    value<-value$variables
264  x$variables[...]<-value
265  x
266}
267
268dim.survey.design<-function(x){
269	dim(x$variables)
270}
271
272na.fail.survey.design<-function(object,...){
273	tmp<-na.fail(object$variables,...)
274	object
275}
276
277na.omit.survey.design<-function(object,...){
278  tmp<-na.omit(object$variables,...)
279  omit<-attr(tmp,"na.action")
280  if (length(omit)){
281    object<-object[-omit,]
282    object$variables<-tmp
283    attr(object,"na.action")<-omit
284  }
285  object
286}
287
288na.exclude.survey.design<-function(object,...){
289	tmp<-na.exclude(object$variables,...)
290	exclude<-attr(tmp,"na.action")
291	if (length(exclude)){
292           object<-object[-exclude,]
293	   object$variables<-tmp
294	   attr(object,"na.action")<-exclude
295	}
296	object
297}
298
299
300update.survey.design<-function(object,...){
301
302  dots<-substitute(list(...))[-1]
303  newnames<-names(dots)
304
305  for(j in seq(along=dots)){
306    object$variables[,newnames[j]]<-eval(dots[[j]],object$variables, parent.frame())
307  }
308
309  object$call<-sys.call(-1)
310  object
311}
312
313
314subset.survey.design<-function(x,subset,...){
315        e <- substitute(subset)
316        r <- eval(e, x$variables, parent.frame())
317        r <- r & !is.na(r)
318        x<-x[r,]
319	x$call<-sys.call(-1)
320	x
321}
322
323summary.survey.design<-function(object,...){
324  class(object)<-"summary.survey.design"
325  object
326}
327
328print.summary.survey.design<-function(x,...){
329  y<-x
330  class(y)<-"survey.design"
331  print(y,varnames=TRUE,design.summaries=TRUE,...)
332}
333
334postStratify.survey.design<-function(design, strata, population, partial=FALSE,...){
335
336  if(inherits(strata,"formula")){
337    mf<-substitute(model.frame(strata, data=design$variables,na.action=na.fail))
338    strata<-eval.parent(mf)
339  }
340  strata<-as.data.frame(strata)
341
342  sampletable<-xtabs(I(1/design$prob)~.,data=strata)
343  sampletable<-as.data.frame(sampletable)
344
345  if (inherits(population,"table"))
346    population<-as.data.frame(population)
347  else if (is.data.frame(population))
348    population$Freq <- as.vector(population$Freq) ##allows Freq computed by tapply()
349  else
350    stop("population must be a table or dataframe")
351
352  if (!all(names(strata) %in% names(population)))
353    stop("Stratifying variables don't match")
354  nn<- names(population) %in% names(strata)
355  if (sum(!nn)!=1)
356    stop("stratifying variables don't match")
357
358  names(population)[which(!nn)]<-"Pop.Freq"
359
360  both<-merge(sampletable, population, by=names(strata), all=TRUE)
361
362  samplezero <- both$Freq %in% c(0, NA)
363  popzero <- both$Pop.Freq %in% c(0, NA)
364  both<-both[!(samplezero & popzero),]
365
366  if (any(onlysample<- popzero & !samplezero)){
367    print(both[onlysample,])
368    stop("Strata in sample absent from population. This Can't Happen")
369  }
370  if (any(onlypop <- samplezero & !popzero)){
371    if (partial){
372      both<-both[!onlypop,]
373      warning("Some strata absent from sample: ignored")
374    } else {
375      print(both[onlypop,])
376      stop("Some strata absent from sample: use partial=TRUE to ignore them.")
377    }
378  }
379
380  reweight<-both$Pop.Freq/both$Freq
381  both$label <- do.call("interaction", list(both[,names(strata)]))
382  designlabel <- do.call("interaction", strata)
383  index<-match(designlabel, both$label)
384
385  attr(index,"oldweights")<-1/design$prob
386  design$prob<-design$prob/reweight[index]
387  attr(index,"weights")<-1/design$prob
388  design$postStrata<-c(design$postStrata,list(index))
389
390  ## Do we need to iterate here a la raking to get design strata
391  ## and post-strata both balanced?
392  design$call<-sys.call(-1)
393
394  design
395}
396
397
398svyCprod<-function(x, strata, psu, fpc, nPSU, certainty=NULL, postStrata=NULL,
399                   lonely.psu=getOption("survey.lonely.psu")){
400
401  x<-as.matrix(x)
402  n<-NROW(x)
403
404  ## Remove post-stratum means, which may cut across PSUs
405  if(!is.null(postStrata)){
406    for (psvar in postStrata){
407      if (inherits(psvar, "greg_calibration") || inherits(psvar, "raking"))
408        stop("rake() and calibrate() not supported for old-style design objects")
409      psw<-attr(psvar,"weights")
410      psmeans<-rowsum(x/psw,psvar,reorder=TRUE)/as.vector(table(factor(psvar)))
411      x<- x-psmeans[match(psvar,sort(unique(psvar))),]*psw
412    }
413  }
414
415  ##First collapse over PSUs
416
417  if (is.null(strata)){
418    strata<-rep("1",n)
419    if (!is.null(nPSU))
420        names(nPSU)<-"1"
421  }
422  else
423    strata<-as.character(strata) ##can't use factors as indices in for()'
424
425  if (is.null(certainty)){
426    certainty<-rep(FALSE,length(strata))
427    names(certainty)<-strata
428  }
429
430  if (!is.null(psu)){
431    x<-rowsum(x, psu, reorder=FALSE)
432    strata<-strata[!duplicated(psu)]
433    n<-NROW(x)
434  }
435
436  if (!is.null(nPSU)){
437      obsn<-table(strata)
438      dropped<-nPSU[match(names(obsn),names(nPSU))]-obsn
439      if(sum(dropped)){
440        xtra<-matrix(0,ncol=NCOL(x),nrow=sum(dropped))
441        strata<-c(strata,rep(names(dropped),dropped))
442      	if(is.matrix(x))
443	   x<-rbind(x,xtra)
444        else
445	   x<-c(x,xtra)
446        n<-NROW(x)
447      }
448  } else obsn<-table(strata)
449
450  if(is.null(strata)){
451      x<-t(t(x)-colMeans(x))
452  } else {
453      strata.means<-drop(rowsum(x,strata, reorder=FALSE))/drop(rowsum(rep(1,n),strata, reorder=FALSE))
454      if (!is.matrix(strata.means))
455          strata.means<-matrix(strata.means, ncol=NCOL(x))
456      x<- x- strata.means[ match(strata, unique(strata)),,drop=FALSE]
457  }
458
459  p<-NCOL(x)
460  v<-matrix(0,p,p)
461
462  ss<-unique(strata)
463  for(s in ss){
464      this.stratum <- strata %in% s
465
466      ## original number of PSUs in this stratum
467      ## before missing data/subsetting
468      this.n <-nPSU[match(s,names(nPSU))]
469
470      this.df <- this.n/(this.n-1)
471
472      if (is.null(fpc))
473          this.fpc <- 1
474      else{
475          this.fpc <- fpc[,2][ fpc[,1]==as.character(s)]
476          this.fpc <- (this.fpc - this.n)/this.fpc
477      }
478
479      xs<-x[this.stratum,,drop=FALSE]
480
481      this.certain<-certainty[names(certainty) %in% s]
482
483      ## stratum with only 1 design cluster leads to undefined variance
484      lonely.psu<-match.arg(lonely.psu, c("remove","adjust","fail",
485                                          "certainty","average"))
486      if (this.n==1 && !this.certain){
487        this.df<-1
488        if (lonely.psu=="fail")
489          stop("Stratum ",s, " has only one sampling unit.")
490        else if (lonely.psu!="certainty")
491          warning("Stratum ",s, " has only one sampling unit.")
492        if (lonely.psu=="adjust")
493          xs<-strata.means[match(s,ss),,drop=FALSE]
494      } else if (obsn[match(s,names(obsn))]==1 && !this.certain){
495        ## stratum with only 1 cluster left after subsetting
496        warning("Stratum ",s," has only one PSU in this subset.")
497        if (lonely.psu=="adjust")
498          xs<-strata.means[match(s,ss),,drop=FALSE]
499      }
500      ## add it up
501      if (!this.certain)
502        v<-v+crossprod(xs)*this.df*this.fpc
503    }
504  if (lonely.psu=="average"){
505    v<- v/(1-mean(obsn==1 & !certainty))
506  }
507  v
508}
509
510
511
512svymean<-function(x, design,na.rm=FALSE,...){
513  .svycheck(design)
514  UseMethod("svymean",design)
515}
516
517svymean.survey.design<-function(x,design, na.rm=FALSE,deff=FALSE, influence=FALSE,...){
518
519  if (!inherits(design,"survey.design"))
520    stop("design is not a survey design")
521
522  if (inherits(x,"formula")){
523    ## do the right thing with factors
524    mf<-model.frame(x,design$variables,na.action=na.pass)
525    xx<-lapply(attr(terms(x),"variables")[-1],
526               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
527    cols<-sapply(xx,NCOL)
528    x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
529    scols<-c(0,cumsum(cols))
530    for(i in 1:length(xx)){
531      x[,scols[i]+1:cols[i]]<-xx[[i]]
532    }
533    colnames(x)<-do.call("c",lapply(xx,colnames))
534  }
535  else if(typeof(x) %in% c("expression","symbol"))
536    x<-eval(x, design$variables)
537
538  x<-as.matrix(x)
539
540  if (na.rm){
541    nas<-rowSums(is.na(x))
542            design<-design[nas==0,]
543    x<-x[nas==0,,drop=FALSE]
544  }
545
546  pweights<-1/design$prob
547  psum<-sum(pweights)
548  average<-colSums(x*pweights/psum)
549  x<-sweep(x,2,average)
550  v<-svyCprod(x*pweights/psum,design$strata,design$cluster[[1]], design$fpc,
551              design$nPSU,design$certainty, design$postStrata)
552  attr(average,"var")<-v
553    attr(average,"statistic")<-"mean"
554  if(influence)
555    attr(average, "influence")<-x*pweights/psum
556  class(average)<-"svystat"
557  if (is.character(deff) || deff){
558    nobs<-NROW(design$cluster)
559    vsrs<-svyvar(x,design,na.rm=na.rm)/nobs
560    vsrs<-vsrs*(psum-nobs)/psum
561    attr(average, "deff")<-v/vsrs
562  }
563
564  return(average)
565}
566
567
568print.svystat<-function(x,...){
569    vv<-attr(x,"var")
570    if (is.matrix(vv))
571        m<-cbind(x,sqrt(diag(vv)))
572    else
573        m<-cbind(x,sqrt(vv))
574    hasdeff<-!is.null(attr(x,"deff"))
575    if (hasdeff) {
576        m<-cbind(m,deff(x))
577        colnames(m)<-c(attr(x,"statistic"),"SE","DEff")
578    } else {
579        colnames(m)<-c(attr(x,"statistic"),"SE")
580    }
581    printCoefmat(m)
582}
583
584as.data.frame.svystat<-function(x,...){
585  rval<-data.frame(statistic=coef(x),SE=SE(x))
586  names(rval)[1]<-attr(x,"statistic")
587  if (!is.null(attr(x,"deff")))
588    rval<-cbind(rval,deff=deff(x))
589  rval
590}
591
592coef.svystat<-function(object,...){
593  attr(object,"statistic")<-NULL
594  attr(object,"deff")<-NULL
595  attr(object,"var")<-NULL
596  unclass(object)
597}
598
599vcov.svystat<-function(object,...){
600  as.matrix(attr(object,"var"))
601}
602
603influence.svystat<-function(object,...){
604  attr(object,"influence")
605}
606
607SE.svystat<-function(object,...){
608 v<-vcov(object)
609 if (!is.matrix(v) || NCOL(v)==1) sqrt(v) else sqrt(diag(v))
610}
611
612deff <- function(object,quietly=FALSE,...) UseMethod("deff")
613
614deff.default <- function(object, quietly=FALSE,...){
615  rval<-attr(object,"deff")
616  if (is.null(rval)) {
617    if(!quietly)
618      warning("object has no design effect information")
619  } else rval<-diag(as.matrix(rval))
620  rval
621}
622
623cv<-function(object,...) UseMethod("cv")
624
625cv.default<-function(object, warn=TRUE, ...){
626  rval<-SE(object)/coef(object)
627  if (warn && any(coef(object)<0,na.rm=TRUE)) warning("CV may not be useful for negative statistics")
628  rval
629}
630
631
632svytotal<-function(x,design,na.rm=FALSE,...){
633  .svycheck(design)
634  UseMethod("svytotal",design)
635}
636svytotal.survey.design<-function(x,design, na.rm=FALSE, deff=FALSE,influence=FALSE,...){
637
638  if (!inherits(design,"survey.design"))
639    stop("design is not a survey design")
640
641    if (inherits(x,"formula")){
642    ## do the right thing with factors
643    mf<-model.frame(x,design$variables,na.action=na.pass)
644    xx<-lapply(attr(terms(x),"variables")[-1],
645               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
646    cols<-sapply(xx,NCOL)
647    x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
648    scols<-c(0,cumsum(cols))
649    for(i in 1:length(xx)){
650      x[,scols[i]+1:cols[i]]<-xx[[i]]
651    }
652    colnames(x)<-do.call("c",lapply(xx,colnames))
653  } else if(typeof(x) %in% c("expression","symbol"))
654      x<-eval(x, design$variables)
655
656  x<-as.matrix(x)
657
658  if (na.rm){
659    nas<-rowSums(is.na(x))
660    design<-design[nas==0,]
661    x<-x[nas==0,,drop=FALSE]
662  }
663
664  N<-sum(1/design$prob)
665  m <- svymean(x, design, na.rm=na.rm)
666  total<-m*N
667  attr(total, "var")<-v<-svyCprod(x/design$prob,design$strata,
668                                  design$cluster[[1]], design$fpc,
669                                  design$nPSU,design$certainty,design$postStrata)
670    attr(total,"statistic")<-"total"
671    if (influence){
672        attr(total,"influence")<-x/design$prob
673        }
674  if (is.character(deff) || deff){
675    vsrs<-svyvar(x,design)*sum(weights(design)^2)
676    vsrs<-vsrs*(N-NROW(design$cluster))/N
677    attr(total,"deff")<-v/vsrs
678  }
679  return(total)
680}
681
682svyvar<-function(x, design, na.rm=FALSE,...){
683  .svycheck(design)
684  UseMethod("svyvar",design)
685}
686svyvar.survey.design<-function(x, design, na.rm=FALSE,...){
687
688	if (inherits(x,"formula"))
689            x<-model.frame(x,model.frame(design),na.action=na.pass)
690	else if(typeof(x) %in% c("expression","symbol"))
691            x<-eval(x, design$variables)
692
693	n<-sum(weights(design,"sampling")!=0)
694	xbar<-svymean(x,design, na.rm=na.rm)
695	if(NCOL(x)==1) {
696            x<-x-xbar
697            v<-svymean(x*x*n/(n-1),design, na.rm=na.rm)
698            attr(v,"statistic")<-"variance"
699            return(v)
700	}
701	x<-t(t(x)-xbar)
702	p<-NCOL(x)
703	a<-matrix(rep(x,p),ncol=p*p)
704	b<-x[,rep(1:p,each=p)]
705        ## Kish uses the n-1 divisor, so it affects design effects
706	v<-svymean(a*b*n/(n-1),design, na.rm=na.rm)
707	vv<-matrix(v,ncol=p)
708        dimnames(vv)<-list(names(xbar),names(xbar))
709        attr(vv,"var")<-attr(v,"var")
710        attr(vv,"statistic")<-"variance"
711        class(vv)<-c("svyvar","svystat")
712        vv
713    }
714
715print.svyvar<-function (x,  covariance=FALSE, ...)
716{
717    if(!is.matrix(x)) NextMethod()
718
719    vv <- attr(x, "var")
720    if (covariance){
721      nms<-outer(rownames(x),colnames(x),paste,sep=":")
722      m<-cbind(as.vector(x), sqrt(diag(vv)))
723      rownames(m)<-nms
724    } else{
725      ii <- which(diag(sqrt(length(x)))>0)
726      m <- cbind(x[ii], sqrt(diag(vv))[ii])
727    }
728    colnames(m) <- c(attr(x, "statistic"), "SE")
729    printCoefmat(m)
730}
731
732as.matrix.svyvar<-function(x,...) unclass(x)
733
734coef.svyratio<-function(object,...,drop=TRUE){
735  if (!drop) return(object$ratio)
736  cf<-as.vector(object$ratio)
737  nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
738  names(cf)<-nms
739  cf
740}
741
742SE.svyratio<-function(object,...,drop=TRUE){
743  if(!drop) return(sqrt(object$var))
744  se<-as.vector(sqrt(object$var))
745  nms<-as.vector(outer(rownames(object$ratio),colnames(object$ratio),paste,sep="/"))
746  names(se)<-nms
747  se
748}
749
750svyratio<-function(numerator,denominator, design,...){
751  .svycheck(design)
752  UseMethod("svyratio",design)
753}
754
755svyratio.survey.design<-function(numerator, denominator, design,...){
756
757    if (inherits(numerator,"formula"))
758		numerator<-model.frame(numerator,design$variables)
759    else if(typeof(numerator) %in% c("expression","symbol"))
760        numerator<-eval(numerator, design$variables)
761    if (inherits(denominator,"formula"))
762		denominator<-model.frame(denominator,design$variables)
763    else if(typeof(denominator) %in% c("expression","symbol"))
764        denominator<-eval(denominator, design$variables)
765
766    nn<-NCOL(numerator)
767    nd<-NCOL(denominator)
768
769    all<-cbind(numerator,denominator)
770    allstats<-svytotal(all,design)
771    rval<-list(ratio=outer(allstats[1:nn],allstats[nn+1:nd],"/"))
772
773
774    vars<-matrix(ncol=nd,nrow=nn)
775    for(i in 1:nn){
776      for(j in 1:nd){
777        r<-(numerator[,i]-rval$ratio[i,j]*denominator[,j])/sum(denominator[,j]/design$prob)
778        vars[i,j]<-svyCprod(r*1/design$prob, design$strata, design$cluster[[1]], design$fpc,
779                            design$nPSU, design$certainty,design$postStrata)
780      }
781    }
782    colnames(vars)<-names(denominator)
783    rownames(vars)<-names(numerator)
784    rval$var<-vars
785    rval$call<-sys.call()
786    class(rval)<-"svyratio"
787    rval
788
789  }
790
791print.svyratio_separate<-function(x,...){
792  cat("Stratified ratio estimate: ")
793  if (!is.null(x$call))
794    print(x$call)
795  else if (!is.null(attr(x,"call")))
796    print(attr(x$call))
797  for(r in x$ratios) {
798    print(r)
799  }
800  invisible(x)
801}
802
803print.svyratio<-function(x,...){
804  cat("Ratio estimator: ")
805  if (!is.null(x$call))
806    print(x$call)
807  else if(!is.null(attr(x,"call")))
808    print(attr(x,"call"))
809  cat("Ratios=\n")
810  print(x$ratio)
811  cat("SEs=\n")
812  print(sqrt(x$var))
813  invisible(NULL)
814}
815
816predict.svyratio<-function(object, total, se=TRUE,...){
817  if (se)
818    return(list(total=object$ratio*total,se=sqrt(object$var)*total))
819  else
820    return(object$ratio*total)
821}
822
823predict.svyratio_separate<-function(object, total, se=TRUE,...){
824
825  if (length(total)!=length(object$ratios))
826    stop("Number of strata differ in ratio object and totals.")
827  if (!is.null(names(total)) && !is.null(levels(object$strata))){
828    if (!setequal(names(total), levels(object$strata)))
829      warning("Names of strata differ in ratio object and totals")
830    else if (!all(names(total)==levels(object$strata))){
831      warning("Reordering supplied totals to make their names match the ratio object")
832      total<-total[match(names(total),levels(object$strata))]
833    }
834  }
835  totals<-mapply(predict, object=object$ratios, total=total,se=se,...,SIMPLIFY=FALSE)
836
837  if(se){
838    rval<-totals[[1]]$total
839    v<-totals[[1]]$se^2
840    for(ti in totals[-1]) {
841      rval<-rval+ti$total
842      v<-v+ti$se^2
843    }
844    list(total=rval,se=sqrt(v))
845  } else {
846    rval<-totals[[1]]
847    for (ti in totals[-1]) rval<-rval+ti
848    rval
849  }
850
851}
852
853
854cv.svyratio<-function(object,...){
855  sqrt(object$var)/object$ratio
856}
857
858svytable<-function(formula, design, ...){
859    UseMethod("svytable",design)
860}
861
862svytable.survey.design<-function(formula, design, Ntotal=NULL, round=FALSE,...){
863
864  if (!inherits(design,"survey.design")) stop("design must be a survey design")
865  weights<-1/design$prob
866
867  ## unstratified or unadjusted
868  if (length(Ntotal)<=1 || !design$has.strata){
869    if (length(formula)==3)
870      tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~.(formula[[3]]), data=model.frame(design),...))
871       else
872         tblcall<-bquote(xtabs(weights~.(formula[[2]]), data=model.frame(design),...))
873    tbl<-eval(tblcall)
874    if (!is.null(Ntotal)) {
875      if(length(formula)==3)
876        tbl<-tbl/sum(Ntotal)
877      else
878        tbl<-tbl*sum(Ntotal)/sum(tbl)
879    }
880    if (round)
881      tbl<-round(tbl)
882    attr(tbl,"call")<-match.call()
883    class(tbl)<-c("svytable",class(tbl))
884    return(tbl)
885  }
886  ## adjusted and stratified
887  if (length(formula)==3)
888    tblcall<-bquote(xtabs(I(weights*.(formula[[2]]))~design$strata[,1]+.(formula[[3]]), data=model.frame(design),...))
889  else
890    tblcall<-bquote(xtabs(weights~design$strata[,1]+.(formula[[2]]), data=model.frame(design),...))
891
892  tbl<-eval(tblcall)
893
894  ss<-match(sort(unique(design$strata[,1])), Ntotal[,1])
895  dm<-dim(tbl)
896  layer<-prod(dm[-1])
897  tbl<-sweep(tbl,1,Ntotal[ss, 2]/apply(tbl,1,sum),"*")
898  tbl<-apply(tbl, 2:length(dm), sum)
899  if (round)
900    tbl<-round(tbl)
901  class(tbl)<-c("svytable","xtabs", "table")
902  attr(tbl, "call")<-match.call()
903  tbl
904}
905
906svycoxph<-function(formula,design,subset=NULL,rescale=TRUE,...){
907  .svycheck(design)
908  UseMethod("svycoxph",design)
909}
910
911svycoxph.survey.design<-function(formula,design, subset=NULL, rescale=TRUE, ...){
912    subset<-substitute(subset)
913    subset<-eval(subset, model.frame(design),parent.frame())
914    if (!is.null(subset))
915        design<-design[subset,]
916
917    if(any(weights(design)<0)) stop("weights must be non-negative")
918
919    data<-model.frame(design)
920
921    g<-match.call()
922    g$formula<-eval.parent(g$formula)
923    g$design<-NULL
924    g$var<-NULL
925    g$rescale <- NULL
926    if (is.null(g$weights))
927      g$weights<-quote(.survey.prob.weights)
928    else
929      g$weights<-bquote(.survey.prob.weights*.(g$weights))
930    g[[1]]<-quote(coxph)
931    g$data<-quote(data)
932    g$subset<-quote(.survey.prob.weights>0)
933    g$model <- TRUE
934
935    ##need to rescale weights for stability
936    ## unless the user doesn't want to
937    if (rescale)
938        data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob)
939    if (!all(all.vars(formula) %in% names(data)))
940        stop("all variables must be in design= argument")
941    g<-with(list(data=data), eval(g))
942
943    if (inherits(g, "coxph.penal"))
944        warning("svycoxph does not support penalised terms")
945
946    g$call<-match.call()
947    g$call[[1]]<-as.name(.Generic)
948    g$printcall<-sys.call(-1)
949    g$printcall[[1]]<-as.name(.Generic)
950    class(g)<-c("svycoxph", class(g))
951    g$survey.design<-design
952
953    nas<-g$na.action
954    if (length(nas))
955        design<-design[-nas,]
956
957    dbeta.subset<-resid(g,"dfbeta",weighted=TRUE)
958    if (nrow(design)==NROW(dbeta.subset)){
959      dbeta<-as.matrix(dbeta.subset)
960    } else {
961      dbeta<-matrix(0,ncol=NCOL(dbeta.subset),nrow=nrow(design))
962      dbeta[is.finite(design$prob),]<-dbeta.subset
963    }
964    g$inv.info<-g$var
965
966    if (inherits(design,"survey.design2"))
967      g$var<-svyrecvar(dbeta, design$cluster,
968                    design$strata, design$fpc,
969                    postStrata=design$postStrata)
970    else if (inherits(design, "twophase"))
971      g$var<-twophasevar(dbeta, design)
972    else if(inherits(design, "twophase2"))
973      g$var<-twophase2var(dbeta, design)
974    else if(inherits(design, "pps"))
975      g$var<-ppsvar(dbeta,design)
976    else
977      g$var<-svyCprod(dbeta, design$strata,
978                      design$cluster[[1]], design$fpc,design$nPSU,
979                      design$certainty,design$postStrata)
980
981    g$wald.test<-coef(g)%*%solve(g$var,coef(g))
982    g$ll<-g$loglik
983    g$loglik<-NULL
984    g$rscore<-NULL
985    g$score<-NA
986    g$degf.resid<-degf(design)-length(coef(g)[!is.na(coef(g))])+1
987
988    g
989}
990
991
992model.frame.svycoxph<-function(formula,...){
993    f<-formula$call
994    env <- environment(formula(formula))
995    if (is.null(env))
996        env <- parent.frame()
997    f[[1]]<-as.name("model.frame")
998    f$data<-quote(data)
999    f$design<-NULL
1000    f$method<-f$control<-f$singular.ok<-f$model<-f$x<-f$y<-f$iter<-NULL
1001    f$formula<-formula(formula)
1002    if (is.null(f$weights))
1003        f$weights<-quote(.survey.prob.weights)
1004    else
1005        f$weights<-bquote(.survey.prob.weights*.(f$weights))
1006    design<-formula$survey.design
1007    data<-model.frame(design)
1008    data$.survey.prob.weights<-(1/design$prob)/sum(1/design$prob)
1009    with(list(data=data), eval(f))
1010}
1011
1012model.matrix.svycoxph<-function (object, data = NULL, contrast.arg = object$contrasts,
1013    ...)
1014{
1015    if (!is.null(object[["x"]]))
1016        object[["x"]]
1017    else {
1018        if (is.null(data))
1019            data <- model.frame(object, ...)
1020        else data <- model.frame(object, data = data, ...)
1021        Terms <- object$terms
1022        attr(Terms, "intercept") <- 1
1023        strats <- attr(Terms, "specials")$strata
1024        cluster <- attr(Terms, "specials")$cluster
1025        dropx <- NULL
1026        if (length(cluster)) {
1027            tempc <- untangle.specials(Terms, "cluster", 1:10)
1028            ord <- attr(Terms, "order")[tempc$terms]
1029            if (any(ord > 1))
1030                stop("Cluster can not be used in an interaction")
1031            dropx <- tempc$terms
1032        }
1033        if (length(strats)) {
1034            temp <- untangle.specials(Terms, "strata", 1)
1035            dropx <- c(dropx, temp$terms)
1036        }
1037        if (length(dropx)) {
1038            newTerms <- Terms[-dropx]
1039            X <- model.matrix(newTerms, data, contrasts = contrast.arg)
1040        }
1041        else {
1042            newTerms <- Terms
1043            X <- model.matrix(Terms, data, contrasts = contrast.arg)
1044        }
1045        X
1046    }
1047}
1048
1049print.svycoxph<-function(x,...){
1050    print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...)
1051##    x$call<-x$printcall
1052    NextMethod()
1053}
1054
1055summary.svycoxph<-function(object,...){
1056    print(object$survey.design,varnames=FALSE, design.summaries=FALSE,...)
1057##    object$call<-object$printcall
1058    NextMethod()
1059}
1060
1061survfit.svycoxph<-function(object,...){
1062    stop("No survfit method for survey models")
1063}
1064extractAIC.svycoxph<-function(fit,...){
1065    stop("No AIC for survey models")
1066}
1067
1068anova.svycoxph<-function(object,...){
1069    stop("No anova method for survey models")
1070}
1071
1072svyglm<-function(formula, design,subset=NULL,family=stats::gaussian(),start=NULL, ...){
1073  .svycheck(design)
1074  UseMethod("svyglm",design)
1075}
1076
1077svyglm.survey.design<-function(formula,design,subset=NULL, family=stats::gaussian(),start=NULL,
1078                               rescale=TRUE,..., deff=FALSE, influence=FALSE){
1079
1080      subset<-substitute(subset)
1081      subset<-eval(subset, model.frame(design), parent.frame())
1082    if (!is.null(subset)){
1083        if (any(is.na(subset)))
1084            stop("subset must not contain NA values")
1085        design<-design[subset,]
1086      }
1087      data<-model.frame(design)
1088
1089      g<-match.call()
1090    g$formula<-eval.parent(g$formula)
1091    g$influence<-NULL
1092      g$design<-NULL
1093      g$var <- NULL
1094    g$rescale <- NULL
1095    g$deff<-NULL
1096    g$subset <- NULL  ## done it already
1097      g$family<-family
1098      if (is.null(g$weights))
1099        g$weights<-quote(.survey.prob.weights)
1100      else
1101          g$weights<-bquote(.survey.prob.weights*.(g$weights))
1102      g$data<-quote(data)
1103      g[[1]]<-quote(glm)
1104
1105      ##need to rescale weights for stability in binomial
1106      ## (unless the user doesn't want to)
1107      if (rescale)
1108          data$.survey.prob.weights<-(1/design$prob)/mean(1/design$prob)
1109      else
1110          data$.survey.prob.weights<-(1/design$prob)
1111
1112      if(any(is.na(data$.survey.prob.weights)))
1113        stop("weights must not contain NA values")
1114      if (!all(all.vars(formula) %in% names(data)))
1115	stop("all variables must be in design= argument")
1116    g<-with(list(data=data), eval(g))
1117    summ<-summary(g)
1118      g$naive.cov<-summ$cov.unscaled
1119
1120      nas<-g$na.action
1121      if (length(nas))
1122         design<-design[-nas,]
1123
1124      g$cov.unscaled<-svy.varcoef(g,design)
1125      g$df.residual <- degf(design)+1-length(coef(g)[!is.na(coef(g))])
1126
1127      class(g)<-c("svyglm",class(g))
1128      g$call<-match.call()
1129      g$call[[1]]<-as.name(.Generic)
1130      if(!("formula" %in% names(g$call))) {
1131        if (is.null(names(g$call)))
1132          i<-1
1133        else
1134          i<-min(which(names(g$call)[-1]==""))
1135        names(g$call)[i+1]<-"formula"
1136      }
1137
1138    if(deff){
1139        vsrs<-summ$cov.scaled*mean(data$.survey.prob.weights)
1140        attr(g,"deff")<-g$cov.unscaled/vsrs
1141    }
1142
1143    if (influence){
1144        estfun< model.matrix(g)*naa_shorter(nas, resid(g,"working"))*g$weights
1145        if (g$rank<NCOL(estfun)){
1146            estfun<-estfun[,g$qr$pivot[1:g$rank]]
1147        }
1148        attr(g, "influence")<-estfun%*%g$naive.cov
1149    }
1150
1151    g$survey.design<-design
1152    g
1153}
1154
1155print.svyglm<-function(x,...){
1156  print(x$survey.design, varnames=FALSE, design.summaries=FALSE,...)
1157  NextMethod()
1158
1159}
1160
1161coef.svyglm<-function(object,complete=FALSE,...,na.rm=!complete) {
1162  beta<-object$coefficients
1163  if (!na.rm || length(beta)==object$rank)
1164    beta
1165  else
1166    beta[object$qr$pivot[1:object$rank]]
1167}
1168
1169vcov.svyglm<-function(object,...) {
1170  v<-object$cov.unscaled
1171  dimnames(v)<-list(names(coef(object)),names(coef(object)))
1172  v
1173}
1174
1175
1176svy.varcoef<-function(glm.object,design){
1177    Ainv<-summary(glm.object)$cov.unscaled
1178    nas<-glm.object$na.action
1179    estfun<-model.matrix(glm.object)*naa_shorter(nas, resid(glm.object,"working"))*glm.object$weights
1180    if (glm.object$rank<NCOL(estfun)){
1181      estfun<-estfun[,glm.object$qr$pivot[1:glm.object$rank]]
1182    }
1183    naa<-glm.object$na.action
1184    ## the design may still have rows with weight zero for missing values
1185    ## if there are weights or calibration. model.matrix will have removed them
1186    if (length(naa) && (NROW(estfun)!=nrow(design) )){
1187      if ((length(naa)+NROW(estfun))!=nrow(design) )
1188        stop("length mismatch: this can't happen.")
1189      n<-nrow(design)
1190      inx <- (1:n)[-naa]
1191      ee <- matrix(0,nrow=n,ncol=NCOL(estfun))
1192      ee[inx,]<-estfun
1193      estfun<-ee
1194    }
1195
1196    if (inherits(design,"survey.design2"))
1197      svyrecvar(estfun%*%Ainv,design$cluster,design$strata,design$fpc,postStrata=design$postStrata)
1198    else if (inherits(design, "twophase"))
1199      twophasevar(estfun%*%Ainv, design)
1200    else if (inherits(design, "twophase2"))
1201      twophase2var(estfun%*%Ainv, design)
1202    else if (inherits(design, "pps"))
1203      ppsvar(estfun%*%Ainv, design)
1204    else
1205      svyCprod(estfun%*%Ainv,design$strata,design$cluster[[1]],design$fpc, design$nPSU,
1206                  design$certainty,design$postStrata)
1207  }
1208
1209residuals.svyglm<-function(object,type = c("deviance", "pearson", "working",
1210    "response", "partial"),...){
1211	type<-match.arg(type)
1212	if (type=="pearson"){
1213   	   y <- object$y
1214	   mu <- object$fitted.values
1215    	   wts <- object$prior.weights
1216           pwts<- 1/object$survey.design$prob
1217           pwts<- pwts/mean(pwts)
1218           ## missing values in calibrated/post-stratified designs
1219           ## the rows will still be in the design object but not in the model
1220           if (length(naa<-object$na.action) && (length(pwts)!=length(wts))){
1221             if(length(naa)+length(wts) != length(pwts))
1222               stop("length mismatch: this can't happen.")
1223             inx<-(1:length(pwts))[-naa]
1224           } else inx<-1:length(pwts)
1225           r<-numeric(length(pwts))
1226	   r[inx]<-(y - mu) * sqrt(wts/pwts[inx])/(sqrt(object$family$variance(mu)))
1227	   if (is.null(object$na.action))
1228        	r
1229    	   else
1230	        naresid(object$na.action, r)
1231	} else
1232		NextMethod()
1233
1234}
1235
1236summary.svyglm<-function (object, correlation = FALSE, df.resid=NULL,...)
1237{
1238    Qr <- object$qr
1239    est.disp <- TRUE
1240    if (is.null(df.resid))
1241      df.r <- object$df.residual
1242    else
1243      df.r<-df.resid
1244
1245    dispersion<-svyvar(resid(object,"pearson"), object$survey.design,
1246                       na.rm=TRUE)
1247
1248    coef.p <- coef(object)
1249    covmat<-vcov(object)
1250    dimnames(covmat) <- list(names(coef.p), names(coef.p))
1251    var.cf <- diag(covmat)
1252    s.err <- sqrt(var.cf)
1253    tvalue <- coef.p/s.err
1254    dn <- c("Estimate", "Std. Error")
1255    if (!est.disp) {
1256        pvalue <- 2 * pnorm(-abs(tvalue))
1257        coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
1258        dimnames(coef.table) <- list(names(coef.p), c(dn, "z value",
1259            "Pr(>|z|)"))
1260    }
1261    else if (df.r > 0) {
1262        pvalue <- 2 * pt(-abs(tvalue), df.r)
1263        coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
1264        dimnames(coef.table) <- list(names(coef.p), c(dn, "t value",
1265            "Pr(>|t|)"))
1266    }
1267    else {
1268       coef.table <- cbind(coef.p, s.err, tvalue, NaN)
1269       dimnames(coef.table) <- list(names(coef.p), c(dn, "t value",
1270                                                      "Pr(>|t|)"))
1271    }
1272    ans <- c(object[c("call", "terms", "family", "deviance",
1273        "aic", "contrasts", "df.residual", "null.deviance", "df.null",
1274        "iter")], list(deviance.resid = residuals(object, type = "deviance"),
1275        aic = object$aic, coefficients = coef.table, dispersion = dispersion,
1276        df = c(object$rank, df.r,NCOL(Qr$qr)), cov.unscaled = covmat,
1277        cov.scaled = covmat))
1278    if (correlation) {
1279        dd <- sqrt(diag(covmat))
1280        ans$correlation <- covmat/outer(dd, dd)
1281    }
1282    ans$aliased<-is.na(coef(object,na.rm=FALSE))
1283    ans$survey.design<-list(call=object$survey.design$call)
1284    class(ans) <- c("summary.svyglm","summary.glm")
1285    return(ans)
1286}
1287
1288
1289confint.svyglm<-function(object,parm,level=0.95,method=c("Wald","likelihood"),ddf=NULL,...){
1290    method<-match.arg(method)
1291    if(method=="Wald"){
1292        if (is.null(ddf))
1293            ddf <- object$df.residual
1294        if (ddf<=0) {
1295            ci<-confint.default(object,parm=parm,level=.95,...)*NaN
1296        } else {
1297            tlevel <- 1 - 2*pnorm(qt((1 - level)/2, df = ddf))
1298            ci<-confint.default(object,parm=parm,level=tlevel,...)
1299        }
1300        a <- (1 - level)/2
1301        a <- c(a, 1 - a)
1302        pct <- format.perc(a, 3)
1303        colnames(ci)<-pct
1304        return(ci)
1305    }
1306  pnames <- names(coef(object))
1307  if (missing(parm))
1308    parm <- seq_along(pnames)
1309  else if (is.character(parm))
1310    parm <- match(parm, pnames, nomatch = 0)
1311  lambda<-diag(object$cov.unscaled[parm,parm,drop=FALSE])/diag(object$naive.cov[parm,parm,drop=FALSE])
1312  if(is.null(ddf)) ddf<-object$df.residual
1313  if (ddf==Inf){
1314      alpha<-pnorm(qnorm((1-level)/2)*sqrt(lambda))/2
1315  }  else if (ddf<=0) {
1316      stop("zero or negative denomintor df")
1317  } else {
1318      alpha<-pnorm(qt((1-level)/2,df=ddf)*sqrt(lambda))/2
1319  }
1320  rval<-vector("list",length(parm))
1321  for(i in 1:length(parm)){
1322    temp<-MASSprofile_glm(fitted=object,which=parm[i],alpha=alpha[i],...)
1323    rval[[i]]<-confint_profile(temp,parm=parm[i],level=level, unscaled_level=2*alpha[i],...)
1324  }
1325
1326  names(rval)<-pnames[parm]
1327  if (length(rval)==1)
1328    rval<-rval[[1]]
1329  else
1330    rval<-do.call(rbind,rval)
1331  attr(rval,"levels")<-level
1332  rval
1333}
1334
1335
1336##
1337## based on MASS:::confint.profile.glm
1338## which is GPL and (c) Bill Venables and Brian D Ripley.
1339##
1340confint_profile <- function (object, parm = seq_along(pnames), level = 0.95, unscaled_level, ...)
1341{
1342    of <- attr(object, "original.fit")
1343    pnames <- names(coef(of))
1344    if (is.character(parm))
1345        parm <- match(parm, pnames, nomatch = 0L)
1346    a <- (1-level)/2
1347    a <- c(a, 1 - a)
1348    pct <- paste(round(100 * a, 1), "%")
1349    ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(pnames[parm],
1350        pct))
1351    cutoff <- c(qnorm(unscaled_level),qnorm(unscaled_level, lower.tail=FALSE))
1352    for (pm in parm) {
1353        pro <- object[[pnames[pm]]]
1354        if (is.null(pro))
1355            next
1356        if (length(pnames) > 1L)
1357            sp <- spline(x = pro[, "par.vals"][, pm], y = pro[,
1358                1])
1359        else sp <- spline(x = pro[, "par.vals"], y = pro[, 1])
1360        ci[pnames[pm], ] <- approx(sp$y, sp$x, xout = cutoff)$y
1361    }
1362    drop(ci)
1363}
1364
1365##
1366## MASS:::profile.glm with very slight changes to avoid rounding error in 1-alpha
1367## original is GPL and (c) Bill Venables and Brian D Ripley.
1368##
1369
1370MASSprofile_glm<-function (fitted, which = 1:p, alpha = 0.01, maxsteps = 10, del = zmax/5,
1371    trace = FALSE, ...)
1372{
1373    Pnames <- names(B0 <- coef(fitted))
1374    nonA <- !is.na(B0)
1375    pv0 <- t(as.matrix(B0))
1376    p <- length(Pnames)
1377    if (is.character(which))
1378        which <- match(which, Pnames)
1379    summ <- summary(fitted)
1380    std.err <- summ$coefficients[, "Std. Error", drop = FALSE]
1381    mf <- model.frame(fitted)
1382    Y <- model.response(mf)
1383    n <- NROW(Y)
1384    O <- model.offset(mf)
1385    if (!length(O))
1386        O <- rep(0, n)
1387    W <- model.weights(mf)
1388    if (length(W) == 0L)
1389        W <- rep(1, n)
1390    OriginalDeviance <- deviance(fitted)
1391    DispersionParameter <- summ$dispersion
1392    X <- model.matrix(fitted)
1393    fam <- family(fitted)
1394    switch(fam$family, binomial = , poisson = , `Negative Binomial` = {
1395        zmax <- sqrt(qchisq(alpha, 1,lower.tail=FALSE))
1396        profName <- "z"
1397    }, gaussian = , quasi = , inverse.gaussian = , quasibinomial = ,
1398        quasipoisson = , {
1399            zmax <- sqrt(qf(alpha, 1, n - p,lower.tail=FALSE))
1400            profName <- "tau"
1401        })
1402    prof <- vector("list", length = length(which))
1403    names(prof) <- Pnames[which]
1404    for (i in which) {
1405        if (!nonA[i])
1406            next
1407        zi <- 0
1408        pvi <- pv0
1409        a <- nonA
1410        a[i] <- FALSE
1411        Xi <- X[, a, drop = FALSE]
1412        pi <- Pnames[i]
1413        for (sgn in c(-1, 1)) {
1414            if (trace)
1415                message("\nParameter: ", pi, " ", c("down", "up")[(sgn +
1416                  1)/2 + 1])
1417            step <- 0
1418            z <- 0
1419            LP <- X[, nonA, drop = FALSE] %*% B0[nonA] + O
1420            while ((step <- step + 1) < maxsteps && abs(z) <
1421                zmax) {
1422                bi <- B0[i] + sgn * step * del * std.err[Pnames[i],
1423                  1]
1424                o <- O + X[, i] * bi
1425                fm <- glm.fit(x = Xi, y = Y, weights = W, etastart = LP,
1426                  offset = o, family = fam, control = fitted$control)
1427                LP <- Xi %*% fm$coefficients + o
1428                ri <- pv0
1429                ri[, names(coef(fm))] <- coef(fm)
1430                ri[, pi] <- bi
1431                pvi <- rbind(pvi, ri)
1432                zz <- (fm$deviance - OriginalDeviance)/DispersionParameter
1433                if (zz > -0.001)
1434                  zz <- max(zz, 0)
1435                else stop("profiling has found a better solution, so original fit had not converged")
1436                z <- sgn * sqrt(zz)
1437                zi <- c(zi, z)
1438            }
1439        }
1440        si <- order(zi)
1441        prof[[pi]] <- structure(data.frame(zi[si]), names = profName)
1442        prof[[pi]]$par.vals <- pvi[si, , drop = FALSE]
1443    }
1444    val <- structure(prof, original.fit = fitted, summary = summ)
1445    class(val) <- c("profile.glm", "profile")
1446    val
1447}
1448
1449
1450###
1451
1452svymle<-function(loglike, gradient=NULL, design, formulas,
1453    start=NULL, control=list(),
1454    na.action="na.fail", method=NULL,lower=NULL,upper=NULL,
1455    influence=FALSE,...){
1456  if(is.null(method))
1457    method<-if(is.null(gradient)) "Nelder-Mead" else "newuoa"
1458  if (!inherits(design,"survey.design"))
1459    stop("design is not a survey.design")
1460  weights<-weights(design)
1461  wtotal<-sum(weights)
1462
1463  if (!(method %in% c("bobyqa","newuoa")) && is.null(control$fnscale))
1464    control$fnscale <- -wtotal/length(weights)
1465  if (inherits(design, "twophase")) {
1466    data<-design$phase1$sample$variables
1467  } else {
1468    data<-design$variables
1469  }
1470
1471  ## Get the response variable
1472  nms<-names(formulas)
1473  if (nms[1]==""){
1474    if (inherits(formulas[[1]],"formula")) {
1475      y<-eval.parent(model.frame(formulas[[1]],data=data,na.action=na.pass))
1476    } else {
1477      y<-eval(y,data,parent.frame())
1478    }
1479    formulas[1]<-NULL
1480    if (FALSE && NCOL(y)>1) stop("Y has more than one column")
1481  }   else {
1482    ## one formula must have response
1483    has.response<-sapply(formulas,length)==3
1484    if (sum(has.response)!=1) stop("Need a response variable")
1485    ff<-formulas[[which(has.response)]]
1486    ff[[3]]<-1
1487    y<-eval.parent(model.frame(ff,data=data,na.action=na.pass))
1488    formulas[[which(has.response)]]<-delete.response(terms(formulas[[which(has.response)]]))
1489    nms<-c("",nms)
1490  }
1491
1492  if(length(which(nms==""))>1) stop("Formulas must have names")
1493
1494  mf<-vector("list",length(formulas))
1495  vnms <- unique(do.call(c, lapply(formulas, all.vars)))
1496  uformula <- make.formula(vnms)
1497  mf <- model.frame(uformula, data=data,na.action=na.pass)
1498  mf <- cbind(`(Response)`=y, mf)
1499  mf<-mf[,!duplicated(colnames(mf)),drop=FALSE]
1500
1501  mf<-get(na.action)(mf)
1502  nas<-attr(mf,"na.action")
1503  if (length(nas))
1504    design<-design[-nas,]
1505  weights<-1/design$prob
1506  wtotal<-sum(weights)
1507
1508  Y<-mf[,1]
1509
1510#  mm <- lapply(formulas,model.matrix, data=mf)
1511
1512  mmFrame <- lapply(formulas,model.frame, data=mf)
1513  mm = mapply(model.matrix, object = formulas, data=mmFrame, SIMPLIFY=FALSE)
1514  mmOffset <- lapply(mmFrame, model.offset)
1515
1516  # add a vector of zeros if there is no offset provided
1517  noOffset = which(unlist(lapply(mmOffset, length))==0)
1518  for(D in noOffset) {
1519    mmOffset[[D]] =  rep(0, NROW(mm[[1]]))
1520  }
1521
1522  ## parameter names
1523  parnms<-lapply(mm,colnames)
1524  for(i in 1:length(parnms))
1525    parnms[[i]]<-paste(nms[i+1],parnms[[i]],sep=".")
1526  parnms<-unlist(parnms)
1527
1528  # maps position in theta to model matrices
1529  np<-c(0,cumsum(sapply(mm,NCOL)))
1530
1531
1532  objectivefn<-function(theta,...){
1533    args<-vector("list",length(nms))
1534    args[[1]]<-Y
1535    for(i in 2:length(nms))
1536      args[[i]]<-mm[[i-1]]%*%theta[(np[i-1]+1):np[i]] +
1537          mmOffset[[i-1]]
1538    names(args)<-nms
1539    args<-c(args, ...)
1540    sum(do.call("loglike",args)*weights)
1541  }
1542
1543  if (is.null(gradient)) {
1544    grad<-NULL
1545  } else {
1546    fnargs<-names(formals(loglike))[-1]
1547    grargs<-names(formals(gradient))[-1]
1548    if(!identical(fnargs,grargs))
1549      stop("loglike and gradient have different arguments.")
1550    reorder<-na.omit(match(grargs,nms[-1]))
1551    grad<-function(theta,...){
1552      args<-vector("list",length(nms))
1553      args[[1]]<-Y
1554      for(i in 2:length(nms))
1555        args[[i]]<-drop(
1556            mm[[i-1]]%*%theta[(np[i-1]+1):np[i]] +
1557                mmOffset[[i-1]])
1558      names(args)<-nms
1559      args<-c(args,...)
1560      rval<-NULL
1561      tmp<-do.call("gradient",args)
1562      for(i in reorder){
1563        rval<-c(rval, colSums(as.matrix(tmp[,i]*weights*mm[[i]])))
1564      }
1565      drop(rval)
1566    }
1567  }
1568
1569  theta0<-numeric(np[length(np)])
1570  if (is.list(start))
1571    st<-do.call("c",start)
1572  else
1573    st<-start
1574
1575  if (length(st)==length(theta0)) {
1576    theta0<-st
1577  } else {
1578    stop("starting values wrong length")
1579  }
1580
1581  if (method=="nlm"){
1582    ff<-function(theta){
1583      rval<- -objectivefn(theta)
1584      if (is.na(rval)) rval<- -Inf
1585      attr(rval,"gradient")<- -grad(theta)
1586      rval
1587    }
1588    rval<-nlm(ff, theta0,hessian=TRUE)
1589    if (rval$code>3) warning("nlm did not converge")
1590    rval$par<-rval$estimate
1591  } else if (method == "newuoa"){
1592      rval<-minqa::uobyqa(theta0, function(par,...) -objectivefn(par,...), control=control, ...)
1593      if(rval$ier>0) warning(rval$msg)
1594      rval$hessian <- numDeriv::hessian(objectivefn, rval$par)
1595  } else if (method == "bobyqa"){
1596      if (is.list(lower)) lower<-do.call(c, lower)
1597      if (is.list(upper)) upper<-do.call(c,upper)
1598      rval<-minqa::bobyqa(theta0, function(par,...) -objectivefn(par,...), control=control,lower=lower,upper=upper, ...)
1599      if(rval$ier>0) warning(rval$msg)
1600      rval$hessian <- numDeriv::hessian(objectivefn,rval$par)
1601  }else {
1602    rval<-optim(theta0, objectivefn, grad,control=control,
1603        hessian=TRUE,method=method,...)
1604    if (rval$conv!=0) warning("optim did not converge")
1605  }
1606
1607
1608
1609
1610  names(rval$par)<-parnms
1611  dimnames(rval$hessian)<-list(parnms,parnms)
1612
1613  if (is.null(gradient)) {
1614    rval$invinf<-solve(-rval$hessian)
1615    rval$scores<-NULL
1616    rval$sandwich<-NULL
1617  }  else {
1618    theta<-rval$par
1619    args<-vector("list",length(nms))
1620    args[[1]]<-Y
1621    for(i in 2:length(nms))
1622      args[[i]]<-drop(mm[[i-1]]%*%theta[(np[i-1]+1):np[i]]+ mmOffset[[i-1]])
1623    names(args)<-nms
1624    args<-c(args,...)
1625    deta<-do.call("gradient",args)
1626    rval$scores<-NULL
1627    for(i in reorder)
1628      rval$scores<-cbind(rval$scores,deta[,i]*weights*mm[[i]])
1629
1630    rval$invinf<-solve(-rval$hessian)
1631    dimnames(rval$invinf)<-list(parnms,parnms)
1632
1633    db<-rval$scores%*%rval$invinf
1634    if (inherits(design,"survey.design2"))
1635      rval$sandwich<-svyrecvar(db,design$cluster,design$strata, design$fpc,
1636          postStrata=design$postStrata)
1637    else if (inherits(design, "twophase"))
1638      rval$sandwich<-twophasevar(db,design)
1639    else
1640      rval$sandwich<-svyCprod(db,design$strata,design$cluster[[1]],
1641          design$fpc, design$nPSU,
1642          design$certainty, design$postStrata)
1643    dimnames(rval$sandwich)<-list(parnms,parnms)
1644  }
1645
1646  if (influence)
1647      attr(rval,"influence")<-db
1648  rval$call<-match.call()
1649  rval$design<-design
1650  class(rval)<-"svymle"
1651  rval
1652
1653}
1654
1655
1656coef.svymle<-function(object,...) object$par
1657
1658vcov.svymle<-function(object,stderr=c("robust","model"),...) {
1659    stderr<-match.arg(stderr)
1660    if (stderr=="robust"){
1661	rval<-object$sandwich
1662	if (is.null(rval)) {
1663		p<-length(coef(object))
1664		rval<-matrix(NA,p,p)
1665	}
1666    } else {
1667        rval<-object$invinf*mean(1/object$design$prob)
1668    }
1669    rval
1670}
1671
1672
1673print.svymle<-function(x,...){
1674  cat("Survey-sampled mle: \n")
1675  print(x$call)
1676  cat("Coef:  \n")
1677  print(x$par)
1678}
1679
1680summary.svymle<-function(object,stderr=c("robust","model"),...){
1681    cat("Survey-sampled mle: \n")
1682    print(object$call)
1683    stderr<-match.arg(stderr)
1684    tbl<-data.frame(Coef=coef(object),SE=sqrt(diag(vcov(object,stderr=stderr))))
1685    tbl$p.value<-format.pval(2*(1-pnorm(abs(tbl$Coef/tbl$SE))), digits=3,eps=0.001)
1686    print(tbl)
1687    print(object$design)
1688}
1689
1690model.frame.survey.design<-function(formula,...,drop=TRUE){
1691  formula$variables
1692}
1693model.frame.svyrep.design<-function(formula,...){
1694  formula$variables
1695}
1696model.frame.survey.design2<-function(formula,...){
1697  formula$variables
1698}
1699
1700.onLoad<-function(...){
1701  if (is.null(getOption("survey.lonely.psu")))
1702    options(survey.lonely.psu="fail")
1703  if (is.null(getOption("survey.ultimate.cluster")))
1704    options(survey.ultimate.cluster=FALSE)
1705  if (is.null(getOption("survey.want.obsolete")))
1706    options(survey.want.obsolete=FALSE)
1707  if (is.null(getOption("survey.adjust.domain.lonely")))
1708    options(survey.adjust.domain.lonely=FALSE)
1709  if (is.null(getOption("survey.drop.replicates")))
1710      options(survey.drop.replicates=TRUE)
1711  if (is.null(getOption("survey.multicore")))
1712    options(survey.multicore=FALSE)
1713  if (is.null(getOption("survey.replicates.mse")))
1714    options(survey.replicates.mse=FALSE)
1715}
1716
1717
1718predterms<-function(object,se=FALSE,terms=NULL){
1719  tt<-terms(object)
1720  n <- length(object$residuals)
1721  p <- object$rank
1722  p1 <- seq_len(p)
1723  piv <- object$qr$pivot[p1]
1724  beta<-coef(object)
1725  X<-mm<-model.matrix(object)
1726  aa <- attr(mm, "assign")
1727  ll <- attr(tt, "term.labels")
1728  hasintercept <- attr(tt, "intercept") > 0L
1729  if (hasintercept)
1730    ll <- c("(Intercept)", ll)
1731  aaa <- factor(aa, labels = ll)
1732  asgn <- split(order(aa), aaa)
1733  if (hasintercept) {
1734    asgn$"(Intercept)" <- NULL
1735    }
1736  avx <- colMeans(mm)
1737  termsconst <- sum(avx[piv] * beta[piv])
1738  nterms <- length(asgn)
1739  ip <- matrix(ncol = nterms, nrow = NROW(X))
1740  if (nterms > 0) {
1741    predictor <- matrix(ncol = nterms, nrow = NROW(X))
1742    dimnames(predictor) <- list(rownames(X), names(asgn))
1743
1744    if (hasintercept)
1745      X <- sweep(X, 2L, avx, check.margin = FALSE)
1746    unpiv <- rep.int(0L, NCOL(X))
1747    unpiv[piv] <- p1
1748    for (i in seq.int(1L, nterms, length.out = nterms)) {
1749      iipiv <- asgn[[i]]
1750      ii <- unpiv[iipiv]
1751      iipiv[ii == 0L] <- 0L
1752      predictor[, i] <- if (any(iipiv > 0L))
1753        X[, iipiv, drop = FALSE] %*% beta[iipiv]
1754      else 0
1755      if (se){
1756        ip[,i]<-if (any(iipiv > 0L))
1757          rowSums(as.matrix(X[, iipiv, drop = FALSE] %*% vcov(object)[ii,ii]) * X[, iipiv, drop = FALSE]) else 0
1758      }
1759    }
1760    if (!is.null(terms)) {
1761      predictor <- predictor[, terms, drop = FALSE]
1762      if (se)
1763        ip <- ip[, terms, drop = FALSE]
1764    }
1765  }
1766  else {
1767    predictor <- ip <- matrix(0, n, 0)
1768  }
1769  attr(predictor, "constant") <- if (hasintercept)
1770    termsconst
1771  else 0
1772  if(se)
1773          dimnames(ip)<-dimnames(predictor)
1774  if (se) list(fit=predictor,se.fit=sqrt(ip)) else predictor
1775}
1776
1777
1778predict.svyglm <- function(object, newdata=NULL, total=NULL,
1779                           type = c("link", "response","terms"),
1780                           se.fit=(type!="terms"),
1781                           vcov=FALSE,...){
1782    if(is.null(newdata))
1783      newdata<-model.frame(object$survey.design)
1784    type<-match.arg(type)
1785    if (type=="terms")
1786      return(predterms(object,se=se.fit,...))
1787    tt<-delete.response(terms(formula(object)))
1788    mf<-model.frame(tt,data=newdata, xlev=object$xlevels)
1789    mm<-model.matrix(tt,mf,contrasts.arg = object$contrasts)
1790    if (!is.null(total) && attr(tt,"intercept")){
1791        mm[,attr(tt,"intercept")]<-mm[,attr(tt,"intercept")]*total
1792    }
1793    eta<-drop(mm %*% coef(object))
1794    d<-drop(object$family$mu.eta(eta))
1795    eta<-switch(type, link=eta, response=object$family$linkinv(eta))
1796    if(se.fit){
1797        if(vcov){
1798            vv<-mm %*% vcov(object) %*% t(mm)
1799            attr(eta,"var")<-switch(type,
1800                                    link=vv,
1801                                    response=d*(t(vv*d)))
1802        } else {
1803            ## FIXME make this more efficient
1804            vv<-drop(rowSums((mm %*% vcov(object)) * mm))
1805            attr(eta,"var")<-switch(type,
1806                                    link=vv,
1807                                    response=drop(d*(t(vv*d))))
1808        }
1809    }
1810    attr(eta,"statistic")<-type
1811    class(eta)<-"svystat"
1812    eta
1813    }
1814
1815