1find.matches <- function(x, y, tol=rep(0,ncol(y)), scale=tol, 2 maxmatch=10) 3{ 4 if(.R.) rep.int <- rep 5 6 ##if(length(dim(x))==0) x <- matrix(x, nrow=1) 10may02 7 if(!is.matrix(x)) 8 x <- as.matrix(x) 9 10 n <- nrow(x) 11 p <- ncol(x) 12 if(!is.matrix(y)) 13 y <- as.matrix(y) ## 10may02 14 15 if(p != ncol(y)) 16 stop("number of columns of x and y must match") 17 18 ny <- nrow(y) 19 rown <- dimnames(x)[[1]] 20 ry <- dimnames(y)[[1]] 21 matches <- matrix(if(length(ry)) 22 "" 23 else 24 0, 25 n, maxmatch, 26 dimnames=list(rown, 27 paste("Match #",1:maxmatch,sep=""))) 28 29 distance <- matrix(NA, n, maxmatch, 30 dimnames=list(rown, 31 paste("Distance #",1:maxmatch,sep=""))) 32 33 if(length(ry)==0) 34 ry <- 1:ny 35 36 scale <- ifelse(scale==0,1,tol) 37 ones <- rep(1,p) 38 mx <- 0 39 for(i in 1:n) { 40 dif <- abs(y - rep(x[i,], rep.int(ny,p))) 41 toll <- rep(tol, rep.int(nrow(dif),p)) 42 which <- (1:ny)[((dif > toll) %*% ones)==0] 43 lw <- length(which) 44 if(lw) { 45 scaled <- dif[which,,drop=FALSE]/rep(scale, rep.int(lw,p)) 46 dist <- (scaled^2) %*% ones 47 lw <- min(lw,maxmatch) 48 mx <- max(mx,lw) 49 d <- order(dist)[1:lw] 50 matches[i,1:lw] <- ry[which[d]] 51 distance[i,1:lw] <- dist[d] 52 } 53 } 54 55 structure(list(matches=matches[,1:mx], distance=distance[,1:mx]), 56 class="find.matches") 57} 58 59 60print.find.matches <- function(x, digits=.Options$digits, ...) 61{ 62 cat("\nMatches:\n\n") 63 print(x$matches, quote=FALSE) 64 cat("\nDistances:\n\n") 65 print(x$distance, digits=digits) 66 invisible() 67} 68 69 70summary.find.matches <- function(object, ...) 71{ 72 mat <- object$matches 73 dist <- object$distance 74 cat("Frequency table of number of matches found per observation\n\n") 75 m <- (!is.na(dist)) %*% rep(1,ncol(mat)) 76 print(table(m)) 77 cat("\nMedian minimum distance by number of matches\n\n") 78 print(tapply(dist[m>0,1], m[m>0], median)) 79 ta <- table(mat[m>0,1]) 80 ta <- ta[ta>1] 81 if(length(ta)) { 82 cat("\nObservations selected first more than once (with frequencies)\n\n") 83 print(ta) 84 } else cat("\nNo observations selected first more than once\n\n") 85 86 invisible() 87} 88 89 90matchCases <- function(xcase, ycase, idcase=names(ycase), 91 xcontrol, ycontrol, idcontrol=names(ycontrol), 92 tol=NULL, 93 maxobs=max(length(ycase),length(ycontrol))*10, 94 maxmatch=20, which=c('closest','random')) 95{ 96 if(!length(tol)) 97 stop('must specify tol') 98 99 if((length(xcase)!=length(ycase)) || (length(xcontrol)!=length(ycontrol))) 100 stop('lengths of xcase, ycase and of xcontrol, ycontrol must be same') 101 102 which <- match.arg(which) 103 104 ycase <- as.matrix(ycase) 105 ycontrol <- as.matrix(ycontrol) 106 if(!length(idcase)) 107 idcase <- 1:length(ycase) 108 109 if(!length(idcontrol)) 110 idcontrol <- 1:length(ycontrol) 111 112 idcase <- as.character(idcase) 113 idcontrol <- as.character(idcontrol) 114 115 j <- is.na(ycase %*% rep(1,ncol(ycase))) | is.na(xcase) 116 if(any(j)) { 117 warning(paste(sum(j),'cases removed due to NAs')) 118 ycase <- ycase[!j,,drop=FALSE] 119 xcase <- xcase[!j] 120 idcase <- idcase[!j] 121 } 122 123 j <- is.na(ycontrol %*% rep(1,ncol(ycontrol))) | is.na(xcontrol) 124 if(any(j)) { 125 warning(paste(sum(j),'controls removed due to NAs')) 126 ycontrol <- ycontrol[!j,,drop=FALSE] 127 xcontrol <- xcontrol[!j] 128 idcontrol <- idcontrol[!j] 129 } 130 131 idCase <- id <- character(maxobs) 132 type <- factor(rep(NA,maxobs), c('case','control')) 133 x <- numeric(maxobs) 134 y <- matrix(NA, ncol=ncol(ycase), nrow=maxobs) 135 136 last <- 0 137 ncase <- length(ycase) 138 ncontrol <- length(ycontrol) 139 matches <- integer(ncase) 140 for(i in 1:ncase) { 141 s <- abs(xcontrol-xcase[i]) <= tol 142 nmatch <- sum(s) 143 if(nmatch > maxmatch) { 144 s <- (1:ncontrol)[s] ## next line was sample(j,...) 4jun02 145 if(which=="random") 146 s <- sample(s, maxmatch, replace=FALSE) 147 else { 148 errors <- abs(xcontrol[s]-xcase[i]) 149 serrors <- order(errors) 150 s <- (s[serrors])[1:maxmatch] 151 } 152 153 nmatch <- maxmatch 154 } 155 156 matches[i] <- nmatch 157 if(!nmatch) 158 next 159 160 end <- last + nmatch + 1 161 if(end > maxobs) 162 stop(paste('needed maxobs >',maxobs)) 163 164 start <- last+1 165 last <- end 166 idCase[start:end] <- rep(idcase[i], nmatch+1) 167 type[start:end] <- c('case',rep('control',nmatch)) 168 id[start:end] <- c(idcase[i], idcontrol[s]) 169 x[start:end] <- c(xcase[i], xcontrol[s]) 170 y[start:end,] <- rbind(ycase[i,,drop=FALSE], ycontrol[s,,drop=FALSE]) 171 } 172 173 cat('\nFrequencies of Number of Matched Controls per Case:\n\n') 174 print(table(matches)) 175 cat('\n') 176 structure(list(idcase=idCase[1:end], type=type[1:end], 177 id=id[1:end], x=x[1:end], y=drop(y[1:end,])), 178 row.names=as.character(1:end), 179 class='data.frame') 180} 181