1\name{colMedians} 2\title{Fast Row or Column-wise Medians of a Matrix} 3\alias{colMedians} 4\alias{rowMedians} 5\description{ 6 Calculates the median for each row (column) of a matrix \code{x}. 7 This is the same as but more efficient than \code{apply(x, MM, median)} 8 for MM=2 or MM=1, respectively. 9} 10\usage{ 11colMedians(x, na.rm = FALSE, hasNA = TRUE, keep.names=TRUE) 12rowMedians(x, na.rm = FALSE, hasNA = TRUE, keep.names=TRUE) 13} 14\arguments{ 15 \item{x}{a \code{\link{numeric}} \eqn{n \times p}{n x p} \code{\link{matrix}}.} 16 \item{na.rm}{if \code{\link{TRUE}}, \code{\link{NA}}s are excluded 17 first, otherwise not.} 18 \item{hasNA}{logical indicating if \code{x} may contain \code{\link{NA}}s. 19 If set to \code{FALSE}, no internal NA handling is performed which 20 typically is faster.} 21\item{keep.names}{logical indicating if row or column names of \code{x} 22 should become \code{\link{names}} of the result - as is the case for 23 \code{\link{apply}(x, MM, median)}.} 24} 25\value{ 26 a \code{\link{numeric}} vector of length \eqn{n} or \eqn{p}, respectively. 27} 28\section{Missing values}{ 29 Missing values are excluded before calculating the medians 30 \emph{unless} \code{hasNA} is false. Note that \code{na.rm} has no 31 effect and is automatically false when \code{hasNA} is false, i.e., 32 internally, before computations start, the following is executed: 33\preformatted{if (!hasNA) ## If there are no NAs, don't try to remove them 34 narm <- FALSE} 35} 36\details{ 37 The implementation of \code{rowMedians()} and \code{colMedians()} 38 is optimized for both speed and memory. 39 To avoid coercing to \code{\link{double}}s (and hence memory allocation), there 40 is a special implementation for \code{\link{integer}} matrices. 41 That is, if \code{x} is an \code{\link{integer}} \code{\link{matrix}}, then 42 \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))}) 43 would require three times the memory of \code{rowMedians(x)} 44 (\code{colMedians(x)}), but all this is avoided. 45} 46\author{Henrik Bengtsson, Harris Jaffee, Martin Maechler} 47 48\seealso{ 49 See \code{\link{wgt.himedian}()} for a weighted hi-median, and 50 \code{\link[matrixStats]{colWeightedMedians}()} etc from package 51 \CRANpkg{matrixStats} for \emph{weighted} medians.\cr 52 For mean estimates, see \code{rowMeans()} in \code{\link{colSums}}(). 53} 54 55\examples{ 56set.seed(1); n <- 234; p <- 543 # n*p = 127'062 57x <- matrix(rnorm(n*p), n, p) 58x[sample(seq_along(x), size= n*p / 256)] <- NA 59R1 <- system.time(r1 <- rowMedians(x, na.rm=TRUE)) 60C1 <- system.time(y1 <- colMedians(x, na.rm=TRUE)) 61R2 <- system.time(r2 <- apply(x, 1, median, na.rm=TRUE)) 62C2 <- system.time(y2 <- apply(x, 2, median, na.rm=TRUE)) 63R2 / R1 # speedup factor: ~= 4 {platform dependent} 64C2 / C1 # speedup factor: ~= 5.8 {platform dependent} 65stopifnot(all.equal(y1, y2, tol=1e-15), 66 all.equal(r1, r2, tol=1e-15)) 67 68(m <- cbind(x1=3, x2=c(4:1, 3:4,4))) 69stopifnot(colMedians(m) == 3, 70 all.equal(colMeans(m), colMedians(m)),# <- including names ! 71 all.equal(rowMeans(m), rowMedians(m))) 72} 73\keyword{array} 74\keyword{robust} 75\keyword{univar} 76