1\name{colMedians}
2\title{Fast Row or Column-wise Medians of a Matrix}
3\alias{colMedians}
4\alias{rowMedians}
5\description{
6  Calculates the median for each row (column) of a matrix \code{x}.
7  This is the same as but more efficient than \code{apply(x, MM, median)}
8  for MM=2 or MM=1, respectively.
9}
10\usage{
11colMedians(x, na.rm = FALSE, hasNA = TRUE, keep.names=TRUE)
12rowMedians(x, na.rm = FALSE, hasNA = TRUE, keep.names=TRUE)
13}
14\arguments{
15 \item{x}{a \code{\link{numeric}} \eqn{n \times p}{n x p} \code{\link{matrix}}.}
16 \item{na.rm}{if \code{\link{TRUE}}, \code{\link{NA}}s are excluded
17   first, otherwise not.}
18 \item{hasNA}{logical indicating if \code{x} may contain \code{\link{NA}}s.
19   If set to \code{FALSE}, no internal NA handling is performed which
20   typically is faster.}
21\item{keep.names}{logical indicating if row or column names of \code{x}
22  should become \code{\link{names}} of the result - as is the case for
23  \code{\link{apply}(x, MM, median)}.}
24}
25\value{
26  a \code{\link{numeric}} vector of length \eqn{n} or \eqn{p}, respectively.
27}
28\section{Missing values}{
29  Missing values are excluded before calculating the medians
30  \emph{unless} \code{hasNA} is false.  Note that \code{na.rm} has no
31  effect and is automatically false when \code{hasNA} is false, i.e.,
32  internally, before computations start, the following is executed:
33\preformatted{if (!hasNA)        ## If there are no NAs, don't try to remove them
34     narm <- FALSE}
35}
36\details{
37  The implementation of \code{rowMedians()} and \code{colMedians()}
38  is optimized for both speed and memory.
39  To avoid coercing to \code{\link{double}}s (and hence memory allocation), there
40  is a special implementation for \code{\link{integer}} matrices.
41  That is, if \code{x} is an \code{\link{integer}} \code{\link{matrix}}, then
42  \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))})
43  would require three times the memory of \code{rowMedians(x)}
44  (\code{colMedians(x)}), but all this is avoided.
45}
46\author{Henrik Bengtsson, Harris Jaffee, Martin Maechler}
47
48\seealso{
49  See \code{\link{wgt.himedian}()} for a weighted hi-median, and
50  \code{\link[matrixStats]{colWeightedMedians}()} etc from package
51  \CRANpkg{matrixStats} for \emph{weighted} medians.\cr
52  For mean estimates, see \code{rowMeans()} in \code{\link{colSums}}().
53}
54
55\examples{
56set.seed(1); n <- 234; p <- 543 # n*p = 127'062
57x <- matrix(rnorm(n*p), n, p)
58x[sample(seq_along(x), size= n*p / 256)] <- NA
59R1 <- system.time(r1 <- rowMedians(x, na.rm=TRUE))
60C1 <- system.time(y1 <- colMedians(x, na.rm=TRUE))
61R2 <- system.time(r2 <- apply(x, 1, median, na.rm=TRUE))
62C2 <- system.time(y2 <- apply(x, 2, median, na.rm=TRUE))
63R2 / R1 # speedup factor: ~= 4   {platform dependent}
64C2 / C1 # speedup factor: ~= 5.8 {platform dependent}
65stopifnot(all.equal(y1, y2, tol=1e-15),
66          all.equal(r1, r2, tol=1e-15))
67
68(m <- cbind(x1=3, x2=c(4:1, 3:4,4)))
69stopifnot(colMedians(m) == 3,
70          all.equal(colMeans(m), colMedians(m)),# <- including names !
71          all.equal(rowMeans(m), rowMedians(m)))
72}
73\keyword{array}
74\keyword{robust}
75\keyword{univar}
76