1\name{diagDA}
2\title{Diagonal Discriminant Analysis}
3\alias{diagDA}
4\alias{dDA}
5\alias{print.dDA}
6\alias{predict.dDA}
7\keyword{naive Bayes classifier}
8\description{
9  This function implements a simple Gaussian maximum likelihood
10  discriminant rule, for diagonal class covariance matrices.
11
12  In machine learning lingo, this is called \dQuote{Naive Bayes} (for
13  continuous predictors).  Note that naive Bayes is more general, as it
14  models discrete predictors as multinomial, i.e., binary predictor
15  variables as Binomial / Bernoulli.
16}
17\usage{
18dDA(x, cll, pool = TRUE)
19\method{predict}{dDA}(object, newdata, pool = object$pool, \dots)
20\method{print}{dDA}(x, \dots)
21
22diagDA(ls, cll, ts, pool = TRUE)
23}
24\arguments{
25  \item{x,ls}{learning set data matrix, with rows corresponding to
26    cases (e.g., mRNA samples) and columns to predictor variables
27    (e.g., genes).}
28  \item{cll}{class labels for learning set, must be consecutive integers.}
29  \item{object}{object of class \code{dDA}.}
30  \item{ts, newdata}{test set (prediction) data matrix, with rows corresponding
31    to cases and columns to predictor variables.}
32  \item{pool}{logical flag.  If true (by default), the covariance matrices
33    are assumed to be constant across classes and the discriminant rule
34    is linear in the data.  Otherwise (\code{pool= FALSE}), the
35    covariance matrices may vary across classes and the discriminant
36    rule is quadratic in the data.}
37  \item{\dots}{further arguments passed to and from methods.}
38}
39\value{
40  \code{dDA()} returns an object of class \code{dDA} for which there are
41  \code{\link{print}} and \code{\link{predict}} methods.  The latter
42  returns the same as \code{diagDA()}:
43
44  \code{diagDA()} returns an integer vector of class predictions for the
45  test set.
46}
47\references{
48  S. Dudoit, J. Fridlyand, and T. P. Speed. (2000)
49  Comparison of Discrimination Methods for the Classification of Tumors
50  Using Gene Expression Data.
51  (Statistics, UC Berkeley, June 2000, Tech Report \#576)
52}
53\author{% grep:   Sandrine Dudoit, Jane Fridlyand, and Martin Maechler
54  Sandrine Dudoit, \email{sandrine@stat.berkeley.edu}  and\cr
55  Jane Fridlyand, \email{janef@stat.berkeley.edu} originally wrote
56  \code{stat.diag.da()} in CRAN package \CRANpkg{sma} which was modified
57  for speedup by Martin Maechler \email{maechler@R-project.org}
58  who also introduced \code{dDA} etc.
59}
60\seealso{\code{\link[MASS]{lda}} and \code{\link[MASS]{qda}} from the
61  \CRANpkg{MASS} package;
62  \code{\link[e1071]{naiveBayes}} from \CRANpkg{e1071}.
63}
64\examples{
65## two artificial examples by Andreas Greutert:
66d1 <- data.frame(x = c(1, 5, 5, 5, 10, 25, 25, 25, 25, 29),
67                 y = c(4, 1, 2, 4,  4,  4,     6:8,     7))
68n.plot(d1)
69library(cluster)
70(cl1P <- pam(d1,k=4)$cluster) # 4 surprising clusters
71with(d1, points(x+0.5, y, col = cl1P, pch =cl1P))
72
73i1 <- c(1,3,5,6)
74tr1 <- d1[-i1,]
75cl1. <- c(1,2,1,2,1,3)
76cl1  <- c(2,2,1,1,1,3)
77plot(tr1, cex=2, col = cl1, pch = 20+cl1)
78(dd.<- diagDA(tr1, cl1., ts = d1[ i1,]))# ok
79(dd <- diagDA(tr1, cl1 , ts = d1[ i1,]))# ok, too!
80points(d1[ i1,], pch = 10, cex=3, col = dd)
81
82## use new fit + predict instead :
83(r1 <- dDA(tr1, cl1))
84(r1.<- dDA(tr1, cl1.))
85stopifnot(dd == predict(r1,  new = d1[ i1,]),
86          dd.== predict(r1., new = d1[ i1,]))
87
88plot(tr1, cex=2, col = cl1, bg = cl1, pch = 20+cl1,
89     xlim=c(1,30), ylim= c(0,10))
90xy <- cbind(x= runif(500, min=1,max=30), y = runif(500, min=0, max=10))
91points(xy, cex= 0.5, col = predict(r1, new = xy))
92abline(v=c( mean(c(5,25)), mean(c(25,29))))
93
94## example where one variable xj has  Var(xj) = 0:
95x4 <- matrix(c(2:4,7, 6,8,5,6,  7,2,3,1, 7,7,7,7), ncol=4)
96y <- c(2,2, 1,1)
97m4.1 <- dDA(x4, y, pool = FALSE)
98m4.2 <- dDA(x4, y, pool = TRUE)
99xx <- matrix(c(3,7,5,7), ncol=4)
100predict(m4.1, xx)## gave integer(0) previously
101predict(m4.2, xx)
102}
103\keyword{models}
104\keyword{classif}
105
106