1\name{diagDA} 2\title{Diagonal Discriminant Analysis} 3\alias{diagDA} 4\alias{dDA} 5\alias{print.dDA} 6\alias{predict.dDA} 7\keyword{naive Bayes classifier} 8\description{ 9 This function implements a simple Gaussian maximum likelihood 10 discriminant rule, for diagonal class covariance matrices. 11 12 In machine learning lingo, this is called \dQuote{Naive Bayes} (for 13 continuous predictors). Note that naive Bayes is more general, as it 14 models discrete predictors as multinomial, i.e., binary predictor 15 variables as Binomial / Bernoulli. 16} 17\usage{ 18dDA(x, cll, pool = TRUE) 19\method{predict}{dDA}(object, newdata, pool = object$pool, \dots) 20\method{print}{dDA}(x, \dots) 21 22diagDA(ls, cll, ts, pool = TRUE) 23} 24\arguments{ 25 \item{x,ls}{learning set data matrix, with rows corresponding to 26 cases (e.g., mRNA samples) and columns to predictor variables 27 (e.g., genes).} 28 \item{cll}{class labels for learning set, must be consecutive integers.} 29 \item{object}{object of class \code{dDA}.} 30 \item{ts, newdata}{test set (prediction) data matrix, with rows corresponding 31 to cases and columns to predictor variables.} 32 \item{pool}{logical flag. If true (by default), the covariance matrices 33 are assumed to be constant across classes and the discriminant rule 34 is linear in the data. Otherwise (\code{pool= FALSE}), the 35 covariance matrices may vary across classes and the discriminant 36 rule is quadratic in the data.} 37 \item{\dots}{further arguments passed to and from methods.} 38} 39\value{ 40 \code{dDA()} returns an object of class \code{dDA} for which there are 41 \code{\link{print}} and \code{\link{predict}} methods. The latter 42 returns the same as \code{diagDA()}: 43 44 \code{diagDA()} returns an integer vector of class predictions for the 45 test set. 46} 47\references{ 48 S. Dudoit, J. Fridlyand, and T. P. Speed. (2000) 49 Comparison of Discrimination Methods for the Classification of Tumors 50 Using Gene Expression Data. 51 (Statistics, UC Berkeley, June 2000, Tech Report \#576) 52} 53\author{% grep: Sandrine Dudoit, Jane Fridlyand, and Martin Maechler 54 Sandrine Dudoit, \email{sandrine@stat.berkeley.edu} and\cr 55 Jane Fridlyand, \email{janef@stat.berkeley.edu} originally wrote 56 \code{stat.diag.da()} in CRAN package \CRANpkg{sma} which was modified 57 for speedup by Martin Maechler \email{maechler@R-project.org} 58 who also introduced \code{dDA} etc. 59} 60\seealso{\code{\link[MASS]{lda}} and \code{\link[MASS]{qda}} from the 61 \CRANpkg{MASS} package; 62 \code{\link[e1071]{naiveBayes}} from \CRANpkg{e1071}. 63} 64\examples{ 65## two artificial examples by Andreas Greutert: 66d1 <- data.frame(x = c(1, 5, 5, 5, 10, 25, 25, 25, 25, 29), 67 y = c(4, 1, 2, 4, 4, 4, 6:8, 7)) 68n.plot(d1) 69library(cluster) 70(cl1P <- pam(d1,k=4)$cluster) # 4 surprising clusters 71with(d1, points(x+0.5, y, col = cl1P, pch =cl1P)) 72 73i1 <- c(1,3,5,6) 74tr1 <- d1[-i1,] 75cl1. <- c(1,2,1,2,1,3) 76cl1 <- c(2,2,1,1,1,3) 77plot(tr1, cex=2, col = cl1, pch = 20+cl1) 78(dd.<- diagDA(tr1, cl1., ts = d1[ i1,]))# ok 79(dd <- diagDA(tr1, cl1 , ts = d1[ i1,]))# ok, too! 80points(d1[ i1,], pch = 10, cex=3, col = dd) 81 82## use new fit + predict instead : 83(r1 <- dDA(tr1, cl1)) 84(r1.<- dDA(tr1, cl1.)) 85stopifnot(dd == predict(r1, new = d1[ i1,]), 86 dd.== predict(r1., new = d1[ i1,])) 87 88plot(tr1, cex=2, col = cl1, bg = cl1, pch = 20+cl1, 89 xlim=c(1,30), ylim= c(0,10)) 90xy <- cbind(x= runif(500, min=1,max=30), y = runif(500, min=0, max=10)) 91points(xy, cex= 0.5, col = predict(r1, new = xy)) 92abline(v=c( mean(c(5,25)), mean(c(25,29)))) 93 94## example where one variable xj has Var(xj) = 0: 95x4 <- matrix(c(2:4,7, 6,8,5,6, 7,2,3,1, 7,7,7,7), ncol=4) 96y <- c(2,2, 1,1) 97m4.1 <- dDA(x4, y, pool = FALSE) 98m4.2 <- dDA(x4, y, pool = TRUE) 99xx <- matrix(c(3,7,5,7), ncol=4) 100predict(m4.1, xx)## gave integer(0) previously 101predict(m4.2, xx) 102} 103\keyword{models} 104\keyword{classif} 105 106