1library(sfsmisc) 2 3###--------------- "Iris Example for ever" ---------------------------- 4data(iris) 5cl.true <- as.integer(iris[,"Species"]) 6n <- length(cl.true) 7stopifnot(cl.true == rep(1:3, each = 50)) 8m.iris <- data.matrix(iris[, 1:4]) 9 10.proctime00 <- proc.time() 11 12## Self Prediction: Not too good (2+4 and 3+3 misclass.) 13table(diagDA(m.iris, cl.true, m.iris), cl.true) 14table(diagDA(m.iris, cl.true, m.iris, pool=FALSE), cl.true) 15 16## Crossvalidation: The same example as knn() & knn1() from "class" : 17data(iris3) 18train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3]) 19test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3]) 20cl <- rep(1:3, each = 25) 21 22pcl <- diagDA(train, cl, test) 23table(pcl, cl)## 0 + 1 + 2 misclassified 24## knn ( k=1) has 0 + 1 + 3 25## knn ( *, k=3) has 0 + 2 + 3 ==> ``diagDA() is best ..'' 26 27stopifnot(pcl == diagDA(train,cl, test, pool = FALSE)) 28 # i.e. quadratic identical here 29 30### Test 'NA' in predict dat.fr 31RNGversion("3.5.0")# -- so w/ sample() still stays unchanged: 32set.seed(753) 33itr <- sample(n, 0.9 * n) 34lrn <- m.iris[ itr,] 35tst <- m.iris[-itr,] 36dd <- dDA(lrn, cl.true[itr]) 37pd0 <- predict(dd, tst) 38 39i.NA <- c(3:5,7,11) 40j.NA <- sample(1:ncol(tst), size=length(i.NA), replace=TRUE) 41tst[cbind(i.NA, j.NA)] <- NA 42pdd <- predict(dd, tst) 43pcl <- diagDA(lrn, cl.true[itr], tst) 44stopifnot(length(pdd) == nrow(tst), 45 identical(pdd, pcl), 46 pdd[-i.NA] == pd0[-i.NA], 47 which(is.na(pdd)) == i.NA) 48 49## Now do some (randomized) CV : 50## for each observation, count how often it's misclassified 51M <- 200 52set.seed(234) 53missCl <- integer(n) 54for(m in 1:M) { 55 itr <- sample(n, 0.9 * n) 56 lrn <- m.iris[ itr,] 57 tst <- m.iris[-itr,] 58 pcl <- diagDA(lrn, cl.true[itr], tst) 59 stopifnot(pcl == predict(dDA(lrn, cl.true[itr]), tst)) 60 missCl <- missCl + as.integer(pcl != cl.true[ - itr]) 61} 62missCl ; mean(missCl) / M 63 64## The "same" with 'pool=FALSE' : 65missCl <- integer(n) 66for(m in 1:M) { 67 itr <- sample(n, 0.9 * n) 68 lrn <- m.iris[ itr,] 69 tst <- m.iris[-itr,] 70 pcl <- diagDA(lrn, cl.true[itr], tst, pool=FALSE) 71 stopifnot(pcl == predict(dDA(lrn, cl.true[itr], pool=FALSE), tst)) 72 missCl <- missCl + as.integer(pcl != cl.true[ - itr]) 73} 74missCl ; mean(missCl) / M ## here somewhat worse than linear 75 76cat('Time elapsed: ', proc.time() - .proctime00,'\n') 77 78