1library(sfsmisc)
2
3###--------------- "Iris Example for ever" ----------------------------
4data(iris)
5cl.true <- as.integer(iris[,"Species"])
6n <- length(cl.true)
7stopifnot(cl.true == rep(1:3, each = 50))
8m.iris <- data.matrix(iris[, 1:4])
9
10.proctime00 <- proc.time()
11
12## Self Prediction:  Not too good (2+4 and 3+3 misclass.)
13table(diagDA(m.iris, cl.true, m.iris),             cl.true)
14table(diagDA(m.iris, cl.true, m.iris, pool=FALSE), cl.true)
15
16## Crossvalidation:  The same example as  knn() & knn1() from "class" :
17data(iris3)
18train <- rbind(iris3[1:25,,1], iris3[1:25,,2], iris3[1:25,,3])
19test <- rbind(iris3[26:50,,1], iris3[26:50,,2], iris3[26:50,,3])
20cl <- rep(1:3, each = 25)
21
22pcl <- diagDA(train, cl, test)
23table(pcl, cl)## 0 + 1 + 2 misclassified
24## knn (    k=1) has 0 + 1 + 3
25## knn ( *, k=3) has 0 + 2 + 3   ==> ``diagDA() is best ..''
26
27stopifnot(pcl == diagDA(train,cl, test, pool = FALSE))
28                                        # i.e. quadratic identical here
29
30### Test 'NA' in predict dat.fr
31RNGversion("3.5.0")# -- so w/ sample() still stays unchanged:
32set.seed(753)
33itr <- sample(n, 0.9 * n)
34lrn <- m.iris[ itr,]
35tst <- m.iris[-itr,]
36dd <- dDA(lrn, cl.true[itr])
37pd0 <- predict(dd, tst)
38
39i.NA <- c(3:5,7,11)
40j.NA <- sample(1:ncol(tst), size=length(i.NA), replace=TRUE)
41tst[cbind(i.NA, j.NA)] <- NA
42pdd <- predict(dd, tst)
43pcl <- diagDA(lrn, cl.true[itr],  tst)
44stopifnot(length(pdd) == nrow(tst),
45          identical(pdd, pcl),
46          pdd[-i.NA] == pd0[-i.NA],
47          which(is.na(pdd)) == i.NA)
48
49## Now do some (randomized) CV :
50## for each observation, count how often it's misclassified
51M <- 200
52set.seed(234)
53missCl <- integer(n)
54for(m in 1:M) {
55    itr <- sample(n, 0.9 * n)
56    lrn <- m.iris[ itr,]
57    tst <- m.iris[-itr,]
58    pcl <- diagDA(lrn, cl.true[itr],  tst)
59    stopifnot(pcl == predict(dDA(lrn, cl.true[itr]),  tst))
60    missCl <- missCl + as.integer(pcl != cl.true[ - itr])
61}
62missCl ; mean(missCl) / M
63
64## The "same" with  'pool=FALSE' :
65missCl <- integer(n)
66for(m in 1:M) {
67    itr <- sample(n, 0.9 * n)
68    lrn <- m.iris[ itr,]
69    tst <- m.iris[-itr,]
70    pcl <- diagDA(lrn, cl.true[itr],  tst, pool=FALSE)
71    stopifnot(pcl == predict(dDA(lrn, cl.true[itr], pool=FALSE),  tst))
72    missCl <- missCl + as.integer(pcl != cl.true[ - itr])
73}
74missCl ; mean(missCl) / M ## here somewhat worse than linear
75
76cat('Time elapsed: ', proc.time() - .proctime00,'\n')
77
78