1 /** 2 * @file tests/adaboost_test.cpp 3 * @author Udit Saxena 4 * 5 * Tests for AdaBoost class. 6 * 7 * mlpack is free software; you may redistribute it and/or modify it under the 8 * terms of the 3-clause BSD license. You should have received a copy of the 9 * 3-clause BSD license along with mlpack. If not, see 10 * http://www.opensource.org/licenses/BSD-3-Clause for more information. 11 */ 12 #include <mlpack/core.hpp> 13 #include <mlpack/methods/adaboost/adaboost.hpp> 14 15 #include "serialization_catch.hpp" 16 #include "test_catch_tools.hpp" 17 #include "catch.hpp" 18 19 using namespace arma; 20 using namespace mlpack; 21 using namespace mlpack::adaboost; 22 using namespace mlpack::tree; 23 using namespace mlpack::perceptron; 24 25 /** 26 * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset. It 27 * checks whether the hamming loss breaches the upperbound, which is provided by 28 * ztAccumulator. 29 */ 30 TEST_CASE("HammingLossBoundIris", "[AdaBoostTest]") 31 { 32 arma::mat inputData; 33 34 if (!data::Load("iris.csv", inputData)) 35 FAIL("Cannot load test dataset iris.csv!"); 36 37 arma::Mat<size_t> labels; 38 if (!data::Load("iris_labels.txt", labels)) 39 FAIL("Cannot load labels for iris iris_labels.txt"); 40 41 const size_t numClasses = max(labels.row(0)) + 1; 42 43 // Define your own weak learner, perceptron in this case. 44 // Run the perceptron for perceptronIter iterations. 45 int perceptronIter = 400; 46 47 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 48 49 // Define parameters for AdaBoost. 50 size_t iterations = 100; 51 double tolerance = 1e-10; 52 AdaBoost<> a(tolerance); 53 double ztProduct = a.Train(inputData, labels.row(0), numClasses, p, 54 iterations, tolerance); 55 56 arma::Row<size_t> predictedLabels; 57 a.Classify(inputData, predictedLabels); 58 59 size_t countError = arma::accu(labels != predictedLabels); 60 double hammingLoss = (double) countError / labels.n_cols; 61 62 // Check that ztProduct is finite. 63 REQUIRE(std::isfinite(ztProduct) == true); 64 REQUIRE(hammingLoss <= ztProduct); 65 } 66 67 /** 68 * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset. It 69 * checks if the error returned by running a single instance of the weak learner 70 * is worse than running the boosted weak learner using adaboost. 71 */ 72 TEST_CASE("WeakLearnerErrorIris", "[AdaBoostTest]") 73 { 74 arma::mat inputData; 75 76 if (!data::Load("iris.csv", inputData)) 77 FAIL("Cannot load test dataset iris.csv!"); 78 79 arma::Mat<size_t> labels; 80 81 if (!data::Load("iris_labels.txt", labels)) 82 FAIL("Cannot load labels for iris iris_labels.txt"); 83 84 const size_t numClasses = max(labels.row(0)) + 1; 85 86 // Define your own weak learner, perceptron in this case. 87 // Run the perceptron for perceptronIter iterations. 88 int perceptronIter = 400; 89 90 arma::Row<size_t> perceptronPrediction(labels.n_cols); 91 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 92 p.Classify(inputData, perceptronPrediction); 93 94 size_t countWeakLearnerError = arma::accu(labels != perceptronPrediction); 95 double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols; 96 97 // Define parameters for AdaBoost. 98 size_t iterations = 100; 99 double tolerance = 1e-10; 100 AdaBoost<> a(inputData, labels.row(0), numClasses, p, iterations, tolerance); 101 102 arma::Row<size_t> predictedLabels; 103 a.Classify(inputData, predictedLabels); 104 105 size_t countError = arma::accu(labels != predictedLabels);; 106 double error = (double) countError / labels.n_cols; 107 108 REQUIRE(error <= weakLearnerErrorRate); 109 } 110 111 /** 112 * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column 113 * dataset. It checks whether the hamming loss breaches the upperbound, which 114 * is provided by ztAccumulator. 115 */ 116 TEST_CASE("HammingLossBoundVertebralColumn", "[AdaBoostTest]") 117 { 118 arma::mat inputData; 119 if (!data::Load("vc2.csv", inputData)) 120 FAIL("Cannot load test dataset vc2.csv!"); 121 122 arma::Mat<size_t> labels; 123 if (!data::Load("vc2_labels.txt", labels)) 124 FAIL("Cannot load labels for vc2_labels.txt"); 125 126 const size_t numClasses = max(labels.row(0)) + 1; 127 128 // Define your own weak learner, perceptron in this case. 129 // Run the perceptron for perceptronIter iterations. 130 size_t perceptronIter = 800; 131 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 132 133 // Define parameters for AdaBoost. 134 size_t iterations = 50; 135 double tolerance = 1e-10; 136 AdaBoost<> a(tolerance); 137 double ztProduct = a.Train(inputData, labels.row(0), numClasses, p, 138 iterations, tolerance); 139 140 arma::Row<size_t> predictedLabels; 141 a.Classify(inputData, predictedLabels); 142 143 size_t countError = arma::accu(labels != predictedLabels); 144 double hammingLoss = (double) countError / labels.n_cols; 145 146 // Check that ztProduct is finite. 147 REQUIRE(std::isfinite(ztProduct) == true); 148 REQUIRE(hammingLoss <= ztProduct); 149 } 150 151 /** 152 * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column 153 * dataset. It checks if the error returned by running a single instance of the 154 * weak learner is worse than running the boosted weak learner using adaboost. 155 */ 156 TEST_CASE("WeakLearnerErrorVertebralColumn", "[AdaBoostTest]") 157 { 158 arma::mat inputData; 159 if (!data::Load("vc2.csv", inputData)) 160 FAIL("Cannot load test dataset vc2.csv!"); 161 162 arma::Mat<size_t> labels; 163 if (!data::Load("vc2_labels.txt", labels)) 164 FAIL("Cannot load labels for vc2_labels.txt"); 165 166 const size_t numClasses = max(labels.row(0)) + 1; 167 168 // Define your own weak learner, perceptron in this case. 169 // Run the perceptron for perceptronIter iterations. 170 size_t perceptronIter = 800; 171 172 Row<size_t> perceptronPrediction(labels.n_cols); 173 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 174 p.Classify(inputData, perceptronPrediction); 175 176 size_t countWeakLearnerError = arma::accu(labels != perceptronPrediction); 177 double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols; 178 179 // Define parameters for AdaBoost. 180 size_t iterations = 50; 181 double tolerance = 1e-10; 182 AdaBoost<> a(inputData, labels.row(0), numClasses, p, iterations, tolerance); 183 184 arma::Row<size_t> predictedLabels; 185 a.Classify(inputData, predictedLabels); 186 187 size_t countError = arma::accu(labels != predictedLabels); 188 double error = (double) countError / labels.n_cols; 189 190 REQUIRE(error <= weakLearnerErrorRate); 191 } 192 193 /** 194 * This test case runs the AdaBoost.mh algorithm on non-linearly separable 195 * dataset. It checks whether the hamming loss breaches the upperbound, which 196 * is provided by ztAccumulator. 197 */ 198 TEST_CASE("HammingLossBoundNonLinearSepData", "[AdaBoostTest]") 199 { 200 arma::mat inputData; 201 if (!data::Load("train_nonlinsep.txt", inputData)) 202 FAIL("Cannot load test dataset train_nonlinsep.txt!"); 203 204 arma::Mat<size_t> labels; 205 if (!data::Load("train_labels_nonlinsep.txt", labels)) 206 FAIL("Cannot load labels for train_labels_nonlinsep.txt"); 207 208 const size_t numClasses = max(labels.row(0)) + 1; 209 210 // Define your own weak learner, perceptron in this case. 211 // Run the perceptron for perceptronIter iterations. 212 size_t perceptronIter = 800; 213 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 214 215 // Define parameters for AdaBoost. 216 size_t iterations = 50; 217 double tolerance = 1e-10; 218 AdaBoost<> a(tolerance); 219 double ztProduct = a.Train(inputData, labels.row(0), numClasses, p, 220 iterations, tolerance); 221 222 arma::Row<size_t> predictedLabels; 223 a.Classify(inputData, predictedLabels); 224 225 size_t countError = arma::accu(labels == predictedLabels); 226 double hammingLoss = (double) countError / labels.n_cols; 227 228 // Check that ztProduct is finite. 229 REQUIRE(std::isfinite(ztProduct) <= true); 230 REQUIRE(hammingLoss <= ztProduct); 231 } 232 233 /** 234 * This test case runs the AdaBoost.mh algorithm on a non-linearly separable 235 * dataset. It checks if the error returned by running a single instance of the 236 * weak learner is worse than running the boosted weak learner using AdaBoost. 237 */ 238 TEST_CASE("WeakLearnerErrorNonLinearSepData", "[AdaBoostTest]") 239 { 240 arma::mat inputData; 241 if (!data::Load("train_nonlinsep.txt", inputData)) 242 FAIL("Cannot load test dataset train_nonlinsep.txt!"); 243 244 arma::Mat<size_t> labels; 245 if (!data::Load("train_labels_nonlinsep.txt", labels)) 246 FAIL("Cannot load labels for train_labels_nonlinsep.txt"); 247 248 const size_t numClasses = max(labels.row(0)) + 1; 249 250 // Define your own weak learner, perceptron in this case. 251 // Run the perceptron for perceptronIter iterations. 252 size_t perceptronIter = 800; 253 254 Row<size_t> perceptronPrediction(labels.n_cols); 255 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 256 p.Classify(inputData, perceptronPrediction); 257 258 size_t countWeakLearnerError = arma::accu(labels != perceptronPrediction); 259 double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols; 260 261 // Define parameters for AdaBoost. 262 size_t iterations = 50; 263 double tolerance = 1e-10; 264 AdaBoost<> a(inputData, labels.row(0), numClasses, p, iterations, tolerance); 265 266 arma::Row<size_t> predictedLabels; 267 a.Classify(inputData, predictedLabels); 268 269 size_t countError = arma::accu(labels != predictedLabels); 270 double error = (double) countError / labels.n_cols; 271 272 REQUIRE(error == weakLearnerErrorRate); 273 } 274 275 /** 276 * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset. It 277 * checks whether the Hamming loss breaches the upper bound, which is provided 278 * by ztAccumulator. This uses decision stumps as the weak learner. 279 */ 280 TEST_CASE("HammingLossIris_DS", "[AdaBoostTest]") 281 { 282 arma::mat inputData; 283 if (!data::Load("iris.csv", inputData)) 284 FAIL("Cannot load test dataset iris.csv!"); 285 286 arma::Mat<size_t> labels; 287 if (!data::Load("iris_labels.txt", labels)) 288 FAIL("Cannot load labels for iris_labels.txt"); 289 290 // Define your own weak learner, decision stumps in this case. 291 const size_t numClasses = 3; 292 const size_t inpBucketSize = 6; 293 arma::Row<size_t> labelsvec = labels.row(0); 294 ID3DecisionStump ds(inputData, labelsvec, numClasses, inpBucketSize); 295 296 // Define parameters for AdaBoost. 297 size_t iterations = 50; 298 double tolerance = 1e-10; 299 AdaBoost<ID3DecisionStump> a(tolerance); 300 double ztProduct = a.Train(inputData, labelsvec, numClasses, ds, 301 iterations, tolerance); 302 303 arma::Row<size_t> predictedLabels; 304 a.Classify(inputData, predictedLabels); 305 306 size_t countError = arma::accu(labels != predictedLabels); 307 double hammingLoss = (double) countError / labels.n_cols; 308 309 // Check that ztProduct is finite. 310 REQUIRE(std::isfinite(ztProduct) == true); 311 REQUIRE(hammingLoss <= ztProduct); 312 } 313 314 /** 315 * This test case runs the AdaBoost.mh algorithm on a non-linearly separable 316 * dataset. It checks if the error returned by running a single instance of the 317 * weak learner is worse than running the boosted weak learner using adaboost. 318 * This is for the weak learner: decision stumps. 319 */ 320 TEST_CASE("WeakLearnerErrorIris_DS", "[AdaBoostTest]") 321 { 322 arma::mat inputData; 323 if (!data::Load("iris.csv", inputData)) 324 FAIL("Cannot load test dataset iris.csv!"); 325 326 arma::Mat<size_t> labels; 327 if (!data::Load("iris_labels.txt", labels)) 328 FAIL("Cannot load labels for iris_labels.txt"); 329 330 // no need to map the labels here 331 332 // Define your own weak learner, decision stumps in this case. 333 const size_t numClasses = 3; 334 const size_t inpBucketSize = 6; 335 arma::Row<size_t> labelsvec = labels.row(0); 336 337 arma::Row<size_t> dsPrediction(labels.n_cols); 338 339 ID3DecisionStump ds(inputData, labelsvec, numClasses, inpBucketSize); 340 ds.Classify(inputData, dsPrediction); 341 342 size_t countWeakLearnerError = arma::accu(labels != dsPrediction); 343 double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols; 344 345 // Define parameters for AdaBoost. 346 size_t iterations = 50; 347 double tolerance = 1e-10; 348 349 AdaBoost<ID3DecisionStump> a(inputData, labelsvec, numClasses, ds, 350 iterations, tolerance); 351 352 arma::Row<size_t> predictedLabels; 353 a.Classify(inputData, predictedLabels); 354 355 size_t countError = arma::accu(labels != predictedLabels); 356 double error = (double) countError / labels.n_cols; 357 358 REQUIRE(error <= weakLearnerErrorRate); 359 } 360 361 /** 362 * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column 363 * dataset. It checks if the error returned by running a single instance of the 364 * weak learner is worse than running the boosted weak learner using adaboost. 365 * This is for the weak learner: decision stumps. 366 */ 367 TEST_CASE("HammingLossBoundVertebralColumn_DS", "[AdaBoostTest]") 368 { 369 arma::mat inputData; 370 if (!data::Load("vc2.csv", inputData)) 371 FAIL("Cannot load test dataset vc2.csv!"); 372 373 arma::Mat<size_t> labels; 374 if (!data::Load("vc2_labels.txt", labels)) 375 FAIL("Cannot load labels for vc2_labels.txt"); 376 377 // Define your own weak learner, decision stumps in this case. 378 const size_t numClasses = 3; 379 const size_t inpBucketSize = 6; 380 arma::Row<size_t> labelsvec = labels.row(0); 381 382 ID3DecisionStump ds(inputData, labelsvec, numClasses, inpBucketSize); 383 384 // Define parameters for AdaBoost. 385 size_t iterations = 50; 386 double tolerance = 1e-10; 387 388 AdaBoost<ID3DecisionStump> a(tolerance); 389 double ztProduct = a.Train(inputData, labelsvec, numClasses, ds, 390 iterations, tolerance); 391 392 arma::Row<size_t> predictedLabels; 393 a.Classify(inputData, predictedLabels); 394 395 size_t countError = arma::accu(labels != predictedLabels); 396 double hammingLoss = (double) countError / labels.n_cols; 397 398 // Check that ztProduct is finite. 399 REQUIRE(std::isfinite(ztProduct) == true); 400 REQUIRE(hammingLoss <= ztProduct); 401 } 402 403 /** 404 * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column 405 * dataset. It checks if the error returned by running a single instance of the 406 * weak learner is worse than running the boosted weak learner using adaboost. 407 * This is for the weak learner: decision stumps. 408 */ 409 TEST_CASE("WeakLearnerErrorVertebralColumn_DS", "[AdaBoostTest]") 410 { 411 arma::mat inputData; 412 if (!data::Load("vc2.csv", inputData)) 413 FAIL("Cannot load test dataset vc2.csv!"); 414 415 arma::Mat<size_t> labels; 416 if (!data::Load("vc2_labels.txt", labels)) 417 FAIL("Cannot load labels for vc2_labels.txt"); 418 419 // Define your own weak learner, decision stumps in this case. 420 const size_t numClasses = 3; 421 const size_t inpBucketSize = 6; 422 arma::Row<size_t> dsPrediction(labels.n_cols); 423 arma::Row<size_t> labelsvec = labels.row(0); 424 425 ID3DecisionStump ds(inputData, labelsvec, numClasses, inpBucketSize); 426 ds.Classify(inputData, dsPrediction); 427 428 size_t countWeakLearnerError = arma::accu(labels != dsPrediction); 429 double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols; 430 431 // Define parameters for AdaBoost. 432 size_t iterations = 50; 433 double tolerance = 1e-10; 434 AdaBoost<ID3DecisionStump> a(inputData, labelsvec, numClasses, ds, 435 iterations, tolerance); 436 437 arma::Row<size_t> predictedLabels; 438 a.Classify(inputData, predictedLabels); 439 440 size_t countError = arma::accu(labels != predictedLabels); 441 double error = (double) countError / labels.n_cols; 442 443 REQUIRE(error <= weakLearnerErrorRate); 444 } 445 446 /** 447 * This test case runs the AdaBoost.mh algorithm on non-linearly separable 448 * dataset. It checks whether the hamming loss breaches the upperbound, which 449 * is provided by ztAccumulator. This is for the weak learner: decision stumps. 450 */ 451 TEST_CASE("HammingLossBoundNonLinearSepData_DS", "[AdaBoostTest]") 452 { 453 arma::mat inputData; 454 if (!data::Load("train_nonlinsep.txt", inputData)) 455 FAIL("Cannot load test dataset train_nonlinsep.txt!"); 456 457 arma::Mat<size_t> labels; 458 if (!data::Load("train_labels_nonlinsep.txt", labels)) 459 FAIL("Cannot load labels for train_labels_nonlinsep.txt"); 460 461 // Define your own weak learner, decision stumps in this case. 462 const size_t numClasses = 2; 463 const size_t inpBucketSize = 6; 464 arma::Row<size_t> labelsvec = labels.row(0); 465 466 ID3DecisionStump ds(inputData, labelsvec, numClasses, inpBucketSize); 467 468 // Define parameters for Adaboost. 469 size_t iterations = 50; 470 double tolerance = 1e-10; 471 472 AdaBoost<ID3DecisionStump> a(tolerance); 473 double ztProduct = a.Train(inputData, labelsvec, numClasses, ds, 474 iterations, tolerance); 475 476 arma::Row<size_t> predictedLabels; 477 a.Classify(inputData, predictedLabels); 478 479 size_t countError = arma::accu(labels != predictedLabels); 480 double hammingLoss = (double) countError / labels.n_cols; 481 482 // Check that ztProduct is finite. 483 REQUIRE(std::isfinite(ztProduct) == true); 484 REQUIRE(hammingLoss <= ztProduct); 485 } 486 487 /** 488 * This test case runs the AdaBoost.mh algorithm on a non-linearly separable 489 * dataset. It checks if the error returned by running a single instance of the 490 * weak learner is worse than running the boosted weak learner using adaboost. 491 * This for the weak learner: decision stumps. 492 */ 493 TEST_CASE("WeakLearnerErrorNonLinearSepData_DS", "[AdaBoostTest]") 494 { 495 arma::mat inputData; 496 if (!data::Load("train_nonlinsep.txt", inputData)) 497 FAIL("Cannot load test dataset train_nonlinsep.txt!"); 498 499 arma::Mat<size_t> labels; 500 if (!data::Load("train_labels_nonlinsep.txt", labels)) 501 FAIL("Cannot load labels for train_labels_nonlinsep.txt"); 502 503 // Define your own weak learner, decision stumps in this case. 504 const size_t numClasses = 2; 505 const size_t inpBucketSize = 3; 506 arma::Row<size_t> labelsvec = labels.row(0); 507 508 arma::Row<size_t> dsPrediction(labels.n_cols); 509 510 ID3DecisionStump ds(inputData, labelsvec, numClasses, inpBucketSize); 511 ds.Classify(inputData, dsPrediction); 512 513 size_t countWeakLearnerError = arma::accu(labels != dsPrediction); 514 double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols; 515 516 // Define parameters for AdaBoost. 517 size_t iterations = 500; 518 double tolerance = 1e-23; 519 520 AdaBoost<ID3DecisionStump > a(inputData, labelsvec, numClasses, ds, 521 iterations, tolerance); 522 523 arma::Row<size_t> predictedLabels; 524 a.Classify(inputData, predictedLabels); 525 526 size_t countError = arma::accu(labels != predictedLabels); 527 double error = (double) countError / labels.n_cols; 528 529 REQUIRE(error <= weakLearnerErrorRate); 530 } 531 532 /** 533 * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column 534 * dataset. It tests the Classify function and checks for a satisfactory error 535 * rate. 536 */ 537 TEST_CASE("ClassifyTest_VERTEBRALCOL", "[AdaBoostTest]") 538 { 539 arma::mat inputData; 540 if (!data::Load("vc2.csv", inputData)) 541 FAIL("Cannot load test dataset vc2.csv!"); 542 543 arma::Mat<size_t> labels; 544 if (!data::Load("vc2_labels.txt", labels)) 545 FAIL("Cannot load labels for vc2_labels.txt"); 546 547 // Define your own weak learner, perceptron in this case. 548 // Run the perceptron for perceptronIter iterations. 549 size_t perceptronIter = 1000; 550 551 arma::mat testData; 552 553 if (!data::Load("vc2_test.csv", testData)) 554 FAIL("Cannot load test dataset vc2_test.csv!"); 555 556 arma::Mat<size_t> trueTestLabels; 557 558 if (!data::Load("vc2_test_labels.txt", trueTestLabels)) 559 FAIL("Cannot load labels for vc2_test_labels.txt"); 560 561 const size_t numClasses = max(labels.row(0)) + 1; 562 563 Row<size_t> perceptronPrediction(labels.n_cols); 564 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 565 p.Classify(inputData, perceptronPrediction); 566 567 // Define parameters for AdaBoost. 568 size_t iterations = 100; 569 double tolerance = 1e-10; 570 AdaBoost<> a(inputData, labels.row(0), numClasses, p, iterations, tolerance); 571 572 arma::Row<size_t> predictedLabels1(testData.n_cols), 573 predictedLabels2(testData.n_cols); 574 arma::mat probabilities; 575 576 a.Classify(testData, predictedLabels1); 577 a.Classify(testData, predictedLabels2, probabilities); 578 579 REQUIRE(probabilities.n_cols == testData.n_cols); 580 REQUIRE(probabilities.n_rows == numClasses); 581 582 for (size_t i = 0; i < predictedLabels1.n_cols; ++i) 583 REQUIRE(predictedLabels1[i] == predictedLabels2[i]); 584 585 arma::colvec pRow; 586 arma::uword maxIndex = 0; 587 588 for (size_t i = 0; i < predictedLabels1.n_cols; ++i) 589 { 590 pRow = probabilities.unsafe_col(i); 591 pRow.max(maxIndex); 592 REQUIRE(predictedLabels1(i) == maxIndex); 593 REQUIRE(arma::accu(probabilities.col(i)) == Approx(1).epsilon(1e-7)); 594 } 595 596 size_t localError = arma::accu(trueTestLabels != predictedLabels1); 597 double lError = (double) localError / trueTestLabels.n_cols; 598 REQUIRE(lError <= 0.30); 599 } 600 601 /** 602 * This test case runs the AdaBoost.mh algorithm on a non linearly separable 603 * dataset. It tests the Classify function and checks for a satisfactory error 604 * rate. 605 */ 606 TEST_CASE("ClassifyTest_NONLINSEP", "[AdaBoostTest]") 607 { 608 arma::mat inputData; 609 if (!data::Load("train_nonlinsep.txt", inputData)) 610 FAIL("Cannot load test dataset train_nonlinsep.txt!"); 611 612 arma::Mat<size_t> labels; 613 if (!data::Load("train_labels_nonlinsep.txt", labels)) 614 FAIL("Cannot load labels for train_labels_nonlinsep.txt"); 615 616 // Define your own weak learner; in this test decision stumps are used. 617 const size_t numClasses = 2; 618 const size_t inpBucketSize = 3; 619 arma::Row<size_t> labelsvec = labels.row(0); 620 621 arma::mat testData; 622 623 if (!data::Load("test_nonlinsep.txt", testData)) 624 FAIL("Cannot load test dataset test_nonlinsep.txt!"); 625 626 arma::Mat<size_t> trueTestLabels; 627 628 if (!data::Load("test_labels_nonlinsep.txt", trueTestLabels)) 629 FAIL("Cannot load labels for test_labels_nonlinsep.txt"); 630 631 arma::Row<size_t> dsPrediction(labels.n_cols); 632 633 ID3DecisionStump ds(inputData, labelsvec, numClasses, inpBucketSize); 634 635 // Define parameters for AdaBoost. 636 size_t iterations = 50; 637 double tolerance = 1e-10; 638 AdaBoost<ID3DecisionStump > a(inputData, labelsvec, numClasses, ds, 639 iterations, tolerance); 640 641 arma::Row<size_t> predictedLabels1(testData.n_cols), 642 predictedLabels2(testData.n_cols); 643 arma::mat probabilities; 644 645 a.Classify(testData, predictedLabels1); 646 a.Classify(testData, predictedLabels2, probabilities); 647 648 REQUIRE(probabilities.n_cols == testData.n_cols); 649 650 for (size_t i = 0; i < predictedLabels1.n_cols; ++i) 651 REQUIRE(predictedLabels1[i] == predictedLabels2[i]); 652 653 arma::colvec pRow; 654 arma::uword maxIndex = 0; 655 656 for (size_t i = 0; i < predictedLabels1.n_cols; ++i) 657 { 658 pRow = probabilities.unsafe_col(i); 659 pRow.max(maxIndex); 660 REQUIRE(predictedLabels1(i) == maxIndex); 661 REQUIRE(arma::accu(probabilities.col(i)) == Approx(1).epsilon(1e-7)); 662 } 663 664 size_t localError = arma::accu(trueTestLabels != predictedLabels1); 665 double lError = (double) localError / trueTestLabels.n_cols; 666 REQUIRE(lError <= 0.30); 667 } 668 669 /** 670 * This test case runs the AdaBoost.mh algorithm on the UCI Iris Dataset. It 671 * trains it on two thirds of the Iris dataset (iris_train.csv), and tests on 672 * the remaining third of the dataset (iris_test.csv). It tests the Classify() 673 * function and checks for a satisfactory error rate. 674 */ 675 TEST_CASE("ClassifyTest_IRIS", "[AdaBoostTest]") 676 { 677 arma::mat inputData; 678 if (!data::Load("iris_train.csv", inputData)) 679 FAIL("Cannot load test dataset iris_train.csv!"); 680 681 arma::Mat<size_t> labels; 682 if (!data::Load("iris_train_labels.csv", labels)) 683 FAIL("Cannot load labels for iris_train_labels.csv"); 684 const size_t numClasses = max(labels.row(0)) + 1; 685 686 // Define your own weak learner, perceptron in this case. 687 // Run the perceptron for perceptronIter iterations. 688 size_t perceptronIter = 800; 689 690 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 691 692 // Define parameters for AdaBoost. 693 size_t iterations = 50; 694 double tolerance = 1e-10; 695 AdaBoost<> a(inputData, labels.row(0), numClasses, p, iterations, tolerance); 696 697 arma::mat testData; 698 if (!data::Load("iris_test.csv", testData)) 699 FAIL("Cannot load test dataset iris_test.csv!"); 700 701 arma::Row<size_t> predictedLabels(testData.n_cols); 702 a.Classify(testData, predictedLabels); 703 704 arma::Mat<size_t> trueTestLabels; 705 if (!data::Load("iris_test_labels.csv", trueTestLabels)) 706 FAIL("Cannot load test dataset iris_test_labels.csv!"); 707 708 arma::Row<size_t> predictedLabels1(testData.n_cols), 709 predictedLabels2(testData.n_cols); 710 arma::mat probabilities; 711 712 a.Classify(testData, predictedLabels1); 713 a.Classify(testData, predictedLabels2, probabilities); 714 715 REQUIRE(probabilities.n_cols == testData.n_cols); 716 717 for (size_t i = 0; i < predictedLabels1.n_cols; ++i) 718 REQUIRE(predictedLabels1[i] == predictedLabels2[i]); 719 720 arma::colvec pRow; 721 arma::uword maxIndex = 0; 722 723 for (size_t i = 0; i < predictedLabels1.n_cols; ++i) 724 { 725 pRow = probabilities.unsafe_col(i); 726 pRow.max(maxIndex); 727 REQUIRE(predictedLabels1(i) == maxIndex); 728 REQUIRE(arma::accu(probabilities.col(i)) == Approx(1).epsilon(1e-7)); 729 } 730 731 size_t localError = arma::accu(trueTestLabels != predictedLabels1); 732 double lError = (double) localError / labels.n_cols; 733 REQUIRE(lError <= 0.30); 734 } 735 736 /** 737 * Ensure that the Train() function works like it is supposed to, by building 738 * AdaBoost on one dataset and then re-training on another dataset. 739 */ 740 TEST_CASE("TrainTest", "[AdaBoostTest]") 741 { 742 // First train on the iris dataset. 743 arma::mat inputData; 744 if (!data::Load("iris_train.csv", inputData)) 745 FAIL("Cannot load test dataset iris_train.csv!"); 746 747 arma::Mat<size_t> labels; 748 if (!data::Load("iris_train_labels.csv", labels)) 749 FAIL("Cannot load labels for iris_train_labels.csv"); 750 751 const size_t numClasses = max(labels.row(0)) + 1; 752 753 size_t perceptronIter = 800; 754 Perceptron<> p(inputData, labels.row(0), numClasses, perceptronIter); 755 756 // Now train AdaBoost. 757 size_t iterations = 50; 758 double tolerance = 1e-10; 759 AdaBoost<> a(inputData, labels.row(0), numClasses, p, iterations, tolerance); 760 761 // Now load another dataset... 762 if (!data::Load("vc2.csv", inputData)) 763 FAIL("Cannot load test dataset vc2.csv!"); 764 if (!data::Load("vc2_labels.txt", labels)) 765 FAIL("Cannot load labels for vc2_labels.txt"); 766 767 const size_t newNumClasses = max(labels.row(0)) + 1; 768 769 Perceptron<> p2(inputData, labels.row(0), newNumClasses, perceptronIter); 770 771 a.Train(inputData, labels.row(0), newNumClasses, p2, iterations, tolerance); 772 773 // Load test set to see if it trained on vc2 correctly. 774 arma::mat testData; 775 if (!data::Load("vc2_test.csv", testData)) 776 FAIL("Cannot load test dataset vc2_test.csv!"); 777 778 arma::Mat<size_t> trueTestLabels; 779 if (!data::Load("vc2_test_labels.txt", trueTestLabels)) 780 FAIL("Cannot load labels for vc2_test_labels.txt"); 781 782 // Define parameters for AdaBoost. 783 arma::Row<size_t> predictedLabels(testData.n_cols); 784 a.Classify(testData, predictedLabels); 785 786 int localError = arma::accu(trueTestLabels != predictedLabels); 787 double lError = (double) localError / trueTestLabels.n_cols; 788 789 REQUIRE(lError <= 0.30); 790 } 791 792 TEST_CASE("PerceptronSerializationTest", "[AdaBoostTest]") 793 { 794 // Build an AdaBoost object. 795 mat data = randu<mat>(10, 500); 796 Row<size_t> labels(500); 797 for (size_t i = 0; i < 250; ++i) 798 labels[i] = 0; 799 for (size_t i = 250; i < 500; ++i) 800 labels[i] = 1; 801 802 Perceptron<> p(data, labels, 2, 800); 803 AdaBoost<> ab(data, labels, 2, p, 50, 1e-10); 804 805 // Now create another dataset to train with. 806 mat otherData = randu<mat>(5, 200); 807 Row<size_t> otherLabels(200); 808 for (size_t i = 0; i < 100; ++i) 809 otherLabels[i] = 1; 810 for (size_t i = 100; i < 150; ++i) 811 otherLabels[i] = 0; 812 for (size_t i = 150; i < 200; ++i) 813 otherLabels[i] = 2; 814 815 Perceptron<> p2(otherData, otherLabels, 3, 500); 816 AdaBoost<> abText(otherData, otherLabels, 3, p2, 50, 1e-10); 817 818 AdaBoost<> abXml, abBinary; 819 820 SerializeObjectAll(ab, abXml, abText, abBinary); 821 822 // Now check that the objects are the same. 823 REQUIRE(ab.Tolerance() == Approx(abXml.Tolerance()).epsilon(1e-7)); 824 REQUIRE(ab.Tolerance() == Approx(abText.Tolerance()).epsilon(1e-7)); 825 REQUIRE(ab.Tolerance() == Approx(abBinary.Tolerance()).epsilon(1e-7)); 826 827 REQUIRE(ab.WeakLearners() == abXml.WeakLearners()); 828 REQUIRE(ab.WeakLearners() == abText.WeakLearners()); 829 REQUIRE(ab.WeakLearners() == abBinary.WeakLearners()); 830 831 for (size_t i = 0; i < ab.WeakLearners(); ++i) 832 { 833 CheckMatrices(ab.WeakLearner(i).Weights(), 834 abXml.WeakLearner(i).Weights(), 835 abText.WeakLearner(i).Weights(), 836 abBinary.WeakLearner(i).Weights()); 837 838 CheckMatrices(ab.WeakLearner(i).Biases(), 839 abXml.WeakLearner(i).Biases(), 840 abText.WeakLearner(i).Biases(), 841 abBinary.WeakLearner(i).Biases()); 842 } 843 } 844 845 TEST_CASE("ID3DecisionStumpSerializationTest", "[AdaBoostTest]") 846 { 847 // Build an AdaBoost object. 848 mat data = randu<mat>(10, 500); 849 Row<size_t> labels(500); 850 for (size_t i = 0; i < 250; ++i) 851 labels[i] = 0; 852 for (size_t i = 250; i < 500; ++i) 853 labels[i] = 1; 854 855 ID3DecisionStump p(data, labels, 2, 800); 856 AdaBoost<ID3DecisionStump> ab(data, labels, 2, p, 50, 1e-10); 857 858 // Now create another dataset to train with. 859 mat otherData = randu<mat>(5, 200); 860 Row<size_t> otherLabels(200); 861 for (size_t i = 0; i < 100; ++i) 862 otherLabels[i] = 1; 863 for (size_t i = 100; i < 150; ++i) 864 otherLabels[i] = 0; 865 for (size_t i = 150; i < 200; ++i) 866 otherLabels[i] = 2; 867 868 ID3DecisionStump p2(otherData, otherLabels, 3, 500); 869 AdaBoost<ID3DecisionStump> abText(otherData, otherLabels, 3, p2, 50, 1e-10); 870 871 AdaBoost<ID3DecisionStump> abXml, abBinary; 872 873 SerializeObjectAll(ab, abXml, abText, abBinary); 874 875 // Now check that the objects are the same. 876 REQUIRE(ab.Tolerance() == Approx(abXml.Tolerance()).epsilon(1e-7)); 877 REQUIRE(ab.Tolerance() == Approx(abText.Tolerance()).epsilon(1e-7)); 878 REQUIRE(ab.Tolerance() == Approx(abBinary.Tolerance()).epsilon(1e-7)); 879 880 REQUIRE(ab.WeakLearners() == abXml.WeakLearners()); 881 REQUIRE(ab.WeakLearners() == abText.WeakLearners()); 882 REQUIRE(ab.WeakLearners() == abBinary.WeakLearners()); 883 884 for (size_t i = 0; i < ab.WeakLearners(); ++i) 885 { 886 REQUIRE(ab.WeakLearner(i).SplitDimension() == 887 abXml.WeakLearner(i).SplitDimension()); 888 REQUIRE(ab.WeakLearner(i).SplitDimension() == 889 abText.WeakLearner(i).SplitDimension()); 890 REQUIRE(ab.WeakLearner(i).SplitDimension() == 891 abBinary.WeakLearner(i).SplitDimension()); 892 } 893 } 894