1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.algorithm.clustering; 22 23 import static org.junit.Assert.assertEquals; 24 import static org.junit.Assert.assertTrue; 25 26 import java.util.Arrays; 27 import java.util.List; 28 29 import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; 30 import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; 31 import de.lmu.ifi.dbs.elki.data.Cluster; 32 import de.lmu.ifi.dbs.elki.data.Clustering; 33 import de.lmu.ifi.dbs.elki.data.model.Model; 34 import de.lmu.ifi.dbs.elki.database.Database; 35 import de.lmu.ifi.dbs.elki.evaluation.clustering.ClusterContingencyTable; 36 import de.lmu.ifi.dbs.elki.logging.Logging; 37 import de.lmu.ifi.dbs.elki.result.Result; 38 import de.lmu.ifi.dbs.elki.utilities.io.FormatUtil; 39 40 /** 41 * Abstract unit test for clustering algorithms. 42 * 43 * Includes code for cluster evaluation. 44 * 45 * @author Erich Schubert 46 */ 47 public abstract class AbstractClusterAlgorithmTest extends AbstractSimpleAlgorithmTest { 48 /** 49 * Find a clustering result, fail if there is more than one or none. 50 * 51 * @param result Base result 52 * @return Clustering 53 */ findSingleClustering(Result result)54 protected Clustering<?> findSingleClustering(Result result) { 55 List<Clustering<? extends Model>> clusterresults = Clustering.getClusteringResults(result); 56 assertTrue("No unique clustering found in result.", clusterresults.size() == 1); 57 Clustering<? extends Model> clustering = clusterresults.get(0); 58 return clustering; 59 } 60 61 /** 62 * Test the clustering result by comparing the score with an expected value. 63 * 64 * @param database Database to test 65 * @param clustering Clustering result 66 * @param expected Expected score 67 */ testFMeasure(Database database, Clustering<?> clustering, double expected)68 protected <O> void testFMeasure(Database database, Clustering<?> clustering, double expected) { 69 // Run by-label as reference 70 ByLabelClustering bylabel = new ByLabelClustering(); 71 Clustering<Model> rbl = bylabel.run(database); 72 73 ClusterContingencyTable ct = new ClusterContingencyTable(true, false); 74 ct.process(clustering, rbl); 75 double score = ct.getPaircount().f1Measure(); 76 Logging.getLogger(this.getClass()).verbose(this.getClass().getSimpleName() + " score: " + score + " expect: " + expected); 77 assertEquals(this.getClass().getSimpleName() + ": Score does not match.", expected, score, 0.0001); 78 } 79 80 /** 81 * Validate the cluster sizes with an expected result. 82 * 83 * @param clustering Clustering to test 84 * @param expected Expected cluster sizes 85 */ testClusterSizes(Clustering<?> clustering, int[] expected)86 protected void testClusterSizes(Clustering<?> clustering, int[] expected) { 87 List<? extends Cluster<?>> clusters = clustering.getAllClusters(); 88 int[] sizes = new int[clusters.size()]; 89 for(int i = 0; i < sizes.length; ++i) { 90 sizes[i] = clusters.get(i).size(); 91 } 92 // Sort both 93 Arrays.sort(sizes); 94 Arrays.sort(expected); 95 // Test 96 assertEquals("Number of clusters does not match expectations. " + FormatUtil.format(sizes), expected.length, sizes.length); 97 for(int i = 0; i < expected.length; i++) { 98 assertEquals("Cluster size does not match at position " + i + " in " + FormatUtil.format(sizes), expected[i], sizes[i]); 99 } 100 } 101 } 102