1 // Copyright (c) 2017 GeometryFactory Sarl (France).
2 // All rights reserved.
3 //
4 // This file is part of CGAL (www.cgal.org).
5 //
6 // $URL: https://github.com/CGAL/cgal/blob/v5.3/Classification/include/CGAL/Classification/Evaluation.h $
7 // $Id: Evaluation.h 76d1fe8 2021-01-07T16:45:08+01:00 Dmitry Anisimov
8 // SPDX-License-Identifier: GPL-3.0-or-later OR LicenseRef-Commercial
9 //
10 // Author(s)     : Simon Giraudot
11 
12 #ifndef CGAL_CLASSIFICATION_EVALUATION_H
13 #define CGAL_CLASSIFICATION_EVALUATION_H
14 
15 #include <CGAL/license/Classification.h>
16 
17 #include <CGAL/Classification/Label.h>
18 #include <CGAL/Classification/Label_set.h>
19 
20 #include <CGAL/Iterator_range.h>
21 
22 #include <boost/iterator/zip_iterator.hpp>
23 
24 #include <map>
25 #include <cmath> // for std::isnan
26 
27 namespace CGAL {
28 
29 namespace Classification {
30 
31 
32 /*!
33   \ingroup PkgClassificationDataStructures
34 
35   \brief Class to compute several measurements to evaluate the quality
36   of a classification output.
37 */
38 class Evaluation
39 {
40   const Label_set& m_labels;
41   std::vector<std::vector<std::size_t> > m_confusion; // confusion matrix
42 
43 public:
44 
45   /// \name Constructors
46   /// @{
47 
48   /*!
49     \brief instantiates an empty evaluation object.
50 
51     \param labels labels used.
52   */
Evaluation(const Label_set & labels)53   Evaluation (const Label_set& labels)
54     : m_labels (labels)
55   {
56     init();
57   }
58 
59   /*!
60 
61     \brief instantiates an evaluation object and computes all
62     measurements.
63 
64     \param labels labels used.
65 
66     \param ground_truth vector of label indices: it should contain the
67     index of the corresponding label in the `Label_set` provided in the
68     constructor. Input items that do not have a ground truth information
69     should be given the value `-1`.
70 
71     \param result similar to `ground_truth` but contained the result of
72     a classification.
73 
74   */
75   template <typename GroundTruthIndexRange, typename ResultIndexRange>
Evaluation(const Label_set & labels,const GroundTruthIndexRange & ground_truth,const ResultIndexRange & result)76   Evaluation (const Label_set& labels,
77               const GroundTruthIndexRange& ground_truth,
78               const ResultIndexRange& result)
79     : m_labels (labels)
80   {
81     init();
82     append(ground_truth, result);
83   }
84 
85   /// \cond SKIP_IN_MANUAL
init()86   void init()
87   {
88     m_confusion.resize (m_labels.size());
89     for (std::size_t i = 0; i < m_confusion.size(); ++ i)
90       m_confusion[i].resize (m_labels.size(), 0);
91   }
92 
label_has_ground_truth(std::size_t label_idx)93   bool label_has_ground_truth (std::size_t label_idx) const
94   {
95     for (std::size_t i = 0; i < m_labels.size(); ++ i)
96       if (m_confusion[i][label_idx] != 0)
97         return true;
98     return false;
99   }
100   /// \endcond
101 
102   /// @}
103 
104   /// \name Modification
105   /// @{
106 
107   /*!
108     \brief appends more items to the evaluation object.
109 
110     \param ground_truth vector of label indices: it should contain the
111     index of the corresponding label in the `Label_set` provided in the
112     constructor. Input items that do not have a ground truth information
113     should be given the value `-1`.
114 
115     \param result similar to `ground_truth` but contained the result of
116     a classification.
117 
118   */
119   template <typename GroundTruthIndexRange, typename ResultIndexRange>
append(const GroundTruthIndexRange & ground_truth,const ResultIndexRange & result)120   void append (const GroundTruthIndexRange& ground_truth,
121                const ResultIndexRange& result)
122   {
123     CGAL_precondition (m_labels.is_valid_ground_truth (ground_truth));
124     CGAL_precondition (m_labels.is_valid_ground_truth (result));
125 
126     for (const auto p : CGAL::make_range
127            (boost::make_zip_iterator(boost::make_tuple(ground_truth.begin(), result.begin())),
128             boost::make_zip_iterator(boost::make_tuple(ground_truth.end(), result.end()))))
129     {
130       int gt = static_cast<int>(boost::get<0>(p));
131       int res = static_cast<int>(boost::get<1>(p));
132       if (gt == -1 || res == -1)
133         continue;
134 
135       ++ m_confusion[std::size_t(res)][std::size_t(gt)];
136     }
137   }
138 
139   /// @}
140 
141   /// \name Label Evaluation
142   /// @{
143 
144   /*!
145     \brief returns the number of items whose ground truth is
146     `ground_truth` and which were classified as `result`.
147   */
confusion(Label_handle ground_truth,Label_handle result)148   std::size_t confusion (Label_handle ground_truth, Label_handle result)
149   {
150     std::size_t idx_gt = ground_truth->index();
151     std::size_t idx_r = result->index();
152     return m_confusion[idx_gt][idx_r];
153   }
154 
155   /*!
156 
157     \brief returns the precision of the training for the given label.
158 
159     Precision is the number of true positives divided by the sum of
160     the true positives and the false positives.
161 
162   */
precision(Label_handle label)163   float precision (Label_handle label) const
164   {
165     std::size_t idx = label->index();
166     if (!label_has_ground_truth(idx))
167       return std::numeric_limits<float>::quiet_NaN();
168 
169     std::size_t total = 0;
170     for (std::size_t i = 0; i < m_labels.size(); ++ i)
171       total += m_confusion[idx][i];
172 
173     if (total == 0)
174       return 0.f;
175 
176     return m_confusion[idx][idx] / float(total);
177   }
178 
179   /*!
180 
181     \brief returns the recall of the training for the given label.
182 
183     Recall is the number of true positives divided by the sum of
184     the true positives and the false negatives.
185 
186   */
recall(Label_handle label)187   float recall (Label_handle label) const
188   {
189     std::size_t idx = label->index();
190     if (!label_has_ground_truth(idx))
191       return std::numeric_limits<float>::quiet_NaN();
192 
193     std::size_t total = 0;
194     for (std::size_t i = 0; i < m_labels.size(); ++ i)
195       total += m_confusion[i][idx];
196     return m_confusion[idx][idx] / float(total);
197   }
198 
199   /*!
200 
201     \brief returns the \f$F_1\f$ score of the training for the given label.
202 
203     \f$F_1\f$ score is the harmonic mean of `precision()` and `recall()`:
204 
205     \f[
206     F_1 = 2 \times \frac{precision \times recall}{precision + recall}
207     \f]
208 
209   */
f1_score(Label_handle label)210   float f1_score (Label_handle label) const
211   {
212     float p = precision(label);
213     float r = recall(label);
214 
215     if (p == 0.f && r == 0.f)
216       return 0.f;
217 
218     return 2.f * p * r / (p + r);
219   }
220 
221   /*!
222     \brief returns the intersection over union of the training for the
223     given label.
224 
225     Intersection over union is the number of true positives divided by
226     the sum of the true positives, of the false positives and of the
227     false negatives.
228   */
intersection_over_union(Label_handle label)229   float intersection_over_union (Label_handle label) const
230   {
231     std::size_t idx = label->index();
232 
233     std::size_t total = 0;
234     for (std::size_t i = 0; i < m_labels.size(); ++ i)
235     {
236       total += m_confusion[i][idx];
237       if (i != idx)
238         total += m_confusion[idx][i];
239     }
240 
241     return m_confusion[idx][idx] / float(total);
242   }
243 
244   /// @}
245 
246   /// \name Global Evaluation
247   /// @{
248 
249   /*!
250     \brief returns the number of misclassified items.
251   */
number_of_misclassified_items()252   std::size_t number_of_misclassified_items() const
253   {
254     std::size_t total = 0;
255     for (std::size_t i = 0; i < m_labels.size(); ++ i)
256       for (std::size_t j = 0; j < m_labels.size(); ++ j)
257         if (i != j)
258           total += m_confusion[i][j];
259     return total;
260   }
261 
262   /*!
263     \brief returns the total number of items used for evaluation.
264   */
number_of_items()265   std::size_t number_of_items() const
266   {
267     std::size_t total = 0;
268     for (std::size_t i = 0; i < m_labels.size(); ++ i)
269       for (std::size_t j = 0; j < m_labels.size(); ++ j)
270         total += m_confusion[i][j];
271     return total;
272   }
273 
274   /*!
275     \brief returns the accuracy of the training.
276 
277     Accuracy is the total number of true positives divided by the
278     total number of provided inliers.
279   */
accuracy()280   float accuracy() const
281   {
282     std::size_t true_positives = 0;
283     std::size_t total = 0;
284     for (std::size_t i = 0; i < m_labels.size(); ++ i)
285     {
286       true_positives += m_confusion[i][i];
287       for (std::size_t j = 0; j < m_labels.size(); ++ j)
288         total += m_confusion[i][j];
289     }
290     return true_positives / float(total);
291   }
292 
293   /*!
294     \brief returns the mean \f$F_1\f$ score of the training over all
295     labels (see `f1_score()`).
296   */
mean_f1_score()297   float mean_f1_score() const
298   {
299     float mean = 0;
300     std::size_t nb = 0;
301     for (std::size_t i = 0; i < m_labels.size(); ++ i)
302       if (label_has_ground_truth(i))
303       {
304         mean += f1_score(m_labels[i]);
305         ++ nb;
306       }
307     return mean / nb;
308   }
309 
310   /*!
311     \brief returns the mean intersection over union of the training
312     over all labels (see `intersection_over_union()`).
313   */
mean_intersection_over_union()314   float mean_intersection_over_union() const
315   {
316     float mean = 0;
317     std::size_t nb = 0;
318     for (std::size_t i = 0; i < m_labels.size(); ++ i)
319     {
320       float iou = intersection_over_union(m_labels[i]);
321       if (!std::isnan(iou))
322       {
323         mean += iou;
324         ++ nb;
325       }
326     }
327     return mean / nb;
328   }
329 
330   /// @}
331 
332   /// \name Output Formatting Functions
333   /// @{
334 
335   /*!
336     \brief outputs the evaluation in a simple ASCII format to the stream `os`.
337   */
338   friend std::ostream& operator<< (std::ostream& os, const Evaluation& evaluation)
339   {
340     os << "Evaluation of classification:" << std::endl;
341     os << " * Global results:" << std::endl;
342     os << "   - " << evaluation.number_of_misclassified_items()
343        << " misclassified item(s) out of " << evaluation.number_of_items() << std::endl
344        << "   - Accuracy = " << evaluation.accuracy() << std::endl
345        << "   - Mean F1 score = " << evaluation.mean_f1_score() << std::endl
346        << "   - Mean IoU = " << evaluation.mean_intersection_over_union() << std::endl;
347     os << " * Detailed results:" << std::endl;
348     for (std::size_t i = 0; i < evaluation.m_labels.size(); ++ i)
349     {
350       os << "   - \"" << evaluation.m_labels[i]->name() << "\": ";
351       if (evaluation.label_has_ground_truth(i))
352         os << "Precision = " << evaluation.precision(evaluation.m_labels[i]) << " ; "
353            << "Recall = " << evaluation.recall(evaluation.m_labels[i]) << " ; "
354            << "F1 score = " << evaluation.f1_score(evaluation.m_labels[i]) << " ; "
355            << "IoU = " << evaluation.intersection_over_union(evaluation.m_labels[i]) << std::endl;
356       else
357         os << "(no ground truth)" << std::endl;
358     }
359     return os;
360   }
361 
362   /*!
363     \brief outputs the evaluation as an HTML page to the stream `os`.
364   */
output_to_html(std::ostream & os,const Evaluation & evaluation)365   static std::ostream& output_to_html (std::ostream& os, const Evaluation& evaluation)
366   {
367     os <<  "<!DOCTYPE html>" << std::endl
368        << "<html>" << std::endl
369        << "<head>" << std::endl
370        << "<style type=\"text/css\">" << std::endl
371        << "  body{margin:40px auto; max-width:900px; line-height:1.5; color:#333}" << std::endl
372        << "  h1,h2{line-height:1.2}" << std::endl
373        << "  table{width:100%}" << std::endl
374        << "  table,th,td{border: 1px solid black; border-collapse: collapse; }" << std::endl
375        << "  th,td{padding: 5px;}" << std::endl
376        << "</style>" << std::endl
377        << "<title>Evaluation of CGAL Classification results</title>" << std::endl
378        << "</head>" << std::endl
379        << "<body>" << std::endl
380        << "<h1>Evaluation of CGAL Classification results</h1>" << std::endl;
381 
382     os <<  "<h2>Global Results</h2>" << std::endl
383        << "<ul>" << std::endl
384        << "  <li>" << evaluation.number_of_misclassified_items()
385        << " misclassified item(s) out of " << evaluation.number_of_items() << "</li>" << std::endl
386        << "  <li>Accuracy = " << evaluation.accuracy() << "</li>" << std::endl
387        << "  <li>Mean F1 score = " << evaluation.mean_f1_score() << "</li>" << std::endl
388        << "  <li>Mean IoU = " << evaluation.mean_intersection_over_union() << "</li>" << std::endl
389        << "</ul>" << std::endl;
390 
391     const Label_set& labels = evaluation.m_labels;
392 
393     os <<  "<h2>Detailed Results</h2>" << std::endl
394        << "<table>" << std::endl
395        << "  <tr>" << std::endl
396        << "    <th><strong>Label</strong></th>" << std::endl
397        << "    <th><strong>Precision</strong></th>" << std::endl
398        << "    <th><strong>Recall</strong></th>" << std::endl
399        << "    <th><strong>F1 score</strong></th>" << std::endl
400        << "    <th><strong>IoU</strong></th>" << std::endl
401        << "  </tr>" << std::endl;
402     for (std::size_t i = 0; i < labels.size(); ++ i)
403       if (evaluation.label_has_ground_truth(i))
404         os <<  "  <tr>" << std::endl
405            << "    <td>" << labels[i]->name() << "</td>" << std::endl
406            << "    <td>" << evaluation.precision(labels[i]) << "</td>" << std::endl
407            << "    <td>" << evaluation.recall(labels[i]) << "</td>" << std::endl
408            << "    <td>" << evaluation.f1_score(labels[i]) << "</td>" << std::endl
409            << "    <td>" << evaluation.intersection_over_union(labels[i]) << "</td>" << std::endl
410            << "  </tr>" << std::endl;
411       else
412         os <<  "  <tr>" << std::endl
413            << "    <td>" << labels[i]->name() << "</td>" << std::endl
414            << "    <td><em>(no ground truth)</em></td>" << std::endl
415            << "    <td></td>" << std::endl
416            << "    <td></td>" << std::endl
417            << "    <td></td>" << std::endl
418            << "  </tr>" << std::endl;
419 
420     os <<  "</table>" << std::endl;
421 
422     os <<  "<h2>Confusion Matrix</h2>" << std::endl
423        << "<table>" << std::endl
424        << "  <tr>" << std::endl
425        << "    <th></th>" << std::endl;
426     for (std::size_t i = 0; i < labels.size(); ++ i)
427       os <<  "    <th><strong>" << labels[i]->name() << "</strong></th>" << std::endl;
428     os << "    <th><strong>PREDICTIONS</strong></th>" << std::endl;
429     os <<  "  </tr>" << std::endl;
430 
431     std::vector<std::size_t> sums (labels.size(), 0);
432     for (std::size_t i = 0; i < labels.size(); ++ i)
433     {
434       os <<  "  <tr>" << std::endl
435          << "    <td><strong>" << labels[i]->name() << "</strong></td>" << std::endl;
436       std::size_t sum = 0;
437       for (std::size_t j = 0; j < labels.size(); ++ j)
438       {
439         if (i == j)
440           os <<  "    <td><strong>" << evaluation.m_confusion[i][j] << "</strong></td>" << std::endl;
441         else
442           os <<  "    <td>" << evaluation.m_confusion[i][j] << "</td>" << std::endl;
443         sum += evaluation.m_confusion[i][j];
444         sums[j] += evaluation.m_confusion[i][j];
445       }
446       os << "    <td><strong>" << sum << "</strong></td>" << std::endl;
447       os <<  "  </tr>" << std::endl;
448     }
449 
450     os <<  "  <tr>" << std::endl
451        << "    <td><strong>GROUND TRUTH</strong></td>" << std::endl;
452     std::size_t total = 0;
453     for (std::size_t j = 0; j < labels.size(); ++ j)
454     {
455       os << "    <td><strong>" << sums[j] << "</strong></td>" << std::endl;
456       total += sums[j];
457     }
458     os << "    <td><strong>" << total << "</strong></td>" << std::endl
459        <<  "  </tr>" << std::endl
460        <<  "</table>" << std::endl
461        << "<p><em>This page was generated by the <a \
462 href=\"https://doc.cgal.org/latest/Classification/index.html\">CGAL \
463 Classification package</a>.</em></p>" << std::endl
464        <<  "</body>" << std::endl
465        << "</html>" << std::endl;
466 
467     return os;
468   }
469 
470   /// @}
471 
472 };
473 
474 
475 } // namespace Classification
476 
477 } // namespace CGAL
478 
479 #endif // CGAL_CLASSIFICATION_EVALUATION_H
480