1 //   OpenNN: Open Neural Networks Library
2 //   www.opennn.net
3 //
4 //   S T A T I S T I C S   S O U R C E
5 //
6 //   Artificial Intelligence Techniques, SL
7 //   artelnics@artelnics.com
8 
9 #include "statistics.h"
10 
11 namespace OpenNN
12 {
13 
14 /// Default constructor.
15 
Descriptives()16 Descriptives::Descriptives()
17 {
18     name = "Descriptives";
19     minimum = -1.0;
20     maximum = 1.0;
21     mean = 0;
22     standard_deviation = 1.0;
23 }
24 
25 
26 /// Values constructor.
27 
Descriptives(const type & new_minimum,const type & new_maximum,const type & new_mean,const type & new_standard_deviation)28 Descriptives::Descriptives(const type &new_minimum, const type &new_maximum,
29                            const type &new_mean, const type &new_standard_deviation)
30 {
31     minimum = new_minimum;
32     maximum = new_maximum;
33     mean = new_mean;
34     standard_deviation = new_standard_deviation;
35 }
36 
37 
38 /// Destructor.
39 
~Descriptives()40 Descriptives::~Descriptives()
41 {}
42 
43 
44 /// Sets a new minimum value in the descriptives structure.
45 /// @param new_minimum Minimum value.
46 
set_minimum(const type & new_minimum)47 void Descriptives::set_minimum(const type &new_minimum)
48 {
49     minimum = new_minimum;
50 }
51 
52 
53 /// Sets a new maximum value in the descriptives structure.
54 /// @param new_maximum Maximum value.
55 
set_maximum(const type & new_maximum)56 void Descriptives::set_maximum(const type &new_maximum)
57 {
58     maximum = new_maximum;
59 }
60 
61 
62 /// Sets a new mean value in the descriptives structure.
63 /// @param new_mean Mean value.
64 
set_mean(const type & new_mean)65 void Descriptives::set_mean(const type &new_mean)
66 {
67     mean = new_mean;
68 }
69 
70 
71 /// Sets a new standard deviation value in the descriptives structure.
72 /// @param new_standard_deviation Standard deviation value.
73 
set_standard_deviation(const type & new_standard_deviation)74 void Descriptives::set_standard_deviation(const type &new_standard_deviation)
75 {
76     standard_deviation = new_standard_deviation;
77 }
78 
79 
80 /// Returns all the statistical parameters contained in a single vector.
81 /// The size of that vector is four.
82 /// The elements correspond to the minimum, maximum, mean and standard deviation
83 /// values respectively.
84 
to_vector() const85 Tensor<type, 1> Descriptives::to_vector() const
86 {
87     Tensor<type, 1> statistics_vector(4);
88     statistics_vector[0] = minimum;
89     statistics_vector[1] = maximum;
90     statistics_vector[2] = mean;
91     statistics_vector[3] = standard_deviation;
92 
93     return statistics_vector;
94 }
95 
96 
97 /// Returns true if the minimum value is -1 and the maximum value is +1,
98 /// and false otherwise.
99 
has_minimum_minus_one_maximum_one()100 bool Descriptives::has_minimum_minus_one_maximum_one()
101 {
102     if(abs(minimum + 1) < numeric_limits<type>::min() && abs(maximum - 1) < numeric_limits<type>::min())
103     {
104         return true;
105     }
106 
107     return false;
108 }
109 
110 
111 /// Returns true if the mean value is 0 and the standard deviation value is 1,
112 /// and false otherwise.
113 
has_mean_zero_standard_deviation_one()114 bool Descriptives::has_mean_zero_standard_deviation_one()
115 {
116     if(abs(mean) < numeric_limits<type>::min() && abs(standard_deviation - 1) < numeric_limits<type>::min())
117     {
118         return true;
119     }
120     else
121     {
122         return false;
123     }
124 
125 }
126 
127 
128 /// Print the tittle of descriptives structure
129 
print(const string & title) const130 void Descriptives::print(const string& title) const
131 {
132     cout << title << endl
133          << "Minimum: " << minimum << endl
134          << "Maximum: " << maximum << endl
135          << "Mean: " << mean << endl
136          << "Standard deviation: " << standard_deviation << endl;
137 }
138 
139 
BoxPlot(const type & new_minimum,const type & new_first_cuartile,const type & new_median,const type & new_third_quartile,const type & new_maximum)140 BoxPlot::BoxPlot(const type& new_minimum, const type& new_first_cuartile, const type& new_median, const type& new_third_quartile, const type& new_maximum)
141 {
142     minimum = new_minimum;
143     first_quartile = new_first_cuartile;
144     median = new_median;
145     third_quartile = new_third_quartile;
146     maximum = new_maximum;
147 }
148 
149 
150 /// Saves to a file the minimum, maximum, mean and standard deviation
151 /// of the descriptives structure.
152 /// @param file_name Name of descriptives data file.
153 
save(const string & file_name) const154 void Descriptives::save(const string &file_name) const
155 {
156     ofstream file(file_name.c_str());
157 
158     if(!file.is_open())
159     {
160         ostringstream buffer;
161 
162         buffer << "OpenNN Exception: Statistics Class.\n"
163                << "void save(const string&) const method.\n"
164                << "Cannot open descriptives data file.\n";
165 
166         throw logic_error(buffer.str());
167     }
168 
169     // Write file
170 
171     file << "Minimum: " << minimum << endl
172          << "Maximum: " << maximum << endl
173          << "Mean: " << mean << endl
174          << "Standard deviation: " << standard_deviation << endl;
175 
176     // Close file
177 
178     file.close();
179 }
180 
181 
Histogram()182 Histogram::Histogram() {}
183 
184 
185 /// Destructor.
186 
~Histogram()187 Histogram::~Histogram() {}
188 
189 
190 /// Bins number constructor.
191 /// @param bins_number Number of bins in the histogram.
192 
Histogram(const Index & bins_number)193 Histogram::Histogram(const Index &bins_number)
194 {
195     centers.resize(bins_number);
196     frequencies.resize(bins_number);
197 }
198 
199 
200 /// Values constructor.
201 /// @param new_centers Center values for the bins.
202 /// @param new_frequencies Number of variates in each bin.
203 
Histogram(const Tensor<type,1> & new_centers,const Tensor<Index,1> & new_frequencies)204 Histogram::Histogram(const Tensor<type, 1>&new_centers,
205                      const Tensor<Index, 1>&new_frequencies)
206 {
207     centers = new_centers;
208     frequencies = new_frequencies;
209 }
210 
211 
212 // Data constructor
213 /// @param data Numerical data.
214 /// @param number_of_bins Number of bins.
215 
Histogram(const Tensor<type,1> & data,const Index & number_of_bins)216 Histogram::Histogram(const Tensor<type, 1>& data,
217                      const Index& number_of_bins)
218 {
219     const type data_maximum = maximum(data);
220     const type data_minimum = minimum(data);
221     const type step = (data_maximum - data_minimum) / number_of_bins;
222 
223 
224     Tensor<type, 1> new_centers(number_of_bins);
225     for(Index i = 0; i < number_of_bins; i++)
226     {
227         new_centers(i) = data_minimum + (0.5 * step) + (step * i);
228     }
229 
230     Tensor<Index, 1> new_frequencies(number_of_bins);
231     new_frequencies.setZero();
232 
233     type value;
234     Index corresponding_bin;
235 
236     for(Index i = 0; i < data.dimension(0); i++)
237     {
238         value = data(i);
239         corresponding_bin = int((value - data_minimum) / step);
240 
241         new_frequencies(corresponding_bin)++;
242     }
243 
244     centers = new_centers;
245     frequencies = new_frequencies;
246 }
247 
248 
249 // Probabilities constructor
250 /// @param data Numerical probabilities data.
251 
Histogram(const Tensor<type,1> & probability_data)252 Histogram::Histogram(const Tensor<type, 1>& probability_data)
253 {
254     const size_t number_of_bins = 10;
255     type data_maximum = maximum(probability_data);
256     const type data_minimum = 0.0;
257 
258     if(data_maximum > 1)
259     {
260         data_maximum = 100.0;
261     }
262     else
263     {
264         data_maximum = 1.0;
265     }
266 
267     const type step = (data_maximum - data_minimum) / number_of_bins;
268 
269 
270     Tensor<type, 1> new_centers(number_of_bins);
271     for(size_t i = 0; i < number_of_bins; i++)
272     {
273         new_centers(i) = data_minimum + (0.5 * step) + (step * i);
274     }
275 
276     Tensor<Index, 1> new_frequencies(number_of_bins);
277     new_frequencies.setZero();
278 
279     type value;
280     Index corresponding_bin;
281 
282     for(Index i = 0; i < probability_data.dimension(0); i++)
283     {
284         value = probability_data(i);
285         corresponding_bin = int((value - data_minimum) / step);
286 
287         new_frequencies(corresponding_bin)++;
288     }
289 
290     centers = new_centers;
291     frequencies = new_frequencies;
292 }
293 
294 /// Returns the number of bins in the histogram.
295 
get_bins_number() const296 Index Histogram::get_bins_number() const
297 {
298     return centers.size();
299 }
300 
301 
302 /// Returns the number of bins with zero variates.
303 
count_empty_bins() const304 Index Histogram::count_empty_bins() const
305 {
306     const auto size = frequencies.dimension(0);
307 
308     Index count = 0;
309 
310     for(Index i = 0; i < size; i++)
311     {
312         if(frequencies(i) == 0) count++;
313     }
314 
315     return count;
316 }
317 
318 
319 /// Returns the number of variates in the less populated bin.
320 
calculate_minimum_frequency() const321 Index Histogram::calculate_minimum_frequency() const
322 {
323     return minimum(frequencies);
324 }
325 
326 
327 /// Returns the number of variates in the most populated bin.
328 
calculate_maximum_frequency() const329 Index Histogram::calculate_maximum_frequency() const
330 {
331     return maximum(frequencies);
332 
333 }
334 
335 
336 /// Retuns the index of the most populated bin.
337 
calculate_most_populated_bin() const338 Index Histogram::calculate_most_populated_bin() const
339 {
340 
341     const Tensor<Index, 0> max_element = frequencies.maximum();
342 
343     for(Index i = 0; i < frequencies.size(); i++)
344     {
345         if(max_element(0) == frequencies(i)) return i;
346     }
347 
348     return 0;
349 }
350 
351 
352 /// Returns a vector with the centers of the less populated bins.
353 
calculate_minimal_centers() const354 Tensor<type, 1> Histogram::calculate_minimal_centers() const
355 {
356     const Index minimum_frequency = calculate_minimum_frequency();
357 
358     Index minimal_indices_size = 0;
359 
360     if (frequencies.size() == 0)
361     {
362         Tensor<type, 1> nan(1);
363         nan.setValues({static_cast<type>(NAN)});
364         return nan;
365     }
366 
367     for(Index i = 0; i < frequencies.size(); i++)
368     {
369         if(frequencies(i) == minimum_frequency)
370         {
371             minimal_indices_size++;
372         }
373     }
374 
375     Index index = 0;
376 
377     Tensor<type, 1> minimal_centers(minimal_indices_size);
378 
379     for(Index i = 0; i < frequencies.size(); i++)
380     {
381         if(frequencies(i) == minimum_frequency)
382         {
383             minimal_centers(index) = static_cast<type>(centers(i));
384 
385             index++;
386         }
387     }
388 
389     return minimal_centers;
390 }
391 
392 
393 /// Returns a vector with the centers of the most populated bins.
394 
calculate_maximal_centers() const395 Tensor<type, 1> Histogram::calculate_maximal_centers() const
396 {
397     const Index maximum_frequency = calculate_maximum_frequency();
398 
399     Index maximal_indices_size = 0;
400 
401     if (frequencies.size() == 0)
402     {
403         Tensor<type, 1> nan(1);
404         nan.setValues({static_cast<type>(NAN)});
405         return nan;
406     }
407 
408     for(Index i = 0; i < frequencies.size(); i++)
409     {
410         if(frequencies(i) == maximum_frequency)
411         {
412             maximal_indices_size++;
413         }
414     }
415 
416     Index index = 0;
417 
418     Tensor<type, 1> maximal_centers(maximal_indices_size);
419 
420     for(Index i = 0; i < frequencies.size(); i++)
421     {
422         if(maximum_frequency == frequencies(i))
423         {
424             maximal_centers(index) = static_cast<type>(centers(i));
425 
426             index++;
427         }
428     }
429 
430     return maximal_centers;
431 }
432 
433 
434 /// Returns the number of the bin to which a given value belongs to.
435 /// @param value Value for which we want to get the bin.
436 
calculate_bin(const type & value) const437 Index Histogram::calculate_bin(const type&value) const
438 {
439     const Index bins_number = get_bins_number();
440 
441     if(bins_number == 0) return 0;
442 
443     const type minimum_center = centers[0];
444     const type maximum_center = centers[bins_number - 1];
445 
446     const type length = static_cast<type>(maximum_center - minimum_center)/static_cast<type>(bins_number - 1.0);
447 
448     type minimum_value = centers[0] - length / 2;
449     type maximum_value = minimum_value + length;
450 
451     if(value < maximum_value)
452     {
453         return 0;
454     }
455 
456     for(Index j = 1; j < bins_number - 1; j++)
457     {
458         minimum_value = minimum_value + length;
459         maximum_value = maximum_value + length;
460 
461         if(value >= minimum_value && value < maximum_value)
462         {
463             return j;
464         }
465     }
466 
467     if(value >= maximum_value)
468     {
469         return bins_number - 1;
470     }
471     else
472     {
473         ostringstream buffer;
474 
475         buffer << "OpenNN Exception: Statistics Class.\n"
476                << "Index Histogram::calculate_bin(const type&) const.\n"
477                << "Unknown return value.\n";
478 
479         throw logic_error(buffer.str());
480     }
481 }
482 
483 
484 /// Returns the frequency of the bin to which a given value belongs to.
485 /// @param value Value for which we want to get the frequency.
486 
calculate_frequency(const type & value) const487 Index Histogram::calculate_frequency(const type&value) const
488 {
489     const Index bins_number = get_bins_number();
490 
491     if(bins_number == 0) return 0;
492 
493     const Index bin_number = calculate_bin(value);
494 
495     const Index frequency = frequencies[bin_number];
496 
497     return frequency;
498 }
499 
500 
save(const string & histogram_file_name) const501 void Histogram::save(const string& histogram_file_name) const
502 {
503     const Index number_of_bins = centers.dimension(0);
504     ofstream histogram_file(histogram_file_name);
505 
506 
507     histogram_file << "centers,frequencies" << endl;
508     for(Index i = 0; i < number_of_bins; i++)
509     {
510         histogram_file << centers(i) << ",";
511         histogram_file << frequencies(i) << endl;
512     }
513 
514     histogram_file.close();
515 
516 }
517 
518 
519 /// Returns the smallest element of a type vector.
520 /// @param vector Vector to obtain the minimum value.
521 
minimum(const Tensor<type,1> & vector)522 type minimum(const Tensor<type, 1>& vector)
523 {
524     const Index size = vector.dimension(0);
525 
526     if(size == 0) return NAN;
527 
528     type minimum = numeric_limits<type>::max();
529 
530     for(Index i = 0; i < size; i++)
531     {
532         if(vector(i) < minimum && !::isnan(vector(i)))
533         {
534             minimum = vector(i);
535         }
536     }
537 
538     return minimum;
539 }
540 
541 
542 /// Returns the smallest element of a index vector.
543 /// @param vector Vector to obtain the minimum value.
544 
minimum(const Tensor<Index,1> & vector)545 Index minimum(const Tensor<Index, 1>& vector)
546 {
547     const Index size = vector.size();
548 
549     if(size == 0) return NAN;
550 
551     Index minimum = numeric_limits<Index>::max();
552 
553     for(Index i = 0; i < size; i++)
554     {
555         if(vector(i) < minimum)
556         {
557             minimum = vector(i);
558         }
559     }
560 
561     return minimum;
562 }
563 
564 
565 /// Returns the smallest element of a type vector.
566 /// @param vector Vector to obtain the minimum value.
567 /// @param indices Vector of used indices.
568 
minimum(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)569 type minimum(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
570 {
571     const Index size = indices.dimension(0);
572 
573     if(size == 0) return NAN;
574 
575     type minimum = numeric_limits<type>::max();
576 
577     Index index;
578 
579     for(Index i = 0; i < size; i++)
580     {
581         index = indices(i);
582 
583         if(vector(index) < minimum && !::isnan(vector(index)))
584         {
585             minimum = vector(index);
586         }
587     }
588 
589     return minimum;
590 }
591 
592 
593 
594 /// Returns the smallest element of a Index vector.
595 
596 //time_t minimum(const Tensor<time_t, 1>& vector)
597 //{
598 
599 //    const Tensor<time_t, 0> min_element = vector.minimum();
600 
601 //    return min_element(0);
602 //}
603 
604 
605 /// Returns the largest element in the vector.
606 /// @param vector Vector to obtain the maximum value.
607 
maximum(const Tensor<type,1> & vector)608 type maximum(const Tensor<type, 1>& vector)
609 {
610     const Index size = vector.dimension(0);
611 
612     if(size == 0) return NAN;
613 
614     type maximum = -numeric_limits<type>::max();
615 
616     for(Index i = 0; i < size; i++)
617     {
618         if(!::isnan(vector(i)) && vector(i) > maximum)
619         {
620             maximum = vector(i);
621         }
622     }
623 
624     return maximum;
625 }
626 
627 
628 /// Returns the largest element in the vector.
629 /// @param vector Vector to obtain the maximum value.
630 /// @param indices Vector of used indices.
631 
maximum(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)632 type maximum(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
633 {
634     const Index size = indices.dimension(0);
635 
636     if(size == 0) return NAN;
637 
638     type maximum = -numeric_limits<type>::max();
639 
640     Index index;
641 
642     for(Index i = 0; i < size; i++)
643     {
644         index = indices(i);
645 
646         if(!::isnan(vector(index)) && vector(index) > maximum)
647         {
648             maximum = vector(index);
649         }
650     }
651 
652     return maximum;
653 }
654 
655 /// Returns the largest element of a index vector.
656 /// @param vector Vector to obtain the maximum value.
657 
maximum(const Tensor<Index,1> & vector)658 Index maximum(const Tensor<Index, 1>& vector)
659 {
660     const Index size = vector.size();
661 
662     if(size == 0) return NAN;
663 
664     Index maximum = -numeric_limits<Index>::max();
665 
666     for(Index i = 0; i < size; i++)
667     {
668         if(vector(i) > maximum)
669         {
670             maximum = vector(i);
671         }
672     }
673 
674     return maximum;
675 }
676 
677 
678 //time_t maximum(const Tensor<time_t, 1>& vector)
679 //{
680 //    const Tensor<time_t,0> max_element = vector.maximum();
681 
682 //    return max_element(0);
683 //}
684 
685 
686 /// Returns the maximums values of given columns.
687 /// The format is a vector of type values.
688 /// The size of that vector is equal to the number of given columns.
689 /// @param matrix Used matrix.
690 /// @param rows_indices Indices of the rows for which the maximums are to be computed.
691 /// @param columns_indices Indices of the columns for which the maximums are to be computed.
692 
columns_maximums(const Tensor<type,2> & matrix,const Tensor<Index,1> & rows_indices,const Tensor<Index,1> & columns_indices)693 Tensor<type, 1> columns_maximums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& rows_indices, const Tensor<Index, 1>& columns_indices)
694 {
695     const Index rows_number = matrix.dimension(0);
696     const Index columns_number = matrix.dimension(1);
697 
698     Tensor<Index, 1> used_columns_indices;
699 
700     if(columns_indices.dimension(0) == 0)
701     {
702         used_columns_indices.resize(columns_number);
703 
704         for (Index i = 0; i < columns_number; i++)
705         {
706             used_columns_indices(i) = i;
707         }
708     }
709     else
710     {
711         used_columns_indices = columns_indices;
712     }
713 
714     Tensor<Index, 1> used_rows_indices;
715 
716     if(rows_indices.dimension(0) == 0)
717     {
718         used_rows_indices.resize(rows_number);
719 
720         for (Index i = 0; i < rows_number; i++)
721         {
722             used_rows_indices(i) = i;
723         }
724     }
725     else
726     {
727         used_rows_indices = rows_indices;
728     }
729 
730     const Index rows_indices_size = used_rows_indices.size();
731     const Index columns_indices_size = used_columns_indices.size();
732 
733     Tensor<type, 1> maximums(columns_indices_size);
734 
735     Index row_index;
736     Index column_index;
737 
738     Tensor<type, 1> column(rows_indices_size);
739 
740     for(Index j = 0; j < columns_indices_size; j++)
741     {
742         column_index = used_columns_indices(j);
743 
744         for(Index i = 0; i < rows_indices_size; i++)
745         {
746             row_index = used_rows_indices(i);
747 
748             column(i) = matrix(row_index,column_index);
749         }
750 
751         maximums(j) = maximum(column);
752     }
753 
754     return maximums;
755 }
756 
757 
758 /// Returns the mean of the subvector defined by a start and end elements.
759 /// @param vector Vector to be evaluated.
760 /// @param begin Start element.
761 /// @param end End element.
762 
mean(const Tensor<type,1> & vector,const Index & begin,const Index & end)763 type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end)
764 {
765 #ifdef __OPENNN_DEBUG__
766 
767     if(begin > end)
768     {
769         ostringstream buffer;
770 
771         buffer << "OpenNN Exception: Statistics class.\n"
772                << "type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end) \n"
773                << "Begin must be less or equal than end.\n";
774 
775         throw logic_error(buffer.str());
776     }
777 
778 #endif
779 
780     if(end == begin) return vector[begin];
781 
782     type sum = 0;
783 
784     for(Index i = begin; i <= end; i++)
785     {
786         sum += vector(i);
787     }
788 
789     return sum /static_cast<type>(end-begin+1);
790 }
791 
792 
793 /// Returns the mean of the elements in the vector.
794 /// @param vector Vector to be evaluated.
795 
mean(const Tensor<type,1> & vector)796 type mean(const Tensor<type, 1>& vector)
797 {
798     const Index size = vector.dimension(0);
799 
800     if (size == 0) return 0;
801 
802 #ifdef __OPENNN_DEBUG__
803 
804     if(size == 0)
805     {
806         ostringstream buffer;
807 
808         buffer << "OpenNN Exception: Statistics Class.\n"
809                << "type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end) "
810                "const method.\n"
811                << "Size must be greater than zero.\n";
812 
813         throw logic_error(buffer.str());
814     }
815 
816 #endif
817 
818     type sum = 0;
819 
820     Index count = 0;
821 
822     for(Index i = 0; i < size; i++)
823     {
824         if(!::isnan(vector(i)))
825         {
826             sum += vector(i);
827             count++;
828         }
829     }
830 
831     const type mean = sum /static_cast<type>(count);
832 
833     return mean;
834 }
835 
836 
837 /// Returns the variance of the elements in the vector.
838 /// @param vector Vector to be evaluated.
839 
variance(const Tensor<type,1> & vector)840 type variance(const Tensor<type, 1>& vector)
841 {
842     const Index size = vector.dimension(0);
843 
844 #ifdef __OPENNN_DEBUG__
845 
846     if(size == 0)
847     {
848         ostringstream buffer;
849 
850         buffer << "OpenNN Exception: Statistics Class.\n"
851                << "type variance(const Tensor<type, 1>& vector) "
852                "const method.\n"
853                << "Size must be greater than zero.\n";
854 
855         throw logic_error(buffer.str());
856     }
857 
858 #endif
859 
860     type sum = 0;
861     type squared_sum = 0;
862 
863     Index count = 0;
864 
865     for(Index i = 0; i < size; i++)
866     {
867         if(!::isnan(vector(i)))
868         {
869             sum += vector(i);
870             squared_sum += vector(i) * vector(i);
871 
872             count++;
873         }
874     }
875 
876     if(count <= 1)
877     {
878         return 0.0;
879     }
880 
881     const type variance = squared_sum/static_cast<type>(count - 1) -(sum/static_cast<type>(count))*(sum/static_cast<type>(count))*static_cast<type>(count)/static_cast<type>(count-1);
882 
883     return variance;
884 }
885 
886 
887 /// Returns the variance of the elements in the vector.
888 /// @param vector Vector to be evaluated.
889 
variance(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)890 type variance(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
891 {
892     const Index size = indices.dimension(0);
893 
894 #ifdef __OPENNN_DEBUG__
895 
896     if(size == 0)
897     {
898         ostringstream buffer;
899 
900         buffer << "OpenNN Exception: Statistics Class.\n"
901                << "type variance(const Tensor<type, 1>&, const Tensor<Index, 1>&) "
902                "const method.\n"
903                << "Indeces size must be greater than zero.\n";
904 
905         throw logic_error(buffer.str());
906     }
907 
908 #endif
909 
910     type sum = 0;
911     type squared_sum = 0;
912 
913     Index count = 0;
914 
915     Index index = 0;
916 
917     for(Index i = 0; i < size; i++)
918     {
919         index = indices(i);
920 
921         if(!::isnan(vector(index)))
922         {
923             sum += vector(index);
924             squared_sum += vector(index) * vector(index);
925 
926             count++;
927         }
928     }
929 
930     if(count <= 1)
931     {
932         return 0.0;
933     }
934 
935     const type variance = squared_sum/static_cast<type>(count - 1) -(sum/static_cast<type>(count))*(sum/static_cast<type>(count))*static_cast<type>(count)/static_cast<type>(count-1);
936 
937     return variance;
938 }
939 
940 
941 /// Returns the standard deviation of the elements in the vector.
942 /// @param vector Vector to be evaluated.
943 
standard_deviation(const Tensor<type,1> & vector)944 type standard_deviation(const Tensor<type, 1>& vector)
945 {
946 #ifdef __OPENNN_DEBUG__
947 
948     const Index size = vector.dimension(0);
949 
950     if(size == 0)
951     {
952         ostringstream buffer;
953 
954         buffer << "OpenNN Exception: Statistics Class.\n"
955                << "type standard_deviation(const Tensor<type, 1>&) const method.\n"
956                << "Size must be greater than zero.\n";
957 
958         throw logic_error(buffer.str());
959     }
960 
961 #endif
962     if(vector.size() == 0) return 0;
963 
964     if(variance(vector)<static_cast<double>(1e-9)){
965         return static_cast<double>(0);
966     }else{
967         return sqrt(variance(vector));
968     }
969 }
970 
971 
972 /// Returns the standard deviation of the elements in the vector.
973 /// @param vector Vector to be evaluated.
974 
standard_deviation(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)975 type standard_deviation(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
976 {
977 #ifdef __OPENNN_DEBUG__
978 
979     const Index size = vector.dimension(0);
980 
981     if(size == 0)
982     {
983         ostringstream buffer;
984 
985         buffer << "OpenNN Exception: Statistics Class.\n"
986                << "type standard_deviation(const Tensor<type, 1>&, const Tensor<Index, 1>&) const method.\n"
987                << "Size must be greater than zero.\n";
988 
989         throw logic_error(buffer.str());
990     }
991 
992 #endif
993     if(variance(vector, indices)<static_cast<double>(1e-9)){
994         return static_cast<double>(0);
995     }else{
996         return sqrt(variance(vector, indices));
997     }
998 }
999 
1000 
1001 
1002 /// @todo check
1003 
standard_deviation(const Tensor<type,1> & vector,const Index & period)1004 Tensor<type, 1> standard_deviation(const Tensor<type, 1>& vector, const Index& period)
1005 {
1006     const Index size = vector.dimension(0);
1007 
1008     Tensor<type, 1> std(size);
1009 
1010     type mean_value = 0;
1011     type sum = 0;
1012 
1013     for(Index i = 0; i < size; i++)
1014     {
1015         const Index begin = i < period ? 0 : i - period + 1;
1016         const Index end = i;
1017 
1018         mean_value = mean(vector, begin,end);
1019 
1020         for(Index j = begin; j < end+1; j++)
1021         {
1022             sum += (vector(j) - mean_value) *(vector(j) - mean_value);
1023         }
1024 
1025         std(i) = sqrt(sum / type(period));
1026 
1027         mean_value = 0;
1028         sum = 0;
1029     }
1030 
1031 
1032     return std;
1033 }
1034 
1035 
1036 /// Returns the asymmetry of the elements in the vector.
1037 /// @param vector Vector to be evaluated.
1038 
asymmetry(const Tensor<type,1> & vector)1039 type asymmetry(const Tensor<type, 1>& vector)
1040 {
1041 
1042     const Index size = vector.dimension(0);
1043 
1044 #ifdef __OPENNN_DEBUG__
1045 
1046     if(size == 0)
1047     {
1048         ostringstream buffer;
1049 
1050         buffer << "OpenNN Exception: Statistics Class.\n"
1051                << "type asymmetry(const Tensor<type, 1>& vector) const method.\n"
1052                << "Size must be greater than zero.\n";
1053 
1054         throw logic_error(buffer.str());
1055     }
1056 
1057 #endif
1058 
1059     if(size == 0 || 1)
1060     {
1061         return 0.0;
1062     }
1063 
1064     const type standard_deviation_value = standard_deviation(vector);
1065 
1066     const type mean_value = mean(vector);
1067 
1068     type sum = 0;
1069 
1070     Index count = 0;
1071 
1072     for(Index i = 0; i < size; i++)
1073     {
1074         if(!::isnan(vector(i)))
1075         {
1076             sum += (vector(i) - mean_value) *(vector(i) - mean_value) *(vector(i) - mean_value);
1077 
1078             count++;
1079         }
1080     }
1081 
1082     const type numerator = sum /count;
1083     const type denominator = standard_deviation_value * standard_deviation_value * standard_deviation_value;
1084 
1085     return numerator/denominator;
1086 
1087 }
1088 
1089 /// Returns the kurtosis of the elements in the vector.
1090 /// @param vector Vector to be evaluated.
1091 
kurtosis(const Tensor<type,1> & vector)1092 type kurtosis(const Tensor<type, 1>& vector)
1093 {
1094     const Index size = vector.dimension(0);
1095 #ifdef __OPENNN_DEBUG__
1096 
1097     if(size == 0)
1098     {
1099         ostringstream buffer;
1100 
1101         buffer << "OpenNN Exception: Statistics Class.\n"
1102                << "type kurtosis(const Tensor<type, 1>& vector) const method.\n"
1103                << "Size must be greater than zero.\n";
1104 
1105         throw logic_error(buffer.str());
1106     }
1107 
1108 #endif
1109 
1110     if(size == 1)
1111     {
1112         return 0.0;
1113     }
1114 
1115     const type standard_deviation_value = standard_deviation(vector);
1116 
1117     const type mean_value = mean(vector);
1118 
1119     type sum = 0;
1120 
1121     Index count = 0;
1122 
1123     for(Index i = 0; i < size; i++)
1124     {
1125         if(!::isnan(vector(i)))
1126         {
1127             sum += (vector(i) - mean_value)*(vector(i) - mean_value)*(vector(i) - mean_value)*(vector(i) - mean_value);
1128 
1129             count++;
1130         }
1131     }
1132 
1133     const type numerator = sum /count;
1134     const type denominator = standard_deviation_value*standard_deviation_value*standard_deviation_value*standard_deviation_value;
1135 
1136     return numerator/denominator - 3;
1137 
1138 }
1139 
1140 
1141 /// Returns the median of the elements in the vector
1142 /// @param vector Vector to be evaluated.
1143 
median(const Tensor<type,1> & vector)1144 type median(const Tensor<type, 1>& vector)
1145 {
1146     const Index size = vector.dimension(0);
1147 
1148     // Fix missing values
1149 
1150     Index new_size = 0;
1151 
1152     for(Index i = 0; i < size; i++)
1153     {
1154         if(!isnan(vector(i)))
1155         {
1156             new_size++;
1157         }
1158     }
1159 
1160     Tensor<type, 1> sorted_vector;
1161     sorted_vector.resize(new_size);
1162 
1163     Index sorted_index = 0;
1164 
1165     for(Index i = 0; i < size; i++)
1166     {
1167         if(!isnan(vector(i)))
1168         {
1169             sorted_vector(sorted_index) = vector(i);
1170 
1171             sorted_index++;
1172         }
1173     }
1174 
1175     // Calculate median
1176 
1177     sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
1178 
1179     Index median_index;
1180 
1181     if(new_size % 2 == 0)
1182     {
1183         median_index = static_cast<Index>(new_size / 2);
1184 
1185         return (sorted_vector(median_index-1) + sorted_vector(median_index)) / static_cast<type>(2.0);
1186     }
1187     else
1188     {
1189         median_index = static_cast<Index>(new_size / 2);
1190 
1191         return sorted_vector(median_index);
1192     }
1193 }
1194 
1195 
1196 /// Returns the quartiles of the elements in the vector.
1197 /// @param vector Vector to be evaluated.
1198 
quartiles(const Tensor<type,1> & vector)1199 Tensor<type, 1> quartiles(const Tensor<type, 1>& vector)
1200 {
1201     const Index size = vector.dimension(0);
1202 
1203     // Fix missing values
1204 
1205     Index new_size = 0;
1206 
1207     for(Index i = 0; i < size; i++)
1208     {
1209         if(!::isnan(vector(i)))
1210         {
1211             new_size++;
1212         }
1213     }
1214 
1215     Tensor<type, 1> sorted_vector;
1216     sorted_vector.resize(new_size);
1217 
1218     Index sorted_index = 0;
1219 
1220     for(Index i = 0; i < size; i++)
1221     {
1222         if(!::isnan(vector(i)))
1223         {
1224             sorted_vector(sorted_index) = vector(i);
1225 
1226             sorted_index++;
1227         }
1228     }
1229 
1230     sort(sorted_vector.data(), sorted_vector.data() + new_size, less<type>());
1231 
1232     // Calculate quartiles
1233 
1234     Tensor<type, 1> first_sorted_vector(new_size/2);
1235     Tensor<type, 1> last_sorted_vector(new_size/2);
1236 
1237     if (new_size % 2 == 0)
1238     {
1239         for(Index i = 0; i < new_size/2 ; i++)
1240         {
1241             first_sorted_vector(i) = sorted_vector(i);
1242             last_sorted_vector(i) = sorted_vector[i + new_size/2];
1243         }
1244     }
1245     else
1246     {
1247         for(Index i = 0; i < new_size/2 ; i++)
1248         {
1249             first_sorted_vector(i) = sorted_vector(i);
1250             last_sorted_vector(i) = sorted_vector[i + new_size/2 + 1];
1251         }
1252     }
1253 
1254 
1255     Tensor<type, 1> quartiles(3);
1256 
1257     if(new_size == 1)
1258     {
1259         quartiles(0) = sorted_vector(0);
1260         quartiles(1) = sorted_vector(0);
1261         quartiles(2) = sorted_vector(0);
1262     }
1263     else if(new_size == 2)
1264     {
1265         quartiles(0) = (sorted_vector(0)+sorted_vector(1))/4;
1266         quartiles(1) = (sorted_vector(0)+sorted_vector(1))/2;
1267         quartiles(2) = (sorted_vector(0)+sorted_vector(1))*3/4;
1268     }
1269     else if(new_size == 3)
1270     {
1271         quartiles(0) = (sorted_vector(0)+sorted_vector(1))/2;
1272         quartiles(1) = sorted_vector(1);
1273         quartiles(2) = (sorted_vector(2)+sorted_vector(1))/2;
1274     }
1275     else
1276     {
1277         quartiles(0) = median(first_sorted_vector);
1278         quartiles(1) = median(sorted_vector);
1279         quartiles(2) = median(last_sorted_vector);
1280     }
1281     return quartiles;
1282 }
1283 
1284 
1285 /// Returns the quartiles of the elements of the vector that correspond to the given indices.
1286 /// @param vector Vector to be evaluated.
1287 /// @param indices Indices of the elements of the vector to be evaluated.
1288 
quartiles(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)1289 Tensor<type, 1> quartiles(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
1290 {
1291     const Index indices_size = indices.dimension(0);
1292 
1293     // Fix missing values
1294 
1295     Index index;
1296     Index new_size = 0;
1297 
1298     for(Index i = 0; i < indices_size; i++)
1299     {
1300         index = indices(i);
1301 
1302         if(!isnan(vector(index)))
1303         {
1304             new_size++;
1305         }
1306     }
1307 
1308     Tensor<type, 1> sorted_vector;
1309     sorted_vector.resize(new_size);
1310 
1311     Index sorted_index = 0;
1312 
1313     for(Index i = 0; i < indices_size; i++)
1314     {
1315         index = indices(i);
1316 
1317         if(!isnan(vector(index)))
1318         {
1319             sorted_vector(sorted_index) = vector(index);
1320 
1321             sorted_index++;
1322         }
1323     }
1324 
1325     sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
1326 
1327     // Calculate quartiles
1328 
1329     Tensor<type, 1> first_sorted_vector(new_size/2);
1330     Tensor<type, 1> last_sorted_vector(new_size/2);
1331 
1332     for(Index i = 0; i < new_size/2 ; i++)
1333     {
1334         first_sorted_vector(i) = sorted_vector(i);
1335     }
1336 
1337     for(Index i = 0; i < new_size/2; i++)
1338     {
1339         last_sorted_vector(i) = sorted_vector(i + new_size - new_size/2);
1340     }
1341 
1342     Tensor<type, 1> quartiles(3);
1343 
1344     if(new_size == 1)
1345     {
1346         quartiles(0) = sorted_vector(0);
1347         quartiles(1) = sorted_vector(0);
1348         quartiles(2) = sorted_vector(0);
1349     }
1350     else if(new_size == 2)
1351     {
1352         quartiles(0) = (sorted_vector(0)+sorted_vector(1))/4;
1353         quartiles(1) = (sorted_vector(0)+sorted_vector(1))/2;
1354         quartiles(2) = (sorted_vector(0)+sorted_vector(1))*3/4;
1355     }
1356     else if(new_size == 3)
1357     {
1358         quartiles(0) = (sorted_vector(0)+sorted_vector(1))/2;
1359         quartiles(1) = sorted_vector(1);
1360         quartiles(2) = (sorted_vector(2)+sorted_vector(1))/2;
1361     }
1362     else if(new_size % 2 == 0)
1363     {
1364         Index median_index = static_cast<Index>(first_sorted_vector.size() / 2);
1365         quartiles(0) = (first_sorted_vector(median_index-1) + first_sorted_vector(median_index)) / static_cast<type>(2.0);
1366 
1367         median_index = static_cast<Index>(new_size / 2);
1368         quartiles(1) = (sorted_vector(median_index-1) + sorted_vector(median_index)) / static_cast<type>(2.0);
1369 
1370         median_index = static_cast<Index>(last_sorted_vector.size() / 2);
1371         quartiles(2) = (last_sorted_vector(median_index-1) + last_sorted_vector(median_index)) / static_cast<type>(2.0);
1372     }
1373     else
1374     {
1375         quartiles(0) = sorted_vector(new_size/4);
1376         quartiles(1) = sorted_vector(new_size/2);
1377         quartiles(2) = sorted_vector(new_size*3/4);
1378     }
1379 
1380     return quartiles;
1381 }
1382 
1383 
1384 
1385 /// Returns the box and whispers for a vector.
1386 /// @param vector Vector to be evaluated.
1387 
box_plot(const Tensor<type,1> & vector)1388 BoxPlot box_plot(const Tensor<type, 1>& vector)
1389 {
1390     BoxPlot boxplot;
1391 
1392     if(vector.dimension(0) == 0) {
1393         boxplot.minimum = NAN;
1394         boxplot.first_quartile = NAN;
1395         boxplot.median = NAN;
1396         boxplot.third_quartile = NAN;
1397         boxplot.maximum = NAN;
1398         return boxplot;
1399     }
1400 
1401 
1402     const Tensor<type, 1> quartiles = OpenNN::quartiles(vector);
1403 
1404     boxplot.minimum = minimum(vector);
1405     boxplot.first_quartile = quartiles(0);
1406     boxplot.median = quartiles(1);
1407     boxplot.third_quartile = quartiles(2);
1408     boxplot.maximum = maximum(vector);
1409 
1410     return boxplot;
1411 }
1412 
1413 
1414 /// Returns the box and whispers for the elements of the vector that correspond to the given indices.
1415 /// @param vector Vector to be evaluated.
1416 /// @param indices Indices of the elements of the vector to be evaluated.
1417 
box_plot(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)1418 BoxPlot box_plot(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
1419 {
1420     BoxPlot boxplot;
1421 
1422     if(vector.dimension(0) == 0 || indices.dimension(0) == 0) return boxplot;
1423 
1424     const Tensor<type, 1> quartiles = OpenNN::quartiles(vector, indices);
1425 
1426     boxplot.minimum = minimum(vector, indices);
1427     boxplot.first_quartile = quartiles(0);
1428     boxplot.median = quartiles(1);
1429     boxplot.third_quartile = quartiles(2);
1430     boxplot.maximum = maximum(vector, indices);
1431 
1432     return boxplot;
1433 }
1434 
1435 
1436 /// This method bins the elements of the vector into a given number of equally
1437 /// spaced containers.
1438 /// It returns a vector of two vectors.
1439 /// The size of both subvectors is the number of bins.
1440 /// The first subvector contains the frequency of the bins.
1441 /// The second subvector contains the center of the bins.
1442 /// @param vector Vector to obtain the histogram.
1443 /// @param bins_number Number of bins to split the histogram.
1444 
histogram(const Tensor<type,1> & vector,const Index & bins_number)1445 Histogram histogram(const Tensor<type, 1>& vector, const Index &bins_number)
1446 {
1447 #ifdef __OPENNN_DEBUG__
1448 
1449     if(bins_number < 1)
1450     {
1451         ostringstream buffer;
1452 
1453         buffer << "OpenNN Exception: Statistics Class.\n"
1454                << "Histogram histogram(const Tensor<type, 1>&, "
1455                "const Index&) const method.\n"
1456                << "Number of bins is less than one.\n";
1457 
1458         throw logic_error(buffer.str());
1459     }
1460 
1461 #endif
1462 
1463     const Index size = vector.dimension(0);
1464 
1465     Tensor<type, 1> minimums(bins_number);
1466     Tensor<type, 1> maximums(bins_number);
1467 
1468     Tensor<type, 1> centers(bins_number);
1469     Tensor<Index, 1> frequencies(bins_number);
1470     frequencies.setZero();
1471 
1472     Index unique_values_number = 1;
1473     Tensor<type, 1> old_unique_values(1);
1474     Tensor<type, 1> unique_values(1);
1475     unique_values(0) = vector(0);
1476     old_unique_values = unique_values;
1477 
1478     for(Index i = 1; i < size; i++)
1479     {
1480         if(std::find(unique_values.data(), unique_values.data()+unique_values.size(), vector(i)) == unique_values.data()+unique_values.size())
1481         {
1482             unique_values_number++;
1483 
1484             unique_values.resize(unique_values_number);
1485 
1486             for(Index j = 0; j < unique_values_number-1; j++) unique_values(j) = old_unique_values(j);
1487 
1488             unique_values(unique_values_number-1) = vector(i);
1489 
1490             old_unique_values = unique_values;
1491         }
1492 
1493         if(unique_values_number > bins_number) break;
1494     }
1495 
1496     if(unique_values_number <= bins_number)
1497     {
1498         sort(unique_values.data(), unique_values.data() + unique_values.size(), less<type>());
1499 
1500         centers = unique_values;
1501         minimums = unique_values;
1502         maximums = unique_values;
1503 
1504         frequencies.resize(unique_values_number);
1505         frequencies.setZero();
1506 
1507         for(Index i = 0; i < size; i++)
1508         {
1509             if(isnan(vector(i))) continue;
1510 
1511             for(Index j = 0; j < unique_values_number; j++)
1512             {
1513                 if(static_cast<Index>(vector(i)) == static_cast<Index>(centers(j)))
1514                 {
1515                     frequencies(j)++;
1516                     break;
1517                 }
1518             }
1519         }
1520     }
1521     else
1522     {
1523         const type min = minimum(vector);
1524         const type max = maximum(vector);
1525 
1526         const type length = (max - min) /static_cast<type>(bins_number);
1527 
1528         minimums(0) = min;
1529         maximums(0) = min + length;
1530         centers(0) = (maximums(0) + minimums(0)) /static_cast<type>(2.0);
1531 
1532         // Calculate bins center
1533 
1534         for(Index i = 1; i < bins_number; i++)
1535         {
1536             minimums(i) = minimums(i - 1) + length;
1537             maximums(i) = maximums(i - 1) + length;
1538 
1539             centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1540         }
1541 
1542         // Calculate bins frequency
1543 
1544         const Index size = vector.dimension(0);
1545 
1546         for(Index i = 0; i < size; i++)
1547         {
1548             if(isnan(vector(i)))
1549             {
1550                 continue;
1551             }
1552 
1553             for(Index j = 0; j < bins_number - 1; j++)
1554             {
1555                 if(vector(i) >= minimums(j) && vector(i) < maximums(j))
1556                 {
1557                     frequencies(j)++;
1558                     break;
1559                 }
1560             }
1561 
1562             if(vector(i) >= minimums(bins_number - 1))
1563             {
1564                 frequencies(bins_number - 1)++;
1565             }
1566         }
1567     }
1568 
1569     Histogram histogram;
1570     histogram.centers = centers;
1571     histogram.minimums = minimums;
1572     histogram.maximums = maximums;
1573     histogram.frequencies = frequencies;
1574 
1575     return histogram;
1576 }
1577 
1578 
1579 /// This method bins the elements of the vector into a given number of equally
1580 /// spaced containers.
1581 /// It returns a vector of two vectors.
1582 /// The size of both subvectors is the number of bins.
1583 /// The first subvector contains the frequency of the bins.
1584 /// The second subvector contains the center of the bins.
1585 /// @param vector
1586 /// @param center
1587 /// @param bins_number
1588 
1589 
histogram_centered(const Tensor<type,1> & vector,const type & center,const Index & bins_number)1590 Histogram histogram_centered(const Tensor<type, 1>& vector, const type& center, const Index & bins_number)
1591 {
1592 #ifdef __OPENNN_DEBUG__
1593 
1594     if(bins_number < 1)
1595     {
1596         ostringstream buffer;
1597 
1598         buffer << "OpenNN Exception: Statistics Class.\n"
1599                << "Histogram histogram_centered(const Tensor<type, 1>&, "
1600                "const type&, const Index&) const method.\n"
1601                << "Number of bins is less than one.\n";
1602 
1603         throw logic_error(buffer.str());
1604     }
1605 
1606 #endif
1607 
1608     Index bin_center;
1609 
1610     if(bins_number%2 == 0)
1611     {
1612         bin_center = static_cast<Index>(static_cast<type>(bins_number)/static_cast<type>(2.0));
1613     }
1614     else
1615     {
1616         bin_center = static_cast<Index>(static_cast<type>(bins_number)/static_cast<type>(2.0) + static_cast<type>(0.5));
1617     }
1618 
1619     Tensor<type, 1> minimums(bins_number);
1620     Tensor<type, 1> maximums(bins_number);
1621 
1622     Tensor<type, 1> centers(bins_number);
1623     Tensor<Index, 1> frequencies(bins_number);
1624     frequencies.setZero();
1625 
1626     const type min = minimum(vector);
1627     const type max = maximum(vector);
1628 
1629     const type length = (max - min)/static_cast<type>(bins_number);
1630 
1631     minimums(bin_center-1) = center - length;
1632     maximums(bin_center-1) = center + length;
1633     centers(bin_center-1) = center;
1634 
1635     // Calculate bins center
1636 
1637     for(Index i = bin_center; i < bins_number; i++) // Upper centers
1638     {
1639         minimums(i) = minimums(i - 1) + length;
1640         maximums(i) = maximums(i - 1) + length;
1641 
1642         centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1643     }
1644 
1645     for(Index i = static_cast<Index>(bin_center)-2; i >= 0; i--) // Lower centers
1646     {
1647         minimums(i) = minimums(i + 1) - length;
1648         maximums(i) = maximums(i + 1) - length;
1649 
1650         centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1651     }
1652 
1653     // Calculate bins frequency
1654 
1655     const Index size = vector.dimension(0);
1656 
1657     for(Index i = 0; i < size; i++)
1658     {
1659         for(Index j = 0; j < bins_number - 1; j++)
1660         {
1661             if(vector(i) >= minimums(j) && vector(i) < maximums(j))
1662             {
1663                 frequencies(j)++;
1664             }
1665         }
1666 
1667         if(vector(i) >= minimums(bins_number - 1))
1668         {
1669             frequencies(bins_number - 1)++;
1670         }
1671     }
1672 
1673     Histogram histogram(bins_number);
1674     histogram.centers = centers;
1675     histogram.minimums = minimums;
1676     histogram.maximums = maximums;
1677     histogram.frequencies = frequencies;
1678 
1679     return histogram;
1680 }
1681 
1682 
1683 /// This method bins the elements of the vector into a given number of equally
1684 /// spaced containers.
1685 /// It returns a vector of two vectors.
1686 /// The size of both subvectors is the number of bins.
1687 /// The first subvector contains the frequency of the bins.
1688 /// The second subvector contains the center of the bins.
1689 /// @todo isnan is not defined for bool.
1690 
histogram(const Tensor<bool,1> & v)1691 Histogram histogram(const Tensor<bool, 1>& v)
1692 {
1693     Tensor<type, 1> minimums(2);
1694     minimums.setZero();
1695     Tensor<type, 1> maximums(2);
1696     maximums.setConstant(1);
1697 
1698     Tensor<type, 1> centers(2);
1699     centers.setValues({0,1});
1700     Tensor<Index, 1> frequencies(2);
1701     frequencies.setZero();
1702 
1703     // Calculate bins frequency
1704 
1705     const Index size = v.dimension(0);
1706 
1707     for(Index i = 0; i < size; i++)
1708     {
1709 //        if(isnan(v(i))) continue;
1710 
1711         for(Index j = 0; j < 2; j++)
1712         {
1713             if(static_cast<Index>(v(i)) == static_cast<Index>(minimums(j)))
1714             {
1715                 frequencies(j)++;
1716             }
1717         }
1718     }
1719 
1720     Histogram histogram(2);
1721     histogram.centers = centers;
1722     histogram.minimums = minimums;
1723     histogram.maximums = maximums;
1724     histogram.frequencies = frequencies;
1725 
1726 //    Histogram histogram;
1727     return histogram;
1728 }
1729 
1730 
1731 /// This method bins the elements of the vector into a given number of equally
1732 /// spaced containers.
1733 /// It returns a vector of two vectors.
1734 /// The size of both subvectors is the number of bins.
1735 /// The first subvector contains the frequency of the bins.
1736 /// The second subvector contains the center of the bins.
1737 /// @param vector
1738 /// @param bins_number
1739 
histogram(const Tensor<Index,1> & vector,const Index & bins_number)1740 Histogram histogram(const Tensor<Index, 1>& vector, const Index& bins_number)
1741 {
1742 #ifdef __OPENNN_DEBUG__
1743 
1744     if(bins_number < 1)
1745     {
1746         ostringstream buffer;
1747 
1748         buffer << "OpenNN Exception: Statistics Class.\n"
1749                << "Histogram calculate_histogram_integers(const Tensor<Index, 1>&, "
1750                "const Index&) const method.\n"
1751                << "Number of bins is less than one.\n";
1752 
1753         throw logic_error(buffer.str());
1754     }
1755 
1756 #endif
1757 /*
1758     Tensor<Index, 1> centers = vector.get_integer_elements(bins_number);
1759     const Index centers_number = centers.size();
1760 
1761     sort(centers.data(), centers.data() + centers.size(), less<Index>());
1762 
1763     Tensor<type, 1> minimums(centers_number);
1764     Tensor<type, 1> maximums(centers_number);
1765     Tensor<Index, 1> frequencies(centers_number);
1766 
1767     for(Index i = 0; i < centers_number; i++)
1768     {
1769       minimums(i) = centers(i);
1770       maximums(i) = centers(i);
1771       frequencies(i) = vector.count_equal_to(centers(i));
1772     }
1773 
1774     Histogram histogram(centers_number);
1775     histogram.centers = centers.cast<type>();
1776     histogram.minimums = minimums;
1777     histogram.maximums = maximums;
1778     histogram.frequencies = frequencies;
1779 
1780     return histogram;
1781     */
1782 
1783     return Histogram();
1784 }
1785 
1786 
1787 /// Returns a vector containing the sum of the frequencies of the bins to which
1788 /// this vector belongs.
1789 /// @param histograms Used histograms.
1790 
total_frequencies(const Tensor<Histogram,1> & histograms)1791 Tensor<Index, 1> total_frequencies(const Tensor<Histogram, 1>&histograms)
1792 {
1793     const Index histograms_number = histograms.size();
1794 
1795     Tensor<Index, 1> total_frequencies(histograms_number);
1796 
1797     for(Index i = 0; i < histograms_number; i++)
1798     {
1799         total_frequencies(i) = histograms(i).frequencies(i);
1800     }
1801 
1802     return total_frequencies;
1803 }
1804 
1805 
1806 /// Calculates a histogram for each column, each having a given number of bins.
1807 /// It returns a vector of vectors.
1808 /// The size of the main vector is the number of columns.
1809 /// Each subvector contains the frequencies and centers of that colums.
1810 /// @param matrix Data to calculate histograms
1811 /// @param bins_number Number of bins for each histogram.
1812 
histograms(const Tensor<type,2> & matrix,const Index & bins_number)1813 Tensor<Histogram, 1> histograms(const Tensor<type, 2>& matrix, const Index& bins_number)
1814 {
1815     const Index rows_number = matrix.dimension(0);
1816     const Index columns_number = matrix.dimension(1);
1817 
1818     Tensor<Histogram, 1> histograms(columns_number);
1819 
1820     Tensor<type, 1> column(rows_number);
1821 
1822     for(Index i = 0; i < columns_number; i++)
1823     {
1824         column = matrix.chip(i,1);
1825 
1826         histograms(i) = histogram(column, bins_number);
1827 
1828         /*
1829               if(column.is_binary())
1830               {
1831                   histograms(i) = histogram(column.to_bool_vector());
1832               }
1833               else
1834               {
1835                   histograms(i) = histogram(column, bins_number);
1836               }
1837         */
1838     }
1839 
1840     return histograms;
1841 }
1842 
1843 
1844 /// Returns the basic descriptives of the columns.
1845 /// The format is a vector of descriptives structures.
1846 /// The size of that vector is equal to the number of columns in this matrix.
1847 /// @param matrix Used matrix.
1848 
descriptives(const Tensor<type,2> & matrix)1849 Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>& matrix)
1850 {
1851     const Index rows_number = matrix.dimension(0);
1852     const Index columns_number = matrix.dimension(1);
1853 
1854 #ifdef __OPENNN_DEBUG__
1855 
1856     if(rows_number == 0)
1857     {
1858         ostringstream buffer;
1859 
1860         buffer << "OpenNN Exception: Statistics Class.\n"
1861                << "Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>&) "
1862                "const method.\n"
1863                << "Number of rows must be greater than one.\n";
1864 
1865         throw logic_error(buffer.str());
1866     }
1867 
1868 #endif
1869 
1870     Tensor<Descriptives, 1> descriptives(columns_number);
1871 
1872     Tensor<type, 1> column(rows_number);
1873 
1874     #pragma omp parallel for private(column)
1875 
1876     for(Index i = 0; i < columns_number; i++)
1877     {
1878         column = matrix.chip(i,1);
1879 
1880         descriptives(i) = OpenNN::descriptives(column);
1881     }
1882 
1883     return descriptives;
1884 }
1885 
1886 
1887 /// Returns the basic descriptives of given columns for given rows.
1888 /// The format is a vector of descriptives structures.
1889 /// The size of that vector is equal to the number of given columns.
1890 /// @param row_indices Indices of the rows for which the descriptives are to be computed.
1891 /// @param columns_indices Indices of the columns for which the descriptives are to be computed.
1892 
descriptives(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices,const Tensor<Index,1> & columns_indices)1893 Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
1894 {
1895     const Index row_indices_size = row_indices.size();
1896     const Index columns_indices_size = columns_indices.size();
1897 
1898     Tensor<Descriptives, 1> descriptives(columns_indices_size);
1899 
1900     Index row_index, column_index;
1901 
1902     Tensor<type, 1> minimums(columns_indices_size);
1903     minimums.setConstant(numeric_limits<type>::max());
1904 
1905     Tensor<type, 1> maximums(columns_indices_size);
1906     maximums.setConstant(numeric_limits<type>::min());
1907 
1908     Tensor<double, 1> sums(columns_indices_size);
1909     Tensor<double, 1> squared_sums(columns_indices_size);
1910     Tensor<Index, 1> count(columns_indices_size);
1911 
1912     sums.setZero();
1913     squared_sums.setZero();
1914     count.setZero();
1915 
1916     for(Index i = 0; i < row_indices_size; i++)
1917     {
1918         row_index = row_indices(i);
1919 
1920         #pragma omp parallel for private(column_index)
1921 
1922         for(Index j = 0; j < columns_indices_size; j++)
1923         {
1924             column_index = columns_indices(j);
1925 
1926             const type value = matrix(row_index,column_index);
1927 
1928             if(isnan(value)) continue;
1929 
1930             if(value < minimums(j)) minimums(j) = value;
1931 
1932             if(value > maximums(j)) maximums(j) = value;
1933 
1934             sums(j) += value;
1935             squared_sums(j) += value*value;
1936             count(j)++;
1937         }
1938     }
1939 
1940     const Tensor<double, 1> mean = sums/count;
1941 
1942     Tensor<double, 1> standard_deviation(columns_indices_size);
1943 
1944     if(row_indices_size > 1)
1945     {
1946         #pragma omp parallel for
1947 
1948         for(Index i = 0; i < columns_indices_size; i++)
1949         {
1950             const double variance = squared_sums(i)/static_cast<double>(count(i)-1)
1951                     - (sums(i)/static_cast<double>(count(i)))*(sums(i)/static_cast<double>(count(i)))*static_cast<double>(count(i))/static_cast<double>(count(i)-1);
1952 
1953             standard_deviation(i) = sqrt(variance);
1954         }
1955     }
1956 
1957     for(Index i = 0; i < columns_indices_size; i++)
1958     {
1959         descriptives(i).minimum = minimums(i);
1960         descriptives(i).maximum = maximums(i);
1961         descriptives(i).mean = mean(i);
1962         descriptives(i).standard_deviation = standard_deviation(i);
1963     }
1964 
1965     return descriptives;
1966 }
1967 
1968 
1969 /// Returns the means of given rows.
1970 /// The format is a vector of type values.
1971 /// The size of that vector is equal to the number of given rows.
1972 /// @param matrix Used matrix.
1973 /// @param columns_indices Indices of the columns for which the descriptives are to be computed.
1974 
rows_means(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices)1975 Tensor<type, 1> rows_means(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices)
1976 {
1977     const Index columns_number = matrix.dimension(1);
1978 
1979     Tensor<Index, 1> used_row_indices;
1980 
1981     if(matrix.dimension(0) == 0 && matrix.dimension(1) == 0)
1982     {
1983         used_row_indices.resize(matrix.dimension(0));
1984     }
1985     else
1986     {
1987         used_row_indices = row_indices;
1988     }
1989 
1990     const Index row_indices_size = used_row_indices.size();
1991 
1992     Tensor<type, 1> means(columns_number);
1993 
1994     Tensor<type, 1> column(row_indices_size);
1995 
1996     for(Index i = 0; i < columns_number; i++)
1997     {
1998         for(Index j = 0; j < row_indices_size; j++)
1999         {
2000             Index row_index = row_indices(j);
2001 
2002             column(j) = matrix(row_index,i);
2003         }
2004 
2005         means(i) = mean(column);
2006     }
2007 
2008     return means;
2009 }
2010 
2011 
2012 /// Returns the minimums values of given columns.
2013 /// The format is a vector of type values.
2014 /// The size of that vector is equal to the number of given columns.
2015 /// @param matrix Used matrix.
2016 /// @param rows_indices Indices of the rows for which the minimums are to be computed.
2017 /// @param columns_indices Indices of the columns for which the minimums are to be computed.
2018 
columns_minimums(const Tensor<type,2> & matrix,const Tensor<Index,1> & rows_indices,const Tensor<Index,1> & columns_indices)2019 Tensor<type, 1> columns_minimums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& rows_indices, const Tensor<Index, 1>& columns_indices)
2020 {
2021     const Index rows_number = matrix.dimension(0);
2022     const Index columns_number = matrix.dimension(1);
2023 
2024     Tensor<Index, 1> used_columns_indices;
2025 
2026     if(columns_indices.dimension(0) == 0)
2027     {
2028         used_columns_indices.resize(columns_number);
2029 
2030         for (Index i = 0; i < columns_number; i++)
2031         {
2032             used_columns_indices(i) = i;
2033         }
2034     }
2035     else
2036     {
2037         used_columns_indices = columns_indices;
2038     }
2039 
2040     Tensor<Index, 1> used_rows_indices;
2041 
2042     if(rows_indices.dimension(0) == 0)
2043     {
2044         used_rows_indices.resize(rows_number);
2045 
2046         for (Index i = 0; i < rows_number; i++)
2047         {
2048             used_rows_indices(i) = i;
2049         }
2050     }
2051     else
2052     {
2053         used_rows_indices = rows_indices;
2054     }
2055 
2056     const Index rows_indices_size = used_rows_indices.size();
2057     const Index columns_indices_size = used_columns_indices.size();
2058 
2059     Tensor<type, 1> minimums(columns_indices_size);
2060 
2061     Index row_index;
2062     Index column_index;
2063 
2064     for(Index j = 0; j < columns_indices_size; j++)
2065     {
2066         column_index = used_columns_indices(j);
2067 
2068         Tensor<type, 1> column(rows_indices_size);
2069 
2070         for(Index i = 0; i < rows_indices_size; i++)
2071         {
2072             row_index = used_rows_indices(i);
2073 
2074             column(i) = matrix(row_index,column_index);
2075         }
2076 
2077         minimums(j) = minimum(column);
2078     }
2079 
2080     return minimums;
2081 }
2082 
2083 
2084 /// Returns the maximums values of given columns.
2085 /// The format is a vector of type values.
2086 /// The size of that vector is equal to the number of given columns.
2087 /// @param matrix Used matrix.
2088 /// @param columns_indices Indices of the columns for which the descriptives are to be computed.
2089 
columns_maximums(const Tensor<type,2> & matrix,const Tensor<Index,1> & columns_indices)2090 Tensor<type, 1> columns_maximums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2091 {
2092     const Index rows_number = matrix.dimension(0);
2093     const Index columns_number = matrix.dimension(1);
2094 
2095     Tensor<Index, 1> used_columns_indices;
2096 
2097     if(columns_indices.dimension(0) == 0 && columns_indices.dimension(1) == 0)
2098     {
2099         used_columns_indices.resize(columns_number);
2100     }
2101     else
2102     {
2103         used_columns_indices = columns_indices;
2104     }
2105 
2106     const Index columns_indices_size = used_columns_indices.size();
2107 
2108     Tensor<type, 1> maximums(columns_indices_size);
2109 
2110     Index column_index;
2111     Tensor<type, 1> column(rows_number);
2112 
2113     for(Index i = 0; i < columns_indices_size; i++)
2114     {
2115         column_index = used_columns_indices(i);
2116 
2117         column = matrix.chip(column_index,1);
2118 
2119         maximums(i) = maximum(column);
2120     }
2121 
2122     return maximums;
2123 }
2124 
2125 
range(const Tensor<type,1> & vector)2126 type range(const Tensor<type, 1>& vector)
2127 {
2128     const type min = minimum(vector);
2129     const type max = maximum(vector);
2130 
2131     return abs(max - min);
2132 }
2133 
2134 /*
2135 /// Calculates the box plots for a set of rows of each of the given columns of this matrix.
2136 /// @param matrix Used matrix.
2137 /// @param rows_indices Rows to be used for the box plot.
2138 /// @param columns_indices Indices of the columns for which box plots are going to be calculated.
2139 /// @todo remove?
2140 
2141 Tensor<BoxPlot, 1> box_plots(const Tensor<type, 2>& matrix, const Tensor<Tensor<Index, 1>, 1>& rows_indices, const Tensor<Index, 1>& columns_indices)
2142 {
2143     const Index columns_number = columns_indices.size();
2144 
2145 #ifdef __OPENNN_DEBUG__
2146 
2147     if(columns_number == rows_indices.size())
2148     {
2149         ostringstream buffer;
2150 
2151         buffer << "OpenNN Exception: Statistics class."
2152                << "void box_plots(const Tensor<type, 2>&, "
2153                "const Tensor<Tensor<Index, 1>, 1>&, const Tensor<Index, 1>&) const method.\n"
2154                << "Size of row indices must be equal to the number of columns.\n";
2155 
2156         throw logic_error(buffer.str());
2157     }
2158 
2159 #endif
2160 
2161     Tensor<BoxPlot, 1> box_plots(columns_number);
2162 
2163     for(Index i = 0; i < matrix.dimension(1); i++)
2164     {
2165 
2166 
2167     }
2168 
2169         Tensor<type, 1> column;
2170 
2171          #pragma omp parallel for private(column)
2172 
2173         for(Index i = 0; i < columns_number; i++)
2174         {
2175             box_plots(i).resize(5);
2176 
2177             const Index rows_number = rows_indices(i).size();
2178 
2179             column = matrix.get_column(columns_indices(i)).get_subvector(rows_indices(i));
2180 
2181             sort(column.begin(), column.end(), less<type>());
2182 
2183             // Minimum
2184 
2185             box_plots(i)[0] = column[0];
2186 
2187             if(rows_number % 2 == 0)
2188             {
2189                 // First quartile
2190 
2191                 box_plots(i)[1] = (column[rows_number / 4] + column[rows_number / 4 + 1]) /static_cast<type>(2.0);
2192 
2193                 // Second quartile
2194 
2195                 box_plots(i)[2] = (column[rows_number * 2 / 4] +
2196                                column[rows_number * 2 / 4 + 1]) /
2197                               2.0;
2198 
2199                 // Third quartile
2200 
2201                 box_plots(i)[3] = (column[rows_number * 3 / 4] +
2202                                column[rows_number * 3 / 4 + 1]) /
2203                               2.0;
2204             }
2205             else
2206             {
2207                 // First quartile
2208 
2209                 box_plots(i)[1] = column[rows_number / 4];
2210 
2211                 // Second quartile
2212 
2213                 box_plots(i)[2] = column[rows_number * 2 / 4];
2214 
2215                 //Third quartile
2216 
2217                 box_plots(i)[3] = column[rows_number * 3 / 4];
2218             }
2219 
2220             // Maximum
2221 
2222             box_plots(i)[4] = column[rows_number-1];
2223         }
2224 
2225     return box_plots;
2226 }
2227 */
2228 
2229 /// Returns the minimum, maximum, mean and standard deviation of the elements in the vector.
2230 /// @param vector Vector to be evaluated.
2231 
descriptives(const Tensor<type,1> & vector)2232 Descriptives descriptives(const Tensor<type, 1>& vector)
2233 {
2234     const Index size = vector.dimension(0);
2235 
2236 #ifdef __OPENNN_DEBUG__
2237 
2238     if(size == 0)
2239     {
2240         ostringstream buffer;
2241 
2242         buffer << "OpenNN Exception: Statistics Class.\n"
2243                << "type descriptives(const Tensor<type, 1>&, "
2244                "const Tensor<Index, 1>&).\n"
2245                << "Size must be greater than zero.\n";
2246 
2247         throw logic_error(buffer.str());
2248     }
2249 
2250 #endif
2251 
2252     Descriptives descriptives;
2253 
2254     type minimum = numeric_limits<type>::max();
2255     type maximum;
2256 
2257     type sum = 0;
2258     type squared_sum = 0;
2259     Index count = 0;
2260 
2261     maximum = -numeric_limits<type>::max();
2262 
2263     for(Index i = 0; i < size; i++)
2264     {
2265         if(!::isnan(vector(i)))
2266         {
2267             if(vector(i) < minimum)
2268             {
2269                 minimum = vector(i);
2270             }
2271 
2272             if(vector(i) > maximum)
2273             {
2274                 maximum = vector(i);
2275             }
2276 
2277             sum += vector(i);
2278             squared_sum += vector(i) *vector(i);
2279 
2280             count++;
2281         }
2282     }
2283 
2284     const type mean = sum/static_cast<type>(count);
2285 
2286     type standard_deviation;
2287 
2288     if(count <= 1)
2289     {
2290         standard_deviation = 0;
2291     }
2292     else
2293     {
2294         const type numerator = squared_sum -(sum * sum) / count;
2295         const type denominator = size - static_cast<type>(1.0);
2296 
2297         standard_deviation = numerator / denominator;
2298     }
2299 
2300     standard_deviation = sqrt(standard_deviation);
2301 
2302     descriptives.minimum = minimum;
2303     descriptives.maximum = maximum;
2304     descriptives.mean = mean;
2305     descriptives.standard_deviation = standard_deviation;
2306 
2307     return descriptives;
2308 }
2309 
2310 
2311 /// Calculates the distance between the empirical distribution of the vector and
2312 /// the normal, half-normal and uniform cumulative distribution. It returns 0, 1
2313 /// or 2 if the closest distribution is the normal, half-normal or the uniform,
2314 /// respectively.
2315 /// @todo review.
2316 
perform_distribution_distance_analysis(const Tensor<type,1> & vector)2317 Index perform_distribution_distance_analysis(const Tensor<type, 1>& vector)
2318 {
2319     Tensor<type, 1> distances(2);
2320     distances.setZero();
2321 
2322     const Index nans = count_nan(vector);
2323 
2324     const Index new_size = vector.size() - nans;
2325 
2326     Tensor<type, 1> new_vector(new_size);
2327 
2328     Index index = 0;
2329 
2330     for(Index i = 0; i < vector.size(); i++)
2331     {
2332         if(!::isnan(vector(i)))
2333         {
2334             new_vector(index) = vector(i);
2335             index++;
2336         }
2337     }
2338 
2339     const Index n = vector.dimension(0);
2340 
2341     Tensor<type, 1> sorted_vector(new_vector);
2342 
2343     sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2344 
2345     const Descriptives descriptives = OpenNN::descriptives(vector);
2346 
2347     const type mean = descriptives.mean;
2348     const type standard_deviation = descriptives.standard_deviation;
2349     const type minimum = sorted_vector(0);
2350     const type maximum = sorted_vector(n-1);
2351 
2352     #pragma omp parallel for schedule(dynamic)
2353 
2354     for(Index i = 0; i < n; i++)
2355     {
2356         const type normal_distribution = static_cast<type>(0.5)
2357                 * static_cast<type>(erfc((mean - sorted_vector(i)))/static_cast<type>((standard_deviation*static_cast<type>(sqrt(2)))));
2358 
2359         const type uniform_distribution = (sorted_vector(i)-minimum)/(maximum-minimum);
2360 
2361         type empirical_distribution;
2362 
2363         Index counter = 0;
2364 
2365         if(vector(i) < sorted_vector(0))
2366         {
2367             empirical_distribution = 0;
2368         }
2369         else if(vector(i) >= sorted_vector(n-1))
2370         {
2371             empirical_distribution = 1.0;
2372         }
2373         else
2374         {
2375             counter = static_cast<Index>(i + 1);
2376 
2377             for(Index j = i+1; j < n; j++)
2378             {
2379                 if(sorted_vector(j) <= sorted_vector(i))
2380                 {
2381                     counter++;
2382                 }
2383                 else
2384                 {
2385                     break;
2386                 }
2387             }
2388 
2389             empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
2390         }
2391 
2392         #pragma omp critical
2393         {
2394             distances(0) += abs(normal_distribution - empirical_distribution);
2395             distances(1) += abs(uniform_distribution - empirical_distribution);
2396         }
2397     }
2398 
2399     return minimal_index(distances);
2400 }
2401 
2402 
2403 /*
2404 /// Returns a vector with the mean values of all the matrix columns.
2405 /// The size is equal to the number of columns in the matrix.
2406 /// @param matrix Matrix used.
2407 /// @todo to delete.
2408 
2409 Tensor<type, 1> columns_mean(const Tensor<type, 2>& matrix)
2410 {
2411 
2412     const Index rows_number = matrix.dimension(0);
2413 
2414     const Index columns_number = matrix.dimension(1);
2415 
2416 #ifdef __OPENNN_DEBUG__
2417 
2418     if(rows_number == 0)
2419     {
2420         ostringstream buffer;
2421 
2422         buffer << "OpenNN Exception: Statistics class.\n"
2423                << "Tensor<type, 1> mean(const Tensor<type, 2>&) const method.\n"
2424                << "Number of rows must be greater than one.\n";
2425 
2426         throw logic_error(buffer.str());
2427     }
2428 
2429 #endif
2430 
2431     Tensor<type, 1> columns_mean(columns_number);
2432 
2433     for(Index i = 0; i < columns_number; i++)
2434     {
2435         Tensor<type, 0> mean = matrix.chip(i,1).mean();
2436 
2437         columns_mean(i) = mean(0);
2438     }
2439 
2440     return columns_mean;
2441 }
2442 
2443 */
2444 /// Returns a vector with the mean values of all the matrix columns.
2445 /// The size is equal to the number of columns in the matrix.
2446 /// @param matrix Matrix used.
2447 
mean(const Tensor<type,2> & matrix)2448 Tensor<type, 1> mean(const Tensor<type, 2>& matrix)
2449 {
2450     const Index rows_number = matrix.dimension(0);
2451     const Index columns_number = matrix.dimension(1);
2452 
2453 #ifdef __OPENNN_DEBUG__
2454 
2455     if(rows_number == 0)
2456     {
2457         ostringstream buffer;
2458 
2459         buffer << "OpenNN Exception: Statistics class.\n"
2460                << "Tensor<type, 1> mean(const Tensor<type, 2>&) const method.\n"
2461                << "Number of rows must be greater than one.\n";
2462 
2463         throw logic_error(buffer.str());
2464     }
2465 
2466 #endif
2467 
2468     // Mean
2469 
2470     Tensor<type, 1> mean(columns_number);
2471     mean.setZero();
2472 
2473     for(Index j = 0; j < columns_number; j++)
2474     {
2475         for(Index i = 0; i < rows_number; i++)
2476         {
2477             if(!::isnan(matrix(i,j)))
2478             {
2479                 mean(j) += matrix(i,j);
2480             }
2481         }
2482 
2483         mean(j) /= static_cast<type>(rows_number);
2484     }
2485 
2486     return mean;
2487 }
2488 
2489 
2490 /// Returns a vector with the mean values of given columns.
2491 /// The size of the vector is equal to the size of the column indices vector.
2492 /// @param columns_indices Indices of columns.
2493 
mean(const Tensor<type,2> & matrix,const Tensor<Index,1> & columns_indices)2494 Tensor<type, 1> mean(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2495 {
2496     const Index rows_number = matrix.dimension(0);
2497 
2498     const Index columns_indices_size = columns_indices.size();
2499 
2500     Index column_index;
2501 
2502     // Mean
2503 
2504     Tensor<type, 1> mean(columns_indices_size);
2505     mean.setZero();
2506 
2507     for(Index j = 0; j < columns_indices_size; j++)
2508     {
2509         column_index = columns_indices(j);
2510 
2511         for(Index i = 0; i < rows_number; i++)
2512         {
2513             mean(j) += matrix(i, column_index);
2514         }
2515 
2516         mean(j) /= static_cast<type>(rows_number);
2517     }
2518 
2519     return mean;
2520 }
2521 
2522 
2523 /// Returns a vector with the mean values of given columns for given rows.
2524 /// The size of the vector is equal to the size of the column indices vector.
2525 /// @param matrix Matrix used.
2526 /// @param row_indices Indices of rows.
2527 /// @param columns_indices Indices of columns.
2528 
mean(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices,const Tensor<Index,1> & columns_indices)2529 Tensor<type, 1> mean(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
2530 {
2531     const Index row_indices_size = row_indices.size();
2532     const Index columns_indices_size = columns_indices.size();
2533 
2534     if (row_indices_size == 0 && columns_indices_size == 0) return NAN;
2535 
2536 #ifdef __OPENNN_DEBUG__
2537 
2538     const Index rows_number = matrix.dimension(0);
2539     const Index columns_number = matrix.dimension(1);
2540 
2541     // Rows check
2542 
2543     if(row_indices_size > rows_number)
2544     {
2545         ostringstream buffer;
2546 
2547         buffer << "OpenNN Exception: Statistics class.\n"
2548                << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2549                "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2550                << "Size of row indices(" << row_indices_size << ") is greater than number of rows(" << rows_number << ").\n";
2551 
2552         throw logic_error(buffer.str());
2553     }
2554 
2555     for(Index i = 0; i < row_indices_size; i++)
2556     {
2557         if(row_indices(i) >= rows_number)
2558         {
2559             ostringstream buffer;
2560 
2561             buffer << "OpenNN Exception: Statistics class.\n"
2562                    << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2563                    "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2564                    << "Row index " << i << " must be less than rows number.\n";
2565 
2566             throw logic_error(buffer.str());
2567         }
2568     }
2569 
2570     if(row_indices_size == 0)
2571     {
2572         ostringstream buffer;
2573 
2574         buffer << "OpenNN Exception: Statistics class.\n"
2575                << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2576                "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2577                << "Size of row indices must be greater than zero.\n";
2578 
2579         throw logic_error(buffer.str());
2580     }
2581 
2582     // Columns check
2583 
2584     if(columns_indices_size > columns_number)
2585     {
2586         ostringstream buffer;
2587 
2588         buffer << "OpenNN Exception: Statistics class.\n"
2589                << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2590                "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2591                << "Column indices size must be equal or less than columns number.\n";
2592 
2593         throw logic_error(buffer.str());
2594     }
2595 
2596     for(Index i = 0; i < columns_indices_size; i++)
2597     {
2598         if(columns_indices(i) >= columns_number)
2599         {
2600             ostringstream buffer;
2601 
2602             buffer << "OpenNN Exception: Statistics class.\n"
2603                    << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2604                    "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2605                    << "Column index " << i << " must be less than columns number.\n";
2606 
2607             throw logic_error(buffer.str());
2608         }
2609     }
2610 
2611 #endif
2612 
2613     Index row_index;
2614     Index column_index;
2615 
2616     Index count = 0;
2617 
2618     // Mean
2619 
2620     Tensor<type, 1> mean(columns_indices_size);
2621     mean.setZero();
2622 
2623     for(Index j = 0; j < columns_indices_size; j++)
2624     {
2625         column_index = columns_indices(j);
2626 
2627         count = 0;
2628 
2629         for(Index i = 0; i < row_indices_size; i++)
2630         {
2631             row_index = row_indices(i);
2632 
2633             if(!::isnan(matrix(row_index,column_index)))
2634             {
2635                 mean(j) += matrix(row_index,column_index);
2636                 count++;
2637             }
2638         }
2639 
2640         mean(j) /= static_cast<type>(count);
2641     }
2642 
2643     return mean;
2644 }
2645 
2646 
2647 /// Returns a vector with the mean values of all the matrix columns.
2648 /// The size is equal to the number of columns in the matrix.
2649 
mean(const Tensor<type,2> & matrix,const Index & column_index)2650 type mean(const Tensor<type, 2>& matrix, const Index& column_index)
2651 {
2652     const Index rows_number = matrix.dimension(0);
2653     const Index columns_number = matrix.dimension(1);
2654 
2655     if (rows_number == 0 && columns_number == 0) return NAN;
2656 
2657 #ifdef __OPENNN_DEBUG__
2658 
2659     if(rows_number == 0)
2660     {
2661         ostringstream buffer;
2662 
2663         buffer << "OpenNN Exception: Statistics class.\n"
2664                << "type mean(const Tensor<type, 2>&, const Index&) const method.\n"
2665                << "Number of rows must be greater than one.\n";
2666 
2667         throw logic_error(buffer.str());
2668     }
2669 
2670     if(column_index >= columns_number)
2671     {
2672         ostringstream buffer;
2673 
2674         buffer << "OpenNN Exception: Statistics class.\n"
2675                << "type mean(const Tensor<type, 2>&, const Index&) const method.\n"
2676                << "Index of column must be less than number of columns.\n";
2677 
2678         throw logic_error(buffer.str());
2679     }
2680 
2681 #endif
2682 
2683     if (rows_number == 0 && columns_number == 0) return NAN;
2684 
2685     // Mean
2686 
2687     type mean = 0;
2688 
2689     Index count = 0;
2690 
2691     for(Index i = 0; i < rows_number; i++)
2692     {
2693         if(!::isnan(matrix(i,column_index)))
2694         {
2695             mean += matrix(i,column_index);
2696             count++;
2697         }
2698     }
2699 
2700     mean /= static_cast<type>(count);
2701 
2702     return mean;
2703 }
2704 
2705 
2706 /// Returns a vector with the median values of all the matrix columns.
2707 /// The size is equal to the number of columns in the matrix.
2708 
median(const Tensor<type,2> & matrix)2709 Tensor<type, 1> median(const Tensor<type, 2>& matrix)
2710 {
2711     const Index rows_number = matrix.dimension(0);
2712     const Index columns_number = matrix.dimension(1);
2713 
2714 #ifdef __OPENNN_DEBUG__
2715 
2716     if(rows_number == 0)
2717     {
2718         ostringstream buffer;
2719 
2720         buffer << "OpenNN Exception: Matrix template.\n"
2721                << "Tensor<type, 1> median() const method.\n"
2722                << "Number of rows must be greater than one.\n";
2723 
2724         throw logic_error(buffer.str());
2725     }
2726 
2727 #endif
2728 
2729     // median
2730 
2731     Tensor<type, 1> median(columns_number);
2732 
2733     for(Index j = 0; j < columns_number; j++)
2734     {
2735         Tensor<type, 1> sorted_column(matrix.chip(j,1));
2736 
2737         sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2738 
2739         if(rows_number % 2 == 0)
2740         {
2741             median(j) = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/2;
2742         }
2743         else
2744         {
2745             median(j) = sorted_column[rows_number*2/4];
2746         }
2747     }
2748 
2749     return median;
2750 }
2751 
2752 
2753 /// Returns a vector with the median values of all the matrix columns.
2754 /// The size is equal to the number of columns in the matrix.
2755 
median(const Tensor<type,2> & matrix,const Index & column_index)2756 type median(const Tensor<type, 2>& matrix, const Index& column_index)
2757 {
2758     const Index rows_number = matrix.dimension(0);
2759 
2760 #ifdef __OPENNN_DEBUG__
2761 
2762     const Index columns_number = matrix.dimension(1);
2763 
2764     if(rows_number == 0)
2765     {
2766         ostringstream buffer;
2767 
2768         buffer << "OpenNN Exception: Matrix template.\n"
2769                << "type median(const Index&) const method.\n"
2770                << "Number of rows must be greater than one.\n";
2771 
2772         throw logic_error(buffer.str());
2773     }
2774 
2775     if(column_index >= columns_number)
2776     {
2777         ostringstream buffer;
2778 
2779         buffer << "OpenNN Exception: Matrix template.\n"
2780                << "type median(const Index&) const method.\n"
2781                << "Index of column must be less than number of columns.\n";
2782 
2783         throw logic_error(buffer.str());
2784     }
2785 
2786 #endif
2787 
2788     // median
2789 
2790     type median = 0;
2791 
2792     Tensor<type, 1> sorted_column(matrix.chip(column_index,1));
2793 
2794     sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2795 
2796     if(rows_number % 2 == 0)
2797     {
2798         median = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/2;
2799     }
2800     else
2801     {
2802         median = sorted_column[rows_number*2/4];
2803     }
2804 
2805     return median;
2806 }
2807 
2808 
2809 /// Returns a vector with the median values of given columns.
2810 /// The size of the vector is equal to the size of the column indices vector.
2811 /// @param columns_indices Indices of columns.
2812 
2813 
median(const Tensor<type,2> & matrix,const Tensor<Index,1> & columns_indices)2814 Tensor<type, 1> median(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2815 {
2816     const Index rows_number = matrix.dimension(0);
2817 
2818     const Index columns_indices_size = columns_indices.size();
2819 
2820     Index column_index;
2821 
2822     // median
2823 
2824     Tensor<type, 1> median(columns_indices_size);
2825 
2826     for(Index j = 0; j < columns_indices_size; j++)
2827     {
2828         column_index = columns_indices(j);
2829 
2830         Tensor<type, 1> sorted_column(matrix.chip(column_index, 1));
2831 
2832         sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2833 
2834         if(rows_number % 2 == 0)
2835         {
2836             median(j) = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/2;
2837         }
2838         else
2839         {
2840             median(j) = sorted_column[rows_number*2/4];
2841         }
2842     }
2843 
2844     return median;
2845 }
2846 
2847 
2848 /// Returns a vector with the median values of given columns for given rows.
2849 /// The size of the vector is equal to the size of the column indices vector.
2850 /// @param row_indices Indices of rows.
2851 /// @param columns_indices Indices of columns.
2852 
median(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices,const Tensor<Index,1> & columns_indices)2853 Tensor<type, 1> median(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
2854 {
2855 
2856     const Index row_indices_size = row_indices.size();
2857     const Index columns_indices_size = columns_indices.size();
2858 
2859 #ifdef __OPENNN_DEBUG__
2860 
2861     const Index rows_number = matrix.dimension(0);
2862     const Index columns_number = matrix.dimension(1);
2863 
2864     // Rows check
2865 
2866     if(row_indices_size > rows_number)
2867     {
2868         ostringstream buffer;
2869 
2870         buffer << "OpenNN Exception: Matrix template.\n"
2871                << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2872                << "Size of row indices(" << row_indices_size << ") is greater than number of rows(" << rows_number << ").\n";
2873 
2874         throw logic_error(buffer.str());
2875     }
2876 
2877     for(Index i = 0; i < row_indices_size; i++)
2878     {
2879         if(row_indices(i) >= rows_number)
2880         {
2881             ostringstream buffer;
2882 
2883             buffer << "OpenNN Exception: Matrix template.\n"
2884                    << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2885                    << "Row index " << i << " must be less than rows number.\n";
2886 
2887             throw logic_error(buffer.str());
2888         }
2889     }
2890 
2891     if(row_indices_size == 0)
2892     {
2893         ostringstream buffer;
2894 
2895         buffer << "OpenNN Exception: Matrix template.\n"
2896                << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2897                << "Size of row indices must be greater than zero.\n";
2898 
2899         throw logic_error(buffer.str());
2900     }
2901 
2902     // Columns check
2903 
2904     if(columns_indices_size > columns_number)
2905     {
2906         ostringstream buffer;
2907 
2908         buffer << "OpenNN Exception: Matrix template.\n"
2909                << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2910                << "Column indices size must be equal or less than columns number.\n";
2911 
2912         throw logic_error(buffer.str());
2913     }
2914 
2915     for(Index i = 0; i < columns_indices_size; i++)
2916     {
2917         if(columns_indices(i) >= columns_number)
2918         {
2919             ostringstream buffer;
2920 
2921             buffer << "OpenNN Exception: Matrix template.\n"
2922                    << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2923                    << "Column index " << i << " must be less than columns number.\n";
2924 
2925             throw logic_error(buffer.str());
2926         }
2927     }
2928 
2929 #endif
2930 
2931     Index column_index;
2932 
2933     // median
2934 
2935     Tensor<type, 1> median(columns_indices_size);
2936 
2937     for(Index j = 0; j < columns_indices_size; j++)
2938     {
2939         column_index = columns_indices(j);
2940 
2941         Tensor<type, 1> sorted_column(row_indices_size);
2942 
2943         for(Index k = 0; k < row_indices_size; k++)
2944         {
2945             const Index row_index = row_indices(k);
2946 
2947             sorted_column(k) = matrix(row_index, column_index);
2948         }
2949 
2950         sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2951 
2952         if(row_indices_size % 2 == 0)
2953         {
2954             median(j) = (sorted_column[row_indices_size*2/4] + sorted_column[row_indices_size*2/4 + 1])/2;
2955         }
2956         else
2957         {
2958             median(j) = sorted_column[row_indices_size * 2 / 4];
2959         }
2960     }
2961 
2962     return median;
2963 }
2964 
2965 
2966 /// Calculates the distance between the empirical distribution of the vector and the
2967 /// normal distribution.
2968 /// @param vector Vector to be evaluated.
2969 
normal_distribution_distance(const Tensor<type,1> & vector)2970 type normal_distribution_distance(const Tensor<type, 1>& vector)
2971 {
2972     type normal_distribution_distance = 0;
2973 
2974     const Index n = vector.dimension(0);
2975 
2976     const type mean_value = mean(vector);
2977     const type standard_deviation = OpenNN::standard_deviation(vector);
2978 
2979     type normal_distribution; // Normal distribution
2980     type empirical_distribution; // Empirical distribution
2981 
2982     Tensor<type, 1> sorted_vector(vector);
2983 
2984     sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2985 
2986     Index counter = 0;
2987 
2988     for(Index i = 0; i < n; i++)
2989     {
2990         normal_distribution = static_cast<type>(0.5) * static_cast<type>(erfc((mean_value - sorted_vector(i)))/(standard_deviation*static_cast<type>(sqrt(2.0))));
2991         counter = 0;
2992 
2993         for(Index j = 0; j < n; j++)
2994         {
2995             if(sorted_vector(j) <= sorted_vector(i))
2996             {
2997                 counter++;
2998             }
2999             else
3000             {
3001                 break;
3002             }
3003         }
3004 
3005         empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3006 
3007         normal_distribution_distance += abs(normal_distribution - empirical_distribution);
3008     }
3009 
3010     return normal_distribution_distance;
3011 }
3012 
3013 
3014 /// Calculates the distance between the empirical distribution of the vector and the
3015 /// half normal distribution.
3016 /// @param vector Vector to be evaluated.
3017 
half_normal_distribution_distance(const Tensor<type,1> & vector)3018 type half_normal_distribution_distance(const Tensor<type, 1>& vector)
3019 {
3020     type half_normal_distribution_distance = 0;
3021 
3022     const Index n = vector.dimension(0);
3023 
3024     const type standard_deviation = OpenNN::standard_deviation(vector);
3025 
3026     type half_normal_distribution; // Half normal distribution
3027     type empirical_distribution; // Empirical distribution
3028 
3029     Tensor<type, 1> sorted_vector(vector);
3030 
3031     sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
3032 
3033     Index counter = 0;
3034 
3035     for(Index i = 0; i < n; i++)
3036     {
3037         half_normal_distribution = static_cast<type>(erf((sorted_vector(i)))/(standard_deviation * static_cast<type>(sqrt(2))));
3038         counter = 0;
3039 
3040         for(Index j = 0; j < n; j++)
3041         {
3042             if(sorted_vector(j) <= sorted_vector(i))
3043             {
3044                 counter++;
3045             }
3046             else
3047             {
3048                 break;
3049             }
3050         }
3051 
3052         empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3053 
3054         half_normal_distribution_distance += abs(half_normal_distribution - empirical_distribution);
3055     }
3056 
3057     return half_normal_distribution_distance;
3058 }
3059 
3060 
3061 /// Calculates the distance between the empirical distribution of the vector and the
3062 /// uniform distribution.
3063 /// @param vector Vector to be evaluated.
3064 
uniform_distribution_distance(const Tensor<type,1> & vector)3065 type uniform_distribution_distance(const Tensor<type, 1>& vector)
3066 {
3067     type uniform_distribution_distance = 0;
3068 
3069     const Index n = vector.dimension(0);
3070 
3071     type uniform_distribution; // Uniform distribution
3072     type empirical_distribution; // Empirical distribution
3073 
3074     Tensor<type, 1> sorted_vector(vector);
3075 
3076     sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
3077 
3078     const type minimum = sorted_vector[0];
3079     const type maximum = sorted_vector[n-1];
3080 
3081     Index counter = 0;
3082 
3083     for(Index i = 0; i < n; i++)
3084     {
3085         uniform_distribution = (sorted_vector(i)-minimum)/(maximum-minimum);
3086         counter = 0;
3087 
3088         for(Index j = 0; j < n; j++)
3089         {
3090             if(sorted_vector(j) <= sorted_vector(i))
3091             {
3092                 counter++;
3093             }
3094             else
3095             {
3096                 break;
3097             }
3098         }
3099 
3100         empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3101 
3102         uniform_distribution_distance += abs(uniform_distribution - empirical_distribution);
3103     }
3104 
3105     return uniform_distribution_distance;
3106 }
3107 
3108 
3109 ///@todo
3110 
normality_parameter(const Tensor<type,1> & vector)3111 type normality_parameter(const Tensor<type, 1>& vector)
3112 {
3113     const type max = maximum(vector);
3114     const type min = minimum(vector);
3115 
3116     const Index n = vector.dimension(0);
3117 
3118     const type mean_value = mean(vector);
3119     const type standard_deviation = OpenNN::standard_deviation(vector);
3120 
3121     type normal_distribution;
3122     type empirical_distribution;
3123     type previous_normal_distribution = 0;
3124     type previous_empirical_distribution = 0;
3125 
3126     Tensor<type, 1> sorted_vector(vector);
3127 
3128     sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
3129 
3130     type empirical_area = 0;
3131     type normal_area = 0;
3132 
3133     Index counter = 0;
3134 
3135     for(Index i = 0; i < n; i++)
3136     {
3137         normal_distribution = static_cast<type>(0.5) * static_cast<type>(erfc((mean_value - sorted_vector(i)))/(standard_deviation*static_cast<type>(sqrt(2.0))));
3138         counter = 0;
3139 
3140         for(Index j = 0; j < n; j++)
3141         {
3142             if(sorted_vector(j) <= sorted_vector(i))
3143             {
3144                 counter++;
3145             }
3146             else
3147             {
3148                 break;
3149             }
3150         }
3151 
3152         empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3153 
3154         if(i == 0)
3155         {
3156             previous_normal_distribution = normal_distribution;
3157             previous_empirical_distribution = empirical_distribution;
3158         }
3159         else
3160         {
3161             normal_area += static_cast<type>(0.5)*(sorted_vector(i)-sorted_vector[i-1])*(normal_distribution+previous_normal_distribution);
3162             empirical_area += static_cast<type>(0.5)*(sorted_vector(i)-sorted_vector[i-1])*(empirical_distribution+previous_empirical_distribution);
3163 
3164             previous_normal_distribution = normal_distribution;
3165             previous_empirical_distribution = empirical_distribution;
3166         }
3167     }
3168 
3169     const type uniform_area = (max - min)/static_cast<type>(2.0);
3170 
3171     return uniform_area;
3172 }
3173 
3174 
variation_percentage(const Tensor<type,1> & vector)3175 Tensor<type, 1> variation_percentage(const Tensor<type, 1>& vector)
3176 {
3177     const Index size = vector.dimension(0);
3178 
3179     Tensor<type, 1> new_vector(size);
3180 
3181     for(Index i = 1; i < size; i++)
3182     {
3183         if(abs(vector[i-1]) < numeric_limits<type>::min())
3184         {
3185             new_vector(i) = (vector(i) - vector[i-1])*static_cast<type>(100.0)/vector[i-1];
3186         }
3187     }
3188 
3189     return new_vector;
3190 }
3191 
3192 
3193 /// Returns the index of the smallest element in the vector.
3194 
minimal_index(const Tensor<type,1> & vector)3195 Index minimal_index(const Tensor<type, 1>& vector)
3196 {
3197     const Index size = vector.dimension(0);
3198 
3199     if(size == 0) return Index();
3200 
3201     Index minimal_index = 0;
3202     type minimum = vector[0];
3203 
3204     for(Index i = 1; i < size; i++)
3205     {
3206         if(vector(i) < minimum)
3207         {
3208             minimal_index = i;
3209             minimum = vector(i);
3210         }
3211     }
3212 
3213     return minimal_index;
3214 }
3215 
3216 
3217 /// Returns the index of the largest element in the vector.
3218 
maximal_index(const Tensor<type,1> & vector)3219 Index maximal_index(const Tensor<type, 1>& vector)
3220 {
3221     const Index size = vector.dimension(0);
3222 
3223     if(size == 0) return Index();
3224 
3225     Index maximal_index = 0;
3226     type maximum = vector[0];
3227 
3228     for(Index i = 1; i < size; i++)
3229     {
3230         if(vector(i) > maximum)
3231         {
3232             maximal_index = i;
3233             maximum = vector(i);
3234         }
3235     }
3236 
3237     return maximal_index;
3238 }
3239 
3240 /// Returns the indices of the smallest elements in the vector.
3241 /// @param number Number of minimal indices to be computed.
3242 
minimal_indices(const Tensor<type,1> & vector,const Index & number)3243 Tensor<Index, 1> minimal_indices(const Tensor<type, 1>& vector, const Index &number)
3244 {
3245     Eigen::Tensor<type, 1> vector_ = vector;
3246 
3247     const Index size = vector.dimension(0);
3248     Tensor<Index, 1> minimal_indices(number);
3249     Eigen::Tensor<type, 0> maxim = vector.maximum();
3250 
3251 #ifdef __OPENNN_DEBUG__
3252 
3253 if(number > size)
3254 {
3255    ostringstream buffer;
3256 
3257    buffer << "OpenNN Exception: Statistics class.\n"
3258           << "Tensor<Index, 1> minimal_indices(Tensor<type, 1>& , const Index &) \n"
3259           << "Number of minimal indices to be computed must be lower (or equal) than the size of the imput vector.\n";
3260 
3261    throw logic_error(buffer.str());
3262 }
3263 #endif
3264 
3265     for(Index j = 0; j < number; j++)
3266     {
3267         Index minimal_index = 0;
3268         type minimum = vector_(0);
3269 
3270         for(Index i = 0; i < size; i++)
3271         {
3272             if(vector_(i) < minimum)
3273             {
3274                 minimal_index = i;
3275                 minimum = vector_(i);
3276             }
3277         }
3278         vector_(minimal_index) = maxim(0)+1;
3279         minimal_indices(j) = minimal_index;
3280     }
3281       return minimal_indices;
3282 }
3283 
3284 
3285 /// Returns the indices of the largest elements in the vector.
3286 /// @param number Number of maximal indices to be computed.
3287 
maximal_indices(const Tensor<type,1> & vector,const Index & number)3288 Tensor<Index, 1> maximal_indices(const Tensor<type, 1>& vector, const Index &number)
3289 {
3290     Eigen::Tensor<type, 1> vector_ = vector;
3291 
3292     const Index size = vector.dimension(0);
3293     Tensor<Index, 1> maximal_indices(number);
3294     Eigen::Tensor<type, 0> minim = vector.minimum();
3295 
3296 #ifdef __OPENNN_DEBUG__
3297 
3298 if(number > size)
3299 {
3300    ostringstream buffer;
3301 
3302    buffer << "OpenNN Exception: Statistics class.\n"
3303           << "Tensor<Index, 1> maximal_indices(Tensor<type, 1>& , const Index &) \n"
3304           << "Number of maximal indices to be computed must be lower (or equal) than the size of the imput vector.\n";
3305 
3306    throw logic_error(buffer.str());
3307 }
3308 #endif
3309 
3310     for(Index j = 0; j < number; j++)
3311     {
3312         Index maximal_index = 0;
3313         type maximal = vector_(0);
3314 
3315         for(Index i = 0; i < size; i++)
3316         {
3317             if(vector_(i) > maximal)
3318             {
3319                 maximal_index = i;
3320                 maximal = vector_(i);
3321             }
3322         }
3323         vector_(maximal_index) = minim(0)-1;
3324         maximal_indices(j) = maximal_index;
3325     }
3326       return maximal_indices;
3327 }
3328 
3329 /// Returns the row and column indices corresponding to the entry with minimum value.
3330 
minimal_indices(const Tensor<type,2> & matrix)3331 Tensor<Index, 1> minimal_indices(const Tensor<type, 2>& matrix)
3332 {
3333     const Index rows_number = matrix.dimension(0);
3334     const Index columns_number = matrix.dimension(1);
3335 
3336     type minimum = matrix(0,0);
3337     Tensor<Index, 1> minimal_indices(2);
3338 
3339     for(Index i = 0; i < rows_number; i++)
3340     {
3341         for(Index j = 0; j < columns_number; j++)
3342         {
3343             if(!::isnan(matrix(i,j))  && matrix(i,j) < minimum)
3344             {
3345                 minimum = matrix(i,j);
3346                 minimal_indices(0) = i;
3347                 minimal_indices(1) = j;
3348             }
3349         }
3350     }
3351 
3352     return minimal_indices;
3353 }
3354 
3355 
3356 /// Returns the row and column indices corresponding to the entry with maximum value.
3357 
maximal_indices(const Tensor<type,2> & matrix)3358 Tensor<Index, 1> maximal_indices(const Tensor<type, 2>& matrix)
3359 {
3360     const Index rows_number = matrix.dimension(0);
3361     const Index columns_number = matrix.dimension(1);
3362 
3363     type maximum = matrix(0,0);
3364 
3365     Tensor<Index, 1> maximal_indices(2);
3366 
3367     for(Index i = 0; i < rows_number; i++)
3368     {
3369         for(Index j = 0; j < columns_number; j++)
3370         {
3371             if(!::isnan(matrix(i,j)) && matrix(i,j) > maximum)
3372             {
3373                 maximum = matrix(i,j);
3374                 maximal_indices(0) = i;
3375                 maximal_indices(1) = j;
3376             }
3377         }
3378     }
3379 
3380     return maximal_indices;
3381 }
3382 
3383 
3384 /// Returns a matrix in which each of the columns contain the maximal indices of each of the columns of the
3385 /// original matrix.
3386 
maximal_columns_indices(const Tensor<type,2> & matrix,const Index & maximum_number)3387 Tensor<Index, 2> maximal_columns_indices(const Tensor<type,2>& matrix, const Index& maximum_number)
3388 {
3389     const Index rows_number = matrix.dimension(0);
3390     const Index columns_number = matrix.dimension(1);
3391 
3392     Tensor<Index, 2> maximal_columns_indices(maximum_number, columns_number);
3393 
3394     Tensor<type, 1> columns_minimums = OpenNN::columns_minimums(matrix);
3395 
3396     for(Index j = 0; j < columns_number; j++)
3397     {
3398         Tensor<type, 1> column = matrix.chip(j,1);
3399 
3400         for(Index i = 0; i < maximum_number; i++)
3401         {
3402             Index maximal_index = 0;
3403             type maximal = column(0);
3404 
3405             for(Index k = 0; k < rows_number; k++)
3406             {
3407                 if(column(k) > maximal && !::isnan(column(k)))
3408                 {
3409                     maximal_index = k;
3410                     maximal = column(k);
3411                 }
3412             }
3413 
3414             column(maximal_index) = columns_minimums(j)-static_cast<type>(1);
3415             maximal_columns_indices(i,j) = maximal_index;
3416         }
3417     }
3418 
3419     return maximal_columns_indices;
3420 }
3421 
3422 
strongest(const Tensor<type,1> & vector)3423 type strongest(const Tensor<type, 1>& vector)
3424 {
3425     const Index size = vector.dimension(0);
3426 
3427     if(size == 0) return 0.0;
3428 
3429     type strongest = vector[0];
3430 
3431     for(Index i = 0; i < size; i++)
3432     {
3433         if(fabs(vector(i)) > fabs(strongest))
3434         {
3435             strongest = vector(i);
3436         }
3437     }
3438 
3439     return strongest;
3440 }
3441 
3442 
3443 /// Returns the l2 norm of a vector
3444 
l2_norm(const Tensor<type,1> & vector)3445 type l2_norm(const Tensor<type, 1>& vector)
3446 {
3447     const Index size = vector.dimension(0);
3448 
3449     if(size == 0) return NAN;
3450 
3451     type square_sum = 0;
3452 
3453     for(Index i = 0; i < size; i++)
3454     {
3455         square_sum = square_sum + vector[i] * vector[i];
3456     }
3457 
3458     return sqrt(square_sum);
3459 }
3460 
3461 
3462 /// Returns a vector containing the means of the subsets which correspond
3463 /// to each of the given integers. The matrix must have 2 columns, the first
3464 /// one containing the integers and the second one the corresponding values.
3465 
means_by_categories(const Tensor<type,2> & matrix)3466 Tensor<type, 1> means_by_categories(const Tensor<type, 2>& matrix)
3467 {
3468 /*
3469     const Index integers_number = matrix.size();
3470     Tensor<type, 1> elements_uniques = matrix.get_column(0).get_unique_elements();
3471     Tensor<type, 1> values = matrix.chip(1,1);
3472 
3473     #ifdef __OPENNN_DEBUG__
3474 
3475     if(integers_number == 0)
3476     {
3477        ostringstream buffer;
3478 
3479        buffer << "OpenNN Exception: Matrix template.\n"
3480               << "Tensor<type, 1> calculate_means_integers(const Tensor<type, 2>& \n"
3481               << "Number of integers must be greater than 0.\n";
3482 
3483        throw logic_error(buffer.str());
3484     }
3485 
3486     #endif
3487 
3488     const Index rows_number = matrix.dimension(0);
3489 
3490     Tensor<type, 1> means(elements_uniques);
3491 
3492     type sum = 0;
3493     Index count = 0;
3494 
3495     for(Index i = 0; i < integers_number; i++)
3496     {
3497         sum = 0;
3498         count = 0;
3499 
3500         for(unsigned j = 0; j < rows_number; j++)
3501         {
3502             if(matrix(j,0) == elements_uniques(i) && !::isnan(values(j)))
3503             {
3504                 sum += matrix(j,1);
3505                 count++;
3506             }
3507         }
3508 
3509         if(count != 0)
3510         {
3511             means(i) = static_cast<type>(sum)/static_cast<type>(count);
3512 
3513         }
3514         else
3515         {
3516             means(i) = 0;
3517         }
3518     }
3519 
3520     return means;
3521 */
3522     return Tensor<type, 1>();
3523 }
3524 
3525 
3526 
3527 /// Returns a vector containing the values of the means for the 0s and 1s of a
3528 /// binary column.
3529 /// The matrix must have 2 columns, the first one has to be binary.
3530 
means_binary_column(const Tensor<type,2> & matrix)3531 Tensor<type, 1> means_binary_column(const Tensor<type, 2>& matrix)
3532 {
3533     Tensor<type, 1> means(2);
3534     means.setZero();
3535 
3536     Index count = 0;
3537 
3538     for(Index i = 0; i < matrix.dimension(0); i++)
3539     {
3540         if(abs(matrix(i,0)) < numeric_limits<type>::min())
3541         {
3542             means[0] += matrix(i,1);
3543             count++;
3544         }
3545         else if(static_cast<Index>(matrix(i,0)) == 1)
3546         {
3547             means[1] += matrix(i,1);
3548             count++;
3549         }
3550     }
3551 
3552     if(count != 0)
3553     {
3554         means[0] = static_cast<type>(means[0])/static_cast<type>(count);
3555         means[1] = static_cast<type>(means[1])/static_cast<type>(count);
3556     }
3557     else
3558     {
3559         means[0] = 0;
3560         means[1] = 0;
3561     }
3562 
3563     return means;
3564 }
3565 
3566 
3567 /// Returns a vector containing the values of the means for the 1s of each
3568 /// of all binary columns.
3569 /// All the columns except the last one must be binary.
3570 
means_binary_columns(const Tensor<type,2> & matrix)3571 Tensor<type, 1> means_binary_columns(const Tensor<type, 2>& matrix)
3572 {
3573     Tensor<type, 1> means(matrix.dimension(1)-1);
3574 
3575     type sum = 0;
3576     Index count = 0;
3577 
3578     for(Index i = 0; i < matrix.dimension(1)-1; i++)
3579     {
3580         sum = 0;
3581         count = 0;
3582 
3583         for(Index j = 0; j < matrix.dimension(0); j++)
3584         {
3585             if(static_cast<Index>(matrix(j,i)) == 1)
3586             {
3587                 sum += matrix(j,matrix.dimension(1)-1);
3588 
3589                 count++;
3590             }
3591         }
3592 
3593         if(count != 0)
3594         {
3595             means(i) = static_cast<type>(sum)/static_cast<type>(count);
3596 
3597         }
3598         else
3599         {
3600             means(i) = 0;
3601         }
3602     }
3603     return means;
3604 }
3605 
3606 
3607 ///Returns a vector with the percentiles of a vector given.
3608 
percentiles(const Tensor<type,1> & vector)3609 Tensor<type, 1> percentiles(const Tensor<type, 1>& vector)
3610 {
3611     const Index size = vector.dimension(0);
3612 
3613 #ifdef __OPENNN_DEBUG__
3614 
3615     if(size < 10)
3616     {
3617         ostringstream buffer;
3618 
3619         buffer << "OpenNN Exception: vector Template.\n"
3620                << "Tensor<type, 1> percentiles(const Tensor<type, 1>& vector) method.\n"
3621                << "Size must be greater than 10.\n";
3622 
3623         throw logic_error(buffer.str());
3624     }
3625 
3626 #endif
3627 
3628       Index new_size = 0;
3629 
3630       for(Index i = 0; i < size; i++)
3631       {
3632           if(!::isnan(vector(i)))
3633           {
3634               new_size++;
3635           }
3636       }
3637 
3638       if(new_size == 0)
3639       {
3640           Tensor<type, 1> nan(1);
3641           nan.setValues({static_cast<type>(NAN)});
3642           return nan;
3643       }
3644 
3645       Index index = 0;
3646       Tensor<type, 1> new_vector(new_size);
3647 
3648       for(Index i = 0; i < size; i++)
3649       {
3650           if(!::isnan(vector(i)))
3651           {
3652               new_vector(index) = vector(i);
3653               index++;
3654           }
3655       }
3656 
3657       Tensor<type, 1> sorted_vector(new_vector);
3658 
3659       sort(sorted_vector.data(), sorted_vector.data() + new_size, less<type>());
3660 
3661 
3662       /// Aempirical method
3663       Tensor<type, 1> percentiles(10);
3664 
3665       for(Index i = 0; i < 9; i++)
3666       {
3667           if(new_size * (i + 1) % 10 == 0)
3668               percentiles[i] = (sorted_vector[new_size * (i + 1) / 10 - 1] + sorted_vector[new_size * (i + 1) / 10]) / static_cast<type>(2.0);
3669 
3670           else
3671               percentiles[i] = static_cast<type>(sorted_vector[new_size * (i + 1) / 10]);
3672       }
3673       percentiles[9] = maximum(new_vector);
3674 
3675       return percentiles;
3676 }
3677 
3678 
3679 /// Returns the weighted mean of the vector.
3680 /// @param weights Weights of the elements of the vector in the mean.
3681 
weighted_mean(const Tensor<type,1> & vector,const Tensor<type,1> & weights)3682 type weighted_mean(const Tensor<type, 1>& vector, const Tensor<type, 1>& weights)
3683 {
3684     const Index size = vector.dimension(0);
3685 
3686 #ifdef __OPENNN_DEBUG__
3687 
3688     if(size == 0)
3689     {
3690         ostringstream buffer;
3691 
3692         buffer << "OpenNN Exception: vector Template.\n"
3693                << "type calculate_weighted_mean(const Tensor<type, 1>&) const method.\n"
3694                << "Size must be greater than zero.\n";
3695 
3696         throw logic_error(buffer.str());
3697     }
3698 
3699     const Index weights_size = weights.size();
3700 
3701     if(size != weights_size)
3702     {
3703         ostringstream buffer;
3704 
3705         buffer << "OpenNN Exception: vector Template.\n"
3706                << "type calculate_weighted_mean(const Tensor<type, 1>&) "
3707                "const method.\n"
3708                << "Size of weights must be equal to vector size.\n";
3709 
3710         throw logic_error(buffer.str());
3711     }
3712 #endif
3713 
3714     type weights_sum = 0;
3715 
3716     type sum = 0;
3717 
3718     for(Index i = 0; i < size; i++)
3719     {
3720         sum += weights(i)*vector(i);
3721         weights_sum += weights(i);
3722     }
3723 
3724     const type mean = sum / weights_sum;
3725 
3726     return mean;
3727 }
3728 
3729 /// Returns the number of nans in the vector.
3730 /// @param vector Vector to count the NANs
3731 
count_nan(const Tensor<type,1> & vector)3732 Index count_nan(const Tensor<type, 1>& vector)
3733 {
3734     Index nan_number = 0;
3735 
3736     for(Index i = 0; i < vector.dimension(0); i++)
3737     {
3738         if(isnan(vector(i))) nan_number++;
3739     }
3740 
3741     return nan_number;
3742 }
3743 
3744 }
3745 
3746 
3747 // OpenNN: Open Neural Networks Library.
3748 // Copyright(C) 2005-2020 Artificial Intelligence Techniques, SL.
3749 //
3750 // This library is free software; you can redistribute it and/or
3751 // modify it under the terms of the GNU Lesser General Public
3752 // License as published by the Free Software Foundation; either
3753 // version 2.1 of the License, or any later version.
3754 //
3755 // This library is distributed in the hope that it will be useful,
3756 // but WITHOUT ANY WARRANTY; without even the implied warranty of
3757 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
3758 // Lesser General Public License for more details.
3759 
3760 // You should have received a copy of the GNU Lesser General Public
3761 // License along with this library; if not, write to the Free Software
3762 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
3763