1 // OpenNN: Open Neural Networks Library
2 // www.opennn.net
3 //
4 // S T A T I S T I C S S O U R C E
5 //
6 // Artificial Intelligence Techniques, SL
7 // artelnics@artelnics.com
8
9 #include "statistics.h"
10
11 namespace OpenNN
12 {
13
14 /// Default constructor.
15
Descriptives()16 Descriptives::Descriptives()
17 {
18 name = "Descriptives";
19 minimum = -1.0;
20 maximum = 1.0;
21 mean = 0;
22 standard_deviation = 1.0;
23 }
24
25
26 /// Values constructor.
27
Descriptives(const type & new_minimum,const type & new_maximum,const type & new_mean,const type & new_standard_deviation)28 Descriptives::Descriptives(const type &new_minimum, const type &new_maximum,
29 const type &new_mean, const type &new_standard_deviation)
30 {
31 minimum = new_minimum;
32 maximum = new_maximum;
33 mean = new_mean;
34 standard_deviation = new_standard_deviation;
35 }
36
37
38 /// Destructor.
39
~Descriptives()40 Descriptives::~Descriptives()
41 {}
42
43
44 /// Sets a new minimum value in the descriptives structure.
45 /// @param new_minimum Minimum value.
46
set_minimum(const type & new_minimum)47 void Descriptives::set_minimum(const type &new_minimum)
48 {
49 minimum = new_minimum;
50 }
51
52
53 /// Sets a new maximum value in the descriptives structure.
54 /// @param new_maximum Maximum value.
55
set_maximum(const type & new_maximum)56 void Descriptives::set_maximum(const type &new_maximum)
57 {
58 maximum = new_maximum;
59 }
60
61
62 /// Sets a new mean value in the descriptives structure.
63 /// @param new_mean Mean value.
64
set_mean(const type & new_mean)65 void Descriptives::set_mean(const type &new_mean)
66 {
67 mean = new_mean;
68 }
69
70
71 /// Sets a new standard deviation value in the descriptives structure.
72 /// @param new_standard_deviation Standard deviation value.
73
set_standard_deviation(const type & new_standard_deviation)74 void Descriptives::set_standard_deviation(const type &new_standard_deviation)
75 {
76 standard_deviation = new_standard_deviation;
77 }
78
79
80 /// Returns all the statistical parameters contained in a single vector.
81 /// The size of that vector is four.
82 /// The elements correspond to the minimum, maximum, mean and standard deviation
83 /// values respectively.
84
to_vector() const85 Tensor<type, 1> Descriptives::to_vector() const
86 {
87 Tensor<type, 1> statistics_vector(4);
88 statistics_vector[0] = minimum;
89 statistics_vector[1] = maximum;
90 statistics_vector[2] = mean;
91 statistics_vector[3] = standard_deviation;
92
93 return statistics_vector;
94 }
95
96
97 /// Returns true if the minimum value is -1 and the maximum value is +1,
98 /// and false otherwise.
99
has_minimum_minus_one_maximum_one()100 bool Descriptives::has_minimum_minus_one_maximum_one()
101 {
102 if(abs(minimum + 1) < numeric_limits<type>::min() && abs(maximum - 1) < numeric_limits<type>::min())
103 {
104 return true;
105 }
106
107 return false;
108 }
109
110
111 /// Returns true if the mean value is 0 and the standard deviation value is 1,
112 /// and false otherwise.
113
has_mean_zero_standard_deviation_one()114 bool Descriptives::has_mean_zero_standard_deviation_one()
115 {
116 if(abs(mean) < numeric_limits<type>::min() && abs(standard_deviation - 1) < numeric_limits<type>::min())
117 {
118 return true;
119 }
120 else
121 {
122 return false;
123 }
124
125 }
126
127
128 /// Print the tittle of descriptives structure
129
print(const string & title) const130 void Descriptives::print(const string& title) const
131 {
132 cout << title << endl
133 << "Minimum: " << minimum << endl
134 << "Maximum: " << maximum << endl
135 << "Mean: " << mean << endl
136 << "Standard deviation: " << standard_deviation << endl;
137 }
138
139
BoxPlot(const type & new_minimum,const type & new_first_cuartile,const type & new_median,const type & new_third_quartile,const type & new_maximum)140 BoxPlot::BoxPlot(const type& new_minimum, const type& new_first_cuartile, const type& new_median, const type& new_third_quartile, const type& new_maximum)
141 {
142 minimum = new_minimum;
143 first_quartile = new_first_cuartile;
144 median = new_median;
145 third_quartile = new_third_quartile;
146 maximum = new_maximum;
147 }
148
149
150 /// Saves to a file the minimum, maximum, mean and standard deviation
151 /// of the descriptives structure.
152 /// @param file_name Name of descriptives data file.
153
save(const string & file_name) const154 void Descriptives::save(const string &file_name) const
155 {
156 ofstream file(file_name.c_str());
157
158 if(!file.is_open())
159 {
160 ostringstream buffer;
161
162 buffer << "OpenNN Exception: Statistics Class.\n"
163 << "void save(const string&) const method.\n"
164 << "Cannot open descriptives data file.\n";
165
166 throw logic_error(buffer.str());
167 }
168
169 // Write file
170
171 file << "Minimum: " << minimum << endl
172 << "Maximum: " << maximum << endl
173 << "Mean: " << mean << endl
174 << "Standard deviation: " << standard_deviation << endl;
175
176 // Close file
177
178 file.close();
179 }
180
181
Histogram()182 Histogram::Histogram() {}
183
184
185 /// Destructor.
186
~Histogram()187 Histogram::~Histogram() {}
188
189
190 /// Bins number constructor.
191 /// @param bins_number Number of bins in the histogram.
192
Histogram(const Index & bins_number)193 Histogram::Histogram(const Index &bins_number)
194 {
195 centers.resize(bins_number);
196 frequencies.resize(bins_number);
197 }
198
199
200 /// Values constructor.
201 /// @param new_centers Center values for the bins.
202 /// @param new_frequencies Number of variates in each bin.
203
Histogram(const Tensor<type,1> & new_centers,const Tensor<Index,1> & new_frequencies)204 Histogram::Histogram(const Tensor<type, 1>&new_centers,
205 const Tensor<Index, 1>&new_frequencies)
206 {
207 centers = new_centers;
208 frequencies = new_frequencies;
209 }
210
211
212 // Data constructor
213 /// @param data Numerical data.
214 /// @param number_of_bins Number of bins.
215
Histogram(const Tensor<type,1> & data,const Index & number_of_bins)216 Histogram::Histogram(const Tensor<type, 1>& data,
217 const Index& number_of_bins)
218 {
219 const type data_maximum = maximum(data);
220 const type data_minimum = minimum(data);
221 const type step = (data_maximum - data_minimum) / number_of_bins;
222
223
224 Tensor<type, 1> new_centers(number_of_bins);
225 for(Index i = 0; i < number_of_bins; i++)
226 {
227 new_centers(i) = data_minimum + (0.5 * step) + (step * i);
228 }
229
230 Tensor<Index, 1> new_frequencies(number_of_bins);
231 new_frequencies.setZero();
232
233 type value;
234 Index corresponding_bin;
235
236 for(Index i = 0; i < data.dimension(0); i++)
237 {
238 value = data(i);
239 corresponding_bin = int((value - data_minimum) / step);
240
241 new_frequencies(corresponding_bin)++;
242 }
243
244 centers = new_centers;
245 frequencies = new_frequencies;
246 }
247
248
249 // Probabilities constructor
250 /// @param data Numerical probabilities data.
251
Histogram(const Tensor<type,1> & probability_data)252 Histogram::Histogram(const Tensor<type, 1>& probability_data)
253 {
254 const size_t number_of_bins = 10;
255 type data_maximum = maximum(probability_data);
256 const type data_minimum = 0.0;
257
258 if(data_maximum > 1)
259 {
260 data_maximum = 100.0;
261 }
262 else
263 {
264 data_maximum = 1.0;
265 }
266
267 const type step = (data_maximum - data_minimum) / number_of_bins;
268
269
270 Tensor<type, 1> new_centers(number_of_bins);
271 for(size_t i = 0; i < number_of_bins; i++)
272 {
273 new_centers(i) = data_minimum + (0.5 * step) + (step * i);
274 }
275
276 Tensor<Index, 1> new_frequencies(number_of_bins);
277 new_frequencies.setZero();
278
279 type value;
280 Index corresponding_bin;
281
282 for(Index i = 0; i < probability_data.dimension(0); i++)
283 {
284 value = probability_data(i);
285 corresponding_bin = int((value - data_minimum) / step);
286
287 new_frequencies(corresponding_bin)++;
288 }
289
290 centers = new_centers;
291 frequencies = new_frequencies;
292 }
293
294 /// Returns the number of bins in the histogram.
295
get_bins_number() const296 Index Histogram::get_bins_number() const
297 {
298 return centers.size();
299 }
300
301
302 /// Returns the number of bins with zero variates.
303
count_empty_bins() const304 Index Histogram::count_empty_bins() const
305 {
306 const auto size = frequencies.dimension(0);
307
308 Index count = 0;
309
310 for(Index i = 0; i < size; i++)
311 {
312 if(frequencies(i) == 0) count++;
313 }
314
315 return count;
316 }
317
318
319 /// Returns the number of variates in the less populated bin.
320
calculate_minimum_frequency() const321 Index Histogram::calculate_minimum_frequency() const
322 {
323 return minimum(frequencies);
324 }
325
326
327 /// Returns the number of variates in the most populated bin.
328
calculate_maximum_frequency() const329 Index Histogram::calculate_maximum_frequency() const
330 {
331 return maximum(frequencies);
332
333 }
334
335
336 /// Retuns the index of the most populated bin.
337
calculate_most_populated_bin() const338 Index Histogram::calculate_most_populated_bin() const
339 {
340
341 const Tensor<Index, 0> max_element = frequencies.maximum();
342
343 for(Index i = 0; i < frequencies.size(); i++)
344 {
345 if(max_element(0) == frequencies(i)) return i;
346 }
347
348 return 0;
349 }
350
351
352 /// Returns a vector with the centers of the less populated bins.
353
calculate_minimal_centers() const354 Tensor<type, 1> Histogram::calculate_minimal_centers() const
355 {
356 const Index minimum_frequency = calculate_minimum_frequency();
357
358 Index minimal_indices_size = 0;
359
360 if (frequencies.size() == 0)
361 {
362 Tensor<type, 1> nan(1);
363 nan.setValues({static_cast<type>(NAN)});
364 return nan;
365 }
366
367 for(Index i = 0; i < frequencies.size(); i++)
368 {
369 if(frequencies(i) == minimum_frequency)
370 {
371 minimal_indices_size++;
372 }
373 }
374
375 Index index = 0;
376
377 Tensor<type, 1> minimal_centers(minimal_indices_size);
378
379 for(Index i = 0; i < frequencies.size(); i++)
380 {
381 if(frequencies(i) == minimum_frequency)
382 {
383 minimal_centers(index) = static_cast<type>(centers(i));
384
385 index++;
386 }
387 }
388
389 return minimal_centers;
390 }
391
392
393 /// Returns a vector with the centers of the most populated bins.
394
calculate_maximal_centers() const395 Tensor<type, 1> Histogram::calculate_maximal_centers() const
396 {
397 const Index maximum_frequency = calculate_maximum_frequency();
398
399 Index maximal_indices_size = 0;
400
401 if (frequencies.size() == 0)
402 {
403 Tensor<type, 1> nan(1);
404 nan.setValues({static_cast<type>(NAN)});
405 return nan;
406 }
407
408 for(Index i = 0; i < frequencies.size(); i++)
409 {
410 if(frequencies(i) == maximum_frequency)
411 {
412 maximal_indices_size++;
413 }
414 }
415
416 Index index = 0;
417
418 Tensor<type, 1> maximal_centers(maximal_indices_size);
419
420 for(Index i = 0; i < frequencies.size(); i++)
421 {
422 if(maximum_frequency == frequencies(i))
423 {
424 maximal_centers(index) = static_cast<type>(centers(i));
425
426 index++;
427 }
428 }
429
430 return maximal_centers;
431 }
432
433
434 /// Returns the number of the bin to which a given value belongs to.
435 /// @param value Value for which we want to get the bin.
436
calculate_bin(const type & value) const437 Index Histogram::calculate_bin(const type&value) const
438 {
439 const Index bins_number = get_bins_number();
440
441 if(bins_number == 0) return 0;
442
443 const type minimum_center = centers[0];
444 const type maximum_center = centers[bins_number - 1];
445
446 const type length = static_cast<type>(maximum_center - minimum_center)/static_cast<type>(bins_number - 1.0);
447
448 type minimum_value = centers[0] - length / 2;
449 type maximum_value = minimum_value + length;
450
451 if(value < maximum_value)
452 {
453 return 0;
454 }
455
456 for(Index j = 1; j < bins_number - 1; j++)
457 {
458 minimum_value = minimum_value + length;
459 maximum_value = maximum_value + length;
460
461 if(value >= minimum_value && value < maximum_value)
462 {
463 return j;
464 }
465 }
466
467 if(value >= maximum_value)
468 {
469 return bins_number - 1;
470 }
471 else
472 {
473 ostringstream buffer;
474
475 buffer << "OpenNN Exception: Statistics Class.\n"
476 << "Index Histogram::calculate_bin(const type&) const.\n"
477 << "Unknown return value.\n";
478
479 throw logic_error(buffer.str());
480 }
481 }
482
483
484 /// Returns the frequency of the bin to which a given value belongs to.
485 /// @param value Value for which we want to get the frequency.
486
calculate_frequency(const type & value) const487 Index Histogram::calculate_frequency(const type&value) const
488 {
489 const Index bins_number = get_bins_number();
490
491 if(bins_number == 0) return 0;
492
493 const Index bin_number = calculate_bin(value);
494
495 const Index frequency = frequencies[bin_number];
496
497 return frequency;
498 }
499
500
save(const string & histogram_file_name) const501 void Histogram::save(const string& histogram_file_name) const
502 {
503 const Index number_of_bins = centers.dimension(0);
504 ofstream histogram_file(histogram_file_name);
505
506
507 histogram_file << "centers,frequencies" << endl;
508 for(Index i = 0; i < number_of_bins; i++)
509 {
510 histogram_file << centers(i) << ",";
511 histogram_file << frequencies(i) << endl;
512 }
513
514 histogram_file.close();
515
516 }
517
518
519 /// Returns the smallest element of a type vector.
520 /// @param vector Vector to obtain the minimum value.
521
minimum(const Tensor<type,1> & vector)522 type minimum(const Tensor<type, 1>& vector)
523 {
524 const Index size = vector.dimension(0);
525
526 if(size == 0) return NAN;
527
528 type minimum = numeric_limits<type>::max();
529
530 for(Index i = 0; i < size; i++)
531 {
532 if(vector(i) < minimum && !::isnan(vector(i)))
533 {
534 minimum = vector(i);
535 }
536 }
537
538 return minimum;
539 }
540
541
542 /// Returns the smallest element of a index vector.
543 /// @param vector Vector to obtain the minimum value.
544
minimum(const Tensor<Index,1> & vector)545 Index minimum(const Tensor<Index, 1>& vector)
546 {
547 const Index size = vector.size();
548
549 if(size == 0) return NAN;
550
551 Index minimum = numeric_limits<Index>::max();
552
553 for(Index i = 0; i < size; i++)
554 {
555 if(vector(i) < minimum)
556 {
557 minimum = vector(i);
558 }
559 }
560
561 return minimum;
562 }
563
564
565 /// Returns the smallest element of a type vector.
566 /// @param vector Vector to obtain the minimum value.
567 /// @param indices Vector of used indices.
568
minimum(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)569 type minimum(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
570 {
571 const Index size = indices.dimension(0);
572
573 if(size == 0) return NAN;
574
575 type minimum = numeric_limits<type>::max();
576
577 Index index;
578
579 for(Index i = 0; i < size; i++)
580 {
581 index = indices(i);
582
583 if(vector(index) < minimum && !::isnan(vector(index)))
584 {
585 minimum = vector(index);
586 }
587 }
588
589 return minimum;
590 }
591
592
593
594 /// Returns the smallest element of a Index vector.
595
596 //time_t minimum(const Tensor<time_t, 1>& vector)
597 //{
598
599 // const Tensor<time_t, 0> min_element = vector.minimum();
600
601 // return min_element(0);
602 //}
603
604
605 /// Returns the largest element in the vector.
606 /// @param vector Vector to obtain the maximum value.
607
maximum(const Tensor<type,1> & vector)608 type maximum(const Tensor<type, 1>& vector)
609 {
610 const Index size = vector.dimension(0);
611
612 if(size == 0) return NAN;
613
614 type maximum = -numeric_limits<type>::max();
615
616 for(Index i = 0; i < size; i++)
617 {
618 if(!::isnan(vector(i)) && vector(i) > maximum)
619 {
620 maximum = vector(i);
621 }
622 }
623
624 return maximum;
625 }
626
627
628 /// Returns the largest element in the vector.
629 /// @param vector Vector to obtain the maximum value.
630 /// @param indices Vector of used indices.
631
maximum(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)632 type maximum(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
633 {
634 const Index size = indices.dimension(0);
635
636 if(size == 0) return NAN;
637
638 type maximum = -numeric_limits<type>::max();
639
640 Index index;
641
642 for(Index i = 0; i < size; i++)
643 {
644 index = indices(i);
645
646 if(!::isnan(vector(index)) && vector(index) > maximum)
647 {
648 maximum = vector(index);
649 }
650 }
651
652 return maximum;
653 }
654
655 /// Returns the largest element of a index vector.
656 /// @param vector Vector to obtain the maximum value.
657
maximum(const Tensor<Index,1> & vector)658 Index maximum(const Tensor<Index, 1>& vector)
659 {
660 const Index size = vector.size();
661
662 if(size == 0) return NAN;
663
664 Index maximum = -numeric_limits<Index>::max();
665
666 for(Index i = 0; i < size; i++)
667 {
668 if(vector(i) > maximum)
669 {
670 maximum = vector(i);
671 }
672 }
673
674 return maximum;
675 }
676
677
678 //time_t maximum(const Tensor<time_t, 1>& vector)
679 //{
680 // const Tensor<time_t,0> max_element = vector.maximum();
681
682 // return max_element(0);
683 //}
684
685
686 /// Returns the maximums values of given columns.
687 /// The format is a vector of type values.
688 /// The size of that vector is equal to the number of given columns.
689 /// @param matrix Used matrix.
690 /// @param rows_indices Indices of the rows for which the maximums are to be computed.
691 /// @param columns_indices Indices of the columns for which the maximums are to be computed.
692
columns_maximums(const Tensor<type,2> & matrix,const Tensor<Index,1> & rows_indices,const Tensor<Index,1> & columns_indices)693 Tensor<type, 1> columns_maximums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& rows_indices, const Tensor<Index, 1>& columns_indices)
694 {
695 const Index rows_number = matrix.dimension(0);
696 const Index columns_number = matrix.dimension(1);
697
698 Tensor<Index, 1> used_columns_indices;
699
700 if(columns_indices.dimension(0) == 0)
701 {
702 used_columns_indices.resize(columns_number);
703
704 for (Index i = 0; i < columns_number; i++)
705 {
706 used_columns_indices(i) = i;
707 }
708 }
709 else
710 {
711 used_columns_indices = columns_indices;
712 }
713
714 Tensor<Index, 1> used_rows_indices;
715
716 if(rows_indices.dimension(0) == 0)
717 {
718 used_rows_indices.resize(rows_number);
719
720 for (Index i = 0; i < rows_number; i++)
721 {
722 used_rows_indices(i) = i;
723 }
724 }
725 else
726 {
727 used_rows_indices = rows_indices;
728 }
729
730 const Index rows_indices_size = used_rows_indices.size();
731 const Index columns_indices_size = used_columns_indices.size();
732
733 Tensor<type, 1> maximums(columns_indices_size);
734
735 Index row_index;
736 Index column_index;
737
738 Tensor<type, 1> column(rows_indices_size);
739
740 for(Index j = 0; j < columns_indices_size; j++)
741 {
742 column_index = used_columns_indices(j);
743
744 for(Index i = 0; i < rows_indices_size; i++)
745 {
746 row_index = used_rows_indices(i);
747
748 column(i) = matrix(row_index,column_index);
749 }
750
751 maximums(j) = maximum(column);
752 }
753
754 return maximums;
755 }
756
757
758 /// Returns the mean of the subvector defined by a start and end elements.
759 /// @param vector Vector to be evaluated.
760 /// @param begin Start element.
761 /// @param end End element.
762
mean(const Tensor<type,1> & vector,const Index & begin,const Index & end)763 type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end)
764 {
765 #ifdef __OPENNN_DEBUG__
766
767 if(begin > end)
768 {
769 ostringstream buffer;
770
771 buffer << "OpenNN Exception: Statistics class.\n"
772 << "type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end) \n"
773 << "Begin must be less or equal than end.\n";
774
775 throw logic_error(buffer.str());
776 }
777
778 #endif
779
780 if(end == begin) return vector[begin];
781
782 type sum = 0;
783
784 for(Index i = begin; i <= end; i++)
785 {
786 sum += vector(i);
787 }
788
789 return sum /static_cast<type>(end-begin+1);
790 }
791
792
793 /// Returns the mean of the elements in the vector.
794 /// @param vector Vector to be evaluated.
795
mean(const Tensor<type,1> & vector)796 type mean(const Tensor<type, 1>& vector)
797 {
798 const Index size = vector.dimension(0);
799
800 if (size == 0) return 0;
801
802 #ifdef __OPENNN_DEBUG__
803
804 if(size == 0)
805 {
806 ostringstream buffer;
807
808 buffer << "OpenNN Exception: Statistics Class.\n"
809 << "type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end) "
810 "const method.\n"
811 << "Size must be greater than zero.\n";
812
813 throw logic_error(buffer.str());
814 }
815
816 #endif
817
818 type sum = 0;
819
820 Index count = 0;
821
822 for(Index i = 0; i < size; i++)
823 {
824 if(!::isnan(vector(i)))
825 {
826 sum += vector(i);
827 count++;
828 }
829 }
830
831 const type mean = sum /static_cast<type>(count);
832
833 return mean;
834 }
835
836
837 /// Returns the variance of the elements in the vector.
838 /// @param vector Vector to be evaluated.
839
variance(const Tensor<type,1> & vector)840 type variance(const Tensor<type, 1>& vector)
841 {
842 const Index size = vector.dimension(0);
843
844 #ifdef __OPENNN_DEBUG__
845
846 if(size == 0)
847 {
848 ostringstream buffer;
849
850 buffer << "OpenNN Exception: Statistics Class.\n"
851 << "type variance(const Tensor<type, 1>& vector) "
852 "const method.\n"
853 << "Size must be greater than zero.\n";
854
855 throw logic_error(buffer.str());
856 }
857
858 #endif
859
860 type sum = 0;
861 type squared_sum = 0;
862
863 Index count = 0;
864
865 for(Index i = 0; i < size; i++)
866 {
867 if(!::isnan(vector(i)))
868 {
869 sum += vector(i);
870 squared_sum += vector(i) * vector(i);
871
872 count++;
873 }
874 }
875
876 if(count <= 1)
877 {
878 return 0.0;
879 }
880
881 const type variance = squared_sum/static_cast<type>(count - 1) -(sum/static_cast<type>(count))*(sum/static_cast<type>(count))*static_cast<type>(count)/static_cast<type>(count-1);
882
883 return variance;
884 }
885
886
887 /// Returns the variance of the elements in the vector.
888 /// @param vector Vector to be evaluated.
889
variance(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)890 type variance(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
891 {
892 const Index size = indices.dimension(0);
893
894 #ifdef __OPENNN_DEBUG__
895
896 if(size == 0)
897 {
898 ostringstream buffer;
899
900 buffer << "OpenNN Exception: Statistics Class.\n"
901 << "type variance(const Tensor<type, 1>&, const Tensor<Index, 1>&) "
902 "const method.\n"
903 << "Indeces size must be greater than zero.\n";
904
905 throw logic_error(buffer.str());
906 }
907
908 #endif
909
910 type sum = 0;
911 type squared_sum = 0;
912
913 Index count = 0;
914
915 Index index = 0;
916
917 for(Index i = 0; i < size; i++)
918 {
919 index = indices(i);
920
921 if(!::isnan(vector(index)))
922 {
923 sum += vector(index);
924 squared_sum += vector(index) * vector(index);
925
926 count++;
927 }
928 }
929
930 if(count <= 1)
931 {
932 return 0.0;
933 }
934
935 const type variance = squared_sum/static_cast<type>(count - 1) -(sum/static_cast<type>(count))*(sum/static_cast<type>(count))*static_cast<type>(count)/static_cast<type>(count-1);
936
937 return variance;
938 }
939
940
941 /// Returns the standard deviation of the elements in the vector.
942 /// @param vector Vector to be evaluated.
943
standard_deviation(const Tensor<type,1> & vector)944 type standard_deviation(const Tensor<type, 1>& vector)
945 {
946 #ifdef __OPENNN_DEBUG__
947
948 const Index size = vector.dimension(0);
949
950 if(size == 0)
951 {
952 ostringstream buffer;
953
954 buffer << "OpenNN Exception: Statistics Class.\n"
955 << "type standard_deviation(const Tensor<type, 1>&) const method.\n"
956 << "Size must be greater than zero.\n";
957
958 throw logic_error(buffer.str());
959 }
960
961 #endif
962 if(vector.size() == 0) return 0;
963
964 if(variance(vector)<static_cast<double>(1e-9)){
965 return static_cast<double>(0);
966 }else{
967 return sqrt(variance(vector));
968 }
969 }
970
971
972 /// Returns the standard deviation of the elements in the vector.
973 /// @param vector Vector to be evaluated.
974
standard_deviation(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)975 type standard_deviation(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
976 {
977 #ifdef __OPENNN_DEBUG__
978
979 const Index size = vector.dimension(0);
980
981 if(size == 0)
982 {
983 ostringstream buffer;
984
985 buffer << "OpenNN Exception: Statistics Class.\n"
986 << "type standard_deviation(const Tensor<type, 1>&, const Tensor<Index, 1>&) const method.\n"
987 << "Size must be greater than zero.\n";
988
989 throw logic_error(buffer.str());
990 }
991
992 #endif
993 if(variance(vector, indices)<static_cast<double>(1e-9)){
994 return static_cast<double>(0);
995 }else{
996 return sqrt(variance(vector, indices));
997 }
998 }
999
1000
1001
1002 /// @todo check
1003
standard_deviation(const Tensor<type,1> & vector,const Index & period)1004 Tensor<type, 1> standard_deviation(const Tensor<type, 1>& vector, const Index& period)
1005 {
1006 const Index size = vector.dimension(0);
1007
1008 Tensor<type, 1> std(size);
1009
1010 type mean_value = 0;
1011 type sum = 0;
1012
1013 for(Index i = 0; i < size; i++)
1014 {
1015 const Index begin = i < period ? 0 : i - period + 1;
1016 const Index end = i;
1017
1018 mean_value = mean(vector, begin,end);
1019
1020 for(Index j = begin; j < end+1; j++)
1021 {
1022 sum += (vector(j) - mean_value) *(vector(j) - mean_value);
1023 }
1024
1025 std(i) = sqrt(sum / type(period));
1026
1027 mean_value = 0;
1028 sum = 0;
1029 }
1030
1031
1032 return std;
1033 }
1034
1035
1036 /// Returns the asymmetry of the elements in the vector.
1037 /// @param vector Vector to be evaluated.
1038
asymmetry(const Tensor<type,1> & vector)1039 type asymmetry(const Tensor<type, 1>& vector)
1040 {
1041
1042 const Index size = vector.dimension(0);
1043
1044 #ifdef __OPENNN_DEBUG__
1045
1046 if(size == 0)
1047 {
1048 ostringstream buffer;
1049
1050 buffer << "OpenNN Exception: Statistics Class.\n"
1051 << "type asymmetry(const Tensor<type, 1>& vector) const method.\n"
1052 << "Size must be greater than zero.\n";
1053
1054 throw logic_error(buffer.str());
1055 }
1056
1057 #endif
1058
1059 if(size == 0 || 1)
1060 {
1061 return 0.0;
1062 }
1063
1064 const type standard_deviation_value = standard_deviation(vector);
1065
1066 const type mean_value = mean(vector);
1067
1068 type sum = 0;
1069
1070 Index count = 0;
1071
1072 for(Index i = 0; i < size; i++)
1073 {
1074 if(!::isnan(vector(i)))
1075 {
1076 sum += (vector(i) - mean_value) *(vector(i) - mean_value) *(vector(i) - mean_value);
1077
1078 count++;
1079 }
1080 }
1081
1082 const type numerator = sum /count;
1083 const type denominator = standard_deviation_value * standard_deviation_value * standard_deviation_value;
1084
1085 return numerator/denominator;
1086
1087 }
1088
1089 /// Returns the kurtosis of the elements in the vector.
1090 /// @param vector Vector to be evaluated.
1091
kurtosis(const Tensor<type,1> & vector)1092 type kurtosis(const Tensor<type, 1>& vector)
1093 {
1094 const Index size = vector.dimension(0);
1095 #ifdef __OPENNN_DEBUG__
1096
1097 if(size == 0)
1098 {
1099 ostringstream buffer;
1100
1101 buffer << "OpenNN Exception: Statistics Class.\n"
1102 << "type kurtosis(const Tensor<type, 1>& vector) const method.\n"
1103 << "Size must be greater than zero.\n";
1104
1105 throw logic_error(buffer.str());
1106 }
1107
1108 #endif
1109
1110 if(size == 1)
1111 {
1112 return 0.0;
1113 }
1114
1115 const type standard_deviation_value = standard_deviation(vector);
1116
1117 const type mean_value = mean(vector);
1118
1119 type sum = 0;
1120
1121 Index count = 0;
1122
1123 for(Index i = 0; i < size; i++)
1124 {
1125 if(!::isnan(vector(i)))
1126 {
1127 sum += (vector(i) - mean_value)*(vector(i) - mean_value)*(vector(i) - mean_value)*(vector(i) - mean_value);
1128
1129 count++;
1130 }
1131 }
1132
1133 const type numerator = sum /count;
1134 const type denominator = standard_deviation_value*standard_deviation_value*standard_deviation_value*standard_deviation_value;
1135
1136 return numerator/denominator - 3;
1137
1138 }
1139
1140
1141 /// Returns the median of the elements in the vector
1142 /// @param vector Vector to be evaluated.
1143
median(const Tensor<type,1> & vector)1144 type median(const Tensor<type, 1>& vector)
1145 {
1146 const Index size = vector.dimension(0);
1147
1148 // Fix missing values
1149
1150 Index new_size = 0;
1151
1152 for(Index i = 0; i < size; i++)
1153 {
1154 if(!isnan(vector(i)))
1155 {
1156 new_size++;
1157 }
1158 }
1159
1160 Tensor<type, 1> sorted_vector;
1161 sorted_vector.resize(new_size);
1162
1163 Index sorted_index = 0;
1164
1165 for(Index i = 0; i < size; i++)
1166 {
1167 if(!isnan(vector(i)))
1168 {
1169 sorted_vector(sorted_index) = vector(i);
1170
1171 sorted_index++;
1172 }
1173 }
1174
1175 // Calculate median
1176
1177 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
1178
1179 Index median_index;
1180
1181 if(new_size % 2 == 0)
1182 {
1183 median_index = static_cast<Index>(new_size / 2);
1184
1185 return (sorted_vector(median_index-1) + sorted_vector(median_index)) / static_cast<type>(2.0);
1186 }
1187 else
1188 {
1189 median_index = static_cast<Index>(new_size / 2);
1190
1191 return sorted_vector(median_index);
1192 }
1193 }
1194
1195
1196 /// Returns the quartiles of the elements in the vector.
1197 /// @param vector Vector to be evaluated.
1198
quartiles(const Tensor<type,1> & vector)1199 Tensor<type, 1> quartiles(const Tensor<type, 1>& vector)
1200 {
1201 const Index size = vector.dimension(0);
1202
1203 // Fix missing values
1204
1205 Index new_size = 0;
1206
1207 for(Index i = 0; i < size; i++)
1208 {
1209 if(!::isnan(vector(i)))
1210 {
1211 new_size++;
1212 }
1213 }
1214
1215 Tensor<type, 1> sorted_vector;
1216 sorted_vector.resize(new_size);
1217
1218 Index sorted_index = 0;
1219
1220 for(Index i = 0; i < size; i++)
1221 {
1222 if(!::isnan(vector(i)))
1223 {
1224 sorted_vector(sorted_index) = vector(i);
1225
1226 sorted_index++;
1227 }
1228 }
1229
1230 sort(sorted_vector.data(), sorted_vector.data() + new_size, less<type>());
1231
1232 // Calculate quartiles
1233
1234 Tensor<type, 1> first_sorted_vector(new_size/2);
1235 Tensor<type, 1> last_sorted_vector(new_size/2);
1236
1237 if (new_size % 2 == 0)
1238 {
1239 for(Index i = 0; i < new_size/2 ; i++)
1240 {
1241 first_sorted_vector(i) = sorted_vector(i);
1242 last_sorted_vector(i) = sorted_vector[i + new_size/2];
1243 }
1244 }
1245 else
1246 {
1247 for(Index i = 0; i < new_size/2 ; i++)
1248 {
1249 first_sorted_vector(i) = sorted_vector(i);
1250 last_sorted_vector(i) = sorted_vector[i + new_size/2 + 1];
1251 }
1252 }
1253
1254
1255 Tensor<type, 1> quartiles(3);
1256
1257 if(new_size == 1)
1258 {
1259 quartiles(0) = sorted_vector(0);
1260 quartiles(1) = sorted_vector(0);
1261 quartiles(2) = sorted_vector(0);
1262 }
1263 else if(new_size == 2)
1264 {
1265 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/4;
1266 quartiles(1) = (sorted_vector(0)+sorted_vector(1))/2;
1267 quartiles(2) = (sorted_vector(0)+sorted_vector(1))*3/4;
1268 }
1269 else if(new_size == 3)
1270 {
1271 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/2;
1272 quartiles(1) = sorted_vector(1);
1273 quartiles(2) = (sorted_vector(2)+sorted_vector(1))/2;
1274 }
1275 else
1276 {
1277 quartiles(0) = median(first_sorted_vector);
1278 quartiles(1) = median(sorted_vector);
1279 quartiles(2) = median(last_sorted_vector);
1280 }
1281 return quartiles;
1282 }
1283
1284
1285 /// Returns the quartiles of the elements of the vector that correspond to the given indices.
1286 /// @param vector Vector to be evaluated.
1287 /// @param indices Indices of the elements of the vector to be evaluated.
1288
quartiles(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)1289 Tensor<type, 1> quartiles(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
1290 {
1291 const Index indices_size = indices.dimension(0);
1292
1293 // Fix missing values
1294
1295 Index index;
1296 Index new_size = 0;
1297
1298 for(Index i = 0; i < indices_size; i++)
1299 {
1300 index = indices(i);
1301
1302 if(!isnan(vector(index)))
1303 {
1304 new_size++;
1305 }
1306 }
1307
1308 Tensor<type, 1> sorted_vector;
1309 sorted_vector.resize(new_size);
1310
1311 Index sorted_index = 0;
1312
1313 for(Index i = 0; i < indices_size; i++)
1314 {
1315 index = indices(i);
1316
1317 if(!isnan(vector(index)))
1318 {
1319 sorted_vector(sorted_index) = vector(index);
1320
1321 sorted_index++;
1322 }
1323 }
1324
1325 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
1326
1327 // Calculate quartiles
1328
1329 Tensor<type, 1> first_sorted_vector(new_size/2);
1330 Tensor<type, 1> last_sorted_vector(new_size/2);
1331
1332 for(Index i = 0; i < new_size/2 ; i++)
1333 {
1334 first_sorted_vector(i) = sorted_vector(i);
1335 }
1336
1337 for(Index i = 0; i < new_size/2; i++)
1338 {
1339 last_sorted_vector(i) = sorted_vector(i + new_size - new_size/2);
1340 }
1341
1342 Tensor<type, 1> quartiles(3);
1343
1344 if(new_size == 1)
1345 {
1346 quartiles(0) = sorted_vector(0);
1347 quartiles(1) = sorted_vector(0);
1348 quartiles(2) = sorted_vector(0);
1349 }
1350 else if(new_size == 2)
1351 {
1352 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/4;
1353 quartiles(1) = (sorted_vector(0)+sorted_vector(1))/2;
1354 quartiles(2) = (sorted_vector(0)+sorted_vector(1))*3/4;
1355 }
1356 else if(new_size == 3)
1357 {
1358 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/2;
1359 quartiles(1) = sorted_vector(1);
1360 quartiles(2) = (sorted_vector(2)+sorted_vector(1))/2;
1361 }
1362 else if(new_size % 2 == 0)
1363 {
1364 Index median_index = static_cast<Index>(first_sorted_vector.size() / 2);
1365 quartiles(0) = (first_sorted_vector(median_index-1) + first_sorted_vector(median_index)) / static_cast<type>(2.0);
1366
1367 median_index = static_cast<Index>(new_size / 2);
1368 quartiles(1) = (sorted_vector(median_index-1) + sorted_vector(median_index)) / static_cast<type>(2.0);
1369
1370 median_index = static_cast<Index>(last_sorted_vector.size() / 2);
1371 quartiles(2) = (last_sorted_vector(median_index-1) + last_sorted_vector(median_index)) / static_cast<type>(2.0);
1372 }
1373 else
1374 {
1375 quartiles(0) = sorted_vector(new_size/4);
1376 quartiles(1) = sorted_vector(new_size/2);
1377 quartiles(2) = sorted_vector(new_size*3/4);
1378 }
1379
1380 return quartiles;
1381 }
1382
1383
1384
1385 /// Returns the box and whispers for a vector.
1386 /// @param vector Vector to be evaluated.
1387
box_plot(const Tensor<type,1> & vector)1388 BoxPlot box_plot(const Tensor<type, 1>& vector)
1389 {
1390 BoxPlot boxplot;
1391
1392 if(vector.dimension(0) == 0) {
1393 boxplot.minimum = NAN;
1394 boxplot.first_quartile = NAN;
1395 boxplot.median = NAN;
1396 boxplot.third_quartile = NAN;
1397 boxplot.maximum = NAN;
1398 return boxplot;
1399 }
1400
1401
1402 const Tensor<type, 1> quartiles = OpenNN::quartiles(vector);
1403
1404 boxplot.minimum = minimum(vector);
1405 boxplot.first_quartile = quartiles(0);
1406 boxplot.median = quartiles(1);
1407 boxplot.third_quartile = quartiles(2);
1408 boxplot.maximum = maximum(vector);
1409
1410 return boxplot;
1411 }
1412
1413
1414 /// Returns the box and whispers for the elements of the vector that correspond to the given indices.
1415 /// @param vector Vector to be evaluated.
1416 /// @param indices Indices of the elements of the vector to be evaluated.
1417
box_plot(const Tensor<type,1> & vector,const Tensor<Index,1> & indices)1418 BoxPlot box_plot(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
1419 {
1420 BoxPlot boxplot;
1421
1422 if(vector.dimension(0) == 0 || indices.dimension(0) == 0) return boxplot;
1423
1424 const Tensor<type, 1> quartiles = OpenNN::quartiles(vector, indices);
1425
1426 boxplot.minimum = minimum(vector, indices);
1427 boxplot.first_quartile = quartiles(0);
1428 boxplot.median = quartiles(1);
1429 boxplot.third_quartile = quartiles(2);
1430 boxplot.maximum = maximum(vector, indices);
1431
1432 return boxplot;
1433 }
1434
1435
1436 /// This method bins the elements of the vector into a given number of equally
1437 /// spaced containers.
1438 /// It returns a vector of two vectors.
1439 /// The size of both subvectors is the number of bins.
1440 /// The first subvector contains the frequency of the bins.
1441 /// The second subvector contains the center of the bins.
1442 /// @param vector Vector to obtain the histogram.
1443 /// @param bins_number Number of bins to split the histogram.
1444
histogram(const Tensor<type,1> & vector,const Index & bins_number)1445 Histogram histogram(const Tensor<type, 1>& vector, const Index &bins_number)
1446 {
1447 #ifdef __OPENNN_DEBUG__
1448
1449 if(bins_number < 1)
1450 {
1451 ostringstream buffer;
1452
1453 buffer << "OpenNN Exception: Statistics Class.\n"
1454 << "Histogram histogram(const Tensor<type, 1>&, "
1455 "const Index&) const method.\n"
1456 << "Number of bins is less than one.\n";
1457
1458 throw logic_error(buffer.str());
1459 }
1460
1461 #endif
1462
1463 const Index size = vector.dimension(0);
1464
1465 Tensor<type, 1> minimums(bins_number);
1466 Tensor<type, 1> maximums(bins_number);
1467
1468 Tensor<type, 1> centers(bins_number);
1469 Tensor<Index, 1> frequencies(bins_number);
1470 frequencies.setZero();
1471
1472 Index unique_values_number = 1;
1473 Tensor<type, 1> old_unique_values(1);
1474 Tensor<type, 1> unique_values(1);
1475 unique_values(0) = vector(0);
1476 old_unique_values = unique_values;
1477
1478 for(Index i = 1; i < size; i++)
1479 {
1480 if(std::find(unique_values.data(), unique_values.data()+unique_values.size(), vector(i)) == unique_values.data()+unique_values.size())
1481 {
1482 unique_values_number++;
1483
1484 unique_values.resize(unique_values_number);
1485
1486 for(Index j = 0; j < unique_values_number-1; j++) unique_values(j) = old_unique_values(j);
1487
1488 unique_values(unique_values_number-1) = vector(i);
1489
1490 old_unique_values = unique_values;
1491 }
1492
1493 if(unique_values_number > bins_number) break;
1494 }
1495
1496 if(unique_values_number <= bins_number)
1497 {
1498 sort(unique_values.data(), unique_values.data() + unique_values.size(), less<type>());
1499
1500 centers = unique_values;
1501 minimums = unique_values;
1502 maximums = unique_values;
1503
1504 frequencies.resize(unique_values_number);
1505 frequencies.setZero();
1506
1507 for(Index i = 0; i < size; i++)
1508 {
1509 if(isnan(vector(i))) continue;
1510
1511 for(Index j = 0; j < unique_values_number; j++)
1512 {
1513 if(static_cast<Index>(vector(i)) == static_cast<Index>(centers(j)))
1514 {
1515 frequencies(j)++;
1516 break;
1517 }
1518 }
1519 }
1520 }
1521 else
1522 {
1523 const type min = minimum(vector);
1524 const type max = maximum(vector);
1525
1526 const type length = (max - min) /static_cast<type>(bins_number);
1527
1528 minimums(0) = min;
1529 maximums(0) = min + length;
1530 centers(0) = (maximums(0) + minimums(0)) /static_cast<type>(2.0);
1531
1532 // Calculate bins center
1533
1534 for(Index i = 1; i < bins_number; i++)
1535 {
1536 minimums(i) = minimums(i - 1) + length;
1537 maximums(i) = maximums(i - 1) + length;
1538
1539 centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1540 }
1541
1542 // Calculate bins frequency
1543
1544 const Index size = vector.dimension(0);
1545
1546 for(Index i = 0; i < size; i++)
1547 {
1548 if(isnan(vector(i)))
1549 {
1550 continue;
1551 }
1552
1553 for(Index j = 0; j < bins_number - 1; j++)
1554 {
1555 if(vector(i) >= minimums(j) && vector(i) < maximums(j))
1556 {
1557 frequencies(j)++;
1558 break;
1559 }
1560 }
1561
1562 if(vector(i) >= minimums(bins_number - 1))
1563 {
1564 frequencies(bins_number - 1)++;
1565 }
1566 }
1567 }
1568
1569 Histogram histogram;
1570 histogram.centers = centers;
1571 histogram.minimums = minimums;
1572 histogram.maximums = maximums;
1573 histogram.frequencies = frequencies;
1574
1575 return histogram;
1576 }
1577
1578
1579 /// This method bins the elements of the vector into a given number of equally
1580 /// spaced containers.
1581 /// It returns a vector of two vectors.
1582 /// The size of both subvectors is the number of bins.
1583 /// The first subvector contains the frequency of the bins.
1584 /// The second subvector contains the center of the bins.
1585 /// @param vector
1586 /// @param center
1587 /// @param bins_number
1588
1589
histogram_centered(const Tensor<type,1> & vector,const type & center,const Index & bins_number)1590 Histogram histogram_centered(const Tensor<type, 1>& vector, const type& center, const Index & bins_number)
1591 {
1592 #ifdef __OPENNN_DEBUG__
1593
1594 if(bins_number < 1)
1595 {
1596 ostringstream buffer;
1597
1598 buffer << "OpenNN Exception: Statistics Class.\n"
1599 << "Histogram histogram_centered(const Tensor<type, 1>&, "
1600 "const type&, const Index&) const method.\n"
1601 << "Number of bins is less than one.\n";
1602
1603 throw logic_error(buffer.str());
1604 }
1605
1606 #endif
1607
1608 Index bin_center;
1609
1610 if(bins_number%2 == 0)
1611 {
1612 bin_center = static_cast<Index>(static_cast<type>(bins_number)/static_cast<type>(2.0));
1613 }
1614 else
1615 {
1616 bin_center = static_cast<Index>(static_cast<type>(bins_number)/static_cast<type>(2.0) + static_cast<type>(0.5));
1617 }
1618
1619 Tensor<type, 1> minimums(bins_number);
1620 Tensor<type, 1> maximums(bins_number);
1621
1622 Tensor<type, 1> centers(bins_number);
1623 Tensor<Index, 1> frequencies(bins_number);
1624 frequencies.setZero();
1625
1626 const type min = minimum(vector);
1627 const type max = maximum(vector);
1628
1629 const type length = (max - min)/static_cast<type>(bins_number);
1630
1631 minimums(bin_center-1) = center - length;
1632 maximums(bin_center-1) = center + length;
1633 centers(bin_center-1) = center;
1634
1635 // Calculate bins center
1636
1637 for(Index i = bin_center; i < bins_number; i++) // Upper centers
1638 {
1639 minimums(i) = minimums(i - 1) + length;
1640 maximums(i) = maximums(i - 1) + length;
1641
1642 centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1643 }
1644
1645 for(Index i = static_cast<Index>(bin_center)-2; i >= 0; i--) // Lower centers
1646 {
1647 minimums(i) = minimums(i + 1) - length;
1648 maximums(i) = maximums(i + 1) - length;
1649
1650 centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1651 }
1652
1653 // Calculate bins frequency
1654
1655 const Index size = vector.dimension(0);
1656
1657 for(Index i = 0; i < size; i++)
1658 {
1659 for(Index j = 0; j < bins_number - 1; j++)
1660 {
1661 if(vector(i) >= minimums(j) && vector(i) < maximums(j))
1662 {
1663 frequencies(j)++;
1664 }
1665 }
1666
1667 if(vector(i) >= minimums(bins_number - 1))
1668 {
1669 frequencies(bins_number - 1)++;
1670 }
1671 }
1672
1673 Histogram histogram(bins_number);
1674 histogram.centers = centers;
1675 histogram.minimums = minimums;
1676 histogram.maximums = maximums;
1677 histogram.frequencies = frequencies;
1678
1679 return histogram;
1680 }
1681
1682
1683 /// This method bins the elements of the vector into a given number of equally
1684 /// spaced containers.
1685 /// It returns a vector of two vectors.
1686 /// The size of both subvectors is the number of bins.
1687 /// The first subvector contains the frequency of the bins.
1688 /// The second subvector contains the center of the bins.
1689 /// @todo isnan is not defined for bool.
1690
histogram(const Tensor<bool,1> & v)1691 Histogram histogram(const Tensor<bool, 1>& v)
1692 {
1693 Tensor<type, 1> minimums(2);
1694 minimums.setZero();
1695 Tensor<type, 1> maximums(2);
1696 maximums.setConstant(1);
1697
1698 Tensor<type, 1> centers(2);
1699 centers.setValues({0,1});
1700 Tensor<Index, 1> frequencies(2);
1701 frequencies.setZero();
1702
1703 // Calculate bins frequency
1704
1705 const Index size = v.dimension(0);
1706
1707 for(Index i = 0; i < size; i++)
1708 {
1709 // if(isnan(v(i))) continue;
1710
1711 for(Index j = 0; j < 2; j++)
1712 {
1713 if(static_cast<Index>(v(i)) == static_cast<Index>(minimums(j)))
1714 {
1715 frequencies(j)++;
1716 }
1717 }
1718 }
1719
1720 Histogram histogram(2);
1721 histogram.centers = centers;
1722 histogram.minimums = minimums;
1723 histogram.maximums = maximums;
1724 histogram.frequencies = frequencies;
1725
1726 // Histogram histogram;
1727 return histogram;
1728 }
1729
1730
1731 /// This method bins the elements of the vector into a given number of equally
1732 /// spaced containers.
1733 /// It returns a vector of two vectors.
1734 /// The size of both subvectors is the number of bins.
1735 /// The first subvector contains the frequency of the bins.
1736 /// The second subvector contains the center of the bins.
1737 /// @param vector
1738 /// @param bins_number
1739
histogram(const Tensor<Index,1> & vector,const Index & bins_number)1740 Histogram histogram(const Tensor<Index, 1>& vector, const Index& bins_number)
1741 {
1742 #ifdef __OPENNN_DEBUG__
1743
1744 if(bins_number < 1)
1745 {
1746 ostringstream buffer;
1747
1748 buffer << "OpenNN Exception: Statistics Class.\n"
1749 << "Histogram calculate_histogram_integers(const Tensor<Index, 1>&, "
1750 "const Index&) const method.\n"
1751 << "Number of bins is less than one.\n";
1752
1753 throw logic_error(buffer.str());
1754 }
1755
1756 #endif
1757 /*
1758 Tensor<Index, 1> centers = vector.get_integer_elements(bins_number);
1759 const Index centers_number = centers.size();
1760
1761 sort(centers.data(), centers.data() + centers.size(), less<Index>());
1762
1763 Tensor<type, 1> minimums(centers_number);
1764 Tensor<type, 1> maximums(centers_number);
1765 Tensor<Index, 1> frequencies(centers_number);
1766
1767 for(Index i = 0; i < centers_number; i++)
1768 {
1769 minimums(i) = centers(i);
1770 maximums(i) = centers(i);
1771 frequencies(i) = vector.count_equal_to(centers(i));
1772 }
1773
1774 Histogram histogram(centers_number);
1775 histogram.centers = centers.cast<type>();
1776 histogram.minimums = minimums;
1777 histogram.maximums = maximums;
1778 histogram.frequencies = frequencies;
1779
1780 return histogram;
1781 */
1782
1783 return Histogram();
1784 }
1785
1786
1787 /// Returns a vector containing the sum of the frequencies of the bins to which
1788 /// this vector belongs.
1789 /// @param histograms Used histograms.
1790
total_frequencies(const Tensor<Histogram,1> & histograms)1791 Tensor<Index, 1> total_frequencies(const Tensor<Histogram, 1>&histograms)
1792 {
1793 const Index histograms_number = histograms.size();
1794
1795 Tensor<Index, 1> total_frequencies(histograms_number);
1796
1797 for(Index i = 0; i < histograms_number; i++)
1798 {
1799 total_frequencies(i) = histograms(i).frequencies(i);
1800 }
1801
1802 return total_frequencies;
1803 }
1804
1805
1806 /// Calculates a histogram for each column, each having a given number of bins.
1807 /// It returns a vector of vectors.
1808 /// The size of the main vector is the number of columns.
1809 /// Each subvector contains the frequencies and centers of that colums.
1810 /// @param matrix Data to calculate histograms
1811 /// @param bins_number Number of bins for each histogram.
1812
histograms(const Tensor<type,2> & matrix,const Index & bins_number)1813 Tensor<Histogram, 1> histograms(const Tensor<type, 2>& matrix, const Index& bins_number)
1814 {
1815 const Index rows_number = matrix.dimension(0);
1816 const Index columns_number = matrix.dimension(1);
1817
1818 Tensor<Histogram, 1> histograms(columns_number);
1819
1820 Tensor<type, 1> column(rows_number);
1821
1822 for(Index i = 0; i < columns_number; i++)
1823 {
1824 column = matrix.chip(i,1);
1825
1826 histograms(i) = histogram(column, bins_number);
1827
1828 /*
1829 if(column.is_binary())
1830 {
1831 histograms(i) = histogram(column.to_bool_vector());
1832 }
1833 else
1834 {
1835 histograms(i) = histogram(column, bins_number);
1836 }
1837 */
1838 }
1839
1840 return histograms;
1841 }
1842
1843
1844 /// Returns the basic descriptives of the columns.
1845 /// The format is a vector of descriptives structures.
1846 /// The size of that vector is equal to the number of columns in this matrix.
1847 /// @param matrix Used matrix.
1848
descriptives(const Tensor<type,2> & matrix)1849 Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>& matrix)
1850 {
1851 const Index rows_number = matrix.dimension(0);
1852 const Index columns_number = matrix.dimension(1);
1853
1854 #ifdef __OPENNN_DEBUG__
1855
1856 if(rows_number == 0)
1857 {
1858 ostringstream buffer;
1859
1860 buffer << "OpenNN Exception: Statistics Class.\n"
1861 << "Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>&) "
1862 "const method.\n"
1863 << "Number of rows must be greater than one.\n";
1864
1865 throw logic_error(buffer.str());
1866 }
1867
1868 #endif
1869
1870 Tensor<Descriptives, 1> descriptives(columns_number);
1871
1872 Tensor<type, 1> column(rows_number);
1873
1874 #pragma omp parallel for private(column)
1875
1876 for(Index i = 0; i < columns_number; i++)
1877 {
1878 column = matrix.chip(i,1);
1879
1880 descriptives(i) = OpenNN::descriptives(column);
1881 }
1882
1883 return descriptives;
1884 }
1885
1886
1887 /// Returns the basic descriptives of given columns for given rows.
1888 /// The format is a vector of descriptives structures.
1889 /// The size of that vector is equal to the number of given columns.
1890 /// @param row_indices Indices of the rows for which the descriptives are to be computed.
1891 /// @param columns_indices Indices of the columns for which the descriptives are to be computed.
1892
descriptives(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices,const Tensor<Index,1> & columns_indices)1893 Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
1894 {
1895 const Index row_indices_size = row_indices.size();
1896 const Index columns_indices_size = columns_indices.size();
1897
1898 Tensor<Descriptives, 1> descriptives(columns_indices_size);
1899
1900 Index row_index, column_index;
1901
1902 Tensor<type, 1> minimums(columns_indices_size);
1903 minimums.setConstant(numeric_limits<type>::max());
1904
1905 Tensor<type, 1> maximums(columns_indices_size);
1906 maximums.setConstant(numeric_limits<type>::min());
1907
1908 Tensor<double, 1> sums(columns_indices_size);
1909 Tensor<double, 1> squared_sums(columns_indices_size);
1910 Tensor<Index, 1> count(columns_indices_size);
1911
1912 sums.setZero();
1913 squared_sums.setZero();
1914 count.setZero();
1915
1916 for(Index i = 0; i < row_indices_size; i++)
1917 {
1918 row_index = row_indices(i);
1919
1920 #pragma omp parallel for private(column_index)
1921
1922 for(Index j = 0; j < columns_indices_size; j++)
1923 {
1924 column_index = columns_indices(j);
1925
1926 const type value = matrix(row_index,column_index);
1927
1928 if(isnan(value)) continue;
1929
1930 if(value < minimums(j)) minimums(j) = value;
1931
1932 if(value > maximums(j)) maximums(j) = value;
1933
1934 sums(j) += value;
1935 squared_sums(j) += value*value;
1936 count(j)++;
1937 }
1938 }
1939
1940 const Tensor<double, 1> mean = sums/count;
1941
1942 Tensor<double, 1> standard_deviation(columns_indices_size);
1943
1944 if(row_indices_size > 1)
1945 {
1946 #pragma omp parallel for
1947
1948 for(Index i = 0; i < columns_indices_size; i++)
1949 {
1950 const double variance = squared_sums(i)/static_cast<double>(count(i)-1)
1951 - (sums(i)/static_cast<double>(count(i)))*(sums(i)/static_cast<double>(count(i)))*static_cast<double>(count(i))/static_cast<double>(count(i)-1);
1952
1953 standard_deviation(i) = sqrt(variance);
1954 }
1955 }
1956
1957 for(Index i = 0; i < columns_indices_size; i++)
1958 {
1959 descriptives(i).minimum = minimums(i);
1960 descriptives(i).maximum = maximums(i);
1961 descriptives(i).mean = mean(i);
1962 descriptives(i).standard_deviation = standard_deviation(i);
1963 }
1964
1965 return descriptives;
1966 }
1967
1968
1969 /// Returns the means of given rows.
1970 /// The format is a vector of type values.
1971 /// The size of that vector is equal to the number of given rows.
1972 /// @param matrix Used matrix.
1973 /// @param columns_indices Indices of the columns for which the descriptives are to be computed.
1974
rows_means(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices)1975 Tensor<type, 1> rows_means(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices)
1976 {
1977 const Index columns_number = matrix.dimension(1);
1978
1979 Tensor<Index, 1> used_row_indices;
1980
1981 if(matrix.dimension(0) == 0 && matrix.dimension(1) == 0)
1982 {
1983 used_row_indices.resize(matrix.dimension(0));
1984 }
1985 else
1986 {
1987 used_row_indices = row_indices;
1988 }
1989
1990 const Index row_indices_size = used_row_indices.size();
1991
1992 Tensor<type, 1> means(columns_number);
1993
1994 Tensor<type, 1> column(row_indices_size);
1995
1996 for(Index i = 0; i < columns_number; i++)
1997 {
1998 for(Index j = 0; j < row_indices_size; j++)
1999 {
2000 Index row_index = row_indices(j);
2001
2002 column(j) = matrix(row_index,i);
2003 }
2004
2005 means(i) = mean(column);
2006 }
2007
2008 return means;
2009 }
2010
2011
2012 /// Returns the minimums values of given columns.
2013 /// The format is a vector of type values.
2014 /// The size of that vector is equal to the number of given columns.
2015 /// @param matrix Used matrix.
2016 /// @param rows_indices Indices of the rows for which the minimums are to be computed.
2017 /// @param columns_indices Indices of the columns for which the minimums are to be computed.
2018
columns_minimums(const Tensor<type,2> & matrix,const Tensor<Index,1> & rows_indices,const Tensor<Index,1> & columns_indices)2019 Tensor<type, 1> columns_minimums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& rows_indices, const Tensor<Index, 1>& columns_indices)
2020 {
2021 const Index rows_number = matrix.dimension(0);
2022 const Index columns_number = matrix.dimension(1);
2023
2024 Tensor<Index, 1> used_columns_indices;
2025
2026 if(columns_indices.dimension(0) == 0)
2027 {
2028 used_columns_indices.resize(columns_number);
2029
2030 for (Index i = 0; i < columns_number; i++)
2031 {
2032 used_columns_indices(i) = i;
2033 }
2034 }
2035 else
2036 {
2037 used_columns_indices = columns_indices;
2038 }
2039
2040 Tensor<Index, 1> used_rows_indices;
2041
2042 if(rows_indices.dimension(0) == 0)
2043 {
2044 used_rows_indices.resize(rows_number);
2045
2046 for (Index i = 0; i < rows_number; i++)
2047 {
2048 used_rows_indices(i) = i;
2049 }
2050 }
2051 else
2052 {
2053 used_rows_indices = rows_indices;
2054 }
2055
2056 const Index rows_indices_size = used_rows_indices.size();
2057 const Index columns_indices_size = used_columns_indices.size();
2058
2059 Tensor<type, 1> minimums(columns_indices_size);
2060
2061 Index row_index;
2062 Index column_index;
2063
2064 for(Index j = 0; j < columns_indices_size; j++)
2065 {
2066 column_index = used_columns_indices(j);
2067
2068 Tensor<type, 1> column(rows_indices_size);
2069
2070 for(Index i = 0; i < rows_indices_size; i++)
2071 {
2072 row_index = used_rows_indices(i);
2073
2074 column(i) = matrix(row_index,column_index);
2075 }
2076
2077 minimums(j) = minimum(column);
2078 }
2079
2080 return minimums;
2081 }
2082
2083
2084 /// Returns the maximums values of given columns.
2085 /// The format is a vector of type values.
2086 /// The size of that vector is equal to the number of given columns.
2087 /// @param matrix Used matrix.
2088 /// @param columns_indices Indices of the columns for which the descriptives are to be computed.
2089
columns_maximums(const Tensor<type,2> & matrix,const Tensor<Index,1> & columns_indices)2090 Tensor<type, 1> columns_maximums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2091 {
2092 const Index rows_number = matrix.dimension(0);
2093 const Index columns_number = matrix.dimension(1);
2094
2095 Tensor<Index, 1> used_columns_indices;
2096
2097 if(columns_indices.dimension(0) == 0 && columns_indices.dimension(1) == 0)
2098 {
2099 used_columns_indices.resize(columns_number);
2100 }
2101 else
2102 {
2103 used_columns_indices = columns_indices;
2104 }
2105
2106 const Index columns_indices_size = used_columns_indices.size();
2107
2108 Tensor<type, 1> maximums(columns_indices_size);
2109
2110 Index column_index;
2111 Tensor<type, 1> column(rows_number);
2112
2113 for(Index i = 0; i < columns_indices_size; i++)
2114 {
2115 column_index = used_columns_indices(i);
2116
2117 column = matrix.chip(column_index,1);
2118
2119 maximums(i) = maximum(column);
2120 }
2121
2122 return maximums;
2123 }
2124
2125
range(const Tensor<type,1> & vector)2126 type range(const Tensor<type, 1>& vector)
2127 {
2128 const type min = minimum(vector);
2129 const type max = maximum(vector);
2130
2131 return abs(max - min);
2132 }
2133
2134 /*
2135 /// Calculates the box plots for a set of rows of each of the given columns of this matrix.
2136 /// @param matrix Used matrix.
2137 /// @param rows_indices Rows to be used for the box plot.
2138 /// @param columns_indices Indices of the columns for which box plots are going to be calculated.
2139 /// @todo remove?
2140
2141 Tensor<BoxPlot, 1> box_plots(const Tensor<type, 2>& matrix, const Tensor<Tensor<Index, 1>, 1>& rows_indices, const Tensor<Index, 1>& columns_indices)
2142 {
2143 const Index columns_number = columns_indices.size();
2144
2145 #ifdef __OPENNN_DEBUG__
2146
2147 if(columns_number == rows_indices.size())
2148 {
2149 ostringstream buffer;
2150
2151 buffer << "OpenNN Exception: Statistics class."
2152 << "void box_plots(const Tensor<type, 2>&, "
2153 "const Tensor<Tensor<Index, 1>, 1>&, const Tensor<Index, 1>&) const method.\n"
2154 << "Size of row indices must be equal to the number of columns.\n";
2155
2156 throw logic_error(buffer.str());
2157 }
2158
2159 #endif
2160
2161 Tensor<BoxPlot, 1> box_plots(columns_number);
2162
2163 for(Index i = 0; i < matrix.dimension(1); i++)
2164 {
2165
2166
2167 }
2168
2169 Tensor<type, 1> column;
2170
2171 #pragma omp parallel for private(column)
2172
2173 for(Index i = 0; i < columns_number; i++)
2174 {
2175 box_plots(i).resize(5);
2176
2177 const Index rows_number = rows_indices(i).size();
2178
2179 column = matrix.get_column(columns_indices(i)).get_subvector(rows_indices(i));
2180
2181 sort(column.begin(), column.end(), less<type>());
2182
2183 // Minimum
2184
2185 box_plots(i)[0] = column[0];
2186
2187 if(rows_number % 2 == 0)
2188 {
2189 // First quartile
2190
2191 box_plots(i)[1] = (column[rows_number / 4] + column[rows_number / 4 + 1]) /static_cast<type>(2.0);
2192
2193 // Second quartile
2194
2195 box_plots(i)[2] = (column[rows_number * 2 / 4] +
2196 column[rows_number * 2 / 4 + 1]) /
2197 2.0;
2198
2199 // Third quartile
2200
2201 box_plots(i)[3] = (column[rows_number * 3 / 4] +
2202 column[rows_number * 3 / 4 + 1]) /
2203 2.0;
2204 }
2205 else
2206 {
2207 // First quartile
2208
2209 box_plots(i)[1] = column[rows_number / 4];
2210
2211 // Second quartile
2212
2213 box_plots(i)[2] = column[rows_number * 2 / 4];
2214
2215 //Third quartile
2216
2217 box_plots(i)[3] = column[rows_number * 3 / 4];
2218 }
2219
2220 // Maximum
2221
2222 box_plots(i)[4] = column[rows_number-1];
2223 }
2224
2225 return box_plots;
2226 }
2227 */
2228
2229 /// Returns the minimum, maximum, mean and standard deviation of the elements in the vector.
2230 /// @param vector Vector to be evaluated.
2231
descriptives(const Tensor<type,1> & vector)2232 Descriptives descriptives(const Tensor<type, 1>& vector)
2233 {
2234 const Index size = vector.dimension(0);
2235
2236 #ifdef __OPENNN_DEBUG__
2237
2238 if(size == 0)
2239 {
2240 ostringstream buffer;
2241
2242 buffer << "OpenNN Exception: Statistics Class.\n"
2243 << "type descriptives(const Tensor<type, 1>&, "
2244 "const Tensor<Index, 1>&).\n"
2245 << "Size must be greater than zero.\n";
2246
2247 throw logic_error(buffer.str());
2248 }
2249
2250 #endif
2251
2252 Descriptives descriptives;
2253
2254 type minimum = numeric_limits<type>::max();
2255 type maximum;
2256
2257 type sum = 0;
2258 type squared_sum = 0;
2259 Index count = 0;
2260
2261 maximum = -numeric_limits<type>::max();
2262
2263 for(Index i = 0; i < size; i++)
2264 {
2265 if(!::isnan(vector(i)))
2266 {
2267 if(vector(i) < minimum)
2268 {
2269 minimum = vector(i);
2270 }
2271
2272 if(vector(i) > maximum)
2273 {
2274 maximum = vector(i);
2275 }
2276
2277 sum += vector(i);
2278 squared_sum += vector(i) *vector(i);
2279
2280 count++;
2281 }
2282 }
2283
2284 const type mean = sum/static_cast<type>(count);
2285
2286 type standard_deviation;
2287
2288 if(count <= 1)
2289 {
2290 standard_deviation = 0;
2291 }
2292 else
2293 {
2294 const type numerator = squared_sum -(sum * sum) / count;
2295 const type denominator = size - static_cast<type>(1.0);
2296
2297 standard_deviation = numerator / denominator;
2298 }
2299
2300 standard_deviation = sqrt(standard_deviation);
2301
2302 descriptives.minimum = minimum;
2303 descriptives.maximum = maximum;
2304 descriptives.mean = mean;
2305 descriptives.standard_deviation = standard_deviation;
2306
2307 return descriptives;
2308 }
2309
2310
2311 /// Calculates the distance between the empirical distribution of the vector and
2312 /// the normal, half-normal and uniform cumulative distribution. It returns 0, 1
2313 /// or 2 if the closest distribution is the normal, half-normal or the uniform,
2314 /// respectively.
2315 /// @todo review.
2316
perform_distribution_distance_analysis(const Tensor<type,1> & vector)2317 Index perform_distribution_distance_analysis(const Tensor<type, 1>& vector)
2318 {
2319 Tensor<type, 1> distances(2);
2320 distances.setZero();
2321
2322 const Index nans = count_nan(vector);
2323
2324 const Index new_size = vector.size() - nans;
2325
2326 Tensor<type, 1> new_vector(new_size);
2327
2328 Index index = 0;
2329
2330 for(Index i = 0; i < vector.size(); i++)
2331 {
2332 if(!::isnan(vector(i)))
2333 {
2334 new_vector(index) = vector(i);
2335 index++;
2336 }
2337 }
2338
2339 const Index n = vector.dimension(0);
2340
2341 Tensor<type, 1> sorted_vector(new_vector);
2342
2343 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2344
2345 const Descriptives descriptives = OpenNN::descriptives(vector);
2346
2347 const type mean = descriptives.mean;
2348 const type standard_deviation = descriptives.standard_deviation;
2349 const type minimum = sorted_vector(0);
2350 const type maximum = sorted_vector(n-1);
2351
2352 #pragma omp parallel for schedule(dynamic)
2353
2354 for(Index i = 0; i < n; i++)
2355 {
2356 const type normal_distribution = static_cast<type>(0.5)
2357 * static_cast<type>(erfc((mean - sorted_vector(i)))/static_cast<type>((standard_deviation*static_cast<type>(sqrt(2)))));
2358
2359 const type uniform_distribution = (sorted_vector(i)-minimum)/(maximum-minimum);
2360
2361 type empirical_distribution;
2362
2363 Index counter = 0;
2364
2365 if(vector(i) < sorted_vector(0))
2366 {
2367 empirical_distribution = 0;
2368 }
2369 else if(vector(i) >= sorted_vector(n-1))
2370 {
2371 empirical_distribution = 1.0;
2372 }
2373 else
2374 {
2375 counter = static_cast<Index>(i + 1);
2376
2377 for(Index j = i+1; j < n; j++)
2378 {
2379 if(sorted_vector(j) <= sorted_vector(i))
2380 {
2381 counter++;
2382 }
2383 else
2384 {
2385 break;
2386 }
2387 }
2388
2389 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
2390 }
2391
2392 #pragma omp critical
2393 {
2394 distances(0) += abs(normal_distribution - empirical_distribution);
2395 distances(1) += abs(uniform_distribution - empirical_distribution);
2396 }
2397 }
2398
2399 return minimal_index(distances);
2400 }
2401
2402
2403 /*
2404 /// Returns a vector with the mean values of all the matrix columns.
2405 /// The size is equal to the number of columns in the matrix.
2406 /// @param matrix Matrix used.
2407 /// @todo to delete.
2408
2409 Tensor<type, 1> columns_mean(const Tensor<type, 2>& matrix)
2410 {
2411
2412 const Index rows_number = matrix.dimension(0);
2413
2414 const Index columns_number = matrix.dimension(1);
2415
2416 #ifdef __OPENNN_DEBUG__
2417
2418 if(rows_number == 0)
2419 {
2420 ostringstream buffer;
2421
2422 buffer << "OpenNN Exception: Statistics class.\n"
2423 << "Tensor<type, 1> mean(const Tensor<type, 2>&) const method.\n"
2424 << "Number of rows must be greater than one.\n";
2425
2426 throw logic_error(buffer.str());
2427 }
2428
2429 #endif
2430
2431 Tensor<type, 1> columns_mean(columns_number);
2432
2433 for(Index i = 0; i < columns_number; i++)
2434 {
2435 Tensor<type, 0> mean = matrix.chip(i,1).mean();
2436
2437 columns_mean(i) = mean(0);
2438 }
2439
2440 return columns_mean;
2441 }
2442
2443 */
2444 /// Returns a vector with the mean values of all the matrix columns.
2445 /// The size is equal to the number of columns in the matrix.
2446 /// @param matrix Matrix used.
2447
mean(const Tensor<type,2> & matrix)2448 Tensor<type, 1> mean(const Tensor<type, 2>& matrix)
2449 {
2450 const Index rows_number = matrix.dimension(0);
2451 const Index columns_number = matrix.dimension(1);
2452
2453 #ifdef __OPENNN_DEBUG__
2454
2455 if(rows_number == 0)
2456 {
2457 ostringstream buffer;
2458
2459 buffer << "OpenNN Exception: Statistics class.\n"
2460 << "Tensor<type, 1> mean(const Tensor<type, 2>&) const method.\n"
2461 << "Number of rows must be greater than one.\n";
2462
2463 throw logic_error(buffer.str());
2464 }
2465
2466 #endif
2467
2468 // Mean
2469
2470 Tensor<type, 1> mean(columns_number);
2471 mean.setZero();
2472
2473 for(Index j = 0; j < columns_number; j++)
2474 {
2475 for(Index i = 0; i < rows_number; i++)
2476 {
2477 if(!::isnan(matrix(i,j)))
2478 {
2479 mean(j) += matrix(i,j);
2480 }
2481 }
2482
2483 mean(j) /= static_cast<type>(rows_number);
2484 }
2485
2486 return mean;
2487 }
2488
2489
2490 /// Returns a vector with the mean values of given columns.
2491 /// The size of the vector is equal to the size of the column indices vector.
2492 /// @param columns_indices Indices of columns.
2493
mean(const Tensor<type,2> & matrix,const Tensor<Index,1> & columns_indices)2494 Tensor<type, 1> mean(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2495 {
2496 const Index rows_number = matrix.dimension(0);
2497
2498 const Index columns_indices_size = columns_indices.size();
2499
2500 Index column_index;
2501
2502 // Mean
2503
2504 Tensor<type, 1> mean(columns_indices_size);
2505 mean.setZero();
2506
2507 for(Index j = 0; j < columns_indices_size; j++)
2508 {
2509 column_index = columns_indices(j);
2510
2511 for(Index i = 0; i < rows_number; i++)
2512 {
2513 mean(j) += matrix(i, column_index);
2514 }
2515
2516 mean(j) /= static_cast<type>(rows_number);
2517 }
2518
2519 return mean;
2520 }
2521
2522
2523 /// Returns a vector with the mean values of given columns for given rows.
2524 /// The size of the vector is equal to the size of the column indices vector.
2525 /// @param matrix Matrix used.
2526 /// @param row_indices Indices of rows.
2527 /// @param columns_indices Indices of columns.
2528
mean(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices,const Tensor<Index,1> & columns_indices)2529 Tensor<type, 1> mean(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
2530 {
2531 const Index row_indices_size = row_indices.size();
2532 const Index columns_indices_size = columns_indices.size();
2533
2534 if (row_indices_size == 0 && columns_indices_size == 0) return NAN;
2535
2536 #ifdef __OPENNN_DEBUG__
2537
2538 const Index rows_number = matrix.dimension(0);
2539 const Index columns_number = matrix.dimension(1);
2540
2541 // Rows check
2542
2543 if(row_indices_size > rows_number)
2544 {
2545 ostringstream buffer;
2546
2547 buffer << "OpenNN Exception: Statistics class.\n"
2548 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2549 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2550 << "Size of row indices(" << row_indices_size << ") is greater than number of rows(" << rows_number << ").\n";
2551
2552 throw logic_error(buffer.str());
2553 }
2554
2555 for(Index i = 0; i < row_indices_size; i++)
2556 {
2557 if(row_indices(i) >= rows_number)
2558 {
2559 ostringstream buffer;
2560
2561 buffer << "OpenNN Exception: Statistics class.\n"
2562 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2563 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2564 << "Row index " << i << " must be less than rows number.\n";
2565
2566 throw logic_error(buffer.str());
2567 }
2568 }
2569
2570 if(row_indices_size == 0)
2571 {
2572 ostringstream buffer;
2573
2574 buffer << "OpenNN Exception: Statistics class.\n"
2575 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2576 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2577 << "Size of row indices must be greater than zero.\n";
2578
2579 throw logic_error(buffer.str());
2580 }
2581
2582 // Columns check
2583
2584 if(columns_indices_size > columns_number)
2585 {
2586 ostringstream buffer;
2587
2588 buffer << "OpenNN Exception: Statistics class.\n"
2589 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2590 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2591 << "Column indices size must be equal or less than columns number.\n";
2592
2593 throw logic_error(buffer.str());
2594 }
2595
2596 for(Index i = 0; i < columns_indices_size; i++)
2597 {
2598 if(columns_indices(i) >= columns_number)
2599 {
2600 ostringstream buffer;
2601
2602 buffer << "OpenNN Exception: Statistics class.\n"
2603 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2604 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2605 << "Column index " << i << " must be less than columns number.\n";
2606
2607 throw logic_error(buffer.str());
2608 }
2609 }
2610
2611 #endif
2612
2613 Index row_index;
2614 Index column_index;
2615
2616 Index count = 0;
2617
2618 // Mean
2619
2620 Tensor<type, 1> mean(columns_indices_size);
2621 mean.setZero();
2622
2623 for(Index j = 0; j < columns_indices_size; j++)
2624 {
2625 column_index = columns_indices(j);
2626
2627 count = 0;
2628
2629 for(Index i = 0; i < row_indices_size; i++)
2630 {
2631 row_index = row_indices(i);
2632
2633 if(!::isnan(matrix(row_index,column_index)))
2634 {
2635 mean(j) += matrix(row_index,column_index);
2636 count++;
2637 }
2638 }
2639
2640 mean(j) /= static_cast<type>(count);
2641 }
2642
2643 return mean;
2644 }
2645
2646
2647 /// Returns a vector with the mean values of all the matrix columns.
2648 /// The size is equal to the number of columns in the matrix.
2649
mean(const Tensor<type,2> & matrix,const Index & column_index)2650 type mean(const Tensor<type, 2>& matrix, const Index& column_index)
2651 {
2652 const Index rows_number = matrix.dimension(0);
2653 const Index columns_number = matrix.dimension(1);
2654
2655 if (rows_number == 0 && columns_number == 0) return NAN;
2656
2657 #ifdef __OPENNN_DEBUG__
2658
2659 if(rows_number == 0)
2660 {
2661 ostringstream buffer;
2662
2663 buffer << "OpenNN Exception: Statistics class.\n"
2664 << "type mean(const Tensor<type, 2>&, const Index&) const method.\n"
2665 << "Number of rows must be greater than one.\n";
2666
2667 throw logic_error(buffer.str());
2668 }
2669
2670 if(column_index >= columns_number)
2671 {
2672 ostringstream buffer;
2673
2674 buffer << "OpenNN Exception: Statistics class.\n"
2675 << "type mean(const Tensor<type, 2>&, const Index&) const method.\n"
2676 << "Index of column must be less than number of columns.\n";
2677
2678 throw logic_error(buffer.str());
2679 }
2680
2681 #endif
2682
2683 if (rows_number == 0 && columns_number == 0) return NAN;
2684
2685 // Mean
2686
2687 type mean = 0;
2688
2689 Index count = 0;
2690
2691 for(Index i = 0; i < rows_number; i++)
2692 {
2693 if(!::isnan(matrix(i,column_index)))
2694 {
2695 mean += matrix(i,column_index);
2696 count++;
2697 }
2698 }
2699
2700 mean /= static_cast<type>(count);
2701
2702 return mean;
2703 }
2704
2705
2706 /// Returns a vector with the median values of all the matrix columns.
2707 /// The size is equal to the number of columns in the matrix.
2708
median(const Tensor<type,2> & matrix)2709 Tensor<type, 1> median(const Tensor<type, 2>& matrix)
2710 {
2711 const Index rows_number = matrix.dimension(0);
2712 const Index columns_number = matrix.dimension(1);
2713
2714 #ifdef __OPENNN_DEBUG__
2715
2716 if(rows_number == 0)
2717 {
2718 ostringstream buffer;
2719
2720 buffer << "OpenNN Exception: Matrix template.\n"
2721 << "Tensor<type, 1> median() const method.\n"
2722 << "Number of rows must be greater than one.\n";
2723
2724 throw logic_error(buffer.str());
2725 }
2726
2727 #endif
2728
2729 // median
2730
2731 Tensor<type, 1> median(columns_number);
2732
2733 for(Index j = 0; j < columns_number; j++)
2734 {
2735 Tensor<type, 1> sorted_column(matrix.chip(j,1));
2736
2737 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2738
2739 if(rows_number % 2 == 0)
2740 {
2741 median(j) = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/2;
2742 }
2743 else
2744 {
2745 median(j) = sorted_column[rows_number*2/4];
2746 }
2747 }
2748
2749 return median;
2750 }
2751
2752
2753 /// Returns a vector with the median values of all the matrix columns.
2754 /// The size is equal to the number of columns in the matrix.
2755
median(const Tensor<type,2> & matrix,const Index & column_index)2756 type median(const Tensor<type, 2>& matrix, const Index& column_index)
2757 {
2758 const Index rows_number = matrix.dimension(0);
2759
2760 #ifdef __OPENNN_DEBUG__
2761
2762 const Index columns_number = matrix.dimension(1);
2763
2764 if(rows_number == 0)
2765 {
2766 ostringstream buffer;
2767
2768 buffer << "OpenNN Exception: Matrix template.\n"
2769 << "type median(const Index&) const method.\n"
2770 << "Number of rows must be greater than one.\n";
2771
2772 throw logic_error(buffer.str());
2773 }
2774
2775 if(column_index >= columns_number)
2776 {
2777 ostringstream buffer;
2778
2779 buffer << "OpenNN Exception: Matrix template.\n"
2780 << "type median(const Index&) const method.\n"
2781 << "Index of column must be less than number of columns.\n";
2782
2783 throw logic_error(buffer.str());
2784 }
2785
2786 #endif
2787
2788 // median
2789
2790 type median = 0;
2791
2792 Tensor<type, 1> sorted_column(matrix.chip(column_index,1));
2793
2794 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2795
2796 if(rows_number % 2 == 0)
2797 {
2798 median = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/2;
2799 }
2800 else
2801 {
2802 median = sorted_column[rows_number*2/4];
2803 }
2804
2805 return median;
2806 }
2807
2808
2809 /// Returns a vector with the median values of given columns.
2810 /// The size of the vector is equal to the size of the column indices vector.
2811 /// @param columns_indices Indices of columns.
2812
2813
median(const Tensor<type,2> & matrix,const Tensor<Index,1> & columns_indices)2814 Tensor<type, 1> median(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2815 {
2816 const Index rows_number = matrix.dimension(0);
2817
2818 const Index columns_indices_size = columns_indices.size();
2819
2820 Index column_index;
2821
2822 // median
2823
2824 Tensor<type, 1> median(columns_indices_size);
2825
2826 for(Index j = 0; j < columns_indices_size; j++)
2827 {
2828 column_index = columns_indices(j);
2829
2830 Tensor<type, 1> sorted_column(matrix.chip(column_index, 1));
2831
2832 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2833
2834 if(rows_number % 2 == 0)
2835 {
2836 median(j) = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/2;
2837 }
2838 else
2839 {
2840 median(j) = sorted_column[rows_number*2/4];
2841 }
2842 }
2843
2844 return median;
2845 }
2846
2847
2848 /// Returns a vector with the median values of given columns for given rows.
2849 /// The size of the vector is equal to the size of the column indices vector.
2850 /// @param row_indices Indices of rows.
2851 /// @param columns_indices Indices of columns.
2852
median(const Tensor<type,2> & matrix,const Tensor<Index,1> & row_indices,const Tensor<Index,1> & columns_indices)2853 Tensor<type, 1> median(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
2854 {
2855
2856 const Index row_indices_size = row_indices.size();
2857 const Index columns_indices_size = columns_indices.size();
2858
2859 #ifdef __OPENNN_DEBUG__
2860
2861 const Index rows_number = matrix.dimension(0);
2862 const Index columns_number = matrix.dimension(1);
2863
2864 // Rows check
2865
2866 if(row_indices_size > rows_number)
2867 {
2868 ostringstream buffer;
2869
2870 buffer << "OpenNN Exception: Matrix template.\n"
2871 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2872 << "Size of row indices(" << row_indices_size << ") is greater than number of rows(" << rows_number << ").\n";
2873
2874 throw logic_error(buffer.str());
2875 }
2876
2877 for(Index i = 0; i < row_indices_size; i++)
2878 {
2879 if(row_indices(i) >= rows_number)
2880 {
2881 ostringstream buffer;
2882
2883 buffer << "OpenNN Exception: Matrix template.\n"
2884 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2885 << "Row index " << i << " must be less than rows number.\n";
2886
2887 throw logic_error(buffer.str());
2888 }
2889 }
2890
2891 if(row_indices_size == 0)
2892 {
2893 ostringstream buffer;
2894
2895 buffer << "OpenNN Exception: Matrix template.\n"
2896 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2897 << "Size of row indices must be greater than zero.\n";
2898
2899 throw logic_error(buffer.str());
2900 }
2901
2902 // Columns check
2903
2904 if(columns_indices_size > columns_number)
2905 {
2906 ostringstream buffer;
2907
2908 buffer << "OpenNN Exception: Matrix template.\n"
2909 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2910 << "Column indices size must be equal or less than columns number.\n";
2911
2912 throw logic_error(buffer.str());
2913 }
2914
2915 for(Index i = 0; i < columns_indices_size; i++)
2916 {
2917 if(columns_indices(i) >= columns_number)
2918 {
2919 ostringstream buffer;
2920
2921 buffer << "OpenNN Exception: Matrix template.\n"
2922 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2923 << "Column index " << i << " must be less than columns number.\n";
2924
2925 throw logic_error(buffer.str());
2926 }
2927 }
2928
2929 #endif
2930
2931 Index column_index;
2932
2933 // median
2934
2935 Tensor<type, 1> median(columns_indices_size);
2936
2937 for(Index j = 0; j < columns_indices_size; j++)
2938 {
2939 column_index = columns_indices(j);
2940
2941 Tensor<type, 1> sorted_column(row_indices_size);
2942
2943 for(Index k = 0; k < row_indices_size; k++)
2944 {
2945 const Index row_index = row_indices(k);
2946
2947 sorted_column(k) = matrix(row_index, column_index);
2948 }
2949
2950 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2951
2952 if(row_indices_size % 2 == 0)
2953 {
2954 median(j) = (sorted_column[row_indices_size*2/4] + sorted_column[row_indices_size*2/4 + 1])/2;
2955 }
2956 else
2957 {
2958 median(j) = sorted_column[row_indices_size * 2 / 4];
2959 }
2960 }
2961
2962 return median;
2963 }
2964
2965
2966 /// Calculates the distance between the empirical distribution of the vector and the
2967 /// normal distribution.
2968 /// @param vector Vector to be evaluated.
2969
normal_distribution_distance(const Tensor<type,1> & vector)2970 type normal_distribution_distance(const Tensor<type, 1>& vector)
2971 {
2972 type normal_distribution_distance = 0;
2973
2974 const Index n = vector.dimension(0);
2975
2976 const type mean_value = mean(vector);
2977 const type standard_deviation = OpenNN::standard_deviation(vector);
2978
2979 type normal_distribution; // Normal distribution
2980 type empirical_distribution; // Empirical distribution
2981
2982 Tensor<type, 1> sorted_vector(vector);
2983
2984 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2985
2986 Index counter = 0;
2987
2988 for(Index i = 0; i < n; i++)
2989 {
2990 normal_distribution = static_cast<type>(0.5) * static_cast<type>(erfc((mean_value - sorted_vector(i)))/(standard_deviation*static_cast<type>(sqrt(2.0))));
2991 counter = 0;
2992
2993 for(Index j = 0; j < n; j++)
2994 {
2995 if(sorted_vector(j) <= sorted_vector(i))
2996 {
2997 counter++;
2998 }
2999 else
3000 {
3001 break;
3002 }
3003 }
3004
3005 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3006
3007 normal_distribution_distance += abs(normal_distribution - empirical_distribution);
3008 }
3009
3010 return normal_distribution_distance;
3011 }
3012
3013
3014 /// Calculates the distance between the empirical distribution of the vector and the
3015 /// half normal distribution.
3016 /// @param vector Vector to be evaluated.
3017
half_normal_distribution_distance(const Tensor<type,1> & vector)3018 type half_normal_distribution_distance(const Tensor<type, 1>& vector)
3019 {
3020 type half_normal_distribution_distance = 0;
3021
3022 const Index n = vector.dimension(0);
3023
3024 const type standard_deviation = OpenNN::standard_deviation(vector);
3025
3026 type half_normal_distribution; // Half normal distribution
3027 type empirical_distribution; // Empirical distribution
3028
3029 Tensor<type, 1> sorted_vector(vector);
3030
3031 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
3032
3033 Index counter = 0;
3034
3035 for(Index i = 0; i < n; i++)
3036 {
3037 half_normal_distribution = static_cast<type>(erf((sorted_vector(i)))/(standard_deviation * static_cast<type>(sqrt(2))));
3038 counter = 0;
3039
3040 for(Index j = 0; j < n; j++)
3041 {
3042 if(sorted_vector(j) <= sorted_vector(i))
3043 {
3044 counter++;
3045 }
3046 else
3047 {
3048 break;
3049 }
3050 }
3051
3052 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3053
3054 half_normal_distribution_distance += abs(half_normal_distribution - empirical_distribution);
3055 }
3056
3057 return half_normal_distribution_distance;
3058 }
3059
3060
3061 /// Calculates the distance between the empirical distribution of the vector and the
3062 /// uniform distribution.
3063 /// @param vector Vector to be evaluated.
3064
uniform_distribution_distance(const Tensor<type,1> & vector)3065 type uniform_distribution_distance(const Tensor<type, 1>& vector)
3066 {
3067 type uniform_distribution_distance = 0;
3068
3069 const Index n = vector.dimension(0);
3070
3071 type uniform_distribution; // Uniform distribution
3072 type empirical_distribution; // Empirical distribution
3073
3074 Tensor<type, 1> sorted_vector(vector);
3075
3076 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
3077
3078 const type minimum = sorted_vector[0];
3079 const type maximum = sorted_vector[n-1];
3080
3081 Index counter = 0;
3082
3083 for(Index i = 0; i < n; i++)
3084 {
3085 uniform_distribution = (sorted_vector(i)-minimum)/(maximum-minimum);
3086 counter = 0;
3087
3088 for(Index j = 0; j < n; j++)
3089 {
3090 if(sorted_vector(j) <= sorted_vector(i))
3091 {
3092 counter++;
3093 }
3094 else
3095 {
3096 break;
3097 }
3098 }
3099
3100 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3101
3102 uniform_distribution_distance += abs(uniform_distribution - empirical_distribution);
3103 }
3104
3105 return uniform_distribution_distance;
3106 }
3107
3108
3109 ///@todo
3110
normality_parameter(const Tensor<type,1> & vector)3111 type normality_parameter(const Tensor<type, 1>& vector)
3112 {
3113 const type max = maximum(vector);
3114 const type min = minimum(vector);
3115
3116 const Index n = vector.dimension(0);
3117
3118 const type mean_value = mean(vector);
3119 const type standard_deviation = OpenNN::standard_deviation(vector);
3120
3121 type normal_distribution;
3122 type empirical_distribution;
3123 type previous_normal_distribution = 0;
3124 type previous_empirical_distribution = 0;
3125
3126 Tensor<type, 1> sorted_vector(vector);
3127
3128 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
3129
3130 type empirical_area = 0;
3131 type normal_area = 0;
3132
3133 Index counter = 0;
3134
3135 for(Index i = 0; i < n; i++)
3136 {
3137 normal_distribution = static_cast<type>(0.5) * static_cast<type>(erfc((mean_value - sorted_vector(i)))/(standard_deviation*static_cast<type>(sqrt(2.0))));
3138 counter = 0;
3139
3140 for(Index j = 0; j < n; j++)
3141 {
3142 if(sorted_vector(j) <= sorted_vector(i))
3143 {
3144 counter++;
3145 }
3146 else
3147 {
3148 break;
3149 }
3150 }
3151
3152 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
3153
3154 if(i == 0)
3155 {
3156 previous_normal_distribution = normal_distribution;
3157 previous_empirical_distribution = empirical_distribution;
3158 }
3159 else
3160 {
3161 normal_area += static_cast<type>(0.5)*(sorted_vector(i)-sorted_vector[i-1])*(normal_distribution+previous_normal_distribution);
3162 empirical_area += static_cast<type>(0.5)*(sorted_vector(i)-sorted_vector[i-1])*(empirical_distribution+previous_empirical_distribution);
3163
3164 previous_normal_distribution = normal_distribution;
3165 previous_empirical_distribution = empirical_distribution;
3166 }
3167 }
3168
3169 const type uniform_area = (max - min)/static_cast<type>(2.0);
3170
3171 return uniform_area;
3172 }
3173
3174
variation_percentage(const Tensor<type,1> & vector)3175 Tensor<type, 1> variation_percentage(const Tensor<type, 1>& vector)
3176 {
3177 const Index size = vector.dimension(0);
3178
3179 Tensor<type, 1> new_vector(size);
3180
3181 for(Index i = 1; i < size; i++)
3182 {
3183 if(abs(vector[i-1]) < numeric_limits<type>::min())
3184 {
3185 new_vector(i) = (vector(i) - vector[i-1])*static_cast<type>(100.0)/vector[i-1];
3186 }
3187 }
3188
3189 return new_vector;
3190 }
3191
3192
3193 /// Returns the index of the smallest element in the vector.
3194
minimal_index(const Tensor<type,1> & vector)3195 Index minimal_index(const Tensor<type, 1>& vector)
3196 {
3197 const Index size = vector.dimension(0);
3198
3199 if(size == 0) return Index();
3200
3201 Index minimal_index = 0;
3202 type minimum = vector[0];
3203
3204 for(Index i = 1; i < size; i++)
3205 {
3206 if(vector(i) < minimum)
3207 {
3208 minimal_index = i;
3209 minimum = vector(i);
3210 }
3211 }
3212
3213 return minimal_index;
3214 }
3215
3216
3217 /// Returns the index of the largest element in the vector.
3218
maximal_index(const Tensor<type,1> & vector)3219 Index maximal_index(const Tensor<type, 1>& vector)
3220 {
3221 const Index size = vector.dimension(0);
3222
3223 if(size == 0) return Index();
3224
3225 Index maximal_index = 0;
3226 type maximum = vector[0];
3227
3228 for(Index i = 1; i < size; i++)
3229 {
3230 if(vector(i) > maximum)
3231 {
3232 maximal_index = i;
3233 maximum = vector(i);
3234 }
3235 }
3236
3237 return maximal_index;
3238 }
3239
3240 /// Returns the indices of the smallest elements in the vector.
3241 /// @param number Number of minimal indices to be computed.
3242
minimal_indices(const Tensor<type,1> & vector,const Index & number)3243 Tensor<Index, 1> minimal_indices(const Tensor<type, 1>& vector, const Index &number)
3244 {
3245 Eigen::Tensor<type, 1> vector_ = vector;
3246
3247 const Index size = vector.dimension(0);
3248 Tensor<Index, 1> minimal_indices(number);
3249 Eigen::Tensor<type, 0> maxim = vector.maximum();
3250
3251 #ifdef __OPENNN_DEBUG__
3252
3253 if(number > size)
3254 {
3255 ostringstream buffer;
3256
3257 buffer << "OpenNN Exception: Statistics class.\n"
3258 << "Tensor<Index, 1> minimal_indices(Tensor<type, 1>& , const Index &) \n"
3259 << "Number of minimal indices to be computed must be lower (or equal) than the size of the imput vector.\n";
3260
3261 throw logic_error(buffer.str());
3262 }
3263 #endif
3264
3265 for(Index j = 0; j < number; j++)
3266 {
3267 Index minimal_index = 0;
3268 type minimum = vector_(0);
3269
3270 for(Index i = 0; i < size; i++)
3271 {
3272 if(vector_(i) < minimum)
3273 {
3274 minimal_index = i;
3275 minimum = vector_(i);
3276 }
3277 }
3278 vector_(minimal_index) = maxim(0)+1;
3279 minimal_indices(j) = minimal_index;
3280 }
3281 return minimal_indices;
3282 }
3283
3284
3285 /// Returns the indices of the largest elements in the vector.
3286 /// @param number Number of maximal indices to be computed.
3287
maximal_indices(const Tensor<type,1> & vector,const Index & number)3288 Tensor<Index, 1> maximal_indices(const Tensor<type, 1>& vector, const Index &number)
3289 {
3290 Eigen::Tensor<type, 1> vector_ = vector;
3291
3292 const Index size = vector.dimension(0);
3293 Tensor<Index, 1> maximal_indices(number);
3294 Eigen::Tensor<type, 0> minim = vector.minimum();
3295
3296 #ifdef __OPENNN_DEBUG__
3297
3298 if(number > size)
3299 {
3300 ostringstream buffer;
3301
3302 buffer << "OpenNN Exception: Statistics class.\n"
3303 << "Tensor<Index, 1> maximal_indices(Tensor<type, 1>& , const Index &) \n"
3304 << "Number of maximal indices to be computed must be lower (or equal) than the size of the imput vector.\n";
3305
3306 throw logic_error(buffer.str());
3307 }
3308 #endif
3309
3310 for(Index j = 0; j < number; j++)
3311 {
3312 Index maximal_index = 0;
3313 type maximal = vector_(0);
3314
3315 for(Index i = 0; i < size; i++)
3316 {
3317 if(vector_(i) > maximal)
3318 {
3319 maximal_index = i;
3320 maximal = vector_(i);
3321 }
3322 }
3323 vector_(maximal_index) = minim(0)-1;
3324 maximal_indices(j) = maximal_index;
3325 }
3326 return maximal_indices;
3327 }
3328
3329 /// Returns the row and column indices corresponding to the entry with minimum value.
3330
minimal_indices(const Tensor<type,2> & matrix)3331 Tensor<Index, 1> minimal_indices(const Tensor<type, 2>& matrix)
3332 {
3333 const Index rows_number = matrix.dimension(0);
3334 const Index columns_number = matrix.dimension(1);
3335
3336 type minimum = matrix(0,0);
3337 Tensor<Index, 1> minimal_indices(2);
3338
3339 for(Index i = 0; i < rows_number; i++)
3340 {
3341 for(Index j = 0; j < columns_number; j++)
3342 {
3343 if(!::isnan(matrix(i,j)) && matrix(i,j) < minimum)
3344 {
3345 minimum = matrix(i,j);
3346 minimal_indices(0) = i;
3347 minimal_indices(1) = j;
3348 }
3349 }
3350 }
3351
3352 return minimal_indices;
3353 }
3354
3355
3356 /// Returns the row and column indices corresponding to the entry with maximum value.
3357
maximal_indices(const Tensor<type,2> & matrix)3358 Tensor<Index, 1> maximal_indices(const Tensor<type, 2>& matrix)
3359 {
3360 const Index rows_number = matrix.dimension(0);
3361 const Index columns_number = matrix.dimension(1);
3362
3363 type maximum = matrix(0,0);
3364
3365 Tensor<Index, 1> maximal_indices(2);
3366
3367 for(Index i = 0; i < rows_number; i++)
3368 {
3369 for(Index j = 0; j < columns_number; j++)
3370 {
3371 if(!::isnan(matrix(i,j)) && matrix(i,j) > maximum)
3372 {
3373 maximum = matrix(i,j);
3374 maximal_indices(0) = i;
3375 maximal_indices(1) = j;
3376 }
3377 }
3378 }
3379
3380 return maximal_indices;
3381 }
3382
3383
3384 /// Returns a matrix in which each of the columns contain the maximal indices of each of the columns of the
3385 /// original matrix.
3386
maximal_columns_indices(const Tensor<type,2> & matrix,const Index & maximum_number)3387 Tensor<Index, 2> maximal_columns_indices(const Tensor<type,2>& matrix, const Index& maximum_number)
3388 {
3389 const Index rows_number = matrix.dimension(0);
3390 const Index columns_number = matrix.dimension(1);
3391
3392 Tensor<Index, 2> maximal_columns_indices(maximum_number, columns_number);
3393
3394 Tensor<type, 1> columns_minimums = OpenNN::columns_minimums(matrix);
3395
3396 for(Index j = 0; j < columns_number; j++)
3397 {
3398 Tensor<type, 1> column = matrix.chip(j,1);
3399
3400 for(Index i = 0; i < maximum_number; i++)
3401 {
3402 Index maximal_index = 0;
3403 type maximal = column(0);
3404
3405 for(Index k = 0; k < rows_number; k++)
3406 {
3407 if(column(k) > maximal && !::isnan(column(k)))
3408 {
3409 maximal_index = k;
3410 maximal = column(k);
3411 }
3412 }
3413
3414 column(maximal_index) = columns_minimums(j)-static_cast<type>(1);
3415 maximal_columns_indices(i,j) = maximal_index;
3416 }
3417 }
3418
3419 return maximal_columns_indices;
3420 }
3421
3422
strongest(const Tensor<type,1> & vector)3423 type strongest(const Tensor<type, 1>& vector)
3424 {
3425 const Index size = vector.dimension(0);
3426
3427 if(size == 0) return 0.0;
3428
3429 type strongest = vector[0];
3430
3431 for(Index i = 0; i < size; i++)
3432 {
3433 if(fabs(vector(i)) > fabs(strongest))
3434 {
3435 strongest = vector(i);
3436 }
3437 }
3438
3439 return strongest;
3440 }
3441
3442
3443 /// Returns the l2 norm of a vector
3444
l2_norm(const Tensor<type,1> & vector)3445 type l2_norm(const Tensor<type, 1>& vector)
3446 {
3447 const Index size = vector.dimension(0);
3448
3449 if(size == 0) return NAN;
3450
3451 type square_sum = 0;
3452
3453 for(Index i = 0; i < size; i++)
3454 {
3455 square_sum = square_sum + vector[i] * vector[i];
3456 }
3457
3458 return sqrt(square_sum);
3459 }
3460
3461
3462 /// Returns a vector containing the means of the subsets which correspond
3463 /// to each of the given integers. The matrix must have 2 columns, the first
3464 /// one containing the integers and the second one the corresponding values.
3465
means_by_categories(const Tensor<type,2> & matrix)3466 Tensor<type, 1> means_by_categories(const Tensor<type, 2>& matrix)
3467 {
3468 /*
3469 const Index integers_number = matrix.size();
3470 Tensor<type, 1> elements_uniques = matrix.get_column(0).get_unique_elements();
3471 Tensor<type, 1> values = matrix.chip(1,1);
3472
3473 #ifdef __OPENNN_DEBUG__
3474
3475 if(integers_number == 0)
3476 {
3477 ostringstream buffer;
3478
3479 buffer << "OpenNN Exception: Matrix template.\n"
3480 << "Tensor<type, 1> calculate_means_integers(const Tensor<type, 2>& \n"
3481 << "Number of integers must be greater than 0.\n";
3482
3483 throw logic_error(buffer.str());
3484 }
3485
3486 #endif
3487
3488 const Index rows_number = matrix.dimension(0);
3489
3490 Tensor<type, 1> means(elements_uniques);
3491
3492 type sum = 0;
3493 Index count = 0;
3494
3495 for(Index i = 0; i < integers_number; i++)
3496 {
3497 sum = 0;
3498 count = 0;
3499
3500 for(unsigned j = 0; j < rows_number; j++)
3501 {
3502 if(matrix(j,0) == elements_uniques(i) && !::isnan(values(j)))
3503 {
3504 sum += matrix(j,1);
3505 count++;
3506 }
3507 }
3508
3509 if(count != 0)
3510 {
3511 means(i) = static_cast<type>(sum)/static_cast<type>(count);
3512
3513 }
3514 else
3515 {
3516 means(i) = 0;
3517 }
3518 }
3519
3520 return means;
3521 */
3522 return Tensor<type, 1>();
3523 }
3524
3525
3526
3527 /// Returns a vector containing the values of the means for the 0s and 1s of a
3528 /// binary column.
3529 /// The matrix must have 2 columns, the first one has to be binary.
3530
means_binary_column(const Tensor<type,2> & matrix)3531 Tensor<type, 1> means_binary_column(const Tensor<type, 2>& matrix)
3532 {
3533 Tensor<type, 1> means(2);
3534 means.setZero();
3535
3536 Index count = 0;
3537
3538 for(Index i = 0; i < matrix.dimension(0); i++)
3539 {
3540 if(abs(matrix(i,0)) < numeric_limits<type>::min())
3541 {
3542 means[0] += matrix(i,1);
3543 count++;
3544 }
3545 else if(static_cast<Index>(matrix(i,0)) == 1)
3546 {
3547 means[1] += matrix(i,1);
3548 count++;
3549 }
3550 }
3551
3552 if(count != 0)
3553 {
3554 means[0] = static_cast<type>(means[0])/static_cast<type>(count);
3555 means[1] = static_cast<type>(means[1])/static_cast<type>(count);
3556 }
3557 else
3558 {
3559 means[0] = 0;
3560 means[1] = 0;
3561 }
3562
3563 return means;
3564 }
3565
3566
3567 /// Returns a vector containing the values of the means for the 1s of each
3568 /// of all binary columns.
3569 /// All the columns except the last one must be binary.
3570
means_binary_columns(const Tensor<type,2> & matrix)3571 Tensor<type, 1> means_binary_columns(const Tensor<type, 2>& matrix)
3572 {
3573 Tensor<type, 1> means(matrix.dimension(1)-1);
3574
3575 type sum = 0;
3576 Index count = 0;
3577
3578 for(Index i = 0; i < matrix.dimension(1)-1; i++)
3579 {
3580 sum = 0;
3581 count = 0;
3582
3583 for(Index j = 0; j < matrix.dimension(0); j++)
3584 {
3585 if(static_cast<Index>(matrix(j,i)) == 1)
3586 {
3587 sum += matrix(j,matrix.dimension(1)-1);
3588
3589 count++;
3590 }
3591 }
3592
3593 if(count != 0)
3594 {
3595 means(i) = static_cast<type>(sum)/static_cast<type>(count);
3596
3597 }
3598 else
3599 {
3600 means(i) = 0;
3601 }
3602 }
3603 return means;
3604 }
3605
3606
3607 ///Returns a vector with the percentiles of a vector given.
3608
percentiles(const Tensor<type,1> & vector)3609 Tensor<type, 1> percentiles(const Tensor<type, 1>& vector)
3610 {
3611 const Index size = vector.dimension(0);
3612
3613 #ifdef __OPENNN_DEBUG__
3614
3615 if(size < 10)
3616 {
3617 ostringstream buffer;
3618
3619 buffer << "OpenNN Exception: vector Template.\n"
3620 << "Tensor<type, 1> percentiles(const Tensor<type, 1>& vector) method.\n"
3621 << "Size must be greater than 10.\n";
3622
3623 throw logic_error(buffer.str());
3624 }
3625
3626 #endif
3627
3628 Index new_size = 0;
3629
3630 for(Index i = 0; i < size; i++)
3631 {
3632 if(!::isnan(vector(i)))
3633 {
3634 new_size++;
3635 }
3636 }
3637
3638 if(new_size == 0)
3639 {
3640 Tensor<type, 1> nan(1);
3641 nan.setValues({static_cast<type>(NAN)});
3642 return nan;
3643 }
3644
3645 Index index = 0;
3646 Tensor<type, 1> new_vector(new_size);
3647
3648 for(Index i = 0; i < size; i++)
3649 {
3650 if(!::isnan(vector(i)))
3651 {
3652 new_vector(index) = vector(i);
3653 index++;
3654 }
3655 }
3656
3657 Tensor<type, 1> sorted_vector(new_vector);
3658
3659 sort(sorted_vector.data(), sorted_vector.data() + new_size, less<type>());
3660
3661
3662 /// Aempirical method
3663 Tensor<type, 1> percentiles(10);
3664
3665 for(Index i = 0; i < 9; i++)
3666 {
3667 if(new_size * (i + 1) % 10 == 0)
3668 percentiles[i] = (sorted_vector[new_size * (i + 1) / 10 - 1] + sorted_vector[new_size * (i + 1) / 10]) / static_cast<type>(2.0);
3669
3670 else
3671 percentiles[i] = static_cast<type>(sorted_vector[new_size * (i + 1) / 10]);
3672 }
3673 percentiles[9] = maximum(new_vector);
3674
3675 return percentiles;
3676 }
3677
3678
3679 /// Returns the weighted mean of the vector.
3680 /// @param weights Weights of the elements of the vector in the mean.
3681
weighted_mean(const Tensor<type,1> & vector,const Tensor<type,1> & weights)3682 type weighted_mean(const Tensor<type, 1>& vector, const Tensor<type, 1>& weights)
3683 {
3684 const Index size = vector.dimension(0);
3685
3686 #ifdef __OPENNN_DEBUG__
3687
3688 if(size == 0)
3689 {
3690 ostringstream buffer;
3691
3692 buffer << "OpenNN Exception: vector Template.\n"
3693 << "type calculate_weighted_mean(const Tensor<type, 1>&) const method.\n"
3694 << "Size must be greater than zero.\n";
3695
3696 throw logic_error(buffer.str());
3697 }
3698
3699 const Index weights_size = weights.size();
3700
3701 if(size != weights_size)
3702 {
3703 ostringstream buffer;
3704
3705 buffer << "OpenNN Exception: vector Template.\n"
3706 << "type calculate_weighted_mean(const Tensor<type, 1>&) "
3707 "const method.\n"
3708 << "Size of weights must be equal to vector size.\n";
3709
3710 throw logic_error(buffer.str());
3711 }
3712 #endif
3713
3714 type weights_sum = 0;
3715
3716 type sum = 0;
3717
3718 for(Index i = 0; i < size; i++)
3719 {
3720 sum += weights(i)*vector(i);
3721 weights_sum += weights(i);
3722 }
3723
3724 const type mean = sum / weights_sum;
3725
3726 return mean;
3727 }
3728
3729 /// Returns the number of nans in the vector.
3730 /// @param vector Vector to count the NANs
3731
count_nan(const Tensor<type,1> & vector)3732 Index count_nan(const Tensor<type, 1>& vector)
3733 {
3734 Index nan_number = 0;
3735
3736 for(Index i = 0; i < vector.dimension(0); i++)
3737 {
3738 if(isnan(vector(i))) nan_number++;
3739 }
3740
3741 return nan_number;
3742 }
3743
3744 }
3745
3746
3747 // OpenNN: Open Neural Networks Library.
3748 // Copyright(C) 2005-2020 Artificial Intelligence Techniques, SL.
3749 //
3750 // This library is free software; you can redistribute it and/or
3751 // modify it under the terms of the GNU Lesser General Public
3752 // License as published by the Free Software Foundation; either
3753 // version 2.1 of the License, or any later version.
3754 //
3755 // This library is distributed in the hope that it will be useful,
3756 // but WITHOUT ANY WARRANTY; without even the implied warranty of
3757 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3758 // Lesser General Public License for more details.
3759
3760 // You should have received a copy of the GNU Lesser General Public
3761 // License along with this library; if not, write to the Free Software
3762 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
3763