1 /*********************************************************************
2 Statistics - Statistical analysis on input dataset.
3 Statistics is part of GNU Astronomy Utilities (Gnuastro) package.
4 
5 Original author:
6      Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2015-2021, Free Software Foundation, Inc.
9 
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14 
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24 
25 #include <math.h>
26 #include <errno.h>
27 #include <error.h>
28 #include <float.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <stdlib.h>
32 
33 #include <gnuastro/wcs.h>
34 #include <gnuastro/fits.h>
35 #include <gnuastro/tile.h>
36 #include <gnuastro/blank.h>
37 #include <gnuastro/pointer.h>
38 #include <gnuastro/arithmetic.h>
39 #include <gnuastro/statistics.h>
40 #include <gnuastro/interpolate.h>
41 #include <gnuastro/permutation.h>
42 
43 #include <gnuastro-internal/timing.h>
44 #include <gnuastro-internal/checkset.h>
45 
46 #include "main.h"
47 
48 #include "ui.h"
49 #include "sky.h"
50 #include "contour.h"
51 #include "statistics.h"
52 
53 
54 
55 
56 
57 /*******************************************************************/
58 /**************           Print in one row           ***************/
59 /*******************************************************************/
60 static gal_data_t *
statistics_pull_out_element(gal_data_t * input,size_t index)61 statistics_pull_out_element(gal_data_t *input, size_t index)
62 {
63   size_t dsize=1;
64   gal_data_t *out=gal_data_alloc(NULL, input->type, 1, &dsize,
65                                  NULL, 1, -1, 1, NULL, NULL, NULL);
66   memcpy( out->array,
67           gal_pointer_increment(input->array, index, input->type),
68           gal_type_sizeof(input->type) );
69   return out;
70 }
71 
72 
73 
74 
75 
76 static double
statistics_read_check_args(struct statisticsparams * p)77 statistics_read_check_args(struct statisticsparams *p)
78 {
79   double d;
80   if(p->tp_args==NULL)
81     error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s so we can "
82           "address the problem. Not enough arguments for the requested "
83           "single measurement options", __func__, PACKAGE_BUGREPORT);
84   d=gal_list_f64_pop(&p->tp_args);
85   return d;
86 }
87 
88 
89 
90 
91 
92 static void
statistics_print_one_row(struct statisticsparams * p)93 statistics_print_one_row(struct statisticsparams *p)
94 {
95   int mustfree;
96   char *toprint;
97   double arg, *d;
98   gal_list_i32_t *tmp;
99   size_t dsize=1, counter;
100   gal_data_t *sum=NULL, *med=NULL, *meanstd=NULL, *modearr=NULL;
101   gal_data_t *tmpv, *sclip=NULL, *out=NULL, *num=NULL, *min=NULL, *max=NULL;
102 
103   /* The user can ask for any of the operators more than once, also some
104      operators might return more than one usable value (like mode). So we
105      will calculate the desired values once, and then print them any number
106      of times. */
107   for(tmp=p->singlevalue; tmp!=NULL; tmp=tmp->next)
108     switch(tmp->v)
109       {
110       /* Calculate respective values. Checking with 'if(num==NULL)' gives
111          compiler warnings of 'this if clause does not guard ...'. So we
112          are using this empty-if and else statement. */
113       case UI_KEY_NUMBER:
114         num = num ? num : gal_statistics_number(p->input);           break;
115       case UI_KEY_MINIMUM:
116         min = min ? min : gal_statistics_minimum(p->input);          break;
117       case UI_KEY_MAXIMUM:
118         max = max ? max : gal_statistics_maximum(p->input);          break;
119       case UI_KEY_SUM:
120         sum = sum ? sum : gal_statistics_sum(p->input);              break;
121       case UI_KEY_MEDIAN:
122         med = med ? med : gal_statistics_median(p->sorted, 0); break;
123       case UI_KEY_STD:
124       case UI_KEY_MEAN:
125       case UI_KEY_QUANTOFMEAN:
126         meanstd = meanstd ? meanstd : gal_statistics_mean_std(p->input);
127         break;
128       case UI_KEY_MODE:
129       case UI_KEY_MODEQUANT:
130       case UI_KEY_MODESYM:
131       case UI_KEY_MODESYMVALUE:
132         modearr = ( modearr
133                     ? modearr
134                     : gal_statistics_mode(p->sorted, p->mirrordist, 0) );
135         d=modearr->array;
136         if(d[2]<GAL_STATISTICS_MODE_GOOD_SYM) d[0]=d[1]=NAN;
137         break;
138       case UI_KEY_SIGCLIPSTD:
139       case UI_KEY_SIGCLIPMEAN:
140       case UI_KEY_SIGCLIPNUMBER:
141       case UI_KEY_SIGCLIPMEDIAN:
142         sclip = ( sclip
143                   ? sclip
144                   : gal_statistics_sigma_clip(p->sorted, p->sclipparams[0],
145                                               p->sclipparams[1], 0, 1) );
146         break;
147 
148       /* Will be calculated as printed. */
149       case UI_KEY_QUANTILE:
150       case UI_KEY_QUANTFUNC:
151         break;
152 
153       /* The option isn't recognized. */
154       default:
155         error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s so we "
156               "can address the problem. Operation code %d not recognized",
157               __func__, PACKAGE_BUGREPORT, tmp->v);
158       }
159 
160 
161   /* Print every requested number. */
162   counter=0;
163   for(tmp=p->singlevalue; tmp!=NULL; tmp=tmp->next)
164     {
165       /* By default don't free anything. */
166       mustfree=0;
167 
168       /* Get the output. */
169       switch(tmp->v)
170         {
171         /* Previously calculated values. */
172         case UI_KEY_NUMBER:     out=num;                  break;
173         case UI_KEY_MINIMUM:    out=min;                  break;
174         case UI_KEY_MAXIMUM:    out=max;                  break;
175         case UI_KEY_SUM:        out=sum;                  break;
176         case UI_KEY_MEDIAN:     out=med;                  break;
177         case UI_KEY_MEAN:
178           out=statistics_pull_out_element(meanstd, 0); mustfree=1; break;
179         case UI_KEY_STD:
180           out=statistics_pull_out_element(meanstd, 1); mustfree=1; break;
181         case UI_KEY_MODE:
182           out=statistics_pull_out_element(modearr, 0); mustfree=1; break;
183         case UI_KEY_MODEQUANT:
184           out=statistics_pull_out_element(modearr, 1); mustfree=1; break;
185         case UI_KEY_MODESYM:
186           out=statistics_pull_out_element(modearr, 2); mustfree=1; break;
187         case UI_KEY_MODESYMVALUE:
188           out=statistics_pull_out_element(modearr, 3); mustfree=1; break;
189         case UI_KEY_SIGCLIPSTD:
190           out=statistics_pull_out_element(sclip,   3); mustfree=1; break;
191         case UI_KEY_SIGCLIPMEAN:
192           out=statistics_pull_out_element(sclip,   2); mustfree=1; break;
193         case UI_KEY_SIGCLIPMEDIAN:
194           out=statistics_pull_out_element(sclip,   1); mustfree=1; break;
195         case UI_KEY_SIGCLIPNUMBER:
196           out=statistics_pull_out_element(sclip,   0); mustfree=1; break;
197 
198         /* Not previously calculated. */
199         case UI_KEY_QUANTILE:
200           mustfree=1;
201           arg = statistics_read_check_args(p);
202           out = gal_statistics_quantile(p->sorted, arg, 0);
203           break;
204 
205         case UI_KEY_QUANTFUNC:
206           mustfree=1;
207           arg = statistics_read_check_args(p);
208           tmpv = gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &dsize,
209                                 NULL, 1, -1, 1, NULL, NULL, NULL);
210           *((double *)(tmpv->array)) = arg;
211           tmpv = gal_data_copy_to_new_type_free(tmpv, p->input->type);
212           out = gal_statistics_quantile_function(p->sorted, tmpv, 0);
213           gal_data_free(tmpv);
214           break;
215 
216         case UI_KEY_QUANTOFMEAN:
217           mustfree=1;
218           tmpv=statistics_pull_out_element(meanstd, 0);
219           out = gal_statistics_quantile_function(p->sorted, tmpv, 0);
220           gal_data_free(tmpv);
221           break;
222 
223         /* The option isn't recognized. */
224         default:
225           error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s so we "
226                 "can address the problem. Operation code %d not recognized",
227                 __func__, PACKAGE_BUGREPORT, tmp->v);
228         }
229 
230       /* Print the number. Note that we don't want any extra white space
231          characters before or after the printed outputs. So we have defined
232          'counter' to add a single white space character before any element
233          except the first one. */
234       toprint=gal_type_to_string(out->array, out->type, 0);
235       printf("%s%s", counter ? " " : "", toprint);
236       free(toprint);
237 
238       /* Clean up (if necessary). */
239       ++counter;
240       if(mustfree) gal_data_free(out);
241     }
242 
243 
244   /* Print a new line. */
245   printf("\n");
246 
247 
248   /* Clean any of the allocated arrays. */
249   if(num)     gal_data_free(num);
250   if(min)     gal_data_free(min);
251   if(max)     gal_data_free(max);
252   if(sum)     gal_data_free(sum);
253   if(med)     gal_data_free(med);
254   if(sclip)   gal_data_free(sclip);
255   if(meanstd) gal_data_free(meanstd);
256   if(modearr) gal_data_free(modearr);
257 }
258 
259 
260 
261 
262 
263 
264 
265 
266 
267 
268 
269 
270 
271 
272 
273 
274 
275 
276 
277 
278 /*******************************************************************/
279 /**************         Single value on tile         ***************/
280 /*******************************************************************/
281 static void
statistics_interpolate_and_write(struct statisticsparams * p,gal_data_t * values,char * output)282 statistics_interpolate_and_write(struct statisticsparams *p,
283                                  gal_data_t *values, char *output)
284 {
285   gal_data_t *interpd;
286   struct gal_options_common_params *cp=&p->cp;
287 
288   /* Do the interpolation (if necessary). */
289   if( p->interpolate
290       && !(p->cp.interponlyblank && gal_blank_present(values, 1)==0) )
291     {
292       interpd=gal_interpolate_neighbors(values, &cp->tl,
293                                         cp->interpmetric,
294                                         cp->interpnumngb,
295                                         cp->numthreads,
296                                         cp->interponlyblank, 0,
297                                         GAL_INTERPOLATE_NEIGHBORS_FUNC_MEDIAN);
298       gal_data_free(values);
299       values=interpd;
300     }
301 
302   /* Write the values. */
303   gal_tile_full_values_write(values, &cp->tl, !p->ignoreblankintiles,
304                              output, NULL, PROGRAM_NAME);
305   gal_fits_key_write_filename("input", p->inputname, &p->cp.okeys, 1,
306                               p->cp.quiet);
307   gal_fits_key_write_config(&p->cp.okeys, "Statistics configuration",
308                             "STATISTICS-CONFIG", output, "0");
309 }
310 
311 
312 
313 
314 
315 static void
statistics_on_tile(struct statisticsparams * p)316 statistics_on_tile(struct statisticsparams *p)
317 {
318   double arg=0;
319   gal_list_i32_t *operation;
320   gal_data_t *tile, *values;
321   size_t tind, dsize=1, mind=-1;
322   uint8_t type=GAL_TYPE_INVALID;
323   gal_data_t *tmp=NULL, *tmpv=NULL, *ttmp;
324   struct gal_options_common_params *cp=&p->cp;
325   struct gal_tile_two_layer_params *tl=&p->cp.tl;
326   char *output=gal_checkset_automatic_output(cp, cp->output
327                                              ? cp->output
328                                              : p->inputname,
329                                              "_ontile.fits");
330 
331   /* Do the operation on each tile. */
332   for(operation=p->singlevalue; operation!=NULL; operation=operation->next)
333     {
334       /* Set the type of the output array. */
335       switch(operation->v)
336         {
337         case UI_KEY_NUMBER:
338           type=GAL_TYPE_INT32; break;
339 
340         case UI_KEY_MINIMUM:
341         case UI_KEY_MAXIMUM:
342         case UI_KEY_MEDIAN:
343         case UI_KEY_MODE:
344         case UI_KEY_QUANTFUNC:
345           type=p->input->type; break;
346 
347         case UI_KEY_SUM:
348         case UI_KEY_MEAN:
349         case UI_KEY_STD:
350         case UI_KEY_QUANTILE:
351         case UI_KEY_MODEQUANT:
352         case UI_KEY_MODESYM:
353         case UI_KEY_MODESYMVALUE:
354           type=GAL_TYPE_FLOAT64; break;
355 
356         default:
357           error(EXIT_FAILURE, 0, "%s: a bug! %d is not a recognized "
358                 "operation code", __func__, operation->v);
359         }
360 
361       /* Allocate the space necessary to keep the value for each tile. */
362       values=gal_data_alloc(NULL, type, p->input->ndim, tl->numtiles, NULL,
363                             0, p->input->minmapsize, p->cp.quietmmap,
364                             NULL, NULL, NULL);
365 
366       /* Read the argument for those operations that need it. This is done
367          here, because below, the functions are repeated on each tile. */
368       switch(operation->v)
369         {
370         case UI_KEY_QUANTILE:
371           arg = statistics_read_check_args(p);
372           break;
373         case UI_KEY_QUANTFUNC:
374           arg = statistics_read_check_args(p);
375           tmpv = gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &dsize,
376                                 NULL, 1, -1, 1, NULL, NULL, NULL);
377           *((double *)(tmpv->array)) = arg;
378           tmpv = gal_data_copy_to_new_type_free(tmpv, p->input->type);
379         }
380 
381       /* Do the operation on each tile. */
382       tind=0;
383       for(tile=tl->tiles; tile!=NULL; tile=tile->next)
384         {
385           /* Do the proper operation. */
386           switch(operation->v)
387             {
388             case UI_KEY_NUMBER:
389               tmp=gal_statistics_number(tile);                      break;
390 
391             case UI_KEY_MINIMUM:
392               tmp=gal_statistics_minimum(tile);                     break;
393 
394             case UI_KEY_MAXIMUM:
395               tmp=gal_statistics_maximum(tile);                     break;
396 
397             case UI_KEY_MEDIAN:
398               tmp=gal_statistics_median(tile, 1);                   break;
399 
400             case UI_KEY_QUANTFUNC:
401               tmp=gal_statistics_quantile_function(tile, tmpv, 1);  break;
402 
403             case UI_KEY_SUM:
404               tmp=gal_statistics_sum(tile);                         break;
405 
406             case UI_KEY_MEAN:
407               tmp=gal_statistics_mean(tile);                        break;
408 
409             case UI_KEY_STD:
410               tmp=gal_statistics_std(tile);                         break;
411 
412             case UI_KEY_QUANTILE:
413               tmp=gal_statistics_quantile(tile, arg, 1);            break;
414 
415             case UI_KEY_MODE:
416             case UI_KEY_MODESYM:
417             case UI_KEY_MODEQUANT:
418             case UI_KEY_MODESYMVALUE:
419               switch(operation->v)
420                 {
421                 case UI_KEY_MODE:         mind=0;  break;
422                 case UI_KEY_MODESYM:      mind=2;  break;
423                 case UI_KEY_MODEQUANT:    mind=1;  break;
424                 case UI_KEY_MODESYMVALUE: mind=3;  break;
425                 }
426               tmp=gal_statistics_mode(tile, p->mirrordist, 1);
427               ttmp=statistics_pull_out_element(tmp, mind);
428               gal_data_free(tmp);
429               tmp=ttmp;
430               break;
431 
432             default:
433               error(EXIT_FAILURE, 0, "%s: a bug! please contact us at %s to "
434                     "fix the problem. The operation code %d is not "
435                     "recognized", __func__, PACKAGE_BUGREPORT, operation->v);
436             }
437 
438           /* Put the output value into the 'values' array and clean up. */
439           tmp=gal_data_copy_to_new_type_free(tmp, type);
440           memcpy(gal_pointer_increment(values->array, tind++, values->type),
441                  tmp->array, gal_type_sizeof(type));
442           gal_data_free(tmp);
443         }
444 
445       /* Do the interpolation (if necessary) and write the array into the
446          output. */
447       statistics_interpolate_and_write(p, values, output);
448 
449       /* Clean up. */
450       gal_data_free(values);
451       if(operation->v==UI_KEY_QUANTFUNC) gal_data_free(tmpv);
452     }
453 
454   /* Clean up. */
455   free(output);
456 }
457 
458 
459 
460 
461 
462 
463 
464 
465 
466 
467 
468 
469 
470 
471 
472 
473 
474 
475 
476 /*******************************************************************/
477 /**************             ASCII plots              ***************/
478 /*******************************************************************/
479 static void
print_ascii_plot(struct statisticsparams * p,gal_data_t * plot,gal_data_t * bins,int h1_c0,int printinfo)480 print_ascii_plot(struct statisticsparams *p, gal_data_t *plot,
481                  gal_data_t *bins, int h1_c0, int printinfo)
482 {
483   int i, j;
484   size_t *s, *sf, max=0;
485   double *b, v, halfbinwidth, correction;
486 
487   /* Find the maximum of the plot. */
488   sf=(s=plot->array)+plot->size; do max = *s>max ? *s : max; while(++s<sf);
489 
490   /* Print the range so the user knows. */
491   if(printinfo)
492     {
493       b=bins->array;
494       halfbinwidth = (b[1]-b[0])/2;
495       printf("\nASCII %s:\n", ( h1_c0 ? "Histogram" :
496                                 "Cumulative frequency plot") );
497       if(h1_c0) printf("Number: %zu\n", p->input->size);
498       printf("Y: (linear: 0 to %zu)\n", max);
499       printf("X: (linear: %g -- %g, in %zu bins)\n", b[0]-halfbinwidth,
500              b[ bins->size - 1 ] + halfbinwidth, bins->size);
501     }
502 
503   /* Print the ASCII plot: */
504   s=plot->array;
505   correction = (double)(p->asciiheight) / (double)max;
506   for(i=p->asciiheight;i>=0;--i)
507     {
508       printf(" |");
509       for(j=0;j<plot->size;++j)
510         {
511           v = (double)s[j] * correction;
512           if( v >= ((double)i-0.5f) && v > 0.0f ) printf("*");
513           else printf(" ");
514         }
515       printf("\n");
516     }
517   printf(" |");
518   for(j=0;j<plot->size;++j) printf("-");
519   printf("\n\n");
520 }
521 
522 
523 
524 
525 
526 /* Data structure that must be fed into 'gal_statistics_regular_bins'.*/
527 static gal_data_t *
set_bin_range_params(struct statisticsparams * p,size_t dim)528 set_bin_range_params(struct statisticsparams *p, size_t dim)
529 {
530   size_t rsize=2;
531   gal_data_t *range=NULL;
532 
533   if(p->manualbinrange)
534     {
535       /* Allocate the range data structure. */
536       range=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &rsize, NULL,
537                            0, -1, 1, NULL, NULL, NULL);
538       switch(dim)
539         {
540         case 1:
541           ((float *)(range->array))[0]=p->greaterequal;
542           ((float *)(range->array))[1]=p->lessthan;
543           break;
544         case 2:
545           ((float *)(range->array))[0]=p->greaterequal2;
546           ((float *)(range->array))[1]=p->lessthan2;
547           break;
548         default:
549           error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to "
550                 "address the problem. The value %zu for 'dim' isn't "
551                 "recogized", __func__, PACKAGE_BUGREPORT, dim);
552         }
553     }
554   return range;
555 }
556 
557 
558 
559 
560 
561 static void
ascii_plots(struct statisticsparams * p)562 ascii_plots(struct statisticsparams *p)
563 {
564   gal_data_t *bins, *hist, *cfp=NULL, *range=NULL;
565 
566   /* Make the bins and the respective plot. */
567   range=set_bin_range_params(p, 1);
568   bins=gal_statistics_regular_bins(p->input, range, p->numasciibins, NAN);
569   hist=gal_statistics_histogram(p->input, bins, 0, 0);
570   if(p->asciicfp)
571     {
572       bins->next=hist;
573       cfp=gal_statistics_cfp(p->input, bins, 0);
574     }
575 
576   /* Print the plots. */
577   if(p->asciihist)  print_ascii_plot(p, hist, bins, 1, 1);
578   if(p->asciicfp)   print_ascii_plot(p, cfp,  bins, 0, 1);
579 
580   /* Clean up.*/
581   gal_data_free(bins);
582   gal_data_free(hist);
583   gal_data_free(range);
584   if(p->asciicfp) gal_data_free(cfp);
585 }
586 
587 
588 
589 
590 
591 
592 
593 
594 
595 
596 
597 
598 
599 
600 
601 
602 
603 
604 
605 
606 /*******************************************************************/
607 /*******    Histogram and cumulative frequency tables    ***********/
608 /*******************************************************************/
609 static char *
statistics_output_name(struct statisticsparams * p,char * suf,int * isfits)610 statistics_output_name(struct statisticsparams *p, char *suf, int *isfits)
611 {
612   int use_auto_output=0;
613   char *out, *fix, *suffix=NULL;
614 
615   /* Automatic output should be used when no output name was specified or
616      we have more than one output file. */
617   use_auto_output = p->cp.output ? (p->numoutfiles>1 ? 1 : 0) : 1;
618 
619   /* Set the 'fix' and 'suffix' strings. Note that 'fix' is necessary in
620      every case, even when no automatic output is to be used. Since it is
621      used to determine the format of the output. */
622   fix = ( *isfits
623           ? "fits"
624           : ( p->cp.output
625               ? gal_fits_name_is_fits(p->cp.output) ? "fits" : "txt"
626               : "txt" ) );
627   if(use_auto_output)
628     if( asprintf(&suffix, "%s.%s", suf, fix)<0 )
629       error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
630 
631   /* Make the output name. */
632   out = ( use_auto_output
633           ? gal_checkset_automatic_output(&p->cp, p->inputname, suffix)
634           : p->cp.output );
635 
636   /* See if it is a FITS file. */
637   *isfits=strcmp(fix, "fits")==0;
638 
639   /* Make sure it doesn't already exist. */
640   gal_checkset_writable_remove(out, 0, p->cp.dontdelete);
641 
642   /* Clean up and return. */
643   if(suffix) free(suffix);
644   return out;
645 }
646 
647 
648 
649 
650 
651 void
write_output_table(struct statisticsparams * p,gal_data_t * table,char * suf,char * contents)652 write_output_table(struct statisticsparams *p, gal_data_t *table,
653                    char *suf, char *contents)
654 {
655   int isfits=0;
656   char *tmp, *output;
657   gal_list_str_t *comments=NULL;
658 
659   /* Set the output. */
660   output=statistics_output_name(p, suf, &isfits);
661 
662   /* Write the comments, NOTE: we are writing the first two in reverse of
663      the order we want them. They will later be freed as part of the list's
664      freeing.*/
665   tmp=gal_fits_name_save_as_string(p->inputname, p->cp.hdu);
666   gal_list_str_add(&comments, tmp, 0);
667 
668   if( asprintf(&tmp, "%s created from:", contents)<0 )
669     error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
670   gal_list_str_add(&comments, tmp, 0);
671 
672   if(!isfits)  /* The intro info will be in FITS files anyway.*/
673     gal_table_comments_add_intro(&comments, PROGRAM_STRING, &p->rawtime);
674 
675 
676   /* Write the table. */
677   gal_checkset_writable_remove(output, 0, p->cp.dontdelete);
678   gal_table_write(table, NULL, comments, p->cp.tableformat, output,
679                   "TABLE", 0);
680 
681 
682   /* Write the configuration information if we have a FITS output. */
683   if(isfits)
684     {
685       gal_fits_key_write_filename("input", p->inputname, &p->cp.okeys, 1,
686                                   p->cp.quiet);
687       gal_fits_key_write_config(&p->cp.okeys, "Statistics configuration",
688                                 "STATISTICS-CONFIG", output, "0");
689     }
690 
691 
692   /* Let the user know, if we aren't in quiet mode. */
693   if(!p->cp.quiet)
694     printf("%s created.\n", output);
695 
696 
697   /* Clean up. */
698   gal_list_str_free(comments, 1);
699   if(output!=p->cp.output) free(output);
700 }
701 
702 
703 
704 
705 
706 static void
save_hist_and_or_cfp(struct statisticsparams * p)707 save_hist_and_or_cfp(struct statisticsparams *p)
708 {
709   char *suf, *contents;
710   gal_data_t *bins, *hist, *cfp=NULL, *range=NULL;
711 
712   /* Set the bins and make the histogram, this is necessary for both the
713      histogram and CFP (recall that the CFP is built from the
714      histogram). */
715   range=set_bin_range_params(p, 1);
716   bins=gal_statistics_regular_bins(p->input, range, p->numbins,
717                                    p->onebinstart);
718   hist=gal_statistics_histogram(p->input, bins, p->normalize, p->maxbinone);
719 
720 
721   /* Set the histogram as the next pointer of bins. This is again necessary
722      in both cases: when only a histogram is requested, it is used for the
723      plotting. When only a CFP is desired, it is used as input into
724      'gal_statistics_cfp'. */
725   bins->next=hist;
726 
727 
728   /* Make the cumulative frequency plot if the user wanted it. Make the
729      CFP, note that for the CFP, 'maxbinone' and 'normalize' are the same:
730      the last bin (largest value) must be one. So if any of them are given,
731      then set the last argument to 1.*/
732   if(p->cumulative)
733     cfp=gal_statistics_cfp(p->input, bins, p->normalize || p->maxbinone);
734 
735 
736   /* FITS tables don't accept 'uint64_t', so to be consistent, we'll conver
737      the histogram and CFP to 'uint32_t'.*/
738   if(hist->type==GAL_TYPE_UINT64)
739     hist=gal_data_copy_to_new_type_free(hist, GAL_TYPE_UINT32);
740   if(cfp && cfp->type==GAL_TYPE_UINT64)
741     cfp=gal_data_copy_to_new_type_free(cfp, GAL_TYPE_UINT32);
742 
743 
744   /* Finalize the next pointers. */
745   bins->next=hist;
746   hist->next=cfp;
747 
748 
749   /* Prepare the contents. */
750   if(p->histogram && p->cumulative)
751     { suf="-hist-cfp"; contents="Histogram and cumulative frequency plot"; }
752   else if(p->histogram)
753     { suf="-hist";     contents="Histogram"; }
754   else
755     { suf="-cfp";      contents="Cumulative frequency plot"; }
756 
757 
758   /* Set the output file name. */
759   write_output_table(p, bins, suf, contents);
760 
761   /* Clean up. */
762   gal_data_free(range);
763 }
764 
765 
766 
767 
768 /* In the WCS standard, '-' is meaningful, so if a column name contains
769    '-', it should be changed to '_'. */
770 static char *
histogram_2d_set_ctype(char * orig,char * backup)771 histogram_2d_set_ctype(char *orig, char *backup)
772 {
773   char *c, *out=NULL;
774 
775   /* If an original name exists, then check it. Otherwise, just return the
776      backup string so 'CTYPE' isn't left empty. */
777   if(orig)
778     {
779       /* Copy the original name into a newly allocated space because we
780          later want to free it. */
781       gal_checkset_allocate_copy(orig, &out);
782 
783       /* Parse the copy and if a dash is present, correct it. */
784       for(c=out; *c!='\0'; ++c) if(*c=='-') *c='_';
785     }
786   else
787     gal_checkset_allocate_copy(backup, &out);
788 
789   /* Return the final string. */
790   return out;
791 }
792 
793 
794 
795 
796 
797 static void
histogram_2d(struct statisticsparams * p)798 histogram_2d(struct statisticsparams *p)
799 {
800   int isfits=1;
801   int32_t *imgarr;
802   double *d1, *d2;
803   uint32_t *histarr;
804   gal_data_t *range1, *range2;
805   gal_data_t *img, *hist2d, *bins;
806   size_t i, j, dsize[2], nb1=p->numbins, nb2=p->numbins2;
807   char *output, suf[]="-hist2d", contents[]="2D Histogram";
808 
809   /* WCS-related arrays. */
810   char *cunit[2], *ctype[2];
811   double crpix[2], crval[2], cdelt[2], pc[4]={1,0,0,1};
812 
813   /* Set the bins for each dimension */
814   range1=set_bin_range_params(p, 1);
815   range2=set_bin_range_params(p, 2);
816   bins=gal_statistics_regular_bins(p->input, range1, nb1,
817                                    p->onebinstart);
818   bins->next=gal_statistics_regular_bins(p->input->next, range2,
819                                          nb2, p->onebinstart2);
820 
821   /* Build the 2D histogram. */
822   hist2d=gal_statistics_histogram2d(p->input, bins);
823 
824   /* Write the histogram into a 2D FITS image. Note that in the FITS image
825      standard, the first axis is the fastest array (unlike the default
826      format we have adopted for the tables). */
827   if(!strcmp(p->histogram2d,"image"))
828     {
829       /* Allocate the 2D image array. Note that 'dsize' is in the order of
830          C, where the first dimension is the slowest. However, in FITS the
831          fastest dimension is the first. */
832       dsize[0]=nb2;
833       dsize[1]=nb1;
834       histarr=hist2d->next->next->array;
835       img=gal_data_alloc(NULL, GAL_TYPE_INT32, 2, dsize, NULL, 0,
836                          p->cp.minmapsize, p->cp.quietmmap,
837                          NULL, NULL, NULL);
838 
839       /* Fill the array values. */
840       imgarr=img->array;
841       for(i=0;i<nb2;++i)
842         for(j=0;j<nb1;++j)
843           imgarr[i*nb1+j]=histarr[j*nb2+i];
844 
845       /* Set the WCS. */
846       d1=bins->array;
847       d2=bins->next->array;
848       crpix[0] = 1;                   crpix[1] = 1;
849       crval[0] = d1[0];               crval[1] = d2[0];
850       cdelt[0] = d1[1]-d1[0];         cdelt[1] = d2[1]-d2[0];
851       cunit[0] = p->input->unit;      cunit[1] = p->input->next->unit;
852       ctype[0] = histogram_2d_set_ctype(p->input->name, "X");
853       ctype[1] = histogram_2d_set_ctype(p->input->next->name, "Y");
854       img->wcs=gal_wcs_create(crpix, crval, cdelt, pc, cunit, ctype, 2,
855                               p->cp.wcslinearmatrix);
856 
857       /* Write the output. */
858       output=statistics_output_name(p, suf, &isfits);
859       gal_fits_img_write(img, output, NULL, PROGRAM_STRING);
860       gal_fits_key_write_filename("input", p->inputname, &p->cp.okeys, 1,
861                                   p->cp.quiet);
862       gal_fits_key_write_config(&p->cp.okeys, "Statistics configuration",
863                                 "STATISTICS-CONFIG", output, "0");
864 
865       /* Clean up and let the user know that the histogram is built. */
866       free(ctype[0]);
867       free(ctype[1]);
868       gal_data_free(img);
869       if(!p->cp.quiet) printf("%s created.\n", output);
870     }
871   /* Write 2D histogram as a table. */
872   else
873     write_output_table(p, hist2d, suf, contents);
874 
875   /* Clean up. */
876   gal_data_free(range1);
877   gal_data_free(range2);
878   gal_list_data_free(bins);
879   gal_list_data_free(hist2d);
880 }
881 
882 
883 
884 
885 
886 void
print_mirror_hist_cfp(struct statisticsparams * p)887 print_mirror_hist_cfp(struct statisticsparams *p)
888 {
889   size_t dsize=1;
890   gal_data_t *table;
891   double mirror_val;
892   gal_data_t *mirror=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &dsize,
893                                     NULL, 1, -1, 1, NULL, NULL, NULL);
894 
895   /* Convert the given mirror value into the type of the input dataset. */
896   *((double *)(mirror->array)) = p->mirror;
897   mirror=gal_data_copy_to_new_type_free(mirror, p->input->type);
898 
899   /* Make the table columns. */
900   table=gal_statistics_mode_mirror_plots(p->sorted, mirror, p->numbins, 0,
901                                          &mirror_val);
902 
903   if(p->mirror!=mirror_val)
904     {
905       fprintf(stderr, "Warning: Mirror value is %f.\n", mirror_val);
906       if(!p->cp.quiet)
907         fprintf(stderr, "\nNote that the mirror distribution is discrete "
908                 "and depends on the input data. So the closest point in "
909                 "the data to your desired mirror at %f was %f.\n\n",
910                 p->mirror, mirror_val);
911     }
912 
913   /* If the mirror value was out-of-range, then no table will be made. */
914   if(table)
915     write_output_table(p, table, "_mirror_hist_cfp",
916                        "Histogram and CFP of mirror distribution");
917   else
918     error(EXIT_FAILURE, 0, "%s: mirror value %g is out of range",
919           __func__, p->mirror);
920 }
921 
922 
923 
924 
925 
926 
927 
928 
929 
930 
931 
932 
933 
934 
935 
936 
937 
938 
939 /*******************************************************************/
940 /**************           Basic information          ***************/
941 /*******************************************************************/
942 /* To keep things in 'print_basics' clean, we'll define the input data
943    here, then only print the values there. */
944 void
print_input_info(struct statisticsparams * p)945 print_input_info(struct statisticsparams *p)
946 {
947   char *str, *name, *col=NULL;
948 
949   /* Print the program name and version. */
950   printf("%s\n", PROGRAM_NAME);
951 
952   /* Print the input information, if the input was a table, we also need to
953      give the column information. When the column has a name, it will be
954      printed, when it doesn't, we'll use the same string the user gave. */
955   printf("-------\n");
956   name=gal_fits_name_save_as_string(p->inputname, p->cp.hdu);
957   printf("Input: %s\n", name);
958 
959   /* If a table was given, print the column. */
960   if(p->columns) printf("Column: %s\n",
961                         p->input->name ? p->input->name : p->columns->v);
962 
963   /* Range. */
964   str=NULL;
965   if( !isnan(p->greaterequal) && !isnan(p->lessthan) )
966     {
967       if( asprintf(&str, "from (inclusive) %g, up to (exclusive) %g",
968                    p->greaterequal, p->lessthan)<0 )
969         error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
970     }
971   else if( !isnan(p->greaterequal) )
972     {
973       if( asprintf(&str, "from (inclusive) %g", p->greaterequal)<0 )
974         error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
975     }
976   else if( !isnan(p->lessthan) )
977     {
978       if( asprintf(&str, "up to (exclusive) %g", p->lessthan)<0 )
979         error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
980     }
981   if(str)
982     {
983       printf("Range: ");
984       if(p->refcol)
985         printf("[on column %s] ",
986                p->reference->name ? p->reference->name : p->refcol);
987       printf("%s.\n", str);
988       free(str);
989     }
990 
991   /* Units. */
992   if(p->input->unit) printf("Unit: %s\n", p->input->unit);
993 
994   /* Clean up. */
995   if(col) free(col);
996   free(name);
997   printf("-------\n");
998 }
999 
1000 
1001 
1002 
1003 
1004 /* This function will report the simple immediate statistics of the
1005    data. For the average and standard deviation, the unsorted data is
1006    used so we don't suddenly encounter rounding errors. */
1007 void
print_basics(struct statisticsparams * p)1008 print_basics(struct statisticsparams *p)
1009 {
1010   char *str;
1011   int namewidth=40;
1012   float mirrdist=1.5;
1013   double mean, std, *d;
1014   gal_data_t *tmp, *bins, *hist, *range=NULL;
1015 
1016   /* Define the input dataset. */
1017   print_input_info(p);
1018 
1019   /* Print the number: */
1020   printf("  %-*s %zu\n", namewidth, "Number of elements:", p->input->size);
1021 
1022   /* Minimum: */
1023   tmp=gal_statistics_minimum(p->input);
1024   str=gal_type_to_string(tmp->array, tmp->type, 0);
1025   printf("  %-*s %s\n", namewidth, "Minimum:", str);
1026   gal_data_free(tmp);
1027   free(str);
1028 
1029   /* Maximum: */
1030   tmp=gal_statistics_maximum(p->input);
1031   str=gal_type_to_string(tmp->array, tmp->type, 0);
1032   printf("  %-*s %s\n", namewidth, "Maximum:", str);
1033   gal_data_free(tmp);
1034   free(str);
1035 
1036   /* Find the mean and standard deviation, but don't print them, see
1037      explanations under median. */
1038   tmp=gal_statistics_mean_std(p->input);
1039   mean = ((double *)(tmp->array))[0];
1040   std  = ((double *)(tmp->array))[1];
1041   gal_data_free(tmp);
1042 
1043   /* Mode of the distribution (if it is valid). we want the mode and median
1044      to be found in place to save time/memory. But having a sorted array
1045      can decrease the floating point accuracy of the standard deviation. So
1046      we'll do the median calculation in the end.*/
1047   tmp=gal_statistics_mode(p->input, mirrdist, 1);
1048   d=tmp->array;
1049   if(d[2]>GAL_STATISTICS_MODE_GOOD_SYM)
1050     {        /* Same format as 'gal_data_write_to_string' */
1051       printf("  %-*s %.10g\n", namewidth, "Mode:", d[0]);
1052       printf("  %-*s %.10g\n", namewidth, "Mode quantile:", d[1]);
1053     }
1054   gal_data_free(tmp);
1055 
1056   /* Find and print the median:  */
1057   tmp=gal_statistics_median(p->input, 0);
1058   str=gal_type_to_string(tmp->array, tmp->type, 0);
1059   printf("  %-*s %s\n", namewidth, "Median:", str);
1060   gal_data_free(tmp);
1061   free(str);
1062 
1063   /* Print the mean and standard deviation. Same format as
1064      'gal_data_write_to_string' */
1065   printf("  %-*s %.10g\n", namewidth, "Mean:", mean);
1066   printf("  %-*s %.10g\n", namewidth, "Standard deviation:", std);
1067 
1068   /* Ascii histogram. Note that we don't want to force the user to have the
1069      plotting parameters. Also, when a reference column is defined, the
1070      range shown in the basic information section applies to that, not the
1071      range of the histogram. In that case, we want to print the histogram
1072      information. */
1073   printf("-------");
1074   range=set_bin_range_params(p, 1);
1075   p->asciiheight = p->asciiheight ? p->asciiheight : 10;
1076   p->numasciibins = p->numasciibins ? p->numasciibins : 70;
1077   bins=gal_statistics_regular_bins(p->input, range, p->numasciibins, NAN);
1078   hist=gal_statistics_histogram(p->input, bins, 0, 0);
1079   if(p->refcol==NULL) printf("\nHistogram:\n");
1080   print_ascii_plot(p, hist, bins, 1, p->refcol ? 1 : 0);
1081   gal_data_free(bins);
1082   gal_data_free(hist);
1083   gal_data_free(range);
1084 }
1085 
1086 
1087 
1088 
1089 
1090 
1091 
1092 
1093 
1094 
1095 
1096 
1097 
1098 
1099 
1100 
1101 
1102 
1103 
1104 
1105 /*******************************************************************/
1106 /**************            Sigma clipping            ***************/
1107 /*******************************************************************/
1108 void
print_sigma_clip(struct statisticsparams * p)1109 print_sigma_clip(struct statisticsparams *p)
1110 {
1111   float *a;
1112   char *mode;
1113   int namewidth=40;
1114   gal_data_t *sigclip;
1115 
1116   /* Set the mode for printing: */
1117   if( p->sclipparams[1]>=1.0f )
1118     {
1119       if( asprintf(&mode, "for %g clips", p->sclipparams[1])<0 )
1120         error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
1121     }
1122   else
1123     {
1124       if( asprintf(&mode, "until relative change in STD is less than %g",
1125                    p->sclipparams[1])<0 )
1126         error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
1127     }
1128 
1129   /* Report the status */
1130   if(!p->cp.quiet)
1131     {
1132       print_input_info(p);
1133       printf("%g-sigma clipping steps %s:\n\n", p->sclipparams[0], mode);
1134     }
1135 
1136   /* Do the Sigma clipping: */
1137   sigclip=gal_statistics_sigma_clip(p->sorted, p->sclipparams[0],
1138                                     p->sclipparams[1], 0, p->cp.quiet);
1139   a=sigclip->array;
1140 
1141   /* Finish the introduction. */
1142   if(!p->cp.quiet)
1143     printf("-------\nSummary:\n");
1144   else
1145     printf("%g-sigma clipped %s:\n", p->sclipparams[0], mode);
1146 
1147   /* Print the final results: */
1148   printf("  %-*s %zu\n", namewidth, "Number of input elements:",
1149          p->input->size);
1150   if( p->sclipparams[1] < 1.0f )
1151     printf("  %-*s %d\n", namewidth, "Number of clips:",     sigclip->status);
1152   printf("  %-*s %.0f\n", namewidth, "Final number of elements:", a[0]);
1153   printf("  %-*s %g\n", namewidth, "Median:",                a[1]);
1154   printf("  %-*s %g\n", namewidth, "Mean:",                  a[2]);
1155   printf("  %-*s %g\n", namewidth, "Standard deviation:",    a[3]);
1156 
1157   /* Clean up. */
1158   free(mode);
1159 }
1160 
1161 
1162 
1163 
1164 
1165 
1166 
1167 
1168 
1169 
1170 
1171 
1172 
1173 
1174 
1175 
1176 
1177 
1178 /*******************************************************************/
1179 /**************             Main function            ***************/
1180 /*******************************************************************/
1181 void
statistics(struct statisticsparams * p)1182 statistics(struct statisticsparams *p)
1183 {
1184   int print_basic_info=1;
1185 
1186   /* Print the one-row numbers if the user asked for them. */
1187   if(p->singlevalue)
1188     {
1189       print_basic_info=0;
1190       if(p->ontile) statistics_on_tile(p);
1191       else          statistics_print_one_row(p);
1192     }
1193 
1194   /* Find the Sky value if called. */
1195   if(p->sky)
1196     {
1197       sky(p);
1198       print_basic_info=0;
1199     }
1200 
1201   if(p->contour)
1202     {
1203       contour(p);
1204       print_basic_info=0;
1205     }
1206 
1207   /* Print the ASCII plots if requested. */
1208   if(p->asciihist || p->asciicfp)
1209     {
1210       ascii_plots(p);
1211       print_basic_info=0;
1212     }
1213 
1214   /* Save the histogram and CFP as tables if requested. */
1215   if(p->histogram || p->cumulative)
1216     {
1217       print_basic_info=0;
1218       save_hist_and_or_cfp(p);
1219     }
1220 
1221   /* 2D histogram. */
1222   if(p->histogram2d)
1223     {
1224       print_basic_info=0;
1225       histogram_2d(p);
1226     }
1227 
1228   /* Print the sigma-clipped results. */
1229   if( p->sigmaclip )
1230     {
1231       print_basic_info=0;
1232       print_sigma_clip(p);
1233     }
1234 
1235   /* Make the mirror table. */
1236   if( !isnan(p->mirror) )
1237     {
1238       print_basic_info=0;
1239       print_mirror_hist_cfp(p);
1240     }
1241 
1242   /* If nothing was requested print the simple statistics. */
1243   if(print_basic_info)
1244     print_basics(p);
1245 }
1246