1 /*********************************************************************
2 Statistics - Statistical analysis on input dataset.
3 Statistics is part of GNU Astronomy Utilities (Gnuastro) package.
4 
5 Original author:
6      Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2015-2021, Free Software Foundation, Inc.
9 
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14 
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24 
25 #include <argp.h>
26 #include <errno.h>
27 #include <error.h>
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include <gnuastro/txt.h>
32 #include <gnuastro/wcs.h>
33 #include <gnuastro/fits.h>
34 #include <gnuastro/tile.h>
35 #include <gnuastro/array.h>
36 #include <gnuastro/qsort.h>
37 #include <gnuastro/blank.h>
38 #include <gnuastro/table.h>
39 #include <gnuastro/threads.h>
40 #include <gnuastro/arithmetic.h>
41 #include <gnuastro/statistics.h>
42 
43 #include <gnuastro-internal/timing.h>
44 #include <gnuastro-internal/checkset.h>
45 #include <gnuastro-internal/tableintern.h>
46 #include <gnuastro-internal/fixedstringmacros.h>
47 
48 #include "main.h"
49 
50 #include "ui.h"
51 #include "authors-cite.h"
52 
53 
54 
55 
56 
57 /**************************************************************/
58 /*********      Argp necessary global entities     ************/
59 /**************************************************************/
60 /* Definition parameters for the Argp: */
61 const char *
62 argp_program_version = PROGRAM_STRING "\n"
63                        GAL_STRINGS_COPYRIGHT
64                        "\n\nWritten/developed by "PROGRAM_AUTHORS;
65 
66 const char *
67 argp_program_bug_address = PACKAGE_BUGREPORT;
68 
69 static char
70 args_doc[] = "ASTRdata";
71 
72 const char
73 doc[] = GAL_STRINGS_TOP_HELP_INFO PROGRAM_NAME" will do statistical "
74   "analysis on the input dataset (table column or image). All blank "
75   "pixels or pixels outside of the given range are ignored. You can "
76   "either directly ask for certain statistics in one line/row as shown "
77   "below with the same order as requested, or get tables of different "
78   "statistical measures like the histogram, cumulative frequency style "
79   "and etc. If no particular statistic is requested, some basic "
80   "information about the dataset is printed on the command-line.\n"
81   GAL_STRINGS_MORE_HELP_INFO
82   /* After the list of options: */
83   "\v"
84   PACKAGE_NAME" home page: "PACKAGE_URL;
85 
86 
87 
88 
89 
90 
91 
92 
93 
94 
95 
96 
97 
98 
99 
100 
101 
102 
103 
104 
105 
106 
107 /**************************************************************/
108 /*********    Initialize & Parse command-line    **************/
109 /**************************************************************/
110 static void
ui_initialize_options(struct statisticsparams * p,struct argp_option * program_options,struct argp_option * gal_commonopts_options)111 ui_initialize_options(struct statisticsparams *p,
112                       struct argp_option *program_options,
113                       struct argp_option *gal_commonopts_options)
114 {
115   size_t i;
116   struct gal_options_common_params *cp=&p->cp;
117 
118   /* Set the necessary common parameters structure. */
119   cp->program_struct     = p;
120   cp->poptions           = program_options;
121   cp->program_name       = PROGRAM_NAME;
122   cp->program_exec       = PROGRAM_EXEC;
123   cp->program_bibtex     = PROGRAM_BIBTEX;
124   cp->program_authors    = PROGRAM_AUTHORS;
125   cp->coptions           = gal_commonopts_options;
126   cp->numthreads         = gal_threads_number();
127   cp->tl.remainderfrac   = NAN;
128 
129   /* Program-specific initializers */
130   p->lessthan            = NAN;
131   p->lessthan2           = NAN;
132   p->onebinstart         = NAN;
133   p->onebinstart2        = NAN;
134   p->greaterequal        = NAN;
135   p->greaterequal2       = NAN;
136   p->quantmin            = NAN;
137   p->quantmax            = NAN;
138   p->mirror              = NAN;
139   p->mirrordist          = NAN;
140   p->meanmedqdiff        = NAN;
141   p->sclipparams[0]      = NAN;
142   p->sclipparams[1]      = NAN;
143 
144   /* Set the mandatory common options. */
145   for(i=0; !gal_options_is_last(&cp->coptions[i]); ++i)
146     switch(cp->coptions[i].key)
147       {
148       case GAL_OPTIONS_KEY_LOG:
149       case GAL_OPTIONS_KEY_TYPE:
150         cp->coptions[i].flags=OPTION_HIDDEN;
151         break;
152 
153       case GAL_OPTIONS_KEY_SEARCHIN:
154       case GAL_OPTIONS_KEY_MINMAPSIZE:
155       case GAL_OPTIONS_KEY_TABLEFORMAT:
156         cp->coptions[i].mandatory=GAL_OPTIONS_MANDATORY;
157         break;
158       }
159 }
160 
161 
162 
163 
164 
165 /* Parse a single option: */
166 error_t
parse_opt(int key,char * arg,struct argp_state * state)167 parse_opt(int key, char *arg, struct argp_state *state)
168 {
169   struct statisticsparams *p = state->input;
170 
171   /* Pass 'gal_options_common_params' into the child parser.  */
172   state->child_inputs[0] = &p->cp;
173 
174   /* In case the user incorrectly uses the equal sign (for example
175      with a short format or with space in the long format, then 'arg'
176      start with (if the short version was called) or be (if the long
177      version was called with a space) the equal sign. So, here we
178      check if the first character of arg is the equal sign, then the
179      user is warned and the program is stopped: */
180   if(arg && arg[0]=='=')
181     argp_error(state, "incorrect use of the equal sign ('='). For short "
182                "options, '=' should not be used and for long options, "
183                "there should be no space between the option, equal sign "
184                "and value");
185 
186   /* Set the key to this option. */
187   switch(key)
188     {
189     /* Read the non-option tokens (arguments): */
190     case ARGP_KEY_ARG:
191       /* The user may give a shell variable that is empty! In that case
192          'arg' will be an empty string! We don't want to account for such
193          cases (and give a clear error that no input has been given). */
194       if(p->inputname)
195         argp_error(state, "only one argument (input file) should be given");
196       else
197         if(arg[0]!='\0') p->inputname=arg;
198       break;
199 
200     /* This is an option, set its value. */
201     default:
202       return gal_options_set_from_key(key, arg, p->cp.poptions, &p->cp);
203     }
204 
205   return 0;
206 }
207 
208 
209 
210 
211 
212 static void *
ui_add_to_single_value(struct argp_option * option,char * arg,char * filename,size_t lineno,void * params)213 ui_add_to_single_value(struct argp_option *option, char *arg,
214                       char *filename, size_t lineno, void *params)
215 {
216   size_t i;
217   double *d;
218   gal_data_t *inputs=NULL;
219   struct statisticsparams *p=(struct statisticsparams *)params;
220 
221   /* In case of printing the option values. */
222   if(lineno==-1)
223     error(EXIT_FAILURE, 0, "currently the options to be printed in one row "
224           "(like '--number', '--mean', and etc) do not support printing "
225           "with the '--printparams' ('-P'), or writing into configuration "
226           "files due to lack of time when implementing these features. "
227           "You can put them into configuration files manually. Please get "
228           "in touch with us at '%s', so we can implement it",
229           PACKAGE_BUGREPORT);
230 
231   /* Some of these options take values and some don't. */
232   if(option->type==GAL_OPTIONS_NO_ARG_TYPE)
233     {
234       /* If this option is given in a configuration file, then 'arg' will not
235          be NULL and we don't want to do anything if it is '0'. */
236       if(arg)
237         {
238           /* Make sure the value is only '0' or '1'. */
239           if( arg[1]!='\0' && *arg!='0' && *arg!='1' )
240             error_at_line(EXIT_FAILURE, 0, filename, lineno, "the '--%s' "
241                           "option takes no arguments. In a configuration "
242                           "file it can only have the values '1' or '0', "
243                           "indicating if it should be used or not",
244                           option->name);
245 
246           /* Only proceed if the (possibly given) argument is 1. */
247           if(arg[0]=='0' && arg[1]=='\0') return NULL;
248         }
249 
250       /* Add this option to the print list. */
251       gal_list_i32_add(&p->singlevalue, option->key);
252     }
253   else
254     {
255       /* Read the string of numbers. */
256       inputs=gal_options_parse_list_of_numbers(arg, filename, lineno);
257       if(inputs->size==0)
258         error(EXIT_FAILURE, 0, "'--%s' needs a value", option->name);
259 
260       /* Do the appropriate operations with the  */
261       d=inputs->array;
262       switch(option->key)
263         {
264         case UI_KEY_QUANTILE:
265         case UI_KEY_QUANTFUNC:
266           /* For the quantile and the quantile function, its possible to
267              give any number of arguments, so add the operation index and
268              the argument once for each given number. */
269           for(i=0;i<inputs->size;++i)
270             {
271               if(option->key==UI_KEY_QUANTILE && (d[i]<0 || d[i]>1) )
272                 error_at_line(EXIT_FAILURE, 0, filename, lineno, "values "
273                               "to '--quantile' ('-u') must be between 0 "
274                               "and 1, you had asked for %g (read from '%s')",
275                               d[i], arg);
276               gal_list_f64_add(&p->tp_args, d[i]);
277               gal_list_i32_add(&p->singlevalue, option->key);
278             }
279           break;
280 
281         default:
282           error_at_line(EXIT_FAILURE, 0, filename, lineno, "a bug! please "
283                         "contact us at %s so we can address the problem. "
284                         "the option given to 'ui_add_to_print_in_row' is "
285                         "marked as requiring a value, but is not recognized",
286                         PACKAGE_BUGREPORT);
287         }
288     }
289 
290   return NULL;
291 }
292 
293 
294 
295 
296 
297 static void *
ui_read_quantile_range(struct argp_option * option,char * arg,char * filename,size_t lineno,void * params)298 ui_read_quantile_range(struct argp_option *option, char *arg,
299                        char *filename, size_t lineno, void *params)
300 {
301   char *str;
302   gal_data_t *in;
303   struct statisticsparams *p=(struct statisticsparams *)params;
304 
305   /* For the '--printparams' ('-P') option:*/
306   if(lineno==-1)
307     {
308       if( isnan(p->quantmax) )
309         {
310           if( asprintf(&str, "%g", p->quantmin)<0 )
311             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
312         }
313       else
314         {
315           if( asprintf(&str, "%g,%g", p->quantmin, p->quantmax)<0 )
316             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
317         }
318       return str;
319     }
320 
321   /* Parse the inputs. */
322   in=gal_options_parse_list_of_numbers(arg, filename, lineno);
323 
324   /* Check if there was only two numbers. */
325   if(in->size!=1 && in->size!=2)
326     error_at_line(EXIT_FAILURE, 0, filename, lineno, "the '--%s' "
327                   "option takes one or two values values (separated by "
328                   "a comma) to define the range of used values with "
329                   "quantiles. However, %zu numbers were read in the "
330                   "string '%s' (value to this option).\n\n"
331                   "If there is only one number as input, it will be "
332                   "interpretted as the lower quantile (Q) range. The "
333                   "higher range will be set to the quantile (1-Q). "
334                   "When two numbers are given, they will be used as the "
335                   "lower and higher quantile range respectively",
336                   option->name, in->size, arg);
337 
338   /* Read the values in. */
339   p->quantmin = ((double *)(in->array))[0];
340   if(in->size==2) p->quantmax = ((double *)(in->array))[1];
341 
342   /* Make sure the values are between 0 and 1. */
343   if( (p->quantmin<0 || p->quantmin>1)
344       || ( !isnan(p->quantmax) && (p->quantmax<0 || p->quantmax>1) ) )
345     error_at_line(EXIT_FAILURE, 0, filename, lineno, "values to the "
346                   "'--quantrange' option must be between 0 and 1 "
347                   "(inclusive). Your input was: '%s'", arg);
348 
349   /* When only one value is given, make sure it is less than 0.5. */
350   if( !isnan(p->quantmax) && p->quantmin>0.5 )
351     error(EXIT_FAILURE, 0, "%g>=0.5! When only one value is given to the "
352           "'--%s' option, the range is defined as Q and 1-Q. Thus, the "
353           "value must be less than 0.5", p->quantmin, option->name);
354 
355   /* Clean up and return. */
356   gal_data_free(in);
357   return NULL;
358 }
359 
360 
361 
362 
363 
364 
365 
366 
367 
368 
369 
370 
371 
372 
373 
374 
375 
376 
377 
378 
379 /**************************************************************/
380 /***************       Sanity Check         *******************/
381 /**************************************************************/
382 /* Read and check ONLY the options. When arguments are involved, do the
383    check in 'ui_check_options_and_arguments'. */
384 static void
ui_read_check_only_options(struct statisticsparams * p)385 ui_read_check_only_options(struct statisticsparams *p)
386 {
387   gal_list_i32_t *tmp;
388   struct gal_tile_two_layer_params *tl=&p->cp.tl;
389 
390   /* Check if the format of the output table is valid, given the type of
391      the output. */
392   gal_tableintern_check_fits_format(p->cp.output, p->cp.tableformat);
393 
394 
395   /* If in tile-mode, we must have at least one single valued option. */
396   if(p->ontile && p->singlevalue==NULL)
397     error(EXIT_FAILURE, 0, "at least one of the single-value measurements "
398           "(for example '--median') must be requested with the '--ontile' "
399           "option: there is no value to put in each tile");
400 
401   /* Tessellation related options. */
402   if( p->ontile || p->sky )
403     {
404       /* The tile or sky mode cannot be called with any other modes. */
405       if( p->asciihist || p->asciicfp || p->histogram || p->histogram2d
406           || p->cumulative || p->sigmaclip || !isnan(p->mirror) )
407         error(EXIT_FAILURE, 0, "'--ontile' or '--sky' cannot be called with "
408               "any of the 'particular' calculation options, for example "
409               "'--histogram'. This is because the latter work over the whole "
410               "dataset and element positions are changed, but in the former "
411               "positions are significant");
412 
413       /* Make sure the tessellation defining options are given. */
414       if( tl->tilesize==NULL || tl->numchannels==NULL
415           || isnan(tl->remainderfrac) )
416          error(EXIT_FAILURE, 0, "'--tilesize', '--numchannels', and "
417                "'--remainderfrac' are mandatory options when dealing with "
418                "a tessellation (in '--ontile' or '--sky' mode). Atleast "
419                "one of these options wasn't given a value.");
420     }
421 
422 
423   /* In Sky mode, several options are mandatory. */
424   if( p->sky )
425     {
426       /* Mandatory options. */
427       if( isnan(p->meanmedqdiff) || isnan(p->sclipparams[0])
428           || p->cp.interpmetric==0 || p->cp.interpnumngb==0 )
429         error(EXIT_FAILURE, 0, "'--meanmedqdiff', '--sclipparams', "
430               "'--interpmetric' and '--interpnumngb' are mandatory when "
431               "requesting Sky measurement ('--sky')");
432 
433       /* Make sure a reasonable value is given to '--meanmedqdiff'. */
434       if(p->meanmedqdiff>0.5)
435         error(EXIT_FAILURE, 0, "%g is not acceptable for '--meanmedqdiff'! "
436               "This option is the quantile-difference between the quantile "
437               "of the mean and 0.5 (the quantile of the median). Since the "
438               "range of quantile is from 0.0 to 1.0, the maximum difference "
439               "can therefore be 0.5. For more on this option, please see "
440               "the \"Quantifying signal in a tile\" section of the Gnuastro "
441               "book (with this command: 'info gnuastro \"Quantifying "
442               "signal in a tile\"'", p->meanmedqdiff);
443       if(p->meanmedqdiff>0.3 && p->cp.quiet==0)
444         error(EXIT_SUCCESS, 0, "WARNING: %g is very high for "
445               "'--meanmedqdiff'! This option is the quantile-difference "
446               "between the quantile of the mean and 0.5 (the quantile "
447               "of the median). For more on this option, please see the "
448               "\"Quantifying signal in a tile\" section of the Gnuastro "
449               "book (with this command: 'info gnuastro \"Quantifying "
450               "signal in a tile\"'. To suppress this warning, please use "
451               "the '--quiet' option", p->meanmedqdiff);
452 
453       /* If a kernel name has been given, we need the HDU. */
454       if(p->kernelname && gal_fits_file_recognized(p->kernelname)
455          && p->khdu==NULL )
456         error(EXIT_FAILURE, 0, "no HDU specified for the kernel image. When "
457               "A HDU is necessary for FITS files. You can use the '--khdu' "
458               "('-u') option and give it the HDU number (starting from "
459               "zero), extension name, or anything acceptable by CFITSIO");
460     }
461 
462 
463   /* Sigma-clipping needs 'sclipparams'. */
464   if(p->sigmaclip && isnan(p->sclipparams[0]))
465     error(EXIT_FAILURE, 0, "'--sclipparams' is necessary with '--sigmaclip'. "
466           "'--sclipparams' takes two values (separated by a comma) for "
467           "defining the sigma-clip: the multiple of sigma, and tolerance "
468           "(<1) or number of clips (>1).");
469 
470 
471   /* If any of the mode measurements are requested, then 'mirrordist' is
472      mandatory. */
473   for(tmp=p->singlevalue; tmp!=NULL; tmp=tmp->next)
474     switch(tmp->v)
475       {
476       case UI_KEY_MODE:
477       case UI_KEY_MODESYM:
478       case UI_KEY_MODEQUANT:
479       case UI_KEY_MODESYMVALUE:
480         if( isnan(p->mirrordist) )
481           error(EXIT_FAILURE, 0, "'--mirrordist' is required for the "
482                 "mode-related single measurements ('--mode', '--modequant', "
483                 "'--modesym', and '--modesymvalue')");
484         break;
485       case UI_KEY_SIGCLIPSTD:
486       case UI_KEY_SIGCLIPMEAN:
487       case UI_KEY_SIGCLIPNUMBER:
488       case UI_KEY_SIGCLIPMEDIAN:
489         if( isnan(p->sclipparams[0]) )
490           error(EXIT_FAILURE, 0, "'--sclipparams' is necessary with "
491                 "sigma-clipping measurements.\n\n"
492                 "'--sclipparams' takes two values (separated by a comma) for "
493                 "defining the sigma-clip: the multiple of sigma, and tolerance "
494                 "(<1) or number of clips (>1).");
495         break;
496       }
497 
498 
499   /* If less than and greater than are both given, make sure that the value
500      to greater than is smaller than the value to less-than. */
501   if( !isnan(p->lessthan) && !isnan(p->greaterequal)
502       && p->lessthan < p->greaterequal )
503     error(EXIT_FAILURE, 0, "the value to '--lessthan' (%g) must be larger "
504           "than the value to '--greaterequal' (%g)", p->lessthan,
505           p->greaterequal);
506 
507 
508   /* Less-than and greater-equal cannot be called together with
509      quantrange. */
510   if( ( !isnan(p->lessthan) || !isnan(p->greaterequal) )
511       && !isnan(p->quantmin) )
512     error(EXIT_FAILURE, 0, "'--lessthan' and/or '--greaterequal' cannot "
513           "be called together with '--quantrange'");
514 
515 
516   /* When binned outputs are requested, make sure that 'numbins' is set. */
517   if( (p->histogram || p->histogram2d || p->cumulative || !isnan(p->mirror))
518       && p->numbins==0)
519     error(EXIT_FAILURE, 0, "'--numbins' isn't set. When the histogram or "
520           "cumulative frequency plots are requested, the number of bins "
521           "('--numbins') is necessary");
522   if( p->histogram2d )
523     {
524       if( p->numbins2==0 )
525         error(EXIT_FAILURE, 0, "'--numbins2' isn't set. When a 2D histogram "
526               "is requested, the number of bins in the second dimension "
527               "('--numbins2') is also necessary");
528       if( strcmp(p->histogram2d,"table") && strcmp(p->histogram2d,"image") )
529         error(EXIT_FAILURE, 0, "the value to '--histogram2d' can either be "
530               "'table' or 'image'");
531     }
532 
533   /* If an ascii plot is requested, check if the ascii number of bins and
534      height are given. */
535   if( (p->asciihist || p->asciicfp)
536       && (p->numasciibins==0 || p->asciiheight==0) )
537     error(EXIT_FAILURE, 0, "when an ascii plot is requested, "
538           "'--numasciibins' and '--asciiheight' are mandatory, but atleast "
539           "one of these has not been given");
540 
541 
542   /* Reverse the list of statistics to print in one row and also the
543      arguments, so it has the same order the user wanted. */
544   gal_list_f64_reverse(&p->tp_args);
545   gal_list_i32_reverse(&p->singlevalue);
546 }
547 
548 
549 
550 
551 
552 static void
ui_check_options_and_arguments(struct statisticsparams * p)553 ui_check_options_and_arguments(struct statisticsparams *p)
554 {
555   if(p->inputname)
556     {
557       /* If input is FITS. */
558       if( (p->isfits=gal_fits_file_recognized(p->inputname)) )
559         {
560           /* Check if a HDU is given. */
561           if( p->cp.hdu==NULL )
562             error(EXIT_FAILURE, 0, "no HDU specified. When the input is a "
563                   "FITS file, a HDU must also be specified, you can use "
564                   "the '--hdu' ('-h') option and give it the HDU number "
565                   "(starting from zero), extension name, or anything "
566                   "acceptable by CFITSIO");
567 
568           /* If its an image, make sure column isn't given (in case the
569              user confuses an image with a table). */
570           p->hdu_type=gal_fits_hdu_format(p->inputname, p->cp.hdu);
571           if(p->hdu_type==IMAGE_HDU && p->columns)
572             error(EXIT_FAILURE, 0, "%s (hdu: %s): is a FITS image "
573                   "extension. The '--column' option is only applicable "
574                   "to tables.", p->inputname, p->cp.hdu);
575         }
576     }
577 }
578 
579 
580 
581 
582 
583 
584 
585 
586 
587 
588 
589 
590 
591 
592 
593 
594 
595 
596 
597 
598 /**************************************************************/
599 /***************       Preparations         *******************/
600 /**************************************************************/
601 static void
ui_out_of_range_to_blank(struct statisticsparams * p)602 ui_out_of_range_to_blank(struct statisticsparams *p)
603 {
604   size_t one=1;
605   unsigned char flags=GAL_ARITHMETIC_FLAG_NUMOK;
606   unsigned char flagsor = ( GAL_ARITHMETIC_FLAG_INPLACE
607                             | GAL_ARITHMETIC_FLAG_NUMOK );
608   gal_data_t *tmp, *tmp2, *cond_g=NULL, *cond_l=NULL, *cond, *blank, *ref;
609 
610 
611   /* Set the dataset that should be used for the condition. */
612   ref = p->reference ? p->reference : p->input;
613 
614 
615   /* If the user has given a quantile range, then set the 'greaterequal'
616      and 'lessthan' values. */
617   if( !isnan(p->quantmin) )
618     {
619       /* If only one value was given, set the maximum quantile range. */
620       if( isnan(p->quantmax) ) p->quantmax = 1 - p->quantmin;
621 
622       /* Set the greater-equal value. */
623       tmp=gal_statistics_quantile(ref, p->quantmin, 1);
624       tmp=gal_data_copy_to_new_type_free(tmp, GAL_TYPE_FLOAT32);
625       p->greaterequal=*((float *)(tmp->array));
626 
627       /* Set the lower-than value. */
628       tmp=gal_statistics_quantile(ref, p->quantmax, 1);
629       tmp=gal_data_copy_to_new_type_free(tmp, GAL_TYPE_FLOAT32);
630       p->lessthan=*((float *)(tmp->array));
631     }
632 
633 
634   /* Set the condition. Note that the 'greaterequal' name is for the data
635      we want. So we will set the condition based on those that are
636      less-than  */
637   if(!isnan(p->greaterequal))
638     {
639       tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
640                          NULL, NULL, NULL);
641       *((float *)(tmp->array)) = p->greaterequal;
642       cond_g=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, flags, ref, tmp);
643       gal_data_free(tmp);
644     }
645   if( p->input->next && !isnan(p->greaterequal2) )
646     {
647       tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
648                          NULL, NULL, NULL);
649       *((float *)(tmp->array)) = p->greaterequal2;
650       tmp2=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, flags, p->input->next, tmp);
651       cond_g=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, cond_g, tmp2);
652       gal_data_free(tmp);
653       gal_data_free(tmp2);
654     }
655 
656 
657   /* Same reasoning as above for 'p->greaterthan'. */
658   if(!isnan(p->lessthan))
659     {
660       tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
661                          NULL, NULL, NULL);
662       *((float *)(tmp->array)) = p->lessthan;
663       cond_l=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, flags, ref, tmp);
664       gal_data_free(tmp);
665     }
666   if(p->input->next && !isnan(p->lessthan2))
667     {
668       tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
669                          NULL, NULL, NULL);
670       *((float *)(tmp->array)) = p->lessthan2;
671       tmp2=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, flags, p->input->next, tmp);
672       cond_l=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, cond_l, tmp2);
673       gal_data_free(tmp);
674       gal_data_free(tmp2);
675     }
676 
677 
678   /* Now, set the final condition. If both values were specified, then use
679      the GAL_ARITHMETIC_OP_OR to merge them into one. */
680   switch( !isnan(p->greaterequal) + !isnan(p->lessthan) )
681     {
682     case 0: return;             /* No condition was specified, return.  */
683     case 1:                     /* Only one condition was specified.    */
684       cond = isnan(p->greaterequal) ? cond_l : cond_g;
685       break;
686     case 2:
687       cond = gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, cond_l, cond_g);
688       gal_data_free(cond_g);
689       break;
690     }
691 
692 
693   /* Allocate a blank value to mask all pixels that don't satisfy the
694      condition. */
695   blank=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL,
696                        0, -1, 1, NULL, NULL, NULL);
697   *((float *)(blank->array)) = NAN;
698 
699 
700   /* Set all the pixels that satisfy the condition to blank. Note that a
701      blank value will be used in the proper type of the input in the
702      'where' operator. Also reset the blank flags so they are checked again
703      if necessary.*/
704   gal_arithmetic(GAL_ARITHMETIC_OP_WHERE, 1, flagsor, p->input, cond, blank);
705   p->input->flag &= ~GAL_DATA_FLAG_BLANK_CH;
706   p->input->flag &= ~GAL_DATA_FLAG_HASBLANK;
707   if(p->input->next)
708     {
709       gal_arithmetic(GAL_ARITHMETIC_OP_WHERE, 1, flagsor, p->input->next,
710                      cond, blank);
711       p->input->next->flag &= ~GAL_DATA_FLAG_BLANK_CH;
712       p->input->next->flag &= ~GAL_DATA_FLAG_HASBLANK;
713     }
714 
715   /* Clean up. */
716   gal_data_free(cond);
717   gal_data_free(blank);
718 }
719 
720 
721 
722 
723 
724 /* Check if a sorted array is necessary and if so, then make a sorted
725    array. */
726 static void
ui_make_sorted_if_necessary(struct statisticsparams * p)727 ui_make_sorted_if_necessary(struct statisticsparams *p)
728 {
729   int is_necessary=0;
730   gal_list_i32_t *tmp;
731 
732   /* Check in the one-row outputs. */
733   for(tmp=p->singlevalue; tmp!=NULL; tmp=tmp->next)
734     switch(tmp->v)
735       {
736       case UI_KEY_MODE:
737       case UI_KEY_MEDIAN:
738       case UI_KEY_QUANTILE:
739       case UI_KEY_QUANTFUNC:
740       case UI_KEY_SIGCLIPSTD:
741       case UI_KEY_QUANTOFMEAN:
742       case UI_KEY_SIGCLIPMEAN:
743       case UI_KEY_SIGCLIPNUMBER:
744       case UI_KEY_SIGCLIPMEDIAN:
745         is_necessary=1;
746         break;
747       }
748 
749   /* Check in the rest of the outputs. */
750   if( is_necessary==0 && ( p->sigmaclip || !isnan(p->mirror) ) )
751     is_necessary=1;
752 
753   /* Do the sorting, we will keep the sorted array in a separate space,
754      since the unsorted nature of the original dataset will help decrease
755      floating point errors. If the input is already sorted, we'll just
756      point it to the input.*/
757   if(is_necessary)
758     {
759       if( gal_statistics_is_sorted(p->input, 1) )
760         p->sorted=p->input;
761       else
762         {
763           p->sorted=gal_data_copy(p->input);
764           gal_statistics_sort_increasing(p->sorted);
765         }
766     }
767 }
768 
769 
770 
771 
772 
773 /* Merge all given columns into one list. This is because people may call
774    for different columns in one call to '--column' ('-c', separated by a
775    comma), or multiple calls to '-c'. */
776 gal_list_str_t *
ui_read_columns_in_one(gal_list_str_t * incolumns)777 ui_read_columns_in_one(gal_list_str_t *incolumns)
778 {
779   size_t i;
780   gal_data_t *strs;
781   char *c, **strarr;
782   gal_list_str_t *tmp, *final=NULL;
783 
784   /* Go over the separate calls to the '-c' option, see the explaination in
785      Table's 'ui_columns_prepare' function for more on every step. */
786   for(tmp=incolumns;tmp!=NULL;tmp=tmp->next)
787     {
788       /* Remove any new-line character. */
789       for(c=tmp->v;*c!='\0';++c)
790         if(*c=='\\' && *(c+1)=='\n') { *c=' '; *(++c)=' '; }
791 
792       /* Read the different comma-separated strings into an array (within a
793          'gal_data_t'). */
794       strs=gal_options_parse_csv_strings_raw(tmp->v, NULL, 0);
795       strarr=strs->array;
796 
797       /* Add each array element to the final list of columns. */
798       for(i=0;i<strs->size;++i)
799         gal_list_str_add(&final, strarr[i], 1);
800 
801       /* Clean up. */
802       gal_data_free(strs);
803     }
804 
805   /* Reverse the list to be in the same order. */
806   gal_list_str_reverse(&final);
807 
808   /* For a check.
809   for(tmp=final;tmp!=NULL;tmp=tmp->next)
810     printf("%s\n", tmp->v);
811   */
812 
813   /* Return the final unified list. */
814   return final;
815 }
816 
817 
818 
819 
820 
821 void
ui_read_columns(struct statisticsparams * p)822 ui_read_columns(struct statisticsparams *p)
823 {
824   int toomanycols=0, tformat;
825   gal_data_t *cols, *tmp, *cinfo;
826   size_t size, ncols, nrows, counter=0;
827   gal_list_str_t *incols, *columnlist=NULL;
828   gal_list_str_t *lines=gal_options_check_stdin(p->inputname,
829                                                 p->cp.stdintimeout,
830                                                 "input");
831 
832   /* Merge possibly multiple calls to '-c' (each possibly separated by a
833      coma) into one list. */
834   incols=ui_read_columns_in_one(p->columns);
835 
836   /* If any columns are specified, make sure there is a maximum of two
837      columns.  */
838   if(gal_list_str_number(incols)>2)
839     error(EXIT_FAILURE, 0, "%zu input columns were given but currently a "
840           "maximum of two columns are supported (two columns only for "
841           "special operations, the majority of operations are on a single "
842           "column)", gal_list_str_number(incols));
843 
844   /* If a reference column is also given, add it to the list of columns to
845      read. */
846   if(p->refcol)
847     gal_list_str_add(&columnlist, p->refcol, 0);
848 
849   /* If no column is specified, Statistics will abort and an error will be
850      printed when the table has more than one column. If there is only one
851      column, there is no need to specify any, so Statistics will use it. */
852   if(incols==NULL)
853     {
854       /* Get the basic table information. */
855       cinfo=gal_table_info(p->inputname, p->cp.hdu, lines, &ncols, &nrows,
856                            &tformat);
857       gal_data_array_free(cinfo, ncols, 1);
858 
859       /* See how many columns it has and take the proper action. */
860       switch(ncols)
861         {
862         case 0:
863           error(EXIT_FAILURE, 0, "%s contains no usable columns",
864                 ( p->inputname
865                   ? gal_checkset_dataset_name(p->inputname, p->cp.hdu)
866                   : "Standard input" ));
867         case 1:
868           gal_list_str_add(&incols, "1", 1);
869           break;
870         default:
871           error(EXIT_FAILURE, 0, "%s is a table containing more than one "
872                 "column. However, the specific column to work on isn't "
873                 "specified.\n\n"
874                 "Please use the '--column' ('-c') option to specify a "
875                 "column. You can either give it the column number "
876                 "(couting from 1), or a match/search in its meta-data (e.g., "
877                 "column names).\n\n"
878                 "For more information, please run the following command "
879                 "(press the 'SPACE' key to go down and 'q' to return to the "
880                 "command-line):\n\n"
881                 "    $ info gnuastro \"Selecting table columns\"\n",
882                 ( p->inputname
883                   ? gal_checkset_dataset_name(p->inputname, p->cp.hdu)
884                   : "Standard input" ));
885         }
886 
887     }
888   if(columnlist) columnlist->next=incols;
889   else           columnlist=incols;
890 
891   /* Read the desired column(s). */
892   cols=gal_table_read(p->inputname, p->cp.hdu, lines, columnlist,
893                       p->cp.searchin, p->cp.ignorecase, p->cp.minmapsize,
894                       p->cp.quietmmap, NULL);
895   gal_list_str_free(lines, 1);
896 
897   /* If the input was from standard input, we'll set the input name to be
898      'stdin' (for future reporting). */
899   if(p->inputname==NULL)
900     gal_checkset_allocate_copy("stdin", &p->inputname);
901 
902   /* Put the columns into the proper gal_data_t. */
903   size=cols->size;
904   while(cols!=NULL)
905     {
906       /* Pop out the top column. */
907       tmp=gal_list_data_pop(&cols);
908 
909       /* Make sure it has the proper size. */
910       if(tmp->size!=size)
911         error(EXIT_FAILURE, 0, " read column number %zu has a %zu elements, "
912               "while previous column(s) had %zu", counter, tmp->size, size);
913 
914       /* Make sure it is a usable datatype. */
915       switch(tmp->type)
916         {
917         case GAL_TYPE_BIT:
918         case GAL_TYPE_STRLL:
919         case GAL_TYPE_STRING:
920         case GAL_TYPE_COMPLEX32:
921         case GAL_TYPE_COMPLEX64:
922           error(EXIT_FAILURE, 0, " read column number %zu has a %s type, "
923                 "which is not currently supported by %s", counter,
924                 gal_type_name(tmp->type, 1), PROGRAM_NAME);
925         }
926 
927       /* Put the column into the proper pointer. */
928       switch(++counter)
929         {
930         case 1: p->input=tmp; break;
931         case 2:
932           if(gal_list_str_number(incols)==2)
933             p->input->next=tmp;
934           else
935             if(p->refcol) p->reference=tmp; else toomanycols=1;
936           break;
937         case 3:
938           if(p->refcol) p->reference=tmp; else toomanycols=1;   break;
939         default: toomanycols=1;
940         }
941 
942       /* Print an error if there are too many columns: */
943       if(toomanycols)
944         gal_tableintern_error_col_selection(p->inputname, p->cp.hdu, "too "
945                                             "many columns were selected by "
946                                             "the given values to the "
947                                             "'--column' and/or '--refcol' "
948                                             "options. Only one is "
949                                             "acceptable for each.");
950     }
951 
952   /* Clean up. */
953   gal_list_str_free(incols, 1);
954   if(columnlist!=incols)
955     {
956       columnlist->next=NULL;
957       gal_list_str_free(columnlist, 0);
958     }
959 
960   /* For a check:
961   printf("Number of input columns: %zu\n", gal_list_data_number(p->input));
962   exit(0);
963   */
964 }
965 
966 
967 
968 
969 
970 void
ui_preparations(struct statisticsparams * p)971 ui_preparations(struct statisticsparams *p)
972 {
973   int keepinputdir=p->cp.keepinputdir;
974   gal_data_t *check, *flag1, *flag2, *flag;
975   struct gal_options_common_params *cp=&p->cp;
976   struct gal_tile_two_layer_params *tl=&cp->tl;
977   unsigned char flagsor = GAL_ARITHMETIC_FLAGS_BASIC;
978   char *checkbasename = p->cp.output ? p->cp.output : p->inputname;
979 
980   /* Change 'keepinputdir' based on if an output name was given. */
981   p->cp.keepinputdir = p->cp.output ? 1 : 0;
982 
983   /* Read the input. */
984   if(p->isfits && p->hdu_type==IMAGE_HDU)
985     {
986       p->inputformat=INPUT_FORMAT_IMAGE;
987       p->input=gal_array_read_one_ch(p->inputname, cp->hdu, NULL,
988                                      cp->minmapsize, p->cp.quietmmap);
989       p->input->wcs=gal_wcs_read(p->inputname, cp->hdu,
990                                  p->cp.wcslinearmatrix, 0, 0,
991                                  &p->input->nwcs);
992       p->input->ndim=gal_dimension_remove_extra(p->input->ndim,
993                                                 p->input->dsize,
994                                                 p->input->wcs);
995     }
996   else
997     {
998       /* Read the table columns. */
999       ui_read_columns(p);
1000       p->inputformat=INPUT_FORMAT_TABLE;
1001 
1002       /* Two columns can only be given with 2D histogram mode. */
1003       if(p->histogram2d==0 && p->input->next!=NULL)
1004         error(EXIT_FAILURE, 0, "two column input is currently only "
1005               "supported for 2D histogram mode");
1006     }
1007 
1008   /* Read the convolution kernel if necessary. */
1009   if(p->sky && p->kernelname)
1010     {
1011       p->kernel=gal_fits_img_read_kernel(p->kernelname, p->khdu,
1012                                          cp->minmapsize, p->cp.quietmmap);
1013       p->kernel->ndim=gal_dimension_remove_extra(p->kernel->ndim,
1014                                                  p->kernel->dsize, NULL);
1015     }
1016 
1017   /* Tile and channel sanity checks and preparations. */
1018   if(p->ontile || p->sky)
1019     {
1020       /* Check the tiles and make the tile structure. */
1021       gal_tile_full_sanity_check(p->inputname, p->cp.hdu, p->input, tl);
1022       gal_tile_full_two_layers(p->input, tl);
1023       gal_tile_full_permutation(tl);
1024 
1025       /* Make the tile check image if requested. */
1026       if(tl->checktiles)
1027         {
1028           tl->tilecheckname=gal_checkset_automatic_output(cp, checkbasename,
1029                                                           "_tiled.fits");
1030           check=gal_tile_block_check_tiles(tl->tiles);
1031           if(p->inputformat==INPUT_FORMAT_IMAGE)
1032             gal_fits_img_write(check, tl->tilecheckname, NULL, PROGRAM_NAME);
1033           else
1034             {
1035               gal_checkset_writable_remove(tl->tilecheckname, 0,
1036                                            cp->dontdelete);
1037               gal_table_write(check, NULL, NULL, cp->tableformat,
1038                               tl->tilecheckname, "TABLE", 0);
1039             }
1040           gal_data_free(check);
1041         }
1042 
1043       /* Set the steps image name. */
1044       if(p->sky && p->checksky)
1045         p->checkskyname=gal_checkset_automatic_output(cp, checkbasename,
1046                                                       "_sky_steps.fits");
1047     }
1048 
1049   /* Set the out-of-range values in the input to blank. */
1050   ui_out_of_range_to_blank(p);
1051 
1052   /* If we are not to work on tiles, then re-order and change the input. */
1053   if(p->ontile==0 && p->sky==0 && p->contour==NULL)
1054     {
1055       /* Only keep the elements we want. Note that if we have more than one
1056          column, we need to move the same rows in both (otherwise their
1057          widths won't be equal). */
1058       if(p->input->next)
1059         {
1060           flag1=gal_blank_flag(p->input);
1061           flag2=gal_blank_flag(p->input->next);
1062           flag=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, flag1, flag2);
1063 
1064           gal_blank_flag_apply(p->input, flag);
1065           gal_blank_flag_apply(p->input->next, flag);
1066 
1067           gal_blank_remove(p->input);
1068           gal_blank_remove(p->input->next);
1069 
1070           gal_data_free(flag);
1071           p->input->next->flag &= ~GAL_DATA_FLAG_HASBLANK ;
1072           p->input->next->flag |= GAL_DATA_FLAG_BLANK_CH ;
1073         }
1074       else
1075         gal_blank_remove(p->input);
1076       p->input->flag &= ~GAL_DATA_FLAG_HASBLANK ;
1077       p->input->flag |= GAL_DATA_FLAG_BLANK_CH ;
1078 
1079       /* Make sure there actually are any (non-blank) elements left. */
1080       if(p->input->size==0)
1081         error(EXIT_FAILURE, 0, "%s: all elements are blank",
1082               gal_fits_name_save_as_string(p->inputname, cp->hdu));
1083 
1084       /* Make sure there is data remaining: */
1085       if(p->input->size==0)
1086         error(EXIT_FAILURE, 0, "%s: no data, maybe the '--greaterequal' or "
1087               "'--lessthan' options need to be adjusted",
1088               gal_fits_name_save_as_string(p->inputname, cp->hdu) );
1089 
1090       /* Make the sorted array if necessary. */
1091       ui_make_sorted_if_necessary(p);
1092 
1093       /* Set the number of output files. */
1094       if( !isnan(p->mirror) )             ++p->numoutfiles;
1095       if( p->histogram || p->cumulative ) ++p->numoutfiles;
1096     }
1097 
1098   /* Reset 'keepinputdir' to what it originally was. */
1099   p->cp.keepinputdir=keepinputdir;
1100 }
1101 
1102 
1103 
1104 
1105 
1106 
1107 
1108 
1109 
1110 
1111 
1112 
1113 
1114 
1115 
1116 
1117 
1118 
1119 
1120 /**************************************************************/
1121 /************         Set the parameters          *************/
1122 /**************************************************************/
1123 
1124 void
ui_read_check_inputs_setup(int argc,char * argv[],struct statisticsparams * p)1125 ui_read_check_inputs_setup(int argc, char *argv[], struct statisticsparams *p)
1126 {
1127   struct gal_options_common_params *cp=&p->cp;
1128 
1129 
1130   /* Include the parameters necessary for argp from this program ('args.h')
1131      and for the common options to all Gnuastro ('commonopts.h'). We want
1132      to directly put the pointers to the fields in 'p' and 'cp', so we are
1133      simply including the header here to not have to use long macros in
1134      those headers which make them hard to read and modify. This also helps
1135      in having a clean environment: everything in those headers is only
1136      available within the scope of this function. */
1137 #include <gnuastro-internal/commonopts.h>
1138 #include "args.h"
1139 
1140 
1141   /* Initialize the options and necessary information.  */
1142   ui_initialize_options(p, program_options, gal_commonopts_options);
1143 
1144 
1145   /* Read the command-line options and arguments. */
1146   errno=0;
1147   if(argp_parse(&thisargp, argc, argv, 0, 0, p))
1148     error(EXIT_FAILURE, errno, "parsing arguments");
1149 
1150 
1151   /* Read the configuration files and set the common values. */
1152   gal_options_read_config_set(&p->cp);
1153 
1154 
1155   /* Read the options into the program's structure, and check them and
1156      their relations prior to printing. */
1157   ui_read_check_only_options(p);
1158 
1159 
1160   /* Print the option values if asked. Note that this needs to be done
1161      after the option checks so un-sane values are not printed in the
1162      output state. */
1163   gal_options_print_state(&p->cp);
1164 
1165 
1166   /* Check that the options and arguments fit well with each other. Note
1167      that arguments don't go in a configuration file. So this test should
1168      be done after (possibly) printing the option values. */
1169   ui_check_options_and_arguments(p);
1170 
1171 
1172   /* Read/allocate all the necessary starting arrays. */
1173   ui_preparations(p);
1174 
1175 
1176   /* Prepare all the options as FITS keywords to write in output
1177      later. Note that in some modes, there is no output file, and
1178      'ui_add_to_single_value' isn't yet prepared. */
1179   if( (p->singlevalue && p->ontile) || p->sky || p->histogram \
1180       || p->cumulative)
1181     gal_options_as_fits_keywords(&p->cp);
1182 }
1183 
1184 
1185 
1186 
1187 
1188 
1189 
1190 
1191 
1192 
1193 
1194 
1195 
1196 
1197 
1198 
1199 
1200 
1201 
1202 
1203 /**************************************************************/
1204 /************      Free allocated, report         *************/
1205 /**************************************************************/
1206 void
ui_free_report(struct statisticsparams * p)1207 ui_free_report(struct statisticsparams *p)
1208 {
1209   /* Free the allocated arrays: */
1210   free(p->cp.hdu);
1211   free(p->cp.output);
1212   gal_data_free(p->reference);
1213   gal_list_data_free(p->input);
1214   gal_list_f64_free(p->tp_args);
1215   gal_list_i32_free(p->singlevalue);
1216   gal_tile_full_free_contents(&p->cp.tl);
1217   if(p->sorted!=p->input) gal_data_free(p->sorted);
1218 }
1219