1 /*********************************************************************
2 Statistics - Statistical analysis on input dataset.
3 Statistics is part of GNU Astronomy Utilities (Gnuastro) package.
4
5 Original author:
6 Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2015-2021, Free Software Foundation, Inc.
9
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24
25 #include <argp.h>
26 #include <errno.h>
27 #include <error.h>
28 #include <stdio.h>
29 #include <string.h>
30
31 #include <gnuastro/txt.h>
32 #include <gnuastro/wcs.h>
33 #include <gnuastro/fits.h>
34 #include <gnuastro/tile.h>
35 #include <gnuastro/array.h>
36 #include <gnuastro/qsort.h>
37 #include <gnuastro/blank.h>
38 #include <gnuastro/table.h>
39 #include <gnuastro/threads.h>
40 #include <gnuastro/arithmetic.h>
41 #include <gnuastro/statistics.h>
42
43 #include <gnuastro-internal/timing.h>
44 #include <gnuastro-internal/checkset.h>
45 #include <gnuastro-internal/tableintern.h>
46 #include <gnuastro-internal/fixedstringmacros.h>
47
48 #include "main.h"
49
50 #include "ui.h"
51 #include "authors-cite.h"
52
53
54
55
56
57 /**************************************************************/
58 /********* Argp necessary global entities ************/
59 /**************************************************************/
60 /* Definition parameters for the Argp: */
61 const char *
62 argp_program_version = PROGRAM_STRING "\n"
63 GAL_STRINGS_COPYRIGHT
64 "\n\nWritten/developed by "PROGRAM_AUTHORS;
65
66 const char *
67 argp_program_bug_address = PACKAGE_BUGREPORT;
68
69 static char
70 args_doc[] = "ASTRdata";
71
72 const char
73 doc[] = GAL_STRINGS_TOP_HELP_INFO PROGRAM_NAME" will do statistical "
74 "analysis on the input dataset (table column or image). All blank "
75 "pixels or pixels outside of the given range are ignored. You can "
76 "either directly ask for certain statistics in one line/row as shown "
77 "below with the same order as requested, or get tables of different "
78 "statistical measures like the histogram, cumulative frequency style "
79 "and etc. If no particular statistic is requested, some basic "
80 "information about the dataset is printed on the command-line.\n"
81 GAL_STRINGS_MORE_HELP_INFO
82 /* After the list of options: */
83 "\v"
84 PACKAGE_NAME" home page: "PACKAGE_URL;
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107 /**************************************************************/
108 /********* Initialize & Parse command-line **************/
109 /**************************************************************/
110 static void
ui_initialize_options(struct statisticsparams * p,struct argp_option * program_options,struct argp_option * gal_commonopts_options)111 ui_initialize_options(struct statisticsparams *p,
112 struct argp_option *program_options,
113 struct argp_option *gal_commonopts_options)
114 {
115 size_t i;
116 struct gal_options_common_params *cp=&p->cp;
117
118 /* Set the necessary common parameters structure. */
119 cp->program_struct = p;
120 cp->poptions = program_options;
121 cp->program_name = PROGRAM_NAME;
122 cp->program_exec = PROGRAM_EXEC;
123 cp->program_bibtex = PROGRAM_BIBTEX;
124 cp->program_authors = PROGRAM_AUTHORS;
125 cp->coptions = gal_commonopts_options;
126 cp->numthreads = gal_threads_number();
127 cp->tl.remainderfrac = NAN;
128
129 /* Program-specific initializers */
130 p->lessthan = NAN;
131 p->lessthan2 = NAN;
132 p->onebinstart = NAN;
133 p->onebinstart2 = NAN;
134 p->greaterequal = NAN;
135 p->greaterequal2 = NAN;
136 p->quantmin = NAN;
137 p->quantmax = NAN;
138 p->mirror = NAN;
139 p->mirrordist = NAN;
140 p->meanmedqdiff = NAN;
141 p->sclipparams[0] = NAN;
142 p->sclipparams[1] = NAN;
143
144 /* Set the mandatory common options. */
145 for(i=0; !gal_options_is_last(&cp->coptions[i]); ++i)
146 switch(cp->coptions[i].key)
147 {
148 case GAL_OPTIONS_KEY_LOG:
149 case GAL_OPTIONS_KEY_TYPE:
150 cp->coptions[i].flags=OPTION_HIDDEN;
151 break;
152
153 case GAL_OPTIONS_KEY_SEARCHIN:
154 case GAL_OPTIONS_KEY_MINMAPSIZE:
155 case GAL_OPTIONS_KEY_TABLEFORMAT:
156 cp->coptions[i].mandatory=GAL_OPTIONS_MANDATORY;
157 break;
158 }
159 }
160
161
162
163
164
165 /* Parse a single option: */
166 error_t
parse_opt(int key,char * arg,struct argp_state * state)167 parse_opt(int key, char *arg, struct argp_state *state)
168 {
169 struct statisticsparams *p = state->input;
170
171 /* Pass 'gal_options_common_params' into the child parser. */
172 state->child_inputs[0] = &p->cp;
173
174 /* In case the user incorrectly uses the equal sign (for example
175 with a short format or with space in the long format, then 'arg'
176 start with (if the short version was called) or be (if the long
177 version was called with a space) the equal sign. So, here we
178 check if the first character of arg is the equal sign, then the
179 user is warned and the program is stopped: */
180 if(arg && arg[0]=='=')
181 argp_error(state, "incorrect use of the equal sign ('='). For short "
182 "options, '=' should not be used and for long options, "
183 "there should be no space between the option, equal sign "
184 "and value");
185
186 /* Set the key to this option. */
187 switch(key)
188 {
189 /* Read the non-option tokens (arguments): */
190 case ARGP_KEY_ARG:
191 /* The user may give a shell variable that is empty! In that case
192 'arg' will be an empty string! We don't want to account for such
193 cases (and give a clear error that no input has been given). */
194 if(p->inputname)
195 argp_error(state, "only one argument (input file) should be given");
196 else
197 if(arg[0]!='\0') p->inputname=arg;
198 break;
199
200 /* This is an option, set its value. */
201 default:
202 return gal_options_set_from_key(key, arg, p->cp.poptions, &p->cp);
203 }
204
205 return 0;
206 }
207
208
209
210
211
212 static void *
ui_add_to_single_value(struct argp_option * option,char * arg,char * filename,size_t lineno,void * params)213 ui_add_to_single_value(struct argp_option *option, char *arg,
214 char *filename, size_t lineno, void *params)
215 {
216 size_t i;
217 double *d;
218 gal_data_t *inputs=NULL;
219 struct statisticsparams *p=(struct statisticsparams *)params;
220
221 /* In case of printing the option values. */
222 if(lineno==-1)
223 error(EXIT_FAILURE, 0, "currently the options to be printed in one row "
224 "(like '--number', '--mean', and etc) do not support printing "
225 "with the '--printparams' ('-P'), or writing into configuration "
226 "files due to lack of time when implementing these features. "
227 "You can put them into configuration files manually. Please get "
228 "in touch with us at '%s', so we can implement it",
229 PACKAGE_BUGREPORT);
230
231 /* Some of these options take values and some don't. */
232 if(option->type==GAL_OPTIONS_NO_ARG_TYPE)
233 {
234 /* If this option is given in a configuration file, then 'arg' will not
235 be NULL and we don't want to do anything if it is '0'. */
236 if(arg)
237 {
238 /* Make sure the value is only '0' or '1'. */
239 if( arg[1]!='\0' && *arg!='0' && *arg!='1' )
240 error_at_line(EXIT_FAILURE, 0, filename, lineno, "the '--%s' "
241 "option takes no arguments. In a configuration "
242 "file it can only have the values '1' or '0', "
243 "indicating if it should be used or not",
244 option->name);
245
246 /* Only proceed if the (possibly given) argument is 1. */
247 if(arg[0]=='0' && arg[1]=='\0') return NULL;
248 }
249
250 /* Add this option to the print list. */
251 gal_list_i32_add(&p->singlevalue, option->key);
252 }
253 else
254 {
255 /* Read the string of numbers. */
256 inputs=gal_options_parse_list_of_numbers(arg, filename, lineno);
257 if(inputs->size==0)
258 error(EXIT_FAILURE, 0, "'--%s' needs a value", option->name);
259
260 /* Do the appropriate operations with the */
261 d=inputs->array;
262 switch(option->key)
263 {
264 case UI_KEY_QUANTILE:
265 case UI_KEY_QUANTFUNC:
266 /* For the quantile and the quantile function, its possible to
267 give any number of arguments, so add the operation index and
268 the argument once for each given number. */
269 for(i=0;i<inputs->size;++i)
270 {
271 if(option->key==UI_KEY_QUANTILE && (d[i]<0 || d[i]>1) )
272 error_at_line(EXIT_FAILURE, 0, filename, lineno, "values "
273 "to '--quantile' ('-u') must be between 0 "
274 "and 1, you had asked for %g (read from '%s')",
275 d[i], arg);
276 gal_list_f64_add(&p->tp_args, d[i]);
277 gal_list_i32_add(&p->singlevalue, option->key);
278 }
279 break;
280
281 default:
282 error_at_line(EXIT_FAILURE, 0, filename, lineno, "a bug! please "
283 "contact us at %s so we can address the problem. "
284 "the option given to 'ui_add_to_print_in_row' is "
285 "marked as requiring a value, but is not recognized",
286 PACKAGE_BUGREPORT);
287 }
288 }
289
290 return NULL;
291 }
292
293
294
295
296
297 static void *
ui_read_quantile_range(struct argp_option * option,char * arg,char * filename,size_t lineno,void * params)298 ui_read_quantile_range(struct argp_option *option, char *arg,
299 char *filename, size_t lineno, void *params)
300 {
301 char *str;
302 gal_data_t *in;
303 struct statisticsparams *p=(struct statisticsparams *)params;
304
305 /* For the '--printparams' ('-P') option:*/
306 if(lineno==-1)
307 {
308 if( isnan(p->quantmax) )
309 {
310 if( asprintf(&str, "%g", p->quantmin)<0 )
311 error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
312 }
313 else
314 {
315 if( asprintf(&str, "%g,%g", p->quantmin, p->quantmax)<0 )
316 error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
317 }
318 return str;
319 }
320
321 /* Parse the inputs. */
322 in=gal_options_parse_list_of_numbers(arg, filename, lineno);
323
324 /* Check if there was only two numbers. */
325 if(in->size!=1 && in->size!=2)
326 error_at_line(EXIT_FAILURE, 0, filename, lineno, "the '--%s' "
327 "option takes one or two values values (separated by "
328 "a comma) to define the range of used values with "
329 "quantiles. However, %zu numbers were read in the "
330 "string '%s' (value to this option).\n\n"
331 "If there is only one number as input, it will be "
332 "interpretted as the lower quantile (Q) range. The "
333 "higher range will be set to the quantile (1-Q). "
334 "When two numbers are given, they will be used as the "
335 "lower and higher quantile range respectively",
336 option->name, in->size, arg);
337
338 /* Read the values in. */
339 p->quantmin = ((double *)(in->array))[0];
340 if(in->size==2) p->quantmax = ((double *)(in->array))[1];
341
342 /* Make sure the values are between 0 and 1. */
343 if( (p->quantmin<0 || p->quantmin>1)
344 || ( !isnan(p->quantmax) && (p->quantmax<0 || p->quantmax>1) ) )
345 error_at_line(EXIT_FAILURE, 0, filename, lineno, "values to the "
346 "'--quantrange' option must be between 0 and 1 "
347 "(inclusive). Your input was: '%s'", arg);
348
349 /* When only one value is given, make sure it is less than 0.5. */
350 if( !isnan(p->quantmax) && p->quantmin>0.5 )
351 error(EXIT_FAILURE, 0, "%g>=0.5! When only one value is given to the "
352 "'--%s' option, the range is defined as Q and 1-Q. Thus, the "
353 "value must be less than 0.5", p->quantmin, option->name);
354
355 /* Clean up and return. */
356 gal_data_free(in);
357 return NULL;
358 }
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379 /**************************************************************/
380 /*************** Sanity Check *******************/
381 /**************************************************************/
382 /* Read and check ONLY the options. When arguments are involved, do the
383 check in 'ui_check_options_and_arguments'. */
384 static void
ui_read_check_only_options(struct statisticsparams * p)385 ui_read_check_only_options(struct statisticsparams *p)
386 {
387 gal_list_i32_t *tmp;
388 struct gal_tile_two_layer_params *tl=&p->cp.tl;
389
390 /* Check if the format of the output table is valid, given the type of
391 the output. */
392 gal_tableintern_check_fits_format(p->cp.output, p->cp.tableformat);
393
394
395 /* If in tile-mode, we must have at least one single valued option. */
396 if(p->ontile && p->singlevalue==NULL)
397 error(EXIT_FAILURE, 0, "at least one of the single-value measurements "
398 "(for example '--median') must be requested with the '--ontile' "
399 "option: there is no value to put in each tile");
400
401 /* Tessellation related options. */
402 if( p->ontile || p->sky )
403 {
404 /* The tile or sky mode cannot be called with any other modes. */
405 if( p->asciihist || p->asciicfp || p->histogram || p->histogram2d
406 || p->cumulative || p->sigmaclip || !isnan(p->mirror) )
407 error(EXIT_FAILURE, 0, "'--ontile' or '--sky' cannot be called with "
408 "any of the 'particular' calculation options, for example "
409 "'--histogram'. This is because the latter work over the whole "
410 "dataset and element positions are changed, but in the former "
411 "positions are significant");
412
413 /* Make sure the tessellation defining options are given. */
414 if( tl->tilesize==NULL || tl->numchannels==NULL
415 || isnan(tl->remainderfrac) )
416 error(EXIT_FAILURE, 0, "'--tilesize', '--numchannels', and "
417 "'--remainderfrac' are mandatory options when dealing with "
418 "a tessellation (in '--ontile' or '--sky' mode). Atleast "
419 "one of these options wasn't given a value.");
420 }
421
422
423 /* In Sky mode, several options are mandatory. */
424 if( p->sky )
425 {
426 /* Mandatory options. */
427 if( isnan(p->meanmedqdiff) || isnan(p->sclipparams[0])
428 || p->cp.interpmetric==0 || p->cp.interpnumngb==0 )
429 error(EXIT_FAILURE, 0, "'--meanmedqdiff', '--sclipparams', "
430 "'--interpmetric' and '--interpnumngb' are mandatory when "
431 "requesting Sky measurement ('--sky')");
432
433 /* Make sure a reasonable value is given to '--meanmedqdiff'. */
434 if(p->meanmedqdiff>0.5)
435 error(EXIT_FAILURE, 0, "%g is not acceptable for '--meanmedqdiff'! "
436 "This option is the quantile-difference between the quantile "
437 "of the mean and 0.5 (the quantile of the median). Since the "
438 "range of quantile is from 0.0 to 1.0, the maximum difference "
439 "can therefore be 0.5. For more on this option, please see "
440 "the \"Quantifying signal in a tile\" section of the Gnuastro "
441 "book (with this command: 'info gnuastro \"Quantifying "
442 "signal in a tile\"'", p->meanmedqdiff);
443 if(p->meanmedqdiff>0.3 && p->cp.quiet==0)
444 error(EXIT_SUCCESS, 0, "WARNING: %g is very high for "
445 "'--meanmedqdiff'! This option is the quantile-difference "
446 "between the quantile of the mean and 0.5 (the quantile "
447 "of the median). For more on this option, please see the "
448 "\"Quantifying signal in a tile\" section of the Gnuastro "
449 "book (with this command: 'info gnuastro \"Quantifying "
450 "signal in a tile\"'. To suppress this warning, please use "
451 "the '--quiet' option", p->meanmedqdiff);
452
453 /* If a kernel name has been given, we need the HDU. */
454 if(p->kernelname && gal_fits_file_recognized(p->kernelname)
455 && p->khdu==NULL )
456 error(EXIT_FAILURE, 0, "no HDU specified for the kernel image. When "
457 "A HDU is necessary for FITS files. You can use the '--khdu' "
458 "('-u') option and give it the HDU number (starting from "
459 "zero), extension name, or anything acceptable by CFITSIO");
460 }
461
462
463 /* Sigma-clipping needs 'sclipparams'. */
464 if(p->sigmaclip && isnan(p->sclipparams[0]))
465 error(EXIT_FAILURE, 0, "'--sclipparams' is necessary with '--sigmaclip'. "
466 "'--sclipparams' takes two values (separated by a comma) for "
467 "defining the sigma-clip: the multiple of sigma, and tolerance "
468 "(<1) or number of clips (>1).");
469
470
471 /* If any of the mode measurements are requested, then 'mirrordist' is
472 mandatory. */
473 for(tmp=p->singlevalue; tmp!=NULL; tmp=tmp->next)
474 switch(tmp->v)
475 {
476 case UI_KEY_MODE:
477 case UI_KEY_MODESYM:
478 case UI_KEY_MODEQUANT:
479 case UI_KEY_MODESYMVALUE:
480 if( isnan(p->mirrordist) )
481 error(EXIT_FAILURE, 0, "'--mirrordist' is required for the "
482 "mode-related single measurements ('--mode', '--modequant', "
483 "'--modesym', and '--modesymvalue')");
484 break;
485 case UI_KEY_SIGCLIPSTD:
486 case UI_KEY_SIGCLIPMEAN:
487 case UI_KEY_SIGCLIPNUMBER:
488 case UI_KEY_SIGCLIPMEDIAN:
489 if( isnan(p->sclipparams[0]) )
490 error(EXIT_FAILURE, 0, "'--sclipparams' is necessary with "
491 "sigma-clipping measurements.\n\n"
492 "'--sclipparams' takes two values (separated by a comma) for "
493 "defining the sigma-clip: the multiple of sigma, and tolerance "
494 "(<1) or number of clips (>1).");
495 break;
496 }
497
498
499 /* If less than and greater than are both given, make sure that the value
500 to greater than is smaller than the value to less-than. */
501 if( !isnan(p->lessthan) && !isnan(p->greaterequal)
502 && p->lessthan < p->greaterequal )
503 error(EXIT_FAILURE, 0, "the value to '--lessthan' (%g) must be larger "
504 "than the value to '--greaterequal' (%g)", p->lessthan,
505 p->greaterequal);
506
507
508 /* Less-than and greater-equal cannot be called together with
509 quantrange. */
510 if( ( !isnan(p->lessthan) || !isnan(p->greaterequal) )
511 && !isnan(p->quantmin) )
512 error(EXIT_FAILURE, 0, "'--lessthan' and/or '--greaterequal' cannot "
513 "be called together with '--quantrange'");
514
515
516 /* When binned outputs are requested, make sure that 'numbins' is set. */
517 if( (p->histogram || p->histogram2d || p->cumulative || !isnan(p->mirror))
518 && p->numbins==0)
519 error(EXIT_FAILURE, 0, "'--numbins' isn't set. When the histogram or "
520 "cumulative frequency plots are requested, the number of bins "
521 "('--numbins') is necessary");
522 if( p->histogram2d )
523 {
524 if( p->numbins2==0 )
525 error(EXIT_FAILURE, 0, "'--numbins2' isn't set. When a 2D histogram "
526 "is requested, the number of bins in the second dimension "
527 "('--numbins2') is also necessary");
528 if( strcmp(p->histogram2d,"table") && strcmp(p->histogram2d,"image") )
529 error(EXIT_FAILURE, 0, "the value to '--histogram2d' can either be "
530 "'table' or 'image'");
531 }
532
533 /* If an ascii plot is requested, check if the ascii number of bins and
534 height are given. */
535 if( (p->asciihist || p->asciicfp)
536 && (p->numasciibins==0 || p->asciiheight==0) )
537 error(EXIT_FAILURE, 0, "when an ascii plot is requested, "
538 "'--numasciibins' and '--asciiheight' are mandatory, but atleast "
539 "one of these has not been given");
540
541
542 /* Reverse the list of statistics to print in one row and also the
543 arguments, so it has the same order the user wanted. */
544 gal_list_f64_reverse(&p->tp_args);
545 gal_list_i32_reverse(&p->singlevalue);
546 }
547
548
549
550
551
552 static void
ui_check_options_and_arguments(struct statisticsparams * p)553 ui_check_options_and_arguments(struct statisticsparams *p)
554 {
555 if(p->inputname)
556 {
557 /* If input is FITS. */
558 if( (p->isfits=gal_fits_file_recognized(p->inputname)) )
559 {
560 /* Check if a HDU is given. */
561 if( p->cp.hdu==NULL )
562 error(EXIT_FAILURE, 0, "no HDU specified. When the input is a "
563 "FITS file, a HDU must also be specified, you can use "
564 "the '--hdu' ('-h') option and give it the HDU number "
565 "(starting from zero), extension name, or anything "
566 "acceptable by CFITSIO");
567
568 /* If its an image, make sure column isn't given (in case the
569 user confuses an image with a table). */
570 p->hdu_type=gal_fits_hdu_format(p->inputname, p->cp.hdu);
571 if(p->hdu_type==IMAGE_HDU && p->columns)
572 error(EXIT_FAILURE, 0, "%s (hdu: %s): is a FITS image "
573 "extension. The '--column' option is only applicable "
574 "to tables.", p->inputname, p->cp.hdu);
575 }
576 }
577 }
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598 /**************************************************************/
599 /*************** Preparations *******************/
600 /**************************************************************/
601 static void
ui_out_of_range_to_blank(struct statisticsparams * p)602 ui_out_of_range_to_blank(struct statisticsparams *p)
603 {
604 size_t one=1;
605 unsigned char flags=GAL_ARITHMETIC_FLAG_NUMOK;
606 unsigned char flagsor = ( GAL_ARITHMETIC_FLAG_INPLACE
607 | GAL_ARITHMETIC_FLAG_NUMOK );
608 gal_data_t *tmp, *tmp2, *cond_g=NULL, *cond_l=NULL, *cond, *blank, *ref;
609
610
611 /* Set the dataset that should be used for the condition. */
612 ref = p->reference ? p->reference : p->input;
613
614
615 /* If the user has given a quantile range, then set the 'greaterequal'
616 and 'lessthan' values. */
617 if( !isnan(p->quantmin) )
618 {
619 /* If only one value was given, set the maximum quantile range. */
620 if( isnan(p->quantmax) ) p->quantmax = 1 - p->quantmin;
621
622 /* Set the greater-equal value. */
623 tmp=gal_statistics_quantile(ref, p->quantmin, 1);
624 tmp=gal_data_copy_to_new_type_free(tmp, GAL_TYPE_FLOAT32);
625 p->greaterequal=*((float *)(tmp->array));
626
627 /* Set the lower-than value. */
628 tmp=gal_statistics_quantile(ref, p->quantmax, 1);
629 tmp=gal_data_copy_to_new_type_free(tmp, GAL_TYPE_FLOAT32);
630 p->lessthan=*((float *)(tmp->array));
631 }
632
633
634 /* Set the condition. Note that the 'greaterequal' name is for the data
635 we want. So we will set the condition based on those that are
636 less-than */
637 if(!isnan(p->greaterequal))
638 {
639 tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
640 NULL, NULL, NULL);
641 *((float *)(tmp->array)) = p->greaterequal;
642 cond_g=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, flags, ref, tmp);
643 gal_data_free(tmp);
644 }
645 if( p->input->next && !isnan(p->greaterequal2) )
646 {
647 tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
648 NULL, NULL, NULL);
649 *((float *)(tmp->array)) = p->greaterequal2;
650 tmp2=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, flags, p->input->next, tmp);
651 cond_g=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, cond_g, tmp2);
652 gal_data_free(tmp);
653 gal_data_free(tmp2);
654 }
655
656
657 /* Same reasoning as above for 'p->greaterthan'. */
658 if(!isnan(p->lessthan))
659 {
660 tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
661 NULL, NULL, NULL);
662 *((float *)(tmp->array)) = p->lessthan;
663 cond_l=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, flags, ref, tmp);
664 gal_data_free(tmp);
665 }
666 if(p->input->next && !isnan(p->lessthan2))
667 {
668 tmp=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL, 0, -1, 1,
669 NULL, NULL, NULL);
670 *((float *)(tmp->array)) = p->lessthan2;
671 tmp2=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, flags, p->input->next, tmp);
672 cond_l=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, cond_l, tmp2);
673 gal_data_free(tmp);
674 gal_data_free(tmp2);
675 }
676
677
678 /* Now, set the final condition. If both values were specified, then use
679 the GAL_ARITHMETIC_OP_OR to merge them into one. */
680 switch( !isnan(p->greaterequal) + !isnan(p->lessthan) )
681 {
682 case 0: return; /* No condition was specified, return. */
683 case 1: /* Only one condition was specified. */
684 cond = isnan(p->greaterequal) ? cond_l : cond_g;
685 break;
686 case 2:
687 cond = gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, cond_l, cond_g);
688 gal_data_free(cond_g);
689 break;
690 }
691
692
693 /* Allocate a blank value to mask all pixels that don't satisfy the
694 condition. */
695 blank=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &one, NULL,
696 0, -1, 1, NULL, NULL, NULL);
697 *((float *)(blank->array)) = NAN;
698
699
700 /* Set all the pixels that satisfy the condition to blank. Note that a
701 blank value will be used in the proper type of the input in the
702 'where' operator. Also reset the blank flags so they are checked again
703 if necessary.*/
704 gal_arithmetic(GAL_ARITHMETIC_OP_WHERE, 1, flagsor, p->input, cond, blank);
705 p->input->flag &= ~GAL_DATA_FLAG_BLANK_CH;
706 p->input->flag &= ~GAL_DATA_FLAG_HASBLANK;
707 if(p->input->next)
708 {
709 gal_arithmetic(GAL_ARITHMETIC_OP_WHERE, 1, flagsor, p->input->next,
710 cond, blank);
711 p->input->next->flag &= ~GAL_DATA_FLAG_BLANK_CH;
712 p->input->next->flag &= ~GAL_DATA_FLAG_HASBLANK;
713 }
714
715 /* Clean up. */
716 gal_data_free(cond);
717 gal_data_free(blank);
718 }
719
720
721
722
723
724 /* Check if a sorted array is necessary and if so, then make a sorted
725 array. */
726 static void
ui_make_sorted_if_necessary(struct statisticsparams * p)727 ui_make_sorted_if_necessary(struct statisticsparams *p)
728 {
729 int is_necessary=0;
730 gal_list_i32_t *tmp;
731
732 /* Check in the one-row outputs. */
733 for(tmp=p->singlevalue; tmp!=NULL; tmp=tmp->next)
734 switch(tmp->v)
735 {
736 case UI_KEY_MODE:
737 case UI_KEY_MEDIAN:
738 case UI_KEY_QUANTILE:
739 case UI_KEY_QUANTFUNC:
740 case UI_KEY_SIGCLIPSTD:
741 case UI_KEY_QUANTOFMEAN:
742 case UI_KEY_SIGCLIPMEAN:
743 case UI_KEY_SIGCLIPNUMBER:
744 case UI_KEY_SIGCLIPMEDIAN:
745 is_necessary=1;
746 break;
747 }
748
749 /* Check in the rest of the outputs. */
750 if( is_necessary==0 && ( p->sigmaclip || !isnan(p->mirror) ) )
751 is_necessary=1;
752
753 /* Do the sorting, we will keep the sorted array in a separate space,
754 since the unsorted nature of the original dataset will help decrease
755 floating point errors. If the input is already sorted, we'll just
756 point it to the input.*/
757 if(is_necessary)
758 {
759 if( gal_statistics_is_sorted(p->input, 1) )
760 p->sorted=p->input;
761 else
762 {
763 p->sorted=gal_data_copy(p->input);
764 gal_statistics_sort_increasing(p->sorted);
765 }
766 }
767 }
768
769
770
771
772
773 /* Merge all given columns into one list. This is because people may call
774 for different columns in one call to '--column' ('-c', separated by a
775 comma), or multiple calls to '-c'. */
776 gal_list_str_t *
ui_read_columns_in_one(gal_list_str_t * incolumns)777 ui_read_columns_in_one(gal_list_str_t *incolumns)
778 {
779 size_t i;
780 gal_data_t *strs;
781 char *c, **strarr;
782 gal_list_str_t *tmp, *final=NULL;
783
784 /* Go over the separate calls to the '-c' option, see the explaination in
785 Table's 'ui_columns_prepare' function for more on every step. */
786 for(tmp=incolumns;tmp!=NULL;tmp=tmp->next)
787 {
788 /* Remove any new-line character. */
789 for(c=tmp->v;*c!='\0';++c)
790 if(*c=='\\' && *(c+1)=='\n') { *c=' '; *(++c)=' '; }
791
792 /* Read the different comma-separated strings into an array (within a
793 'gal_data_t'). */
794 strs=gal_options_parse_csv_strings_raw(tmp->v, NULL, 0);
795 strarr=strs->array;
796
797 /* Add each array element to the final list of columns. */
798 for(i=0;i<strs->size;++i)
799 gal_list_str_add(&final, strarr[i], 1);
800
801 /* Clean up. */
802 gal_data_free(strs);
803 }
804
805 /* Reverse the list to be in the same order. */
806 gal_list_str_reverse(&final);
807
808 /* For a check.
809 for(tmp=final;tmp!=NULL;tmp=tmp->next)
810 printf("%s\n", tmp->v);
811 */
812
813 /* Return the final unified list. */
814 return final;
815 }
816
817
818
819
820
821 void
ui_read_columns(struct statisticsparams * p)822 ui_read_columns(struct statisticsparams *p)
823 {
824 int toomanycols=0, tformat;
825 gal_data_t *cols, *tmp, *cinfo;
826 size_t size, ncols, nrows, counter=0;
827 gal_list_str_t *incols, *columnlist=NULL;
828 gal_list_str_t *lines=gal_options_check_stdin(p->inputname,
829 p->cp.stdintimeout,
830 "input");
831
832 /* Merge possibly multiple calls to '-c' (each possibly separated by a
833 coma) into one list. */
834 incols=ui_read_columns_in_one(p->columns);
835
836 /* If any columns are specified, make sure there is a maximum of two
837 columns. */
838 if(gal_list_str_number(incols)>2)
839 error(EXIT_FAILURE, 0, "%zu input columns were given but currently a "
840 "maximum of two columns are supported (two columns only for "
841 "special operations, the majority of operations are on a single "
842 "column)", gal_list_str_number(incols));
843
844 /* If a reference column is also given, add it to the list of columns to
845 read. */
846 if(p->refcol)
847 gal_list_str_add(&columnlist, p->refcol, 0);
848
849 /* If no column is specified, Statistics will abort and an error will be
850 printed when the table has more than one column. If there is only one
851 column, there is no need to specify any, so Statistics will use it. */
852 if(incols==NULL)
853 {
854 /* Get the basic table information. */
855 cinfo=gal_table_info(p->inputname, p->cp.hdu, lines, &ncols, &nrows,
856 &tformat);
857 gal_data_array_free(cinfo, ncols, 1);
858
859 /* See how many columns it has and take the proper action. */
860 switch(ncols)
861 {
862 case 0:
863 error(EXIT_FAILURE, 0, "%s contains no usable columns",
864 ( p->inputname
865 ? gal_checkset_dataset_name(p->inputname, p->cp.hdu)
866 : "Standard input" ));
867 case 1:
868 gal_list_str_add(&incols, "1", 1);
869 break;
870 default:
871 error(EXIT_FAILURE, 0, "%s is a table containing more than one "
872 "column. However, the specific column to work on isn't "
873 "specified.\n\n"
874 "Please use the '--column' ('-c') option to specify a "
875 "column. You can either give it the column number "
876 "(couting from 1), or a match/search in its meta-data (e.g., "
877 "column names).\n\n"
878 "For more information, please run the following command "
879 "(press the 'SPACE' key to go down and 'q' to return to the "
880 "command-line):\n\n"
881 " $ info gnuastro \"Selecting table columns\"\n",
882 ( p->inputname
883 ? gal_checkset_dataset_name(p->inputname, p->cp.hdu)
884 : "Standard input" ));
885 }
886
887 }
888 if(columnlist) columnlist->next=incols;
889 else columnlist=incols;
890
891 /* Read the desired column(s). */
892 cols=gal_table_read(p->inputname, p->cp.hdu, lines, columnlist,
893 p->cp.searchin, p->cp.ignorecase, p->cp.minmapsize,
894 p->cp.quietmmap, NULL);
895 gal_list_str_free(lines, 1);
896
897 /* If the input was from standard input, we'll set the input name to be
898 'stdin' (for future reporting). */
899 if(p->inputname==NULL)
900 gal_checkset_allocate_copy("stdin", &p->inputname);
901
902 /* Put the columns into the proper gal_data_t. */
903 size=cols->size;
904 while(cols!=NULL)
905 {
906 /* Pop out the top column. */
907 tmp=gal_list_data_pop(&cols);
908
909 /* Make sure it has the proper size. */
910 if(tmp->size!=size)
911 error(EXIT_FAILURE, 0, " read column number %zu has a %zu elements, "
912 "while previous column(s) had %zu", counter, tmp->size, size);
913
914 /* Make sure it is a usable datatype. */
915 switch(tmp->type)
916 {
917 case GAL_TYPE_BIT:
918 case GAL_TYPE_STRLL:
919 case GAL_TYPE_STRING:
920 case GAL_TYPE_COMPLEX32:
921 case GAL_TYPE_COMPLEX64:
922 error(EXIT_FAILURE, 0, " read column number %zu has a %s type, "
923 "which is not currently supported by %s", counter,
924 gal_type_name(tmp->type, 1), PROGRAM_NAME);
925 }
926
927 /* Put the column into the proper pointer. */
928 switch(++counter)
929 {
930 case 1: p->input=tmp; break;
931 case 2:
932 if(gal_list_str_number(incols)==2)
933 p->input->next=tmp;
934 else
935 if(p->refcol) p->reference=tmp; else toomanycols=1;
936 break;
937 case 3:
938 if(p->refcol) p->reference=tmp; else toomanycols=1; break;
939 default: toomanycols=1;
940 }
941
942 /* Print an error if there are too many columns: */
943 if(toomanycols)
944 gal_tableintern_error_col_selection(p->inputname, p->cp.hdu, "too "
945 "many columns were selected by "
946 "the given values to the "
947 "'--column' and/or '--refcol' "
948 "options. Only one is "
949 "acceptable for each.");
950 }
951
952 /* Clean up. */
953 gal_list_str_free(incols, 1);
954 if(columnlist!=incols)
955 {
956 columnlist->next=NULL;
957 gal_list_str_free(columnlist, 0);
958 }
959
960 /* For a check:
961 printf("Number of input columns: %zu\n", gal_list_data_number(p->input));
962 exit(0);
963 */
964 }
965
966
967
968
969
970 void
ui_preparations(struct statisticsparams * p)971 ui_preparations(struct statisticsparams *p)
972 {
973 int keepinputdir=p->cp.keepinputdir;
974 gal_data_t *check, *flag1, *flag2, *flag;
975 struct gal_options_common_params *cp=&p->cp;
976 struct gal_tile_two_layer_params *tl=&cp->tl;
977 unsigned char flagsor = GAL_ARITHMETIC_FLAGS_BASIC;
978 char *checkbasename = p->cp.output ? p->cp.output : p->inputname;
979
980 /* Change 'keepinputdir' based on if an output name was given. */
981 p->cp.keepinputdir = p->cp.output ? 1 : 0;
982
983 /* Read the input. */
984 if(p->isfits && p->hdu_type==IMAGE_HDU)
985 {
986 p->inputformat=INPUT_FORMAT_IMAGE;
987 p->input=gal_array_read_one_ch(p->inputname, cp->hdu, NULL,
988 cp->minmapsize, p->cp.quietmmap);
989 p->input->wcs=gal_wcs_read(p->inputname, cp->hdu,
990 p->cp.wcslinearmatrix, 0, 0,
991 &p->input->nwcs);
992 p->input->ndim=gal_dimension_remove_extra(p->input->ndim,
993 p->input->dsize,
994 p->input->wcs);
995 }
996 else
997 {
998 /* Read the table columns. */
999 ui_read_columns(p);
1000 p->inputformat=INPUT_FORMAT_TABLE;
1001
1002 /* Two columns can only be given with 2D histogram mode. */
1003 if(p->histogram2d==0 && p->input->next!=NULL)
1004 error(EXIT_FAILURE, 0, "two column input is currently only "
1005 "supported for 2D histogram mode");
1006 }
1007
1008 /* Read the convolution kernel if necessary. */
1009 if(p->sky && p->kernelname)
1010 {
1011 p->kernel=gal_fits_img_read_kernel(p->kernelname, p->khdu,
1012 cp->minmapsize, p->cp.quietmmap);
1013 p->kernel->ndim=gal_dimension_remove_extra(p->kernel->ndim,
1014 p->kernel->dsize, NULL);
1015 }
1016
1017 /* Tile and channel sanity checks and preparations. */
1018 if(p->ontile || p->sky)
1019 {
1020 /* Check the tiles and make the tile structure. */
1021 gal_tile_full_sanity_check(p->inputname, p->cp.hdu, p->input, tl);
1022 gal_tile_full_two_layers(p->input, tl);
1023 gal_tile_full_permutation(tl);
1024
1025 /* Make the tile check image if requested. */
1026 if(tl->checktiles)
1027 {
1028 tl->tilecheckname=gal_checkset_automatic_output(cp, checkbasename,
1029 "_tiled.fits");
1030 check=gal_tile_block_check_tiles(tl->tiles);
1031 if(p->inputformat==INPUT_FORMAT_IMAGE)
1032 gal_fits_img_write(check, tl->tilecheckname, NULL, PROGRAM_NAME);
1033 else
1034 {
1035 gal_checkset_writable_remove(tl->tilecheckname, 0,
1036 cp->dontdelete);
1037 gal_table_write(check, NULL, NULL, cp->tableformat,
1038 tl->tilecheckname, "TABLE", 0);
1039 }
1040 gal_data_free(check);
1041 }
1042
1043 /* Set the steps image name. */
1044 if(p->sky && p->checksky)
1045 p->checkskyname=gal_checkset_automatic_output(cp, checkbasename,
1046 "_sky_steps.fits");
1047 }
1048
1049 /* Set the out-of-range values in the input to blank. */
1050 ui_out_of_range_to_blank(p);
1051
1052 /* If we are not to work on tiles, then re-order and change the input. */
1053 if(p->ontile==0 && p->sky==0 && p->contour==NULL)
1054 {
1055 /* Only keep the elements we want. Note that if we have more than one
1056 column, we need to move the same rows in both (otherwise their
1057 widths won't be equal). */
1058 if(p->input->next)
1059 {
1060 flag1=gal_blank_flag(p->input);
1061 flag2=gal_blank_flag(p->input->next);
1062 flag=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, flagsor, flag1, flag2);
1063
1064 gal_blank_flag_apply(p->input, flag);
1065 gal_blank_flag_apply(p->input->next, flag);
1066
1067 gal_blank_remove(p->input);
1068 gal_blank_remove(p->input->next);
1069
1070 gal_data_free(flag);
1071 p->input->next->flag &= ~GAL_DATA_FLAG_HASBLANK ;
1072 p->input->next->flag |= GAL_DATA_FLAG_BLANK_CH ;
1073 }
1074 else
1075 gal_blank_remove(p->input);
1076 p->input->flag &= ~GAL_DATA_FLAG_HASBLANK ;
1077 p->input->flag |= GAL_DATA_FLAG_BLANK_CH ;
1078
1079 /* Make sure there actually are any (non-blank) elements left. */
1080 if(p->input->size==0)
1081 error(EXIT_FAILURE, 0, "%s: all elements are blank",
1082 gal_fits_name_save_as_string(p->inputname, cp->hdu));
1083
1084 /* Make sure there is data remaining: */
1085 if(p->input->size==0)
1086 error(EXIT_FAILURE, 0, "%s: no data, maybe the '--greaterequal' or "
1087 "'--lessthan' options need to be adjusted",
1088 gal_fits_name_save_as_string(p->inputname, cp->hdu) );
1089
1090 /* Make the sorted array if necessary. */
1091 ui_make_sorted_if_necessary(p);
1092
1093 /* Set the number of output files. */
1094 if( !isnan(p->mirror) ) ++p->numoutfiles;
1095 if( p->histogram || p->cumulative ) ++p->numoutfiles;
1096 }
1097
1098 /* Reset 'keepinputdir' to what it originally was. */
1099 p->cp.keepinputdir=keepinputdir;
1100 }
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120 /**************************************************************/
1121 /************ Set the parameters *************/
1122 /**************************************************************/
1123
1124 void
ui_read_check_inputs_setup(int argc,char * argv[],struct statisticsparams * p)1125 ui_read_check_inputs_setup(int argc, char *argv[], struct statisticsparams *p)
1126 {
1127 struct gal_options_common_params *cp=&p->cp;
1128
1129
1130 /* Include the parameters necessary for argp from this program ('args.h')
1131 and for the common options to all Gnuastro ('commonopts.h'). We want
1132 to directly put the pointers to the fields in 'p' and 'cp', so we are
1133 simply including the header here to not have to use long macros in
1134 those headers which make them hard to read and modify. This also helps
1135 in having a clean environment: everything in those headers is only
1136 available within the scope of this function. */
1137 #include <gnuastro-internal/commonopts.h>
1138 #include "args.h"
1139
1140
1141 /* Initialize the options and necessary information. */
1142 ui_initialize_options(p, program_options, gal_commonopts_options);
1143
1144
1145 /* Read the command-line options and arguments. */
1146 errno=0;
1147 if(argp_parse(&thisargp, argc, argv, 0, 0, p))
1148 error(EXIT_FAILURE, errno, "parsing arguments");
1149
1150
1151 /* Read the configuration files and set the common values. */
1152 gal_options_read_config_set(&p->cp);
1153
1154
1155 /* Read the options into the program's structure, and check them and
1156 their relations prior to printing. */
1157 ui_read_check_only_options(p);
1158
1159
1160 /* Print the option values if asked. Note that this needs to be done
1161 after the option checks so un-sane values are not printed in the
1162 output state. */
1163 gal_options_print_state(&p->cp);
1164
1165
1166 /* Check that the options and arguments fit well with each other. Note
1167 that arguments don't go in a configuration file. So this test should
1168 be done after (possibly) printing the option values. */
1169 ui_check_options_and_arguments(p);
1170
1171
1172 /* Read/allocate all the necessary starting arrays. */
1173 ui_preparations(p);
1174
1175
1176 /* Prepare all the options as FITS keywords to write in output
1177 later. Note that in some modes, there is no output file, and
1178 'ui_add_to_single_value' isn't yet prepared. */
1179 if( (p->singlevalue && p->ontile) || p->sky || p->histogram \
1180 || p->cumulative)
1181 gal_options_as_fits_keywords(&p->cp);
1182 }
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203 /**************************************************************/
1204 /************ Free allocated, report *************/
1205 /**************************************************************/
1206 void
ui_free_report(struct statisticsparams * p)1207 ui_free_report(struct statisticsparams *p)
1208 {
1209 /* Free the allocated arrays: */
1210 free(p->cp.hdu);
1211 free(p->cp.output);
1212 gal_data_free(p->reference);
1213 gal_list_data_free(p->input);
1214 gal_list_f64_free(p->tp_args);
1215 gal_list_i32_free(p->singlevalue);
1216 gal_tile_full_free_contents(&p->cp.tl);
1217 if(p->sorted!=p->input) gal_data_free(p->sorted);
1218 }
1219