1 /*********************************************************************
2 table -- Functions for I/O on tables.
3 This is part of GNU Astronomy Utilities (Gnuastro) package.
4 
5 Original author:
6      Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2016-2021, Free Software Foundation, Inc.
9 
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14 
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24 
25 #include <stdio.h>
26 #include <errno.h>
27 #include <error.h>
28 #include <regex.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include <gnuastro/git.h>
33 #include <gnuastro/txt.h>
34 #include <gnuastro/blank.h>
35 #include <gnuastro/table.h>
36 #include <gnuastro/pointer.h>
37 
38 #include <gnuastro-internal/timing.h>
39 #include <gnuastro-internal/checkset.h>
40 #include <gnuastro-internal/tableintern.h>
41 
42 
43 
44 
45 
46 
47 /************************************************************************/
48 /***************              Error messages              ***************/
49 /************************************************************************/
50 void
gal_tableintern_error_col_selection(char * filename,char * hdu,char * errorstring)51 gal_tableintern_error_col_selection(char *filename, char *hdu,
52                                     char *errorstring)
53 {
54   char *c, *name, *command;
55 
56   /* Set the proper pointers. */
57   if(gal_fits_name_is_fits(filename))
58     {
59       if( asprintf(&name, "%s (hdu: %s)", filename, hdu)<0 )
60         error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
61       c=hdu; while(*c!='\0') if(isspace(*c++)) break;
62       if( asprintf(&command, *c=='\0' ? "%s --hdu=%s" : "%s --hdu=\"%s\"",
63                    filename, hdu)<0 )
64         error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
65     }
66   else command=name=filename?filename:"stdin";
67 
68   /* Abort with with the proper error. */
69   error(EXIT_FAILURE, 0, "%s: %s\n\nFor more information on selecting "
70         "columns in Gnuastro, please run the following command (press "
71         "'SPACE' to go down and 'q' to return to the command-line):\n\n"
72         "    $ info gnuastro \"Selecting table columns\"\n\n"
73         "To define a better column selection criteria, you can see "
74         "the list of column meta-data in this table, with the following "
75         "command:\n\n"
76         "    $ asttable %s --info\n", name, errorstring, command);
77 }
78 
79 
80 
81 
82 
83 
84 
85 
86 
87 
88 
89 
90 
91 
92 
93 
94 
95 
96 
97 
98 /************************************************************************/
99 /***************                 Formats                  ***************/
100 /************************************************************************/
101 /* Return the type of desired table based on a standard string. */
102 uint8_t
gal_tableintern_string_to_format(char * string)103 gal_tableintern_string_to_format(char *string)
104 {
105   if(string)                    /* Its not NULL. */
106     {
107       if(!strcmp(string, "txt"))              return GAL_TABLE_FORMAT_TXT;
108       else if(!strcmp(string,"fits-ascii"))   return GAL_TABLE_FORMAT_AFITS;
109       else if(!strcmp(string, "fits-binary")) return GAL_TABLE_FORMAT_BFITS;
110       else                                    return GAL_TABLE_FORMAT_INVALID;
111     }
112   else                                        return GAL_TABLE_FORMAT_INVALID;
113 }
114 
115 
116 
117 
118 
119 char *
gal_tableintern_format_as_string(uint8_t tableformat)120 gal_tableintern_format_as_string(uint8_t tableformat)
121 {
122   switch(tableformat)
123     {
124     case GAL_TABLE_FORMAT_TXT:    return "txt";
125     case GAL_TABLE_FORMAT_AFITS:  return "fits-ascii";
126     case GAL_TABLE_FORMAT_BFITS:  return "fits-binary";
127     default:
128       error(EXIT_FAILURE, 0, "%s: code %d not recognized", __func__,
129             tableformat);
130       return NULL;
131     }
132 }
133 
134 
135 
136 
137 
138 
139 /* In programs, the 'searchin' variable is much more easier to format in as
140    a description than an integer (which is what 'gal_table_read_cols'
141    needs). This function will check the string value and give the
142    corresponding integer value.*/
143 uint8_t
gal_tableintern_string_to_searchin(char * string)144 gal_tableintern_string_to_searchin(char *string)
145 {
146   if(string)                    /* Its not NULL. */
147     {
148       if(!strcmp(string, "name"))          return GAL_TABLE_SEARCH_NAME;
149       else if(!strcmp(string, "unit"))     return GAL_TABLE_SEARCH_UNIT;
150       else if(!strcmp(string, "comment"))  return GAL_TABLE_SEARCH_COMMENT;
151       else                                 return GAL_TABLE_SEARCH_INVALID;
152     }
153   else                                     return GAL_TABLE_SEARCH_INVALID;
154 }
155 
156 
157 
158 
159 
160 char *
gal_tableintern_searchin_as_string(uint8_t searchin)161 gal_tableintern_searchin_as_string(uint8_t searchin)
162 {
163   switch(searchin)
164     {
165     case GAL_TABLE_SEARCH_NAME:    return "name";
166     case GAL_TABLE_SEARCH_UNIT:    return "unit";
167     case GAL_TABLE_SEARCH_COMMENT: return "comment";
168     default:
169       error(EXIT_FAILURE, 0, "%s: code %d not recognized as a valid search "
170             "field", __func__, searchin);
171       return NULL;
172     }
173 }
174 
175 
176 
177 
178 
179 /* For programs that output tables, the '--tableformat' option will be used
180    to specify what format the output table should be in. When the output
181    file is a FITS file, there are multiple formats, so to simplify the
182    coding in each program, this function will do a sanity check on the
183    value given to the '--tableformat' parameter. */
184 void
gal_tableintern_check_fits_format(char * filename,int tableformat)185 gal_tableintern_check_fits_format(char *filename, int tableformat)
186 {
187   if( filename && gal_fits_name_is_fits(filename) )
188     {
189       /* When '--tableformat' was not given. */
190       if(tableformat==GAL_TABLE_FORMAT_INVALID)
191         error(EXIT_FAILURE, 0, "'%s' (output file) is a FITS file but the "
192               "desired format of the FITS table has not been specified with "
193               "the '--tableformat' option. For FITS tables, this option can "
194               "take two values: 'fits-ascii', or 'fits-binary'", filename);
195 
196       /* When '--tableformat' didn't have the correct value. */
197       if( tableformat != GAL_TABLE_FORMAT_AFITS
198           && tableformat != GAL_TABLE_FORMAT_BFITS )
199         error(EXIT_FAILURE, 0, "'%s' (output file) is a FITS file but "
200               "is not a recognized FITS table format. For FITS tables, "
201               "'--tableformat' can take two values: 'fits-ascii', or "
202               "'fits-binary'", filename);
203     }
204 }
205 
206 
207 
208 
209 
210 
211 
212 
213 
214 
215 
216 
217 
218 
219 
220 
221 
222 
223 
224 
225 /************************************************************************/
226 /***************          Printing information            ***************/
227 /************************************************************************/
228 /* Fill in/adjust the basic information necessary to print a column. This
229    information can be used for printing a plain text file or for FITS ASCII
230    tables. The 'fmt' and 'lng' should point to pre-allocated arrays. The
231    best way is: 'char fmt[2], lng[3];' in the same function calling this.
232 
233    The width and precision, which are also necessary for printing, are
234    updated in the data structure's 'disp_width' and 'disp_precision'
235    elements. */
236 void
gal_tableintern_col_print_info(gal_data_t * col,int tableformat,char * fmt,char * lng)237 gal_tableintern_col_print_info(gal_data_t *col, int tableformat,
238                                char *fmt, char *lng)
239 {
240   size_t j;
241   char **strarr;
242   int maxstrlen, width=0, precision=0;
243 
244 
245   /* First do a sanity check, so we can safly stop checking in the steps
246      below. */
247   switch(tableformat)
248     {
249     case GAL_TABLE_FORMAT_TXT:
250     case GAL_TABLE_FORMAT_AFITS:
251       break;
252     default:
253       error(EXIT_FAILURE, 0, "%s: is only for plain text or FITS ASCII "
254             "tables. The input 'tableformat' code %d not recognized",
255             __func__, tableformat);
256     }
257 
258 
259 
260   /* Set the formats and widths based on the type of the column. Initialize
261      the characters and blank pointer. The long prefix is not necessary for
262      most types, so just initialize it once up here.*/
263   fmt[0]=fmt[1]=lng[0]=lng[1]=lng[2]='\0';
264   switch(col->type)
265     {
266     case GAL_TYPE_BIT:
267       error(EXIT_FAILURE, 0, "%s: printing of bit types is currently "
268             "not supported", __func__);
269       break;
270 
271 
272 
273 
274     case GAL_TYPE_STRING:
275 
276       /* Set the basic information. */
277       fmt[0] = tableformat==GAL_TABLE_FORMAT_TXT ? 's' : 'A';
278 
279       /* Go through all the strings in the column and find the maximum
280          length to use as printing. If the user asked for a larger width
281          (through the data structure's disp_width element), then set
282          that. */
283       maxstrlen=0;
284       strarr=col->array;
285       for(j=0;j<col->size;++j)
286         maxstrlen = ( (int)strlen(strarr[j]) > maxstrlen
287                       ? (int)strlen(strarr[j]) : maxstrlen );
288       width = col->disp_width>maxstrlen ? col->disp_width : maxstrlen;
289       break;
290 
291 
292 
293 
294     case GAL_TYPE_UINT8:
295     case GAL_TYPE_UINT16:
296     case GAL_TYPE_UINT32:
297     case GAL_TYPE_UINT64:
298 
299       /* For the FITS ASCII table, there is only one format for all
300          integers.  */
301       if(tableformat==GAL_TABLE_FORMAT_AFITS)
302         fmt[0]='I';
303       else
304         switch(col->disp_fmt)
305           {
306           case GAL_TABLE_DISPLAY_FMT_UDECIMAL: fmt[0]='u'; break;
307           case GAL_TABLE_DISPLAY_FMT_OCTAL:    fmt[0]='o'; break;
308           case GAL_TABLE_DISPLAY_FMT_HEX:      fmt[0]='X'; break;
309           default:                             fmt[0]='u';
310           }
311 
312       /* If we have a long type, then make changes. */
313       if(col->type==GAL_TYPE_UINT64)
314         {
315           lng[0]='l';
316           width=( col->disp_width<=0 ? GAL_TABLE_DEF_WIDTH_LINT
317                   : col->disp_width );
318         }
319       else width=( col->disp_width<=0 ? GAL_TABLE_DEF_WIDTH_INT
320                     : col->disp_width );
321       precision=( col->disp_precision<=0 ? GAL_TABLE_DEF_PRECISION_INT
322                   : col->disp_precision );
323       break;
324 
325 
326 
327 
328     case GAL_TYPE_INT8:
329     case GAL_TYPE_INT16:
330     case GAL_TYPE_INT32:
331       fmt[0] = tableformat==GAL_TABLE_FORMAT_TXT ? 'd' : 'I';
332       width = ( col->disp_width<=0 ? GAL_TABLE_DEF_WIDTH_INT
333                 : col->disp_width );
334       precision = ( col->disp_precision<=0 ? GAL_TABLE_DEF_PRECISION_INT
335                     : col->disp_precision );
336       break;
337 
338 
339 
340 
341     case GAL_TYPE_INT64:
342       lng[0] = 'l';
343       fmt[0] = tableformat==GAL_TABLE_FORMAT_TXT ? 'd' : 'I';
344       width=( col->disp_width<=0 ? GAL_TABLE_DEF_WIDTH_LINT
345               : col->disp_width );
346       precision=( col->disp_precision<=0 ? GAL_TABLE_DEF_PRECISION_INT
347                   : col->disp_precision );
348       break;
349 
350 
351 
352     /* We need a default value (because in most cases, it won't be set. */
353     case GAL_TYPE_FLOAT32:
354     case GAL_TYPE_FLOAT64:
355       /* Set the format. */
356       switch(col->disp_fmt)
357         {
358         case GAL_TABLE_DISPLAY_FMT_FLOAT:
359           fmt[0] = tableformat==GAL_TABLE_FORMAT_TXT ? 'f' : 'F'; break;
360         case GAL_TABLE_DISPLAY_FMT_EXP:
361           fmt[0] = tableformat==GAL_TABLE_FORMAT_TXT ? 'e' : 'E'; break;
362         case GAL_TABLE_DISPLAY_FMT_GENERAL:
363           fmt[0] = tableformat==GAL_TABLE_FORMAT_TXT ? 'g' : 'E'; break;
364         default:
365           fmt[0] = tableformat==GAL_TABLE_FORMAT_TXT ? 'g' : 'E'; break;
366         }
367 
368       /* Set the width and precision */
369       switch(col->type)
370         {
371         case GAL_TYPE_FLOAT32:
372           width     = ( col->disp_width<=0
373                         ? GAL_TABLE_DEF_WIDTH_FLT : col->disp_width );
374           precision = ( col->disp_precision<=0
375                         ? GAL_TABLE_DEF_PRECISION_FLT : col->disp_precision );
376           break;
377         case GAL_TYPE_FLOAT64:
378           width     = ( col->disp_width<=0
379                         ? GAL_TABLE_DEF_WIDTH_DBL : col->disp_width );
380 
381           /* CFITSIO doesn't recognize the double precision defined here
382              for ASCII FITS tables. */
383           precision = ( col->disp_precision<=0
384                         ? ( tableformat==GAL_TABLE_FORMAT_TXT
385                             ? GAL_TABLE_DEF_PRECISION_DBL
386                             : GAL_TABLE_DEF_PRECISION_FLT )
387                         : col->disp_precision );
388           break;
389         }
390       break;
391 
392 
393 
394     default:
395       error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
396             __func__, col->type);
397     }
398 
399   /* Write the final width and precision into the column's data structure. */
400   col->disp_width=width;
401   col->disp_precision=precision;
402 }
403 
404 
405 
406 
407 
408 /* Use the input 'blank' string and the input column to put the blank value
409    in the column's array. If the string cannot be interpretted as a blank
410    of that type, then store it in the 'mmapname' element of the data
411    structure. */
412 void
gal_tableintern_read_blank(gal_data_t * col,char * blank)413 gal_tableintern_read_blank(gal_data_t *col, char *blank)
414 {
415   /* If there is nothing to use as blank, then don't continue, note that
416      the column data structure was initialized to mean that there is no
417      blank value. */
418   if(blank==NULL) return;
419 
420   /* Just for a sanity check, the ndim and array elements should be zero. */
421   if(col->ndim || col->array)
422     error(EXIT_FAILURE, 0, "%s: the number of dimensions, and the "
423           "'array' element of 'col' must be zero", __func__);
424 
425   /* Read the blank value as the given type. If successful, then
426      'gal_data_string_to_type' will return 0. In that case, we need to
427      initialize the necessary parameters to read this data structure
428      correctly. If it isn't successful, then  */
429   if( gal_type_from_string((void **)(&col->array), blank, col->type) )
430     {
431       col->flag=GAL_TABLEINTERN_FLAG_ARRAY_IS_BLANK_STRING;
432       gal_checkset_allocate_copy(blank, (char **)(&col->array));
433     }
434   else
435     {
436       col->flag=0;
437       col->dsize=gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0, __func__,
438                                       "col->dsize");
439       col->dsize[0]=col->ndim=col->size=1;
440     }
441 }
442