1 /*********************************************************************
2 table -- Functions for I/O on tables.
3 This is part of GNU Astronomy Utilities (Gnuastro) package.
4
5 Original author:
6 Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2016-2021, Free Software Foundation, Inc.
9
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24
25 #include <stdio.h>
26 #include <errno.h>
27 #include <error.h>
28 #include <regex.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #include <gnuastro/git.h>
33 #include <gnuastro/txt.h>
34 #include <gnuastro/blank.h>
35 #include <gnuastro/table.h>
36
37 #include <gnuastro-internal/timing.h>
38 #include <gnuastro-internal/checkset.h>
39 #include <gnuastro-internal/tableintern.h>
40
41
42
43
44
45
46
47
48
49 /************************************************************************/
50 /*************** Information about a table ***************/
51 /************************************************************************/
52 /* Store the information of each column in a table (either as a text file
53 or as a FITS table) into an array of data structures with 'numcols'
54 structures (one data structure for each column). The number of rows is
55 stored in 'numrows'. The type of the table (e.g., ascii text file, or
56 FITS binary or ASCII table) will be put in 'tableformat' (macros defined
57 in 'gnuastro/table.h'.
58
59 Note that other than the character strings (column name, units and
60 comments), nothing in the data structure(s) will be allocated by this
61 function for the actual data (e.g., the 'array' or 'dsize' elements). */
62 gal_data_t *
gal_table_info(char * filename,char * hdu,gal_list_str_t * lines,size_t * numcols,size_t * numrows,int * tableformat)63 gal_table_info(char *filename, char *hdu, gal_list_str_t *lines,
64 size_t *numcols, size_t *numrows, int *tableformat)
65 {
66 /* Get the table format and size (number of columns and rows). */
67 if(filename && gal_fits_file_recognized(filename))
68 return gal_fits_tab_info(filename, hdu, numcols, numrows, tableformat);
69 else
70 {
71 *tableformat=GAL_TABLE_FORMAT_TXT;
72 return gal_txt_table_info(filename, lines, numcols, numrows);
73 }
74
75 /* Abort with an error if we get to this point. */
76 error(EXIT_FAILURE, 0, "%s: a bug! please contact us at %s so we can fix "
77 "the problem. Control must not have reached the end of this function",
78 __func__, PACKAGE_BUGREPORT);
79 return NULL;
80 }
81
82
83
84
85
86 void
gal_table_print_info(gal_data_t * allcols,size_t numcols,size_t numrows)87 gal_table_print_info(gal_data_t *allcols, size_t numcols, size_t numrows)
88 {
89 size_t i;
90 int Nw=3, nw=4, uw=5, tw=4; /* Initial width from label's width */
91 char *name, *unit, *comment;
92
93 /* If there aren't any columns, there is no need to print anything. */
94 if(numcols==0) return;
95
96 /* Set the widths to print the column information. The width for the
97 column number can easily be identified from the logarithm of the
98 number of columns. */
99 Nw=log10(numcols)+1;
100 for(i=0;i<numcols;++i)
101 {
102 if(allcols[i].name && strlen(allcols[i].name)>nw)
103 nw=strlen(allcols[i].name);
104 if(allcols[i].unit && strlen(allcols[i].unit)>uw)
105 uw=strlen(allcols[i].unit);
106 if(allcols[i].type
107 && strlen(gal_type_name(allcols[i].type, 1))>tw)
108 tw=strlen(gal_type_name(allcols[i].type, 1));
109 }
110
111 /* We want one column space between the columns for readability, not the
112 exact length, so increment all the numbers. */
113 Nw+=2; nw+=2; uw+=2; tw+=2;
114
115 /* Print these column names. */
116 printf("%-*s%-*s%-*s%-*s%s\n", Nw, "---", nw, "----", uw,
117 "-----", tw, "----", "-------");
118 printf("%-*s%-*s%-*s%-*s%s\n", Nw, "No.", nw, "Name", uw,
119 "Units", tw, "Type", "Comment");
120 printf("%-*s%-*s%-*s%-*s%s\n", Nw, "---", nw, "----", uw,
121 "-----", tw, "----", "-------");
122
123 /* For each column, print the information, then free them. */
124 for(i=0;i<numcols;++i)
125 {
126 name = allcols[i].name; /* Just defined for easier */
127 unit = allcols[i].unit; /* readability. The compiler */
128 comment = allcols[i].comment; /* optimizer will remove them. */
129 printf("%-*zu%-*s%-*s%-*s%s\n", Nw, i+1,
130 nw, name ? name : GAL_BLANK_STRING ,
131 uw, unit ? unit : GAL_BLANK_STRING ,
132 tw,
133 allcols[i].type ? gal_type_name(allcols[i].type, 1) : "--",
134 comment ? comment : GAL_BLANK_STRING);
135 }
136
137 /* Print the number of rows. */
138 if(numrows!=GAL_BLANK_SIZE_T)
139 printf("--------\nNumber of rows: %zu\n--------\n", numrows);
140 }
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161 /************************************************************************/
162 /*************** Read a table ***************/
163 /************************************************************************/
164
165 /* Function to print regular expression error. This is taken from the GNU C
166 library manual, with small modifications to fit out style, */
167 static void
table_regexerrorexit(int errcode,regex_t * compiled,char * input)168 table_regexerrorexit(int errcode, regex_t *compiled, char *input)
169 {
170 char *regexerrbuf;
171 size_t length = regerror (errcode, compiled, NULL, 0);
172
173 errno=0;
174 regexerrbuf=malloc(length);
175 if(regexerrbuf==NULL)
176 error(EXIT_FAILURE, errno, "%s: allocating %zu bytes for regexerrbuf",
177 __func__, length);
178 (void) regerror(errcode, compiled, regexerrbuf, length);
179
180 error(EXIT_FAILURE, 0, "%s: regular expression error: %s in value to "
181 "'--column' ('-c'): '%s'", __func__, regexerrbuf, input);
182 }
183
184
185
186
187
188 /* Macro to set the string to search in */
189 static char *
table_set_strcheck(gal_data_t * col,int searchin)190 table_set_strcheck(gal_data_t *col, int searchin)
191 {
192 switch(searchin)
193 {
194 case GAL_TABLE_SEARCH_NAME:
195 return col->name;
196
197 case GAL_TABLE_SEARCH_UNIT:
198 return col->unit;
199
200 case GAL_TABLE_SEARCH_COMMENT:
201 return col->comment;
202
203 default:
204 error(EXIT_FAILURE, 0, "%s: the code %d to searchin was not "
205 "recognized", __func__, searchin);
206 }
207
208 error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s so we can "
209 "address the problem. Control must not have reached the end of "
210 "this function", __func__, PACKAGE_BUGREPORT);
211 return NULL;
212 }
213
214
215
216
217
218 gal_list_sizet_t *
gal_table_list_of_indexs(gal_list_str_t * cols,gal_data_t * allcols,size_t numcols,int searchin,int ignorecase,char * filename,char * hdu,size_t * colmatch)219 gal_table_list_of_indexs(gal_list_str_t *cols, gal_data_t *allcols,
220 size_t numcols, int searchin, int ignorecase,
221 char *filename, char *hdu, size_t *colmatch)
222 {
223 long tlong;
224 int regreturn;
225 regex_t *regex;
226 gal_list_str_t *tmp;
227 gal_list_sizet_t *indexll=NULL;
228 size_t i, nummatch, colcount=0, len;
229 char *str, *strcheck, *tailptr, *errorstring;
230
231 /* Go over the given columns. */
232 if(cols)
233 for(tmp=cols; tmp!=NULL; tmp=tmp->next)
234 {
235 /* Counter for number of columns matched, and length of name. */
236 nummatch=0;
237 len=strlen(tmp->v);
238
239 /* REGULAR EXPRESSION: the first and last characters are '/'. */
240 if( tmp->v[0]=='/' && tmp->v[len-1]=='/' )
241 {
242 /* Remove the slashes, note that we don't want to change
243 'tmp->v' (because it should be freed later). So first we set
244 the last character to '\0', then define a new string from
245 the first element. */
246 tmp->v[len-1]='\0';
247 str = tmp->v + 1;
248
249 /* Allocate the regex_t structure: */
250 errno=0;
251 regex=malloc(sizeof *regex);
252 if(regex==NULL)
253 error(EXIT_FAILURE, errno, "%s: allocating %zu bytes for regex",
254 __func__, sizeof *regex);
255
256 /* First we have to "compile" the string into the regular
257 expression, see the "POSIX Regular Expression Compilation"
258 section of the GNU C Library.
259
260 About the case of the string: the FITS standard says: "It is
261 _strongly recommended_ that every field of the table be
262 assigned a unique, case insensitive name with this
263 keyword..." So the column names can be case-sensitive.
264
265 Here, we don't care about the details of a match, the only
266 important thing is a match, so we are using the REG_NOSUB
267 flag.*/
268 regreturn=0;
269 regreturn=regcomp(regex, str, ( ignorecase
270 ? RE_SYNTAX_AWK | REG_ICASE
271 : RE_SYNTAX_AWK ) );
272 if(regreturn)
273 table_regexerrorexit(regreturn, regex, str);
274
275
276 /* With the regex structure "compile"d you can go through all
277 the column names. Just note that column names are not
278 mandatory in the FITS standard, so some (or all) columns
279 might not have names, if so 'p->tname[i]' will be NULL. */
280 for(i=0;i<numcols;++i)
281 {
282 strcheck=table_set_strcheck(&allcols[i], searchin);
283 if(strcheck && regexec(regex, strcheck, 0, 0, 0)==0)
284 {
285 ++nummatch;
286 gal_list_sizet_add(&indexll, i);
287 }
288 }
289
290 /* Free the regex_t structure: */
291 regfree(regex);
292
293 /* Put the '/' back into the input string. This is done because
294 after this function, the calling program might want to
295 inform the user of their exact input string. */
296 tmp->v[len-1]='/';
297 }
298
299
300 /* Not regular expression. */
301 else
302 {
303 tlong=strtol(tmp->v, &tailptr, 0);
304
305 /* INTEGER: If the string is an integer, then tailptr should
306 point to the null character. If it points to anything else,
307 it shows that we are not dealing with an integer (usable as
308 a column number). So floating point values are also not
309 acceptable. Since it is possible for the users to give zero
310 for the column number, we need to read the string as a
311 number first, then check it here. */
312 if(*tailptr=='\0')
313 {
314 /* Make sure the number is larger than zero! */
315 if(tlong<=0)
316 error(EXIT_FAILURE, 0, "%s: column numbers must be "
317 "positive (not zero or negative). You have asked "
318 "for column number %ld", __func__, tlong);
319
320 /* Check if the given value is not larger than the number
321 of columns in the input catalog (note that the user is
322 counting from 1, not 0!) */
323 if(tlong>numcols)
324 error(EXIT_FAILURE, 0, "%s: has %zu columns, but you "
325 "have asked for column number %ld",
326 gal_fits_name_save_as_string(filename, hdu),
327 numcols, tlong);
328
329 /* Everything seems to be fine, put this column number in
330 the output column numbers linked list. Note that
331 internally, the column numbers start from 0, not 1.*/
332 gal_list_sizet_add(&indexll, tlong-1);
333 ++nummatch;
334 }
335
336
337
338 /* EXACT MATCH: */
339 else
340 {
341 /* Go through all the desired column information and add
342 the column number when there is a match. */
343 for(i=0;i<numcols;++i)
344 {
345 /* Check if this column actually has any
346 information. Then do a case-sensitive or insensitive
347 comparison of the strings. */
348 strcheck=table_set_strcheck(&allcols[i], searchin);
349 if(strcheck && ( ignorecase
350 ? !strcasecmp(tmp->v, strcheck)
351 : !strcmp(tmp->v, strcheck) ) )
352 {
353 ++nummatch;
354 gal_list_sizet_add(&indexll, i);
355 }
356 }
357 }
358 }
359
360
361 /* If there was no match, then report an error. This can only happen
362 for string matches, not column numbers, for numbers, the checks
363 are done (and program is aborted) before this step. */
364 if(nummatch==0)
365 {
366 if( asprintf(&errorstring, "'%s' didn't match any of the "
367 "column %ss.", tmp->v,
368 gal_tableintern_searchin_as_string(searchin))<0 )
369 error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
370 gal_tableintern_error_col_selection(filename, hdu, errorstring);
371 }
372
373
374 /* Keep the value of 'nummatch' if the user requested it. */
375 if(colmatch) colmatch[colcount++]=nummatch;
376 }
377
378 /* cols==NULL */
379 else
380 for(i=0;i<numcols;++i)
381 gal_list_sizet_add(&indexll, i);
382
383 /* Reverse the list. */
384 gal_list_sizet_reverse(&indexll);
385
386 /* For a check.
387 gal_list_sizet_print(indexll);
388 exit(0);
389 */
390
391 /* Return the list. */
392 return indexll;
393 }
394
395
396
397
398
399 /* Read the specified columns in a table (named 'filename') into a linked
400 list of data structures. If the file is FITS, then 'hdu' will also be
401 used, otherwise, 'hdu' is ignored. The information to search for columns
402 should be specified by the 'cols' linked list as string values in each
403 node of the list, the strings in each node can be a number, an exact
404 match to a column name, or a regular expression (in GNU AWK format)
405 enclosed in '/ /'. The 'searchin' value comes from the
406 'gal_table_where_to_search' enumerator and has to be one of its given
407 types. If 'cols' is NULL, then this function will read the full table.
408
409 The output is a linked list with the same order of the cols linked
410 list. Note that one column node in the 'cols' list might give multiple
411 columns, in this case, the order of output columns that correspond to
412 that one input, are in order of the table (which column was read first).
413 So the first requested column is the first popped data structure and so
414 on. */
415 gal_data_t *
gal_table_read(char * filename,char * hdu,gal_list_str_t * lines,gal_list_str_t * cols,int searchin,int ignorecase,size_t minmapsize,int quietmmap,size_t * colmatch)416 gal_table_read(char *filename, char *hdu, gal_list_str_t *lines,
417 gal_list_str_t *cols, int searchin, int ignorecase,
418 size_t minmapsize, int quietmmap, size_t *colmatch)
419 {
420 int tableformat;
421 gal_list_sizet_t *indexll;
422 size_t i, numcols, numrows;
423 gal_data_t *allcols, *out=NULL;
424
425 /* First get the information of all the columns. */
426 allcols=gal_table_info(filename, hdu, lines, &numcols, &numrows,
427 &tableformat);
428
429 /* If there was no actual data in the file, then return NULL. */
430 if(allcols==NULL) return NULL;
431
432 /* Get the list of indexs in the same order as the input list. */
433 indexll=gal_table_list_of_indexs(cols, allcols, numcols, searchin,
434 ignorecase, filename, hdu, colmatch);
435
436 /* Depending on the table format, read the columns into the output
437 structure. Note that the functions here pop each index, read/store the
438 desired column and pop the next, so after these functions, the output
439 linked list will have the opposite order of its input 'indexll'
440 list. So before calling any of them, we will first reverse the
441 'indexll' list, so the output data structure list will have the same
442 order as the input list of desired columns. Also note that after these
443 functions, the 'indexll' will be all freed (each popped element is
444 actually freed).*/
445 gal_list_sizet_reverse(&indexll);
446 switch(tableformat)
447 {
448 case GAL_TABLE_FORMAT_TXT:
449 out=gal_txt_table_read(filename, lines, numrows, allcols, indexll,
450 minmapsize, quietmmap);
451 break;
452
453 case GAL_TABLE_FORMAT_AFITS:
454 case GAL_TABLE_FORMAT_BFITS:
455 out=gal_fits_tab_read(filename, hdu, numrows, allcols, indexll,
456 minmapsize, quietmmap);
457 break;
458
459 default:
460 error(EXIT_FAILURE, 0, "%s: table format code %d not recognized for "
461 "'tableformat'", __func__, tableformat);
462 }
463
464 /* Clean up. */
465 for(i=0;i<numcols;++i)
466 gal_data_free_contents(&allcols[i]);
467 free(allcols);
468 gal_list_sizet_free(indexll);
469
470 /* Return the final linked list. */
471 return out;
472 }
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493 /************************************************************************/
494 /*************** Write a table ***************/
495 /************************************************************************/
496 /* Write the basic information that is necessary by each program into the
497 comments field. Note that the 'comments' has to be already sorted in the
498 proper order. */
499 void
gal_table_comments_add_intro(gal_list_str_t ** comments,char * program_string,time_t * rawtime)500 gal_table_comments_add_intro(gal_list_str_t **comments, char *program_string,
501 time_t *rawtime)
502 {
503 char gitdescribe[100], *tmp;
504
505 /* Get the Git description in the running folder. */
506 tmp=gal_git_describe();
507 if(tmp) { sprintf(gitdescribe, " from %s,", tmp); free(tmp); }
508 else gitdescribe[0]='\0';
509
510 /* Git version and time of program's starting, this will be the second
511 line. Note that ctime puts a '\n' at the end of its string, so we'll
512 have to remove that. Also, note that since we are allocating 'msg', we
513 are setting the allocate flag of 'gal_list_str_add' to 0. */
514 if( asprintf(&tmp, "Created%s on %s", gitdescribe, ctime(rawtime))<0 )
515 error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
516 tmp[ strlen(tmp)-1 ]='\0';
517 gal_list_str_add(comments, tmp, 0);
518
519 /* Program name: this will be the top of the list (first line). We will
520 need to set the allocation flag for this one, because program_string
521 is usually statically allocated.*/
522 if(program_string)
523 gal_list_str_add(comments, program_string, 1);
524 }
525
526
527
528
529
530 /* The input is a linked list of data structures and some comments. The
531 table will then be written into 'filename' with a format that is
532 specified by 'tableformat'. */
533 void
gal_table_write(gal_data_t * cols,struct gal_fits_list_key_t ** keylist,gal_list_str_t * comments,int tableformat,char * filename,char * extname,uint8_t colinfoinstdout)534 gal_table_write(gal_data_t *cols, struct gal_fits_list_key_t **keylist,
535 gal_list_str_t *comments, int tableformat, char *filename,
536 char *extname, uint8_t colinfoinstdout)
537 {
538 /* If a filename was given, then the tableformat is relevant and must be
539 used. When the filename is empty, a text table must be printed on the
540 standard output (on the command-line). */
541 if(filename)
542 {
543 if(gal_fits_name_is_fits(filename))
544 gal_fits_tab_write(cols, comments, tableformat, filename, extname,
545 keylist);
546 else
547 gal_txt_write(cols, keylist, comments, filename, colinfoinstdout);
548 }
549 else
550 /* Write to standard output. */
551 gal_txt_write(cols, keylist, comments, filename, colinfoinstdout);
552 }
553
554
555
556
557
558 void
gal_table_write_log(gal_data_t * logll,char * program_string,time_t * rawtime,gal_list_str_t * comments,char * filename,int quiet)559 gal_table_write_log(gal_data_t *logll, char *program_string,
560 time_t *rawtime, gal_list_str_t *comments,
561 char *filename, int quiet)
562 {
563 char *msg;
564
565 /* Write all the comments into */
566 gal_table_comments_add_intro(&comments, program_string, rawtime);
567
568 /* Write the log file to disk */
569 gal_table_write(logll, NULL, comments, GAL_TABLE_FORMAT_TXT,
570 filename, "LOG", 0);
571
572 /* In verbose mode, print the information. */
573 if(!quiet)
574 {
575 if( asprintf(&msg, "%s created.", filename)<0 )
576 error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
577 gal_timing_report(NULL, msg, 1);
578 free(msg);
579 }
580 }
581