1 /*********************************************************************
2 Table - View and manipulate a FITS table structures.
3 Table is part of GNU Astronomy Utilities (Gnuastro) package.
4 
5 Original author:
6      Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2016-2021, Free Software Foundation, Inc.
9 
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14 
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24 
25 #include <errno.h>
26 #include <error.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 
32 #include <gsl/gsl_rng.h>
33 #include <gsl/gsl_heapsort.h>
34 
35 #include <gnuastro/txt.h>
36 #include <gnuastro/wcs.h>
37 #include <gnuastro/fits.h>
38 #include <gnuastro/table.h>
39 #include <gnuastro/qsort.h>
40 #include <gnuastro/pointer.h>
41 #include <gnuastro/polygon.h>
42 #include <gnuastro/arithmetic.h>
43 #include <gnuastro/statistics.h>
44 #include <gnuastro/permutation.h>
45 
46 #include <gnuastro-internal/checkset.h>
47 
48 #include "main.h"
49 
50 #include "ui.h"
51 #include "arithmetic.h"
52 
53 
54 
55 /**************************************************************/
56 /********     Selecting and ordering of columns      **********/
57 /**************************************************************/
58 static void
table_apply_permutation(gal_data_t * table,size_t * permutation,size_t newsize,int inverse)59 table_apply_permutation(gal_data_t *table, size_t *permutation,
60                         size_t newsize, int inverse)
61 {
62   gal_data_t *tmp;
63 
64   for(tmp=table;tmp!=NULL;tmp=tmp->next)
65     {
66       /* Apply the permutation. */
67       if(inverse)
68         gal_permutation_apply_inverse(tmp, permutation);
69       else
70         gal_permutation_apply(tmp, permutation);
71 
72       /* Correct the size. */
73       tmp->size=tmp->dsize[0]=newsize;
74     }
75 }
76 
77 
78 
79 
80 
81 static void
table_bring_to_top(gal_data_t * table,gal_data_t * rowids)82 table_bring_to_top(gal_data_t *table, gal_data_t *rowids)
83 {
84   char **strarr;
85   gal_data_t *col;
86   size_t i, *ids=rowids->array;
87 
88   /* Make sure the rowids are sorted by increasing index. */
89   gal_statistics_sort_increasing(rowids);
90 
91   /* Go over each column and move the desired rows to the top. */
92   for(col=table;col!=NULL;col=col->next)
93     {
94       /* For easy operation if the column is a string. */
95       strarr = col->type==GAL_TYPE_STRING ? col->array : NULL;
96 
97       /* Move the desired rows up to the top. */
98       for(i=0;i<rowids->size;++i)
99         if( i != ids[i] )
100           {
101             /* Copy the contents. For strings, its just about freeing and
102                copying pointers. */
103             if(col->type==GAL_TYPE_STRING)
104               {
105                 free(strarr[i]);
106                 strarr[i]=strarr[ ids[i] ];
107                 strarr[ ids[i] ]=NULL;
108               }
109             else
110               memcpy(gal_pointer_increment(col->array, i,      col->type),
111                      gal_pointer_increment(col->array, ids[i], col->type),
112                      gal_type_sizeof(col->type));
113           }
114 
115       /* For string arrays, free the pointers of the remaining rows. */
116       if(col->type==GAL_TYPE_STRING)
117         for(i=rowids->size;i<col->size;++i)
118           if(strarr[i]) free(strarr[i]);
119 
120       /* Correct the size (this should be after freeing of the string
121          pointers. */
122       col->size = col->dsize[0] = rowids->size;
123     }
124 
125 }
126 
127 
128 
129 
130 
131 static gal_data_t *
table_selection_range(struct tableparams * p,gal_data_t * col)132 table_selection_range(struct tableparams *p, gal_data_t *col)
133 {
134   size_t one=1;
135   double *darr;
136   int numok=GAL_ARITHMETIC_FLAG_NUMOK;
137   int inplace=GAL_ARITHMETIC_FLAG_INPLACE;
138   gal_data_t *min=NULL, *max=NULL, *tmp, *ltmin, *gemax=NULL;
139 
140   /* First, make sure everything is OK. */
141   if(p->range==NULL)
142     error(EXIT_FAILURE, 0, "%s: a bug! Please contact us to fix the "
143           "problem at %s. 'p->range' should not be NULL at this point",
144           __func__, PACKAGE_BUGREPORT);
145 
146   /* Allocations. */
147   min=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
148                      NULL, NULL, NULL);
149   max=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
150                      NULL, NULL, NULL);
151 
152   /* Read the range of values for this column. */
153   darr=p->range->array;
154   ((double *)(min->array))[0] = darr[0];
155   ((double *)(max->array))[0] = darr[1];
156 
157   /* Move 'p->range' to the next element in the list and free the current
158      one (we have already read its values and don't need it any more). */
159   tmp=p->range;
160   p->range=p->range->next;
161   gal_data_free(tmp);
162 
163   /* Find all the elements outside this range (smaller than the minimum,
164      larger than the maximum or blank) as separate binary flags.. */
165   ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, numok, col, min);
166   gemax=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, numok, col, max);
167 
168   /* Merge them both into one array. */
169   ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, ltmin, gemax);
170 
171   /* For a check.
172   {
173     size_t i;
174     uint8_t *u=ltmin->array;
175     for(i=0;i<ltmin->size;++i) printf("%zu: %u\n", i, u[i]);
176     exit(0);
177   }
178   */
179 
180   /* Clean up and return. */
181   gal_data_free(gemax);
182   gal_data_free(min);
183   gal_data_free(max);
184   return ltmin;
185 }
186 
187 
188 
189 
190 
191 /* Read column value of any type as a double for the polygon options. */
192 static double
selection_polygon_read_point(gal_data_t * col,size_t i)193 selection_polygon_read_point(gal_data_t *col, size_t i)
194 {
195   /* Check and assign the points to the points array. */
196   switch(col->type)
197     {
198     case GAL_TYPE_INT8:    return (( int8_t   *)col->array)[i];
199     case GAL_TYPE_UINT8:   return (( uint8_t  *)col->array)[i];
200     case GAL_TYPE_UINT16:  return (( uint16_t *)col->array)[i];
201     case GAL_TYPE_INT16:   return (( int16_t  *)col->array)[i];
202     case GAL_TYPE_UINT32:  return (( uint32_t *)col->array)[i];
203     case GAL_TYPE_INT32:   return (( int32_t  *)col->array)[i];
204     case GAL_TYPE_UINT64:  return (( uint64_t *)col->array)[i];
205     case GAL_TYPE_INT64:   return (( int64_t  *)col->array)[i];
206     case GAL_TYPE_FLOAT32: return (( float    *)col->array)[i];
207     case GAL_TYPE_FLOAT64: return (( double   *)col->array)[i];
208     default:
209       error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
210             __func__, col->type);
211     }
212 
213   /* Control should not reach here. */
214   error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
215         "problem. Control should not reach the end of this function",
216         __func__, PACKAGE_BUGREPORT);
217   return NAN;
218 }
219 
220 
221 
222 
223 
224 /* Mask the rows that are not in the given polygon. */
225 static gal_data_t *
table_selection_polygon(struct tableparams * p,gal_data_t * col1,gal_data_t * col2,int in1out0)226 table_selection_polygon(struct tableparams *p, gal_data_t *col1,
227                         gal_data_t *col2, int in1out0)
228 {
229   uint8_t *oarr;
230   double point[2];
231   gal_data_t *out=NULL;
232   size_t i, psize=p->polygon->size/2;
233 
234   /* Allocate the output array: This array will have a '0' for the points
235      which are inside the polygon and '1' for those that are outside of it
236      (to be masked/removed from the input). */
237   out=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, col1->dsize, NULL, 0, -1, 1,
238                      NULL, NULL, NULL);
239   oarr=out->array;
240 
241   /* Loop through all the rows in the given columns and check the points.*/
242   for(i=0; i<col1->size; i++)
243     {
244       /* Read the column values as a double. */
245       point[0]=selection_polygon_read_point(col1, i);
246       point[1]=selection_polygon_read_point(col2, i);
247 
248       /* For '--inpolygon', if point is inside polygon, put 0, otherwise
249          1. Note that we are building a mask for the rows that must be
250          discarded, so we want '1' for the points we don't want. */
251       oarr[i] = (in1out0
252                  ? !gal_polygon_is_inside(p->polygon->array, point, psize)
253                  :  gal_polygon_is_inside(p->polygon->array, point, psize));
254 
255       /* For a check
256       printf("(%f,%f): %s, %u\n", point[0], point[1], oarr[i]);
257       */
258     }
259 
260   /* Return the output column. */
261   return out;
262 }
263 
264 
265 
266 
267 
268 /* Given a string dataset and a single string, return a 'uint8_t' array
269    with the same size as the string dataset that has a '1' for all the
270    elements that are equal. */
271 static gal_data_t *
table_selection_string_eq_ne(gal_data_t * column,char * reference,int e0n1)272 table_selection_string_eq_ne(gal_data_t *column, char *reference, int e0n1)
273 {
274   gal_data_t *out;
275   uint8_t *oarr, comp;
276   size_t i, size=column->size;
277   char **strarr=column->array;
278 
279   /* Allocate the output binary dataset. */
280   out=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, &size, NULL, 0, -1, 1,
281                      NULL, NULL, NULL);
282   oarr=out->array;
283 
284   /* Parse the values and mark the outputs IN THE OPPOSITE manner (we are
285      marking the ones that must be removed). */
286   for(i=0;i<size;++i)
287     {
288       comp=strcmp(strarr[i], reference);
289       oarr[i] = e0n1 ? (comp==0) : (comp!=0);
290     }
291 
292   /* Return. */
293   return out;
294 }
295 
296 
297 
298 
299 
300 static gal_data_t *
table_selection_equal_or_notequal(struct tableparams * p,gal_data_t * col,int e0n1)301 table_selection_equal_or_notequal(struct tableparams *p, gal_data_t *col,
302                                   int e0n1)
303 {
304   void *varr;
305   char **strarr;
306   size_t i, one=1;
307   int numok=GAL_ARITHMETIC_FLAG_NUMOK;
308   int inplace=GAL_ARITHMETIC_FLAG_INPLACE;
309   gal_data_t *eq, *out=NULL, *value=NULL;
310   gal_data_t *arg = e0n1 ? p->notequal : p->equal;
311 
312   /* Note that this operator is used to make the "masked" array, so when
313      'e0n1==0' the operator should be 'GAL_ARITHMETIC_OP_NE' and
314      vice-versa.
315 
316      For the merging with other elements, when 'e0n1==0', we need the
317      'GAL_ARITHMETIC_OP_AND', but for 'e0n1==1', it should be 'OR'. */
318   int mergeop  = e0n1 ? GAL_ARITHMETIC_OP_OR : GAL_ARITHMETIC_OP_AND;
319   int operator = e0n1 ? GAL_ARITHMETIC_OP_EQ : GAL_ARITHMETIC_OP_NE;
320 
321   /* First, make sure everything is OK. */
322   if(arg==NULL)
323     error(EXIT_FAILURE, 0, "%s: a bug! Please contact us to fix the "
324           "problem at %s. 'p->range' should not be NULL at this point",
325           __func__, PACKAGE_BUGREPORT);
326 
327   /* To easily parse the given values. */
328   strarr=arg->array;
329 
330   /* Go through the values given to this call of the option and flag the
331      elements. */
332   for(i=0;i<arg->size;++i)
333     {
334       /* Write the value  */
335       if(col->type==GAL_TYPE_STRING)
336         eq=table_selection_string_eq_ne(col, strarr[i], e0n1);
337       else
338         {
339           /* Allocate the value dataset. */
340           value=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
341                                NULL, NULL, NULL);
342           varr=value->array;
343 
344           /* Read the stored string as a float64. */
345           if( gal_type_from_string(&varr, strarr[i], GAL_TYPE_FLOAT64) )
346             {
347               fprintf(stderr, "%s couldn't be read as a number.\n", strarr[i]);
348               exit(EXIT_FAILURE);
349             }
350 
351           /* Mark the rows that are equal (irrespective of the column's
352              original numerical datatype). */
353           eq=gal_arithmetic(operator, 1, numok, col, value);
354         }
355 
356       /* Merge the results with (possible) previous results. */
357       if(out)
358         {
359           out=gal_arithmetic(mergeop, 1, inplace, out, eq);
360           gal_data_free(eq);
361         }
362       else
363         out=eq;
364     }
365 
366   /* For a check.
367   {
368     uint8_t *u=out->array;
369     for(i=0;i<out->size;++i) printf("%zu: %u\n", i, u[i]);
370     exit(0);
371   }
372   */
373 
374 
375   /* Move the main pointer to the next possible call of the given
376      option. Note that 'arg' already points to 'p->equal' or 'p->notequal',
377      so it will automatically be freed with the next step.*/
378   if(e0n1) p->notequal=p->notequal->next;
379   else     p->equal=p->equal->next;
380 
381   /* Clean up and return. */
382   gal_data_free(value);
383   gal_data_free(arg);
384   return out;
385 }
386 
387 
388 
389 
390 
391 static void
table_select_by_value(struct tableparams * p)392 table_select_by_value(struct tableparams *p)
393 {
394   gal_data_t *rowids;
395   struct list_select *tmp;
396   uint8_t *u, *uf, *ustart;
397   size_t i, *s, ngood=0;
398   int inplace=GAL_ARITHMETIC_FLAG_INPLACE;
399   gal_data_t *mask, *blmask, *addmask=NULL;
400 
401   /* It may happen that the input table is empty! In such cases, just
402      return and don't bother with this step. */
403   if(p->table->dsize==NULL) return;
404 
405   /* Allocate datasets for the necessary numbers and write them in. */
406   mask=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, p->table->dsize, NULL, 1,
407                       p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
408 
409   /* Go over each selection criteria and remove the necessary elements. */
410   for(tmp=p->selectcol;tmp!=NULL;tmp=tmp->next)
411     {
412       switch(tmp->type)
413         {
414         case SELECT_TYPE_RANGE:
415           addmask=table_selection_range(p, tmp->col);
416           break;
417 
418         /* '--inpolygon' and '--outpolygon' need two columns. */
419         case SELECT_TYPE_INPOLYGON:
420         case SELECT_TYPE_OUTPOLYGON:
421           addmask=table_selection_polygon(p, tmp->col, tmp->next->col,
422                                           tmp->type==SELECT_TYPE_INPOLYGON);
423           tmp=tmp->next;
424           break;
425 
426         case SELECT_TYPE_EQUAL:
427           addmask=table_selection_equal_or_notequal(p, tmp->col, 0);
428           break;
429 
430         case SELECT_TYPE_NOTEQUAL:
431           addmask=table_selection_equal_or_notequal(p, tmp->col, 1);
432           break;
433 
434         default:
435           error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s "
436                 "to fix the problem. The code %d is not a recognized "
437                 "range identifier", __func__, PACKAGE_BUGREPORT,
438                 tmp->type);
439         }
440 
441       /* Remove any blank elements. */
442       if(gal_blank_present(tmp->col, 1))
443         {
444           blmask = gal_arithmetic(GAL_ARITHMETIC_OP_ISBLANK, 1, 0, tmp->col);
445           addmask=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace,
446                                  addmask, blmask);
447           gal_data_free(blmask);
448         }
449 
450       /* Add this mask array to the cumulative mask array (of all
451          selections). */
452       mask=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, mask, addmask);
453 
454       /* For a check.
455          {
456            float *f=ref->array;
457            uint8_t *m=mask->array;
458            uint8_t *u=addmask->array, *uf=u+addmask->size;
459            printf("\n\nInput column: %s\n", ref->name ? ref->name : "No Name");
460            printf("Range: %g, %g\n", rarr[0], rarr[1]);
461            printf("%-20s%-20s%-20s\n", "Value", "This mask",
462            "Including previous");
463            do printf("%-20f%-20u%-20u\n", *f++, *u++, *m++); while(u<uf);
464            exit(0);
465            }
466         */
467 
468       /* Final clean up. */
469       gal_data_free(addmask);
470     }
471 
472   /* Find the final number of elements to print and allocate the array to
473      keep them. */
474   uf=(u=mask->array)+mask->size;
475   ngood=0; do if(*u==0) ++ngood; while(++u<uf);
476   rowids=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &ngood, NULL, 0,
477                         p->cp.minmapsize, p->cp.quietmmap, NULL,
478                         NULL, NULL);
479 
480   /* Fill-in the finally desired row-IDs. */
481   s=rowids->array;
482   ustart=mask->array;
483   uf=(u=mask->array)+mask->size;
484   do if(*u==0) *s++ = u-ustart; while(++u<uf);
485 
486   /* Move the desired rows to the top of the table. */
487   table_bring_to_top(p->table, rowids);
488 
489   /* If the sort column is not in the table (the proper range has already
490      been applied to it), and we need to sort the resulting columns
491      afterwards, we should also apply the permutation on the sort
492      column. */
493   if(p->sortcol && p->sortin==0) table_bring_to_top(p->sortcol, rowids);
494 
495   /* Clean up. */
496   i=0;
497   for(tmp=p->selectcol;tmp!=NULL;tmp=tmp->next)
498     { if(p->freeselect[i]) {gal_data_free(tmp->col); tmp->col=NULL;} ++i; }
499   ui_list_select_free(p->selectcol, 0);
500   free(p->freeselect);
501   gal_data_free(mask);
502   gal_data_free(rowids);
503 }
504 
505 
506 
507 
508 
509 static void
table_sort(struct tableparams * p)510 table_sort(struct tableparams *p)
511 {
512   gal_data_t *perm;
513   size_t c=0, *s, *sf;
514   int (*qsortfn)(const void *, const void *)=NULL;
515 
516   /* In case there are no columns to sort, skip this function. */
517   if(p->table->size==0) return;
518 
519   /* Allocate the permutation array and fill it. */
520   perm=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, p->table->dsize, NULL, 0,
521                       p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
522   sf=(s=perm->array)+perm->size; do *s=c++; while(++s<sf);
523 
524   /* For string columns, print a descriptive message. Note that some FITS
525      tables were found that do actually have numbers stored in string
526      types! */
527   if(p->sortcol->type==GAL_TYPE_STRING)
528     error(EXIT_FAILURE, 0, "sort column has a string type, but it can "
529           "(currently) only work on numbers.\n\n"
530           "TIP: if you know the columns contents are all numbers that are "
531           "just stored as strings, you can use this program to save the "
532           "table as a text file, modify the column meta-data (for example "
533           "to type 'i32' or 'f32' instead of 'strN'), then use this "
534           "program again to save it as a FITS table.\n\n"
535           "For more on column metadata in plain text format, please run "
536           "the following command (or see the 'Gnuastro text table format "
537           "section of the book/manual):\n\n"
538           "    $ info gnuastro \"gnuastro text table format\"");
539 
540   /* Set the proper qsort function. */
541   if(p->descending)
542     switch(p->sortcol->type)
543       {
544       case GAL_TYPE_UINT8:   qsortfn=gal_qsort_index_single_uint8_d;   break;
545       case GAL_TYPE_INT8:    qsortfn=gal_qsort_index_single_int8_d;    break;
546       case GAL_TYPE_UINT16:  qsortfn=gal_qsort_index_single_uint16_d;  break;
547       case GAL_TYPE_INT16:   qsortfn=gal_qsort_index_single_int16_d;   break;
548       case GAL_TYPE_UINT32:  qsortfn=gal_qsort_index_single_uint32_d;  break;
549       case GAL_TYPE_INT32:   qsortfn=gal_qsort_index_single_int32_d;   break;
550       case GAL_TYPE_UINT64:  qsortfn=gal_qsort_index_single_uint64_d;  break;
551       case GAL_TYPE_INT64:   qsortfn=gal_qsort_index_single_int64_d;   break;
552       case GAL_TYPE_FLOAT32: qsortfn=gal_qsort_index_single_float32_d; break;
553       case GAL_TYPE_FLOAT64: qsortfn=gal_qsort_index_single_float64_d; break;
554       default:
555         error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix "
556               "the problem. The code '%u' wasn't recognized as a data type",
557               __func__, PACKAGE_BUGREPORT, p->sortcol->type);
558       }
559   else
560     switch(p->sortcol->type)
561       {
562       case GAL_TYPE_UINT8:   qsortfn=gal_qsort_index_single_uint8_i;   break;
563       case GAL_TYPE_INT8:    qsortfn=gal_qsort_index_single_int8_i;    break;
564       case GAL_TYPE_UINT16:  qsortfn=gal_qsort_index_single_uint16_i;  break;
565       case GAL_TYPE_INT16:   qsortfn=gal_qsort_index_single_int16_i;   break;
566       case GAL_TYPE_UINT32:  qsortfn=gal_qsort_index_single_uint32_i;  break;
567       case GAL_TYPE_INT32:   qsortfn=gal_qsort_index_single_int32_i;   break;
568       case GAL_TYPE_UINT64:  qsortfn=gal_qsort_index_single_uint64_i;  break;
569       case GAL_TYPE_INT64:   qsortfn=gal_qsort_index_single_int64_i;   break;
570       case GAL_TYPE_FLOAT32: qsortfn=gal_qsort_index_single_float32_i; break;
571       case GAL_TYPE_FLOAT64: qsortfn=gal_qsort_index_single_float64_i; break;
572       default:
573         error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix "
574               "the problem. The code '%u' wasn't recognized as a data type",
575               __func__, PACKAGE_BUGREPORT, p->sortcol->type);
576       }
577 
578   /* Sort the indexs from the values. */
579   gal_qsort_index_single=p->sortcol->array;
580   qsort(perm->array, perm->size, sizeof *s, qsortfn);
581 
582   /* For a check (only on float32 type 'sortcol'):
583   {
584     float *f=p->sortcol->array;
585     sf=(s=perm->array)+perm->size;
586     do printf("%f\n", f[*s]); while(++s<sf);
587     exit(0);
588   }
589   */
590 
591   /* Sort all the output columns with this permutation. */
592   table_apply_permutation(p->table, perm->array, perm->size, 0);
593 
594   /* Clean up. */
595   gal_data_free(perm);
596   if(p->freesort) gal_data_free(p->sortcol);
597 }
598 
599 
600 
601 
602 
603 /* Apply random row selection. If the returned value is 'EXIT_SUCCESS',
604    then, it was successful. Otherwise, it will return 'EXIT_FAILURE' and
605    the input won't be touched. */
606 static int
table_random_rows(gal_data_t * table,gsl_rng * rng,size_t numrandom,size_t minmapsize,int quietmmap)607 table_random_rows(gal_data_t *table, gsl_rng *rng, size_t numrandom,
608                   size_t minmapsize, int quietmmap)
609 {
610   int bad;
611   gal_data_t *rowids;
612   size_t i, j, *ids, ind;
613 
614   /* Sanity check. */
615   if(numrandom>table->size)
616     return EXIT_FAILURE;
617 
618   /* Allocate space for the list of rows to use. */
619   rowids=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &numrandom, NULL, 0,
620                         minmapsize, quietmmap, NULL, NULL, NULL);
621 
622   /* Select the row numbers. */
623   ids=rowids->array;
624   for(i=0;i<numrandom;++i)
625     {
626       /* Select a random index and make sure its new. */
627       bad=1;
628       while(bad)
629         {
630           ind = gsl_rng_uniform(rng) * table->size;
631           for(j=0;j<i;++j) if(ids[j]==ind) break;
632           if(i==j) bad=0;
633         }
634       ids[i]=ind;
635     }
636 
637   /* Move the desired rows to the top. */
638   table_bring_to_top(table, rowids);
639 
640   /* Clean up and return. */
641   gal_data_free(rowids);
642   return EXIT_SUCCESS;
643 }
644 
645 
646 
647 
648 
649 static void
table_select_by_position(struct tableparams * p)650 table_select_by_position(struct tableparams *p)
651 {
652   char **strarr;
653   gal_data_t *col;
654   size_t i, start, end;
655   double *darr = p->rowlimit ? p->rowlimit->array : NULL;
656 
657   /* Random row selection (by position, not value). This step is
658      independent of the other operations of this function, so as soon as
659      its finished return. */
660   if(p->rowrandom)
661     {
662       if( table_random_rows(p->table, p->rng, p->rowrandom,
663                             p->cp.minmapsize, p->cp.quietmmap)
664           == EXIT_FAILURE && p->cp.quiet==0 )
665         error(EXIT_SUCCESS, 0, "'--rowrandom' not activated because "
666               "the number of rows in the table at this stage (%zu) "
667               "is smaller than the number of requested random rows "
668               "(%zu). You can suppress this message with '--quiet'",
669               p->table->size, p->rowrandom);
670       return;
671     }
672 
673   /* Sanity check  */
674   if(p->rowlimit)
675     {
676       if(darr[0]>=p->table->size)
677         error(EXIT_FAILURE, 0, "the first value to '--rowlimit' (%g) "
678               "is larger than the number of rows (%zu)",
679               darr[0]+1, p->table->size);
680       else if( darr[1]>=p->table->size )
681         error(EXIT_FAILURE, 0, "the second value to '--rowlimit' (%g) "
682               "is larger than the number of rows (%zu)",
683               darr[1]+1, p->table->size);
684     }
685 
686   /* Go over all the columns and make the necessary corrections. */
687   for(col=p->table;col!=NULL;col=col->next)
688     {
689       /* FOR STRING: we'll need to free the individual strings that will
690          not be used (outside the allocated array directly
691          'gal_data_t'). We don't have to worry about the space for the
692          actual pointers (they will be free'd by 'free' in any case, since
693          they are in the initially allocated array).*/
694       if(col->type==GAL_TYPE_STRING)
695         {
696           /* Parse the rows and free extra pointers. */
697           strarr=col->array;
698           if(p->rowlimit)
699             {
700               /* Note that the given values to '--rowlimit' started from 1,
701                  but in 'ui.c' we subtracted one from it (so at this stage,
702                  it starts from 0). */
703               start = darr[0];
704               end   = darr[1];
705               for(i=0;i<p->table->size;++i)
706                 if(i<start || i>end) { free(strarr[i]); strarr[i]=NULL; }
707             }
708           else
709             {
710               /* Free their allocated spaces. */
711               start = p->head!=GAL_BLANK_SIZE_T ? p->head        : 0;
712               end   = p->head!=GAL_BLANK_SIZE_T ? p->table->size : p->tail;
713               for(i=start; i<end; ++i) { free(strarr[i]); strarr[i]=NULL; }
714             }
715         }
716 
717       /* Make the final adjustment. */
718       if(p->rowlimit)
719         {
720           /* Move the values up to the top and correct the size. */
721           col->size=darr[1]-darr[0]+1;
722           memmove(col->array,
723                   gal_pointer_increment(col->array, darr[0], col->type),
724                   (darr[1]-darr[0]+1)*gal_type_sizeof(col->type));
725         }
726       else
727         {
728           /* For '--tail', we'll need to bring the last columns to the
729              start. Note that we are using 'memmove' because we want to be
730              safe with overlap. */
731           if(p->tail!=GAL_BLANK_SIZE_T)
732             memmove(col->array,
733                     gal_pointer_increment(col->array, col->size - p->tail,
734                                           col->type),
735                     p->tail*gal_type_sizeof(col->type));
736 
737           /* In any case (head or tail), the new number of column elements
738              is the given value. */
739           col->size = col->dsize[0] = ( p->head!=GAL_BLANK_SIZE_T
740                                         ? p->head
741                                         : p->tail );
742         }
743     }
744 }
745 
746 
747 
748 
749 
750 /* Import columns from another file/table into the working table. */
751 static void
table_catcolumn(struct tableparams * p)752 table_catcolumn(struct tableparams *p)
753 {
754   size_t counter=1;
755   gal_list_str_t *filell, *hdull;
756   gal_data_t *tocat, *final, *newcol;
757   char *tmpname, *hdu=NULL, cstr[100];
758   struct gal_options_common_params *cp=&p->cp;
759 
760   /* Go over all the given files. */
761   hdull=p->catcolumnhdu;
762   for(filell=p->catcolumnfile; filell!=NULL; filell=filell->next)
763     {
764       /* Set the HDU (not necessary for non-FITS tables). */
765       if(gal_fits_file_recognized(filell->v))
766         {
767           if(hdull) { hdu=hdull->v; hdull=hdull->next; }
768           else
769             error(EXIT_FAILURE, 0, "not enough '--catcolumnhdu's (or '-u'). "
770                   "For every FITS table given to '--catcolumnfile'. A call to "
771                   "'--catcolumnhdu' is necessary to identify its "
772                   "HDU/extension");
773         }
774       else hdu=NULL;
775 
776       /* Read the catcolumn table. */
777       tocat=gal_table_read(filell->v, hdu, NULL, p->catcolumns, cp->searchin,
778                            cp->ignorecase, cp->minmapsize, p->cp.quietmmap,
779                            NULL);
780 
781       /* Check the number of rows. */
782       if(tocat->dsize[0]!=p->table->dsize[0])
783         error(EXIT_FAILURE, 0, "%s: incorrect number of rows. The table given "
784               "to '--catcolumn' must have the same number of rows as the "
785               "main argument (after all row-selections have been applied), "
786               "but they have %zu and %zu rows respectively",
787               gal_fits_name_save_as_string(filell->v, hdu), tocat->dsize[0],
788               p->table->dsize[0]);
789 
790       /* Append a counter to the column names because this option is most
791          often used with columns that have a similar name and it would help
792          the user if the output doesn't have multiple columns with same
793          name. */
794       if(p->catcolumnrawname==0)
795         for(newcol=tocat; newcol!=NULL; newcol=newcol->next)
796           if(newcol->name)
797             {
798               /* Add the counter suffix to the column name. */
799               sprintf(cstr, "-%zu", counter);
800               tmpname=gal_checkset_malloc_cat(newcol->name, cstr);
801 
802               /* Free the old name and put in the new one. */
803               free(newcol->name);
804               newcol->name=tmpname;
805             }
806 
807       /* Find the final column of the main table and add this table.*/
808       final=gal_list_data_last(p->table);
809       final->next=tocat;
810       ++counter;
811     }
812 }
813 
814 
815 
816 
817 
818 void
table_colmetadata(struct tableparams * p)819 table_colmetadata(struct tableparams *p)
820 {
821   char **strarr;
822   gal_data_t *meta, *col;
823   size_t counter, *colnum;
824 
825   /* Loop through all the given updates and implement them. */
826   for(meta=p->colmetadata;meta!=NULL;meta=meta->next)
827     {
828       /* If the given column specifier is a name (not parse-able as a
829          number), then this condition will fail. */
830       colnum=NULL;
831       if( gal_type_from_string((void **)(&colnum), meta->name,
832                                GAL_TYPE_SIZE_T) )
833         {
834           /* We have been given a string, so find the first column that has
835              the same name. */
836           for(col=p->table; col!=NULL; col=col->next)
837             if(!strcmp(col->name, meta->name)) break;
838         }
839       /* The column specifier is a number. */
840       else
841         {
842           /* Go over the columns and find the one with this counter. */
843           counter=1;
844           for(col=p->table; col!=NULL; col=col->next)
845             if(counter++==colnum[0]) break;
846 
847           /* Clean up the space that was allocated for 'colnum' (its not
848              allocated when the given value was a string). */
849           free(colnum);
850         }
851 
852       /* If a match was found, then 'col' should not be NULL. */
853       if(col==NULL)
854         error(EXIT_FAILURE, 0, "no column found for '%s' (given to "
855               "'--colmetadata'). Columns can either be specified by "
856               "their position in the output table (integer counter, "
857               "starting from 1), or their name (the first column "
858               "found with the given name will be used)", meta->name);
859 
860       /* The matching column is found and we know that atleast one value is
861          already given (otherwise 'gal_options_parse_name_and_values' would
862          abort the program). The first given string is the new name. */
863       strarr=meta->array;
864       if(col->name) free(col->name);
865       gal_checkset_allocate_copy(strarr[0], &col->name);
866 
867       /* If more than one string is given, the second one is the new
868          unit. */
869       if(meta->size>1)
870         {
871           /* Replace the unit. */
872           if(col->unit) free(col->unit);
873           gal_checkset_allocate_copy(strarr[1], &col->unit);
874 
875           /* The next element is the comment of the column. */
876           if(meta->size>2)
877             {
878               if(col->comment) free(col->comment);
879               gal_checkset_allocate_copy(strarr[2], &col->comment);
880             }
881         }
882     }
883 }
884 
885 
886 
887 
888 
889 void
table_noblank(struct tableparams * p)890 table_noblank(struct tableparams *p)
891 {
892   int found;
893   size_t i, j, *index;
894   gal_data_t *tcol, *flag;
895   char **strarr=p->noblank->array;
896   gal_list_sizet_t *column_indexs=NULL;
897 
898   /* See if all columns should be checked, or just a select few. */
899   if( p->noblank->size==1 && !strcmp(strarr[0],"_all") )
900     {
901       for(i=0;i<gal_list_data_number(p->table);++i)
902         gal_list_sizet_add(&column_indexs, i);
903     }
904 
905   /* Only certain columns should be checked, so find/add their index. */
906   else
907     for(i=0;i<p->noblank->size;++i)
908       {
909         /* First go through the column names and if they match, add
910            them. Note that we don't want to stop once a name is found, in
911            this scenario, if multiple columns have the same name, we should
912            use all.*/
913         j=0;
914         found=0;
915         for(tcol=p->table; tcol!=NULL; tcol=tcol->next)
916           {
917             if( tcol->name && !strcmp(tcol->name, strarr[i]) )
918               {
919                 found=1;
920                 gal_list_sizet_add(&column_indexs, j);
921               }
922             ++j;
923           }
924 
925         /* If the given string didn't match any column name, it must be a
926            number, so parse it as a number and use that number. */
927         if(found==0)
928           {
929             /* Parse the given index. */
930             index=NULL;
931             if( gal_type_from_string((void **)(&index), strarr[i],
932                                      GAL_TYPE_SIZE_T) )
933               error(EXIT_FAILURE, 0, "column '%s' didn't match any of the "
934                     "final column names and can't be parsed as a column "
935                     "counter (starting from 1) either", strarr[i]);
936 
937             /* Make sure its not zero (the user counts from 1). */
938             if(*index==0)
939               error(EXIT_FAILURE, 0, "the column number (given to the "
940                     "'--noblank' option) should start from 1, but you have "
941                     "given 0.");
942 
943             /* Make sure that the index falls within the number (note that
944                it still counts from 1).  */
945             if(*index > gal_list_data_number(p->table))
946               error(EXIT_FAILURE, 0, "the final output table only has %zu "
947                     "columns, but you have given column %zu to '--noblank'. "
948                     "Recall that '--noblank' operates on the output columns "
949                     "and that you can also use output column names (if they "
950                     "have any)",
951                     gal_list_data_number(p->table), *index);
952 
953             /* Everything is fine, add the index to the list of columns to
954                check. */
955             gal_list_sizet_add(&column_indexs, *index-1);
956 
957             /* Clean up. */
958             free(index);
959           }
960 
961         /* For a check.
962            printf("%zu\n", column_indexs->v);
963         */
964       }
965 
966   /* Remove all blank rows from the output table, note that we don't need
967      the flags of the removed columns here. So we can just free it up. */
968   flag=gal_blank_remove_rows(p->table, column_indexs);
969   gal_data_free(flag);
970 }
971 
972 
973 
974 
975 
976 
977 
978 
979 
980 
981 
982 
983 
984 
985 
986 
987 
988 
989 /**************************************************************/
990 /***************       Top function         *******************/
991 /**************************************************************/
992 void
table(struct tableparams * p)993 table(struct tableparams *p)
994 {
995   /* Concatenate the columns of tables (if required)*/
996   if(p->catcolumnfile) table_catcolumn(p);
997 
998   /* Apply ranges based on row values (if required). */
999   if(p->selection) table_select_by_value(p);
1000 
1001   /* Sort it (if required). */
1002   if(p->sort) table_sort(p);
1003 
1004   /* If the output number of rows is limited, apply them. */
1005   if( p->rowlimit
1006       || p->rowrandom
1007       || p->head!=GAL_BLANK_SIZE_T
1008       || p->tail!=GAL_BLANK_SIZE_T )
1009     table_select_by_position(p);
1010 
1011   /* If any arithmetic operations are needed, do them. */
1012   if(p->outcols)
1013     arithmetic_operate(p);
1014 
1015   /* When column metadata should be updated. */
1016   if(p->colmetadata) table_colmetadata(p);
1017 
1018   /* When any columns with blanks should be removed. */
1019   if(p->noblank) table_noblank(p);
1020 
1021   /* Write the output. */
1022   gal_table_write(p->table, NULL, NULL, p->cp.tableformat, p->cp.output,
1023                   "TABLE", p->colinfoinstdout);
1024 }
1025