1 /*********************************************************************
2 Table - View and manipulate a FITS table structures.
3 Table is part of GNU Astronomy Utilities (Gnuastro) package.
4
5 Original author:
6 Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2016-2021, Free Software Foundation, Inc.
9
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24
25 #include <errno.h>
26 #include <error.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31
32 #include <gsl/gsl_rng.h>
33 #include <gsl/gsl_heapsort.h>
34
35 #include <gnuastro/txt.h>
36 #include <gnuastro/wcs.h>
37 #include <gnuastro/fits.h>
38 #include <gnuastro/table.h>
39 #include <gnuastro/qsort.h>
40 #include <gnuastro/pointer.h>
41 #include <gnuastro/polygon.h>
42 #include <gnuastro/arithmetic.h>
43 #include <gnuastro/statistics.h>
44 #include <gnuastro/permutation.h>
45
46 #include <gnuastro-internal/checkset.h>
47
48 #include "main.h"
49
50 #include "ui.h"
51 #include "arithmetic.h"
52
53
54
55 /**************************************************************/
56 /******** Selecting and ordering of columns **********/
57 /**************************************************************/
58 static void
table_apply_permutation(gal_data_t * table,size_t * permutation,size_t newsize,int inverse)59 table_apply_permutation(gal_data_t *table, size_t *permutation,
60 size_t newsize, int inverse)
61 {
62 gal_data_t *tmp;
63
64 for(tmp=table;tmp!=NULL;tmp=tmp->next)
65 {
66 /* Apply the permutation. */
67 if(inverse)
68 gal_permutation_apply_inverse(tmp, permutation);
69 else
70 gal_permutation_apply(tmp, permutation);
71
72 /* Correct the size. */
73 tmp->size=tmp->dsize[0]=newsize;
74 }
75 }
76
77
78
79
80
81 static void
table_bring_to_top(gal_data_t * table,gal_data_t * rowids)82 table_bring_to_top(gal_data_t *table, gal_data_t *rowids)
83 {
84 char **strarr;
85 gal_data_t *col;
86 size_t i, *ids=rowids->array;
87
88 /* Make sure the rowids are sorted by increasing index. */
89 gal_statistics_sort_increasing(rowids);
90
91 /* Go over each column and move the desired rows to the top. */
92 for(col=table;col!=NULL;col=col->next)
93 {
94 /* For easy operation if the column is a string. */
95 strarr = col->type==GAL_TYPE_STRING ? col->array : NULL;
96
97 /* Move the desired rows up to the top. */
98 for(i=0;i<rowids->size;++i)
99 if( i != ids[i] )
100 {
101 /* Copy the contents. For strings, its just about freeing and
102 copying pointers. */
103 if(col->type==GAL_TYPE_STRING)
104 {
105 free(strarr[i]);
106 strarr[i]=strarr[ ids[i] ];
107 strarr[ ids[i] ]=NULL;
108 }
109 else
110 memcpy(gal_pointer_increment(col->array, i, col->type),
111 gal_pointer_increment(col->array, ids[i], col->type),
112 gal_type_sizeof(col->type));
113 }
114
115 /* For string arrays, free the pointers of the remaining rows. */
116 if(col->type==GAL_TYPE_STRING)
117 for(i=rowids->size;i<col->size;++i)
118 if(strarr[i]) free(strarr[i]);
119
120 /* Correct the size (this should be after freeing of the string
121 pointers. */
122 col->size = col->dsize[0] = rowids->size;
123 }
124
125 }
126
127
128
129
130
131 static gal_data_t *
table_selection_range(struct tableparams * p,gal_data_t * col)132 table_selection_range(struct tableparams *p, gal_data_t *col)
133 {
134 size_t one=1;
135 double *darr;
136 int numok=GAL_ARITHMETIC_FLAG_NUMOK;
137 int inplace=GAL_ARITHMETIC_FLAG_INPLACE;
138 gal_data_t *min=NULL, *max=NULL, *tmp, *ltmin, *gemax=NULL;
139
140 /* First, make sure everything is OK. */
141 if(p->range==NULL)
142 error(EXIT_FAILURE, 0, "%s: a bug! Please contact us to fix the "
143 "problem at %s. 'p->range' should not be NULL at this point",
144 __func__, PACKAGE_BUGREPORT);
145
146 /* Allocations. */
147 min=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
148 NULL, NULL, NULL);
149 max=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
150 NULL, NULL, NULL);
151
152 /* Read the range of values for this column. */
153 darr=p->range->array;
154 ((double *)(min->array))[0] = darr[0];
155 ((double *)(max->array))[0] = darr[1];
156
157 /* Move 'p->range' to the next element in the list and free the current
158 one (we have already read its values and don't need it any more). */
159 tmp=p->range;
160 p->range=p->range->next;
161 gal_data_free(tmp);
162
163 /* Find all the elements outside this range (smaller than the minimum,
164 larger than the maximum or blank) as separate binary flags.. */
165 ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_LT, 1, numok, col, min);
166 gemax=gal_arithmetic(GAL_ARITHMETIC_OP_GE, 1, numok, col, max);
167
168 /* Merge them both into one array. */
169 ltmin=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, ltmin, gemax);
170
171 /* For a check.
172 {
173 size_t i;
174 uint8_t *u=ltmin->array;
175 for(i=0;i<ltmin->size;++i) printf("%zu: %u\n", i, u[i]);
176 exit(0);
177 }
178 */
179
180 /* Clean up and return. */
181 gal_data_free(gemax);
182 gal_data_free(min);
183 gal_data_free(max);
184 return ltmin;
185 }
186
187
188
189
190
191 /* Read column value of any type as a double for the polygon options. */
192 static double
selection_polygon_read_point(gal_data_t * col,size_t i)193 selection_polygon_read_point(gal_data_t *col, size_t i)
194 {
195 /* Check and assign the points to the points array. */
196 switch(col->type)
197 {
198 case GAL_TYPE_INT8: return (( int8_t *)col->array)[i];
199 case GAL_TYPE_UINT8: return (( uint8_t *)col->array)[i];
200 case GAL_TYPE_UINT16: return (( uint16_t *)col->array)[i];
201 case GAL_TYPE_INT16: return (( int16_t *)col->array)[i];
202 case GAL_TYPE_UINT32: return (( uint32_t *)col->array)[i];
203 case GAL_TYPE_INT32: return (( int32_t *)col->array)[i];
204 case GAL_TYPE_UINT64: return (( uint64_t *)col->array)[i];
205 case GAL_TYPE_INT64: return (( int64_t *)col->array)[i];
206 case GAL_TYPE_FLOAT32: return (( float *)col->array)[i];
207 case GAL_TYPE_FLOAT64: return (( double *)col->array)[i];
208 default:
209 error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
210 __func__, col->type);
211 }
212
213 /* Control should not reach here. */
214 error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
215 "problem. Control should not reach the end of this function",
216 __func__, PACKAGE_BUGREPORT);
217 return NAN;
218 }
219
220
221
222
223
224 /* Mask the rows that are not in the given polygon. */
225 static gal_data_t *
table_selection_polygon(struct tableparams * p,gal_data_t * col1,gal_data_t * col2,int in1out0)226 table_selection_polygon(struct tableparams *p, gal_data_t *col1,
227 gal_data_t *col2, int in1out0)
228 {
229 uint8_t *oarr;
230 double point[2];
231 gal_data_t *out=NULL;
232 size_t i, psize=p->polygon->size/2;
233
234 /* Allocate the output array: This array will have a '0' for the points
235 which are inside the polygon and '1' for those that are outside of it
236 (to be masked/removed from the input). */
237 out=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, col1->dsize, NULL, 0, -1, 1,
238 NULL, NULL, NULL);
239 oarr=out->array;
240
241 /* Loop through all the rows in the given columns and check the points.*/
242 for(i=0; i<col1->size; i++)
243 {
244 /* Read the column values as a double. */
245 point[0]=selection_polygon_read_point(col1, i);
246 point[1]=selection_polygon_read_point(col2, i);
247
248 /* For '--inpolygon', if point is inside polygon, put 0, otherwise
249 1. Note that we are building a mask for the rows that must be
250 discarded, so we want '1' for the points we don't want. */
251 oarr[i] = (in1out0
252 ? !gal_polygon_is_inside(p->polygon->array, point, psize)
253 : gal_polygon_is_inside(p->polygon->array, point, psize));
254
255 /* For a check
256 printf("(%f,%f): %s, %u\n", point[0], point[1], oarr[i]);
257 */
258 }
259
260 /* Return the output column. */
261 return out;
262 }
263
264
265
266
267
268 /* Given a string dataset and a single string, return a 'uint8_t' array
269 with the same size as the string dataset that has a '1' for all the
270 elements that are equal. */
271 static gal_data_t *
table_selection_string_eq_ne(gal_data_t * column,char * reference,int e0n1)272 table_selection_string_eq_ne(gal_data_t *column, char *reference, int e0n1)
273 {
274 gal_data_t *out;
275 uint8_t *oarr, comp;
276 size_t i, size=column->size;
277 char **strarr=column->array;
278
279 /* Allocate the output binary dataset. */
280 out=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, &size, NULL, 0, -1, 1,
281 NULL, NULL, NULL);
282 oarr=out->array;
283
284 /* Parse the values and mark the outputs IN THE OPPOSITE manner (we are
285 marking the ones that must be removed). */
286 for(i=0;i<size;++i)
287 {
288 comp=strcmp(strarr[i], reference);
289 oarr[i] = e0n1 ? (comp==0) : (comp!=0);
290 }
291
292 /* Return. */
293 return out;
294 }
295
296
297
298
299
300 static gal_data_t *
table_selection_equal_or_notequal(struct tableparams * p,gal_data_t * col,int e0n1)301 table_selection_equal_or_notequal(struct tableparams *p, gal_data_t *col,
302 int e0n1)
303 {
304 void *varr;
305 char **strarr;
306 size_t i, one=1;
307 int numok=GAL_ARITHMETIC_FLAG_NUMOK;
308 int inplace=GAL_ARITHMETIC_FLAG_INPLACE;
309 gal_data_t *eq, *out=NULL, *value=NULL;
310 gal_data_t *arg = e0n1 ? p->notequal : p->equal;
311
312 /* Note that this operator is used to make the "masked" array, so when
313 'e0n1==0' the operator should be 'GAL_ARITHMETIC_OP_NE' and
314 vice-versa.
315
316 For the merging with other elements, when 'e0n1==0', we need the
317 'GAL_ARITHMETIC_OP_AND', but for 'e0n1==1', it should be 'OR'. */
318 int mergeop = e0n1 ? GAL_ARITHMETIC_OP_OR : GAL_ARITHMETIC_OP_AND;
319 int operator = e0n1 ? GAL_ARITHMETIC_OP_EQ : GAL_ARITHMETIC_OP_NE;
320
321 /* First, make sure everything is OK. */
322 if(arg==NULL)
323 error(EXIT_FAILURE, 0, "%s: a bug! Please contact us to fix the "
324 "problem at %s. 'p->range' should not be NULL at this point",
325 __func__, PACKAGE_BUGREPORT);
326
327 /* To easily parse the given values. */
328 strarr=arg->array;
329
330 /* Go through the values given to this call of the option and flag the
331 elements. */
332 for(i=0;i<arg->size;++i)
333 {
334 /* Write the value */
335 if(col->type==GAL_TYPE_STRING)
336 eq=table_selection_string_eq_ne(col, strarr[i], e0n1);
337 else
338 {
339 /* Allocate the value dataset. */
340 value=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &one, NULL, 0, -1, 1,
341 NULL, NULL, NULL);
342 varr=value->array;
343
344 /* Read the stored string as a float64. */
345 if( gal_type_from_string(&varr, strarr[i], GAL_TYPE_FLOAT64) )
346 {
347 fprintf(stderr, "%s couldn't be read as a number.\n", strarr[i]);
348 exit(EXIT_FAILURE);
349 }
350
351 /* Mark the rows that are equal (irrespective of the column's
352 original numerical datatype). */
353 eq=gal_arithmetic(operator, 1, numok, col, value);
354 }
355
356 /* Merge the results with (possible) previous results. */
357 if(out)
358 {
359 out=gal_arithmetic(mergeop, 1, inplace, out, eq);
360 gal_data_free(eq);
361 }
362 else
363 out=eq;
364 }
365
366 /* For a check.
367 {
368 uint8_t *u=out->array;
369 for(i=0;i<out->size;++i) printf("%zu: %u\n", i, u[i]);
370 exit(0);
371 }
372 */
373
374
375 /* Move the main pointer to the next possible call of the given
376 option. Note that 'arg' already points to 'p->equal' or 'p->notequal',
377 so it will automatically be freed with the next step.*/
378 if(e0n1) p->notequal=p->notequal->next;
379 else p->equal=p->equal->next;
380
381 /* Clean up and return. */
382 gal_data_free(value);
383 gal_data_free(arg);
384 return out;
385 }
386
387
388
389
390
391 static void
table_select_by_value(struct tableparams * p)392 table_select_by_value(struct tableparams *p)
393 {
394 gal_data_t *rowids;
395 struct list_select *tmp;
396 uint8_t *u, *uf, *ustart;
397 size_t i, *s, ngood=0;
398 int inplace=GAL_ARITHMETIC_FLAG_INPLACE;
399 gal_data_t *mask, *blmask, *addmask=NULL;
400
401 /* It may happen that the input table is empty! In such cases, just
402 return and don't bother with this step. */
403 if(p->table->dsize==NULL) return;
404
405 /* Allocate datasets for the necessary numbers and write them in. */
406 mask=gal_data_alloc(NULL, GAL_TYPE_UINT8, 1, p->table->dsize, NULL, 1,
407 p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
408
409 /* Go over each selection criteria and remove the necessary elements. */
410 for(tmp=p->selectcol;tmp!=NULL;tmp=tmp->next)
411 {
412 switch(tmp->type)
413 {
414 case SELECT_TYPE_RANGE:
415 addmask=table_selection_range(p, tmp->col);
416 break;
417
418 /* '--inpolygon' and '--outpolygon' need two columns. */
419 case SELECT_TYPE_INPOLYGON:
420 case SELECT_TYPE_OUTPOLYGON:
421 addmask=table_selection_polygon(p, tmp->col, tmp->next->col,
422 tmp->type==SELECT_TYPE_INPOLYGON);
423 tmp=tmp->next;
424 break;
425
426 case SELECT_TYPE_EQUAL:
427 addmask=table_selection_equal_or_notequal(p, tmp->col, 0);
428 break;
429
430 case SELECT_TYPE_NOTEQUAL:
431 addmask=table_selection_equal_or_notequal(p, tmp->col, 1);
432 break;
433
434 default:
435 error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s "
436 "to fix the problem. The code %d is not a recognized "
437 "range identifier", __func__, PACKAGE_BUGREPORT,
438 tmp->type);
439 }
440
441 /* Remove any blank elements. */
442 if(gal_blank_present(tmp->col, 1))
443 {
444 blmask = gal_arithmetic(GAL_ARITHMETIC_OP_ISBLANK, 1, 0, tmp->col);
445 addmask=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace,
446 addmask, blmask);
447 gal_data_free(blmask);
448 }
449
450 /* Add this mask array to the cumulative mask array (of all
451 selections). */
452 mask=gal_arithmetic(GAL_ARITHMETIC_OP_OR, 1, inplace, mask, addmask);
453
454 /* For a check.
455 {
456 float *f=ref->array;
457 uint8_t *m=mask->array;
458 uint8_t *u=addmask->array, *uf=u+addmask->size;
459 printf("\n\nInput column: %s\n", ref->name ? ref->name : "No Name");
460 printf("Range: %g, %g\n", rarr[0], rarr[1]);
461 printf("%-20s%-20s%-20s\n", "Value", "This mask",
462 "Including previous");
463 do printf("%-20f%-20u%-20u\n", *f++, *u++, *m++); while(u<uf);
464 exit(0);
465 }
466 */
467
468 /* Final clean up. */
469 gal_data_free(addmask);
470 }
471
472 /* Find the final number of elements to print and allocate the array to
473 keep them. */
474 uf=(u=mask->array)+mask->size;
475 ngood=0; do if(*u==0) ++ngood; while(++u<uf);
476 rowids=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &ngood, NULL, 0,
477 p->cp.minmapsize, p->cp.quietmmap, NULL,
478 NULL, NULL);
479
480 /* Fill-in the finally desired row-IDs. */
481 s=rowids->array;
482 ustart=mask->array;
483 uf=(u=mask->array)+mask->size;
484 do if(*u==0) *s++ = u-ustart; while(++u<uf);
485
486 /* Move the desired rows to the top of the table. */
487 table_bring_to_top(p->table, rowids);
488
489 /* If the sort column is not in the table (the proper range has already
490 been applied to it), and we need to sort the resulting columns
491 afterwards, we should also apply the permutation on the sort
492 column. */
493 if(p->sortcol && p->sortin==0) table_bring_to_top(p->sortcol, rowids);
494
495 /* Clean up. */
496 i=0;
497 for(tmp=p->selectcol;tmp!=NULL;tmp=tmp->next)
498 { if(p->freeselect[i]) {gal_data_free(tmp->col); tmp->col=NULL;} ++i; }
499 ui_list_select_free(p->selectcol, 0);
500 free(p->freeselect);
501 gal_data_free(mask);
502 gal_data_free(rowids);
503 }
504
505
506
507
508
509 static void
table_sort(struct tableparams * p)510 table_sort(struct tableparams *p)
511 {
512 gal_data_t *perm;
513 size_t c=0, *s, *sf;
514 int (*qsortfn)(const void *, const void *)=NULL;
515
516 /* In case there are no columns to sort, skip this function. */
517 if(p->table->size==0) return;
518
519 /* Allocate the permutation array and fill it. */
520 perm=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, p->table->dsize, NULL, 0,
521 p->cp.minmapsize, p->cp.quietmmap, NULL, NULL, NULL);
522 sf=(s=perm->array)+perm->size; do *s=c++; while(++s<sf);
523
524 /* For string columns, print a descriptive message. Note that some FITS
525 tables were found that do actually have numbers stored in string
526 types! */
527 if(p->sortcol->type==GAL_TYPE_STRING)
528 error(EXIT_FAILURE, 0, "sort column has a string type, but it can "
529 "(currently) only work on numbers.\n\n"
530 "TIP: if you know the columns contents are all numbers that are "
531 "just stored as strings, you can use this program to save the "
532 "table as a text file, modify the column meta-data (for example "
533 "to type 'i32' or 'f32' instead of 'strN'), then use this "
534 "program again to save it as a FITS table.\n\n"
535 "For more on column metadata in plain text format, please run "
536 "the following command (or see the 'Gnuastro text table format "
537 "section of the book/manual):\n\n"
538 " $ info gnuastro \"gnuastro text table format\"");
539
540 /* Set the proper qsort function. */
541 if(p->descending)
542 switch(p->sortcol->type)
543 {
544 case GAL_TYPE_UINT8: qsortfn=gal_qsort_index_single_uint8_d; break;
545 case GAL_TYPE_INT8: qsortfn=gal_qsort_index_single_int8_d; break;
546 case GAL_TYPE_UINT16: qsortfn=gal_qsort_index_single_uint16_d; break;
547 case GAL_TYPE_INT16: qsortfn=gal_qsort_index_single_int16_d; break;
548 case GAL_TYPE_UINT32: qsortfn=gal_qsort_index_single_uint32_d; break;
549 case GAL_TYPE_INT32: qsortfn=gal_qsort_index_single_int32_d; break;
550 case GAL_TYPE_UINT64: qsortfn=gal_qsort_index_single_uint64_d; break;
551 case GAL_TYPE_INT64: qsortfn=gal_qsort_index_single_int64_d; break;
552 case GAL_TYPE_FLOAT32: qsortfn=gal_qsort_index_single_float32_d; break;
553 case GAL_TYPE_FLOAT64: qsortfn=gal_qsort_index_single_float64_d; break;
554 default:
555 error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix "
556 "the problem. The code '%u' wasn't recognized as a data type",
557 __func__, PACKAGE_BUGREPORT, p->sortcol->type);
558 }
559 else
560 switch(p->sortcol->type)
561 {
562 case GAL_TYPE_UINT8: qsortfn=gal_qsort_index_single_uint8_i; break;
563 case GAL_TYPE_INT8: qsortfn=gal_qsort_index_single_int8_i; break;
564 case GAL_TYPE_UINT16: qsortfn=gal_qsort_index_single_uint16_i; break;
565 case GAL_TYPE_INT16: qsortfn=gal_qsort_index_single_int16_i; break;
566 case GAL_TYPE_UINT32: qsortfn=gal_qsort_index_single_uint32_i; break;
567 case GAL_TYPE_INT32: qsortfn=gal_qsort_index_single_int32_i; break;
568 case GAL_TYPE_UINT64: qsortfn=gal_qsort_index_single_uint64_i; break;
569 case GAL_TYPE_INT64: qsortfn=gal_qsort_index_single_int64_i; break;
570 case GAL_TYPE_FLOAT32: qsortfn=gal_qsort_index_single_float32_i; break;
571 case GAL_TYPE_FLOAT64: qsortfn=gal_qsort_index_single_float64_i; break;
572 default:
573 error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix "
574 "the problem. The code '%u' wasn't recognized as a data type",
575 __func__, PACKAGE_BUGREPORT, p->sortcol->type);
576 }
577
578 /* Sort the indexs from the values. */
579 gal_qsort_index_single=p->sortcol->array;
580 qsort(perm->array, perm->size, sizeof *s, qsortfn);
581
582 /* For a check (only on float32 type 'sortcol'):
583 {
584 float *f=p->sortcol->array;
585 sf=(s=perm->array)+perm->size;
586 do printf("%f\n", f[*s]); while(++s<sf);
587 exit(0);
588 }
589 */
590
591 /* Sort all the output columns with this permutation. */
592 table_apply_permutation(p->table, perm->array, perm->size, 0);
593
594 /* Clean up. */
595 gal_data_free(perm);
596 if(p->freesort) gal_data_free(p->sortcol);
597 }
598
599
600
601
602
603 /* Apply random row selection. If the returned value is 'EXIT_SUCCESS',
604 then, it was successful. Otherwise, it will return 'EXIT_FAILURE' and
605 the input won't be touched. */
606 static int
table_random_rows(gal_data_t * table,gsl_rng * rng,size_t numrandom,size_t minmapsize,int quietmmap)607 table_random_rows(gal_data_t *table, gsl_rng *rng, size_t numrandom,
608 size_t minmapsize, int quietmmap)
609 {
610 int bad;
611 gal_data_t *rowids;
612 size_t i, j, *ids, ind;
613
614 /* Sanity check. */
615 if(numrandom>table->size)
616 return EXIT_FAILURE;
617
618 /* Allocate space for the list of rows to use. */
619 rowids=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &numrandom, NULL, 0,
620 minmapsize, quietmmap, NULL, NULL, NULL);
621
622 /* Select the row numbers. */
623 ids=rowids->array;
624 for(i=0;i<numrandom;++i)
625 {
626 /* Select a random index and make sure its new. */
627 bad=1;
628 while(bad)
629 {
630 ind = gsl_rng_uniform(rng) * table->size;
631 for(j=0;j<i;++j) if(ids[j]==ind) break;
632 if(i==j) bad=0;
633 }
634 ids[i]=ind;
635 }
636
637 /* Move the desired rows to the top. */
638 table_bring_to_top(table, rowids);
639
640 /* Clean up and return. */
641 gal_data_free(rowids);
642 return EXIT_SUCCESS;
643 }
644
645
646
647
648
649 static void
table_select_by_position(struct tableparams * p)650 table_select_by_position(struct tableparams *p)
651 {
652 char **strarr;
653 gal_data_t *col;
654 size_t i, start, end;
655 double *darr = p->rowlimit ? p->rowlimit->array : NULL;
656
657 /* Random row selection (by position, not value). This step is
658 independent of the other operations of this function, so as soon as
659 its finished return. */
660 if(p->rowrandom)
661 {
662 if( table_random_rows(p->table, p->rng, p->rowrandom,
663 p->cp.minmapsize, p->cp.quietmmap)
664 == EXIT_FAILURE && p->cp.quiet==0 )
665 error(EXIT_SUCCESS, 0, "'--rowrandom' not activated because "
666 "the number of rows in the table at this stage (%zu) "
667 "is smaller than the number of requested random rows "
668 "(%zu). You can suppress this message with '--quiet'",
669 p->table->size, p->rowrandom);
670 return;
671 }
672
673 /* Sanity check */
674 if(p->rowlimit)
675 {
676 if(darr[0]>=p->table->size)
677 error(EXIT_FAILURE, 0, "the first value to '--rowlimit' (%g) "
678 "is larger than the number of rows (%zu)",
679 darr[0]+1, p->table->size);
680 else if( darr[1]>=p->table->size )
681 error(EXIT_FAILURE, 0, "the second value to '--rowlimit' (%g) "
682 "is larger than the number of rows (%zu)",
683 darr[1]+1, p->table->size);
684 }
685
686 /* Go over all the columns and make the necessary corrections. */
687 for(col=p->table;col!=NULL;col=col->next)
688 {
689 /* FOR STRING: we'll need to free the individual strings that will
690 not be used (outside the allocated array directly
691 'gal_data_t'). We don't have to worry about the space for the
692 actual pointers (they will be free'd by 'free' in any case, since
693 they are in the initially allocated array).*/
694 if(col->type==GAL_TYPE_STRING)
695 {
696 /* Parse the rows and free extra pointers. */
697 strarr=col->array;
698 if(p->rowlimit)
699 {
700 /* Note that the given values to '--rowlimit' started from 1,
701 but in 'ui.c' we subtracted one from it (so at this stage,
702 it starts from 0). */
703 start = darr[0];
704 end = darr[1];
705 for(i=0;i<p->table->size;++i)
706 if(i<start || i>end) { free(strarr[i]); strarr[i]=NULL; }
707 }
708 else
709 {
710 /* Free their allocated spaces. */
711 start = p->head!=GAL_BLANK_SIZE_T ? p->head : 0;
712 end = p->head!=GAL_BLANK_SIZE_T ? p->table->size : p->tail;
713 for(i=start; i<end; ++i) { free(strarr[i]); strarr[i]=NULL; }
714 }
715 }
716
717 /* Make the final adjustment. */
718 if(p->rowlimit)
719 {
720 /* Move the values up to the top and correct the size. */
721 col->size=darr[1]-darr[0]+1;
722 memmove(col->array,
723 gal_pointer_increment(col->array, darr[0], col->type),
724 (darr[1]-darr[0]+1)*gal_type_sizeof(col->type));
725 }
726 else
727 {
728 /* For '--tail', we'll need to bring the last columns to the
729 start. Note that we are using 'memmove' because we want to be
730 safe with overlap. */
731 if(p->tail!=GAL_BLANK_SIZE_T)
732 memmove(col->array,
733 gal_pointer_increment(col->array, col->size - p->tail,
734 col->type),
735 p->tail*gal_type_sizeof(col->type));
736
737 /* In any case (head or tail), the new number of column elements
738 is the given value. */
739 col->size = col->dsize[0] = ( p->head!=GAL_BLANK_SIZE_T
740 ? p->head
741 : p->tail );
742 }
743 }
744 }
745
746
747
748
749
750 /* Import columns from another file/table into the working table. */
751 static void
table_catcolumn(struct tableparams * p)752 table_catcolumn(struct tableparams *p)
753 {
754 size_t counter=1;
755 gal_list_str_t *filell, *hdull;
756 gal_data_t *tocat, *final, *newcol;
757 char *tmpname, *hdu=NULL, cstr[100];
758 struct gal_options_common_params *cp=&p->cp;
759
760 /* Go over all the given files. */
761 hdull=p->catcolumnhdu;
762 for(filell=p->catcolumnfile; filell!=NULL; filell=filell->next)
763 {
764 /* Set the HDU (not necessary for non-FITS tables). */
765 if(gal_fits_file_recognized(filell->v))
766 {
767 if(hdull) { hdu=hdull->v; hdull=hdull->next; }
768 else
769 error(EXIT_FAILURE, 0, "not enough '--catcolumnhdu's (or '-u'). "
770 "For every FITS table given to '--catcolumnfile'. A call to "
771 "'--catcolumnhdu' is necessary to identify its "
772 "HDU/extension");
773 }
774 else hdu=NULL;
775
776 /* Read the catcolumn table. */
777 tocat=gal_table_read(filell->v, hdu, NULL, p->catcolumns, cp->searchin,
778 cp->ignorecase, cp->minmapsize, p->cp.quietmmap,
779 NULL);
780
781 /* Check the number of rows. */
782 if(tocat->dsize[0]!=p->table->dsize[0])
783 error(EXIT_FAILURE, 0, "%s: incorrect number of rows. The table given "
784 "to '--catcolumn' must have the same number of rows as the "
785 "main argument (after all row-selections have been applied), "
786 "but they have %zu and %zu rows respectively",
787 gal_fits_name_save_as_string(filell->v, hdu), tocat->dsize[0],
788 p->table->dsize[0]);
789
790 /* Append a counter to the column names because this option is most
791 often used with columns that have a similar name and it would help
792 the user if the output doesn't have multiple columns with same
793 name. */
794 if(p->catcolumnrawname==0)
795 for(newcol=tocat; newcol!=NULL; newcol=newcol->next)
796 if(newcol->name)
797 {
798 /* Add the counter suffix to the column name. */
799 sprintf(cstr, "-%zu", counter);
800 tmpname=gal_checkset_malloc_cat(newcol->name, cstr);
801
802 /* Free the old name and put in the new one. */
803 free(newcol->name);
804 newcol->name=tmpname;
805 }
806
807 /* Find the final column of the main table and add this table.*/
808 final=gal_list_data_last(p->table);
809 final->next=tocat;
810 ++counter;
811 }
812 }
813
814
815
816
817
818 void
table_colmetadata(struct tableparams * p)819 table_colmetadata(struct tableparams *p)
820 {
821 char **strarr;
822 gal_data_t *meta, *col;
823 size_t counter, *colnum;
824
825 /* Loop through all the given updates and implement them. */
826 for(meta=p->colmetadata;meta!=NULL;meta=meta->next)
827 {
828 /* If the given column specifier is a name (not parse-able as a
829 number), then this condition will fail. */
830 colnum=NULL;
831 if( gal_type_from_string((void **)(&colnum), meta->name,
832 GAL_TYPE_SIZE_T) )
833 {
834 /* We have been given a string, so find the first column that has
835 the same name. */
836 for(col=p->table; col!=NULL; col=col->next)
837 if(!strcmp(col->name, meta->name)) break;
838 }
839 /* The column specifier is a number. */
840 else
841 {
842 /* Go over the columns and find the one with this counter. */
843 counter=1;
844 for(col=p->table; col!=NULL; col=col->next)
845 if(counter++==colnum[0]) break;
846
847 /* Clean up the space that was allocated for 'colnum' (its not
848 allocated when the given value was a string). */
849 free(colnum);
850 }
851
852 /* If a match was found, then 'col' should not be NULL. */
853 if(col==NULL)
854 error(EXIT_FAILURE, 0, "no column found for '%s' (given to "
855 "'--colmetadata'). Columns can either be specified by "
856 "their position in the output table (integer counter, "
857 "starting from 1), or their name (the first column "
858 "found with the given name will be used)", meta->name);
859
860 /* The matching column is found and we know that atleast one value is
861 already given (otherwise 'gal_options_parse_name_and_values' would
862 abort the program). The first given string is the new name. */
863 strarr=meta->array;
864 if(col->name) free(col->name);
865 gal_checkset_allocate_copy(strarr[0], &col->name);
866
867 /* If more than one string is given, the second one is the new
868 unit. */
869 if(meta->size>1)
870 {
871 /* Replace the unit. */
872 if(col->unit) free(col->unit);
873 gal_checkset_allocate_copy(strarr[1], &col->unit);
874
875 /* The next element is the comment of the column. */
876 if(meta->size>2)
877 {
878 if(col->comment) free(col->comment);
879 gal_checkset_allocate_copy(strarr[2], &col->comment);
880 }
881 }
882 }
883 }
884
885
886
887
888
889 void
table_noblank(struct tableparams * p)890 table_noblank(struct tableparams *p)
891 {
892 int found;
893 size_t i, j, *index;
894 gal_data_t *tcol, *flag;
895 char **strarr=p->noblank->array;
896 gal_list_sizet_t *column_indexs=NULL;
897
898 /* See if all columns should be checked, or just a select few. */
899 if( p->noblank->size==1 && !strcmp(strarr[0],"_all") )
900 {
901 for(i=0;i<gal_list_data_number(p->table);++i)
902 gal_list_sizet_add(&column_indexs, i);
903 }
904
905 /* Only certain columns should be checked, so find/add their index. */
906 else
907 for(i=0;i<p->noblank->size;++i)
908 {
909 /* First go through the column names and if they match, add
910 them. Note that we don't want to stop once a name is found, in
911 this scenario, if multiple columns have the same name, we should
912 use all.*/
913 j=0;
914 found=0;
915 for(tcol=p->table; tcol!=NULL; tcol=tcol->next)
916 {
917 if( tcol->name && !strcmp(tcol->name, strarr[i]) )
918 {
919 found=1;
920 gal_list_sizet_add(&column_indexs, j);
921 }
922 ++j;
923 }
924
925 /* If the given string didn't match any column name, it must be a
926 number, so parse it as a number and use that number. */
927 if(found==0)
928 {
929 /* Parse the given index. */
930 index=NULL;
931 if( gal_type_from_string((void **)(&index), strarr[i],
932 GAL_TYPE_SIZE_T) )
933 error(EXIT_FAILURE, 0, "column '%s' didn't match any of the "
934 "final column names and can't be parsed as a column "
935 "counter (starting from 1) either", strarr[i]);
936
937 /* Make sure its not zero (the user counts from 1). */
938 if(*index==0)
939 error(EXIT_FAILURE, 0, "the column number (given to the "
940 "'--noblank' option) should start from 1, but you have "
941 "given 0.");
942
943 /* Make sure that the index falls within the number (note that
944 it still counts from 1). */
945 if(*index > gal_list_data_number(p->table))
946 error(EXIT_FAILURE, 0, "the final output table only has %zu "
947 "columns, but you have given column %zu to '--noblank'. "
948 "Recall that '--noblank' operates on the output columns "
949 "and that you can also use output column names (if they "
950 "have any)",
951 gal_list_data_number(p->table), *index);
952
953 /* Everything is fine, add the index to the list of columns to
954 check. */
955 gal_list_sizet_add(&column_indexs, *index-1);
956
957 /* Clean up. */
958 free(index);
959 }
960
961 /* For a check.
962 printf("%zu\n", column_indexs->v);
963 */
964 }
965
966 /* Remove all blank rows from the output table, note that we don't need
967 the flags of the removed columns here. So we can just free it up. */
968 flag=gal_blank_remove_rows(p->table, column_indexs);
969 gal_data_free(flag);
970 }
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989 /**************************************************************/
990 /*************** Top function *******************/
991 /**************************************************************/
992 void
table(struct tableparams * p)993 table(struct tableparams *p)
994 {
995 /* Concatenate the columns of tables (if required)*/
996 if(p->catcolumnfile) table_catcolumn(p);
997
998 /* Apply ranges based on row values (if required). */
999 if(p->selection) table_select_by_value(p);
1000
1001 /* Sort it (if required). */
1002 if(p->sort) table_sort(p);
1003
1004 /* If the output number of rows is limited, apply them. */
1005 if( p->rowlimit
1006 || p->rowrandom
1007 || p->head!=GAL_BLANK_SIZE_T
1008 || p->tail!=GAL_BLANK_SIZE_T )
1009 table_select_by_position(p);
1010
1011 /* If any arithmetic operations are needed, do them. */
1012 if(p->outcols)
1013 arithmetic_operate(p);
1014
1015 /* When column metadata should be updated. */
1016 if(p->colmetadata) table_colmetadata(p);
1017
1018 /* When any columns with blanks should be removed. */
1019 if(p->noblank) table_noblank(p);
1020
1021 /* Write the output. */
1022 gal_table_write(p->table, NULL, NULL, p->cp.tableformat, p->cp.output,
1023 "TABLE", p->colinfoinstdout);
1024 }
1025