1 /*********************************************************************
2 blank -- Deal with blank values in a datasets
3 This is part of GNU Astronomy Utilities (Gnuastro) package.
4 
5 Original author:
6      Mohammad Akhlaghi <mohammad@akhlaghi.org>
7 Contributing author(s):
8 Copyright (C) 2017-2021, Free Software Foundation, Inc.
9 
10 Gnuastro is free software: you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation, either version 3 of the License, or (at your
13 option) any later version.
14 
15 Gnuastro is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with Gnuastro. If not, see <http://www.gnu.org/licenses/>.
22 **********************************************************************/
23 #include <config.h>
24 
25 #include <errno.h>
26 #include <error.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <inttypes.h>
31 
32 #include <gnuastro/data.h>
33 #include <gnuastro/tile.h>
34 #include <gnuastro/blank.h>
35 #include <gnuastro/pointer.h>
36 #include <gnuastro/statistics.h>
37 
38 #include <gnuastro-internal/checkset.h>
39 
40 
41 
42 
43 /* Write the blank value of the type into an already allocate space. Note
44    that for STRINGS, pointer should actually be 'char **'. */
45 void
gal_blank_write(void * ptr,uint8_t type)46 gal_blank_write(void *ptr, uint8_t type)
47 {
48   switch(type)
49     {
50     /* Numeric types */
51     case GAL_TYPE_UINT8:   *(uint8_t  *)ptr = GAL_BLANK_UINT8;    break;
52     case GAL_TYPE_INT8:    *(int8_t   *)ptr = GAL_BLANK_INT8;     break;
53     case GAL_TYPE_UINT16:  *(uint16_t *)ptr = GAL_BLANK_UINT16;   break;
54     case GAL_TYPE_INT16:   *(int16_t  *)ptr = GAL_BLANK_INT16;    break;
55     case GAL_TYPE_UINT32:  *(uint32_t *)ptr = GAL_BLANK_UINT32;   break;
56     case GAL_TYPE_INT32:   *(int32_t  *)ptr = GAL_BLANK_INT32;    break;
57     case GAL_TYPE_UINT64:  *(uint64_t *)ptr = GAL_BLANK_UINT64;   break;
58     case GAL_TYPE_INT64:   *(int64_t  *)ptr = GAL_BLANK_INT64;    break;
59     case GAL_TYPE_FLOAT32: *(float    *)ptr = GAL_BLANK_FLOAT32;  break;
60     case GAL_TYPE_FLOAT64: *(double   *)ptr = GAL_BLANK_FLOAT64;  break;
61 
62     /* String type. */
63     case GAL_TYPE_STRING:
64       gal_checkset_allocate_copy(GAL_BLANK_STRING, ptr);
65       break;
66 
67     /* Complex types */
68     case GAL_TYPE_COMPLEX32:
69     case GAL_TYPE_COMPLEX64:
70       error(EXIT_FAILURE, 0, "%s: complex types are not yet supported",
71             __func__);
72 
73     /* Unrecognized type. */
74     default:
75       error(EXIT_FAILURE, 0, "%s: type code %d not recognized", __func__,
76             type);
77     }
78 }
79 
80 
81 
82 
83 
84 /* Allocate some space for the given type and put the blank value into
85    it. */
86 void *
gal_blank_alloc_write(uint8_t type)87 gal_blank_alloc_write(uint8_t type)
88 {
89   void *out;
90 
91   /* Allocate the space to keep the blank value. */
92   out=gal_pointer_allocate(type, 1, 0, __func__, "out");
93 
94   /* Put the blank value in the allcated space. */
95   gal_blank_write(out, type);
96 
97   /* Return the allocated space. */
98   return out;
99 }
100 
101 
102 
103 
104 
105 /* Initialize (set all the values in the array) with the blank value of the
106    given type. */
107 void
gal_blank_initialize(gal_data_t * input)108 gal_blank_initialize(gal_data_t *input)
109 {
110   GAL_TILE_PARSE_OPERATE(input, NULL, 0, 0, {*i=b;});
111 }
112 
113 
114 
115 
116 
117 /* Initialize an array to the given type's blank values.*/
118 void
gal_blank_initialize_array(void * array,size_t size,uint8_t type)119 gal_blank_initialize_array(void *array, size_t size, uint8_t type)
120 {
121   size_t i, w=gal_type_sizeof(type);
122   void *b=gal_blank_alloc_write(type);
123 
124   /* Set all the elements to blank. */
125   for(i=0;i<size;++i)
126     memcpy(gal_pointer_increment(array, i, type), b, w);
127 
128   /* Clean up. */
129   free(b);
130 }
131 
132 
133 
134 
135 
136 /* Print the blank value as a string. For the integer types, we'll use the
137    PRIxNN keywords of 'inttypes.h' (which is imported into Gnuastro from
138    Gnulib, so we don't necessarily rely on the host system having it). */
139 char *
gal_blank_as_string(uint8_t type,int width)140 gal_blank_as_string(uint8_t type, int width)
141 {
142   char *blank=NULL, *fmt;
143 
144   /* Print the given value. */
145   switch(type)
146     {
147     case GAL_TYPE_BIT:
148       error(EXIT_FAILURE, 0, "%s: bit types are not implemented yet",
149             __func__);
150       break;
151 
152     case GAL_TYPE_STRING:
153       if(width)
154         {
155           if( asprintf(&blank, "%*s", width,  GAL_BLANK_STRING)<0 )
156             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
157         }
158       else
159         {
160           if( asprintf(&blank, "%s", GAL_BLANK_STRING)<0 )
161             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
162         }
163       break;
164 
165     case GAL_TYPE_UINT8:
166       fmt = width ? "%*"PRIu8 : "%"PRIu8;
167       if(width)
168         {
169           if( asprintf(&blank, fmt, width, (uint8_t)GAL_BLANK_UINT8)<0 )
170             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
171         }
172       else
173         {
174           if( asprintf(&blank, fmt, (uint8_t)GAL_BLANK_UINT8)<0 )
175             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
176         }
177       break;
178 
179     case GAL_TYPE_INT8:
180       fmt = width ? "%*"PRId8 : "%"PRId8;
181       if(width)
182         {
183           if( asprintf(&blank, fmt, width, (int8_t)GAL_BLANK_INT8)<0 )
184             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
185         }
186       else
187         {
188           if( asprintf(&blank, fmt, (int8_t)GAL_BLANK_INT8)<0 )
189             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
190         }
191       break;
192 
193     case GAL_TYPE_UINT16:
194       fmt = width ? "%*"PRIu16 : "%"PRIu16;
195       if(width)
196         {
197           if( asprintf(&blank, fmt, width, (uint16_t)GAL_BLANK_UINT16)<0 )
198             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
199         }
200       else
201         {
202           if( asprintf(&blank, fmt, (uint16_t)GAL_BLANK_UINT16)<0 )
203             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
204         }
205       break;
206 
207     case GAL_TYPE_INT16:
208       fmt = width ? "%*"PRId16 : "%"PRId16;
209       if(width)
210         {
211           if( asprintf(&blank, fmt, width, (int16_t)GAL_BLANK_INT16)<0 )
212             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
213         }
214       else
215         {
216           if( asprintf(&blank, fmt, (int16_t)GAL_BLANK_INT16)<0 )
217             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
218         }
219       break;
220 
221     case GAL_TYPE_UINT32:
222       fmt = width ? "%*"PRIu32 : "%"PRIu32;
223       if(width)
224         {
225           if( asprintf(&blank, fmt, width, (uint32_t)GAL_BLANK_UINT32)<0 )
226             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
227         }
228       else
229         {
230           if( asprintf(&blank, fmt, (uint32_t)GAL_BLANK_UINT32)<0 )
231             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
232         }
233       break;
234 
235     case GAL_TYPE_INT32:
236       fmt = width ? "%*"PRId32 : "%"PRId32;
237       if(width)
238         {
239           if( asprintf(&blank, fmt, width, (int32_t)GAL_BLANK_INT32)<0 )
240             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
241         }
242       else
243         {
244           if( asprintf(&blank, fmt, (int32_t)GAL_BLANK_INT32)<0 )
245             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
246         }
247       break;
248 
249     case GAL_TYPE_UINT64:
250       fmt = width ? "%*"PRIu64 : "%"PRIu64;
251       if(width)
252         {
253           if( asprintf(&blank, fmt, width, (uint64_t)GAL_BLANK_UINT64)<0 )
254             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
255         }
256       else
257         {
258           if( asprintf(&blank, fmt, (uint64_t)GAL_BLANK_UINT64)<0 )
259             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
260         }
261       break;
262 
263     case GAL_TYPE_INT64:
264       fmt = width ? "%*"PRId64 : "%"PRId64;
265       if(width)
266         {
267           if( asprintf(&blank, fmt, width, (int64_t)GAL_BLANK_INT64)<0 )
268             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
269         }
270       else
271         {
272           if( asprintf(&blank, fmt, (int64_t)GAL_BLANK_INT64)<0 )
273             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
274         }
275       break;
276 
277     case GAL_TYPE_FLOAT32:
278       if(width)
279         {
280           if( asprintf(&blank, "%*f", width,  (float)GAL_BLANK_FLOAT32)<0 )
281             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
282         }
283       else
284         {
285           if( asprintf(&blank, "%f", (float)GAL_BLANK_FLOAT32)<0 )
286             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
287         }
288       break;
289 
290     case GAL_TYPE_FLOAT64:
291       if(width)
292         {
293           if( asprintf(&blank, "%*f", width,  (double)GAL_BLANK_FLOAT64)<0 )
294             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
295         }
296       else
297         {
298           if( asprintf(&blank, "%f", (double)GAL_BLANK_FLOAT64)<0 )
299             error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
300         }
301       break;
302 
303     default:
304       error(EXIT_FAILURE, 0, "%s: type code %d not recognized", __func__,
305             type);
306     }
307   return blank;
308 }
309 
310 
311 
312 
313 
314 
315 /* Return 1 if the contents of the pointer (with the given type) is
316    blank. */
317 int
gal_blank_is(void * pointer,uint8_t type)318 gal_blank_is(void *pointer, uint8_t type)
319 {
320   switch(type)
321     {
322     /* Numeric types */
323     case GAL_TYPE_UINT8:     return *(uint8_t  *)pointer==GAL_BLANK_UINT8;
324     case GAL_TYPE_INT8:      return *(int8_t   *)pointer==GAL_BLANK_INT8;
325     case GAL_TYPE_UINT16:    return *(uint16_t *)pointer==GAL_BLANK_UINT16;
326     case GAL_TYPE_INT16:     return *(int16_t  *)pointer==GAL_BLANK_INT16;
327     case GAL_TYPE_UINT32:    return *(uint32_t *)pointer==GAL_BLANK_UINT32;
328     case GAL_TYPE_INT32:     return *(int32_t  *)pointer==GAL_BLANK_INT32;
329     case GAL_TYPE_UINT64:    return *(uint64_t *)pointer==GAL_BLANK_UINT64;
330     case GAL_TYPE_INT64:     return *(int64_t  *)pointer==GAL_BLANK_INT64;
331     case GAL_TYPE_FLOAT32:   return isnan( *(float *)(pointer) );
332     case GAL_TYPE_FLOAT64:   return isnan( *(double *)(pointer) );
333 
334     /* String. */
335     case GAL_TYPE_STRING:    if(!strcmp(pointer,GAL_BLANK_STRING)) return 1;
336 
337     /* Complex types */
338     case GAL_TYPE_COMPLEX32:
339     case GAL_TYPE_COMPLEX64:
340       error(EXIT_FAILURE, 0, "%s: complex types are not yet supported",
341             __func__);
342 
343     /* Bit. */
344     case GAL_TYPE_BIT:
345       error(EXIT_FAILURE, 0, "%s: bit type datasets are not yet supported",
346             __func__);
347 
348     default:
349       error(EXIT_FAILURE, 0, "%s: type value (%d) not recognized",
350             __func__, type);
351     }
352 
353   /* Control should not reach here, so print an error if it does, then
354      return a 0 (just to avoid compiler warnings). */
355   error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to address the "
356         "problem. Control should not reach the end of this function",
357         __func__, PACKAGE_BUGREPORT);
358   return 0;
359 }
360 
361 
362 
363 
364 
365 /* Return 1 if the dataset has a blank value and zero if it doesn't. Before
366    checking the dataset, this function will look at its flags. If the
367    'GAL_DATA_FLAG_HASBLANK' or 'GAL_DATA_FLAG_DONT_CHECK_ZERO' bits of
368    'input->flag' are set to 1, this function will not do any check and will
369    just use the information in the flags.
370 
371    If you want to re-check a dataset which has non-zero flags, then
372    explicitly set the appropriate flag to zero before calling this
373    function. When there are no other flags, you can just set 'input->flags'
374    to zero, otherwise you can use this expression:
375 
376        input->flags &= ~ (GAL_DATA_FLAG_HASBLANK | GAL_DATA_FLAG_USE_ZERO);
377 
378    This function has no side-effects on the dataset: it will not toggle the
379    flags, to avoid repeating parsing of the full dataset multiple times
380    (when it occurs), please toggle the flags your self after the first
381    check. */
382 #define HAS_BLANK(IT) {                                                 \
383     IT b, *a=input->array, *af=a+input->size, *start;                   \
384     gal_blank_write(&b, block->type);                                   \
385                                                                         \
386     /* If this is a tile, not a full block. */                          \
387     if(input!=block)                                                    \
388       start=gal_tile_start_end_ind_inclusive(input, block, start_end_inc); \
389                                                                         \
390     /* Go over all the elements. */                                     \
391     while( start_end_inc[0] + increment <= start_end_inc[1] )           \
392       {                                                                 \
393         /* Necessary when we are on a tile. */                          \
394         if(input!=block)                                                \
395           af = ( a = start + increment ) + input->dsize[input->ndim-1]; \
396                                                                         \
397         /* Check for blank values. */                                   \
398         if(b==b) do if(*a==b)  { hasblank=1; break; } while(++a<af);    \
399         else     do if(*a!=*a) { hasblank=1; break; } while(++a<af);    \
400                                                                         \
401         /* Necessary when we are on a tile. */                          \
402         if(input!=block)                                                \
403           increment += gal_tile_block_increment(block, input->dsize,    \
404                                                 num_increment++, NULL); \
405         else break;                                                     \
406       }                                                                 \
407   }
408 int
gal_blank_present(gal_data_t * input,int updateflag)409 gal_blank_present(gal_data_t *input, int updateflag)
410 {
411   int hasblank=0;
412   char **str, **strf;
413   size_t increment=0, num_increment=1;
414   gal_data_t *block=gal_tile_block(input);
415   size_t start_end_inc[2]={0,block->size-1}; /* -1: this is INCLUSIVE. */
416 
417   /* If there is nothing in the array (its size is zero), then return 0 (no
418      blank is present. */
419   if(input->size==0) return 0;
420 
421   /* From the user's flags, you can tell if the dataset has already been
422      checked for blank values or not. If it has, then just return the
423      checked result. */
424   if( input->flag & GAL_DATA_FLAG_BLANK_CH )
425     return input->flag & GAL_DATA_FLAG_HASBLANK;
426 
427   /* Go over the pixels and check: */
428   switch(block->type)
429     {
430     /* Numeric types */
431     case GAL_TYPE_UINT8:     HAS_BLANK( uint8_t  );    break;
432     case GAL_TYPE_INT8:      HAS_BLANK( int8_t   );    break;
433     case GAL_TYPE_UINT16:    HAS_BLANK( uint16_t );    break;
434     case GAL_TYPE_INT16:     HAS_BLANK( int16_t  );    break;
435     case GAL_TYPE_UINT32:    HAS_BLANK( uint32_t );    break;
436     case GAL_TYPE_INT32:     HAS_BLANK( int32_t  );    break;
437     case GAL_TYPE_UINT64:    HAS_BLANK( uint64_t );    break;
438     case GAL_TYPE_INT64:     HAS_BLANK( int64_t  );    break;
439     case GAL_TYPE_FLOAT32:   HAS_BLANK( float    );    break;
440     case GAL_TYPE_FLOAT64:   HAS_BLANK( double   );    break;
441 
442     /* String. */
443     case GAL_TYPE_STRING:
444       if(input!=block)
445         error(EXIT_FAILURE, 0, "%s: tile mode is currently not supported for "
446               "strings", __func__);
447       strf = (str=input->array) + input->size;
448       do
449         if(*str==NULL || !strcmp(*str,GAL_BLANK_STRING)) return 1;
450       while(++str<strf);
451       break;
452 
453     /* Complex types */
454     case GAL_TYPE_COMPLEX32:
455     case GAL_TYPE_COMPLEX64:
456       error(EXIT_FAILURE, 0, "%s: complex types are not yet supported",
457             __func__);
458 
459     /* Bit */
460     case GAL_TYPE_BIT:
461       error(EXIT_FAILURE, 0, "%s: bit type datasets are not yet supported",
462             __func__);
463 
464     default:
465       error(EXIT_FAILURE, 0, "%s: type value (%d) not recognized",
466             __func__, block->type);
467     }
468 
469   /* Update the flag if requested. */
470   if(updateflag)
471     {
472       input->flag |= GAL_DATA_FLAG_BLANK_CH;
473       if(hasblank) input->flag |= GAL_DATA_FLAG_HASBLANK;
474       else         input->flag &= ~GAL_DATA_FLAG_HASBLANK;
475     }
476 
477   /* If there was a blank value, then the function would have returned with
478      a value of 1. So if it reaches here, then we can be sure that there
479      was no blank values, hence, return 0. */
480   return hasblank;
481 }
482 
483 
484 
485 
486 
487 /* Return the number of blank elements in the dataset. */
488 size_t
gal_blank_number(gal_data_t * input,int updateflag)489 gal_blank_number(gal_data_t *input, int updateflag)
490 {
491   size_t nblank;
492   char **strarr;
493   gal_data_t *number;
494   size_t i, num_not_blank;
495 
496   if(input)
497     {
498       if( gal_blank_present(input, updateflag) )
499         {
500           if(input->type==GAL_TYPE_STRING)
501             {
502               nblank=0;
503               strarr=input->array;
504               for(i=0;i<input->size;++i)
505                 {
506                   if( strarr[i]==NULL
507                       || strcmp(strarr[i], GAL_BLANK_STRING)==0 )
508                     ++nblank;
509                 }
510               return nblank;
511             }
512           else
513             {
514               number=gal_statistics_number(input);
515               num_not_blank=((size_t *)(number->array))[0];
516               gal_data_free(number);
517               return input->size - num_not_blank;
518             }
519         }
520       else
521         return 0;
522     }
523   else
524     return GAL_BLANK_SIZE_T;
525 }
526 
527 
528 
529 
530 
531 
532 
533 /* Create a dataset of the the same size as the input, but with an uint8_t
534    type that has a value of 1 for data that are blank and 0 for those that
535    aren't. */
536 #define FLAG_BLANK(IT) {                                                \
537     IT b, *a=input->array;                                              \
538     gal_blank_write(&b, input->type);                                   \
539     if(b==b) /* Blank value can be checked with the equal comparison */ \
540       do { *o = *a==b;  ++a; } while(++o<of);                           \
541     else     /* Blank value will fail with the equal comparison */      \
542       do { *o = *a!=*a; ++a; } while(++o<of);                           \
543   }
544 gal_data_t *
gal_blank_flag(gal_data_t * input)545 gal_blank_flag(gal_data_t *input)
546 {
547   uint8_t *o, *of;
548   gal_data_t *out;
549   char **str=input->array, **strf=str+input->size;
550 
551   if( gal_blank_present(input, 0) )
552     {
553       /* Allocate a non-cleared output array, we are going to parse the
554          input and fill in each element. */
555       out=gal_data_alloc(NULL, GAL_TYPE_UINT8, input->ndim, input->dsize,
556                          input->wcs, 0, input->minmapsize, input->quietmmap,
557                          NULL, "bool", NULL);
558 
559       /* Set the pointers for easy looping. */
560       of=(o=out->array)+input->size;
561 
562       /* Go over the pixels and set the output values. */
563       switch(input->type)
564         {
565         /* Numeric types */
566         case GAL_TYPE_UINT8:     FLAG_BLANK( uint8_t  );    break;
567         case GAL_TYPE_INT8:      FLAG_BLANK( int8_t   );    break;
568         case GAL_TYPE_UINT16:    FLAG_BLANK( uint16_t );    break;
569         case GAL_TYPE_INT16:     FLAG_BLANK( int16_t  );    break;
570         case GAL_TYPE_UINT32:    FLAG_BLANK( uint32_t );    break;
571         case GAL_TYPE_INT32:     FLAG_BLANK( int32_t  );    break;
572         case GAL_TYPE_UINT64:    FLAG_BLANK( uint64_t );    break;
573         case GAL_TYPE_INT64:     FLAG_BLANK( int64_t  );    break;
574         case GAL_TYPE_FLOAT32:   FLAG_BLANK( float    );    break;
575         case GAL_TYPE_FLOAT64:   FLAG_BLANK( double   );    break;
576 
577         /* String. */
578         case GAL_TYPE_STRING:
579           do *o++ = !strcmp(*str,GAL_BLANK_STRING); while(++str<strf);
580           break;
581 
582         /* Currently unsupported types. */
583         case GAL_TYPE_BIT:
584         case GAL_TYPE_COMPLEX32:
585         case GAL_TYPE_COMPLEX64:
586           error(EXIT_FAILURE, 0, "%s: %s type not yet supported",
587                 __func__, gal_type_name(input->type, 1));
588 
589         /* Bad input. */
590         default:
591           error(EXIT_FAILURE, 0, "%s: type value (%d) not recognized",
592                 __func__, input->type);
593         }
594     }
595   else
596     /* Allocate a CLEAR data structure (all zeros). */
597     out=gal_data_alloc(NULL, GAL_TYPE_UINT8, input->ndim, input->dsize,
598                        input->wcs, 1, input->minmapsize, input->quietmmap,
599                        NULL, "bool", NULL);
600 
601   /* Return */
602   return out;
603 }
604 
605 
606 
607 
608 
609 /* Write a blank value in the input anywhere that the flag dataset is not
610    zero or not blank. */
611 #define BLANK_FLAG_APPLY(IT) {                                          \
612     IT *i=input->array, b;                                              \
613     gal_blank_write(&b, input->type);                                   \
614     do {if(*f && *f!=GAL_BLANK_UINT8) *i=b; ++i;} while(++f<ff);        \
615   }
616 void
gal_blank_flag_apply(gal_data_t * input,gal_data_t * flag)617 gal_blank_flag_apply(gal_data_t *input, gal_data_t *flag)
618 {
619   size_t j;
620   char **strarr=input->array;
621   uint8_t *f=flag->array, *ff=f+flag->size;
622 
623   /* Sanity check. */
624   if(flag->type!=GAL_TYPE_UINT8)
625     error(EXIT_FAILURE, 0, "%s: the 'flag' argument has a '%s' type, it "
626           "must have an unsigned 8-bit type", __func__,
627           gal_type_name(flag->type, 1));
628   if(gal_dimension_is_different(input, flag))
629     error(EXIT_FAILURE, 0, "%s: the 'flag' argument doesn't have the same "
630           "size as the 'input' argument", __func__);
631 
632   /* Write the blank values. */
633   switch(input->type)
634     {
635     /* Basic numeric types. */
636     case GAL_TYPE_UINT8:     BLANK_FLAG_APPLY( uint8_t  );    break;
637     case GAL_TYPE_INT8:      BLANK_FLAG_APPLY( int8_t   );    break;
638     case GAL_TYPE_UINT16:    BLANK_FLAG_APPLY( uint16_t );    break;
639     case GAL_TYPE_INT16:     BLANK_FLAG_APPLY( int16_t  );    break;
640     case GAL_TYPE_UINT32:    BLANK_FLAG_APPLY( uint32_t );    break;
641     case GAL_TYPE_INT32:     BLANK_FLAG_APPLY( int32_t  );    break;
642     case GAL_TYPE_UINT64:    BLANK_FLAG_APPLY( uint64_t );    break;
643     case GAL_TYPE_INT64:     BLANK_FLAG_APPLY( int64_t  );    break;
644     case GAL_TYPE_FLOAT32:   BLANK_FLAG_APPLY( float    );    break;
645     case GAL_TYPE_FLOAT64:   BLANK_FLAG_APPLY( double   );    break;
646 
647     /* Strings. */
648     case GAL_TYPE_STRING:
649       for(j=0; j<input->size; ++j)
650         {
651           if(*f && *f!=GAL_BLANK_UINT8)
652             {
653               free(strarr[j]);
654               gal_checkset_allocate_copy(GAL_BLANK_STRING, &strarr[j]);
655             }
656           ++f;
657         }
658       break;
659 
660     /* Currently unsupported types. */
661     case GAL_TYPE_BIT:
662     case GAL_TYPE_COMPLEX32:
663     case GAL_TYPE_COMPLEX64:
664       error(EXIT_FAILURE, 0, "%s: %s type not yet supported",
665             __func__, gal_type_name(input->type, 1));
666 
667     /* Bad input. */
668     default:
669       error(EXIT_FAILURE, 0, "%s: type value (%d) not recognized",
670             __func__, input->type);
671     }
672 
673   /* Update the blank flags. */
674   gal_blank_present(input, 1);
675 }
676 
677 
678 
679 
680 
681 /* Remove flagged elements from a dataset (which may not necessarily
682    blank), convert it to a 1D dataset and adjust the size properly. In
683    practice this function doesn't 'realloc' the input array, all it does is
684    to shift the blank eleemnts to the end and adjust the size elements of
685    the 'gal_data_t'. */
686 #define BLANK_FLAG_REMOVE(IT) {                                         \
687     IT *a=input->array, *af=a+input->size, *o=input->array;             \
688     do {                                                                \
689       if(*f==0) {++num; *o++=*a; }                                      \
690       ++f;                                                              \
691     }                                                                   \
692     while(++a<af);                                                      \
693   }
694 void
gal_blank_flag_remove(gal_data_t * input,gal_data_t * flag)695 gal_blank_flag_remove(gal_data_t *input, gal_data_t *flag)
696 {
697   char **strarr;
698   size_t i, num=0;
699   uint8_t *f=flag->array;
700 
701   /* Sanity check. */
702   if(flag->type!=GAL_TYPE_UINT8)
703     error(EXIT_FAILURE, 0, "%s: the 'flag' argument has a '%s' type, it "
704           "must have an unsigned 8-bit type", __func__,
705           gal_type_name(flag->type, 1));
706   if(gal_dimension_is_different(input, flag))
707     error(EXIT_FAILURE, 0, "%s: the 'flag' argument doesn't have the same "
708           "size as the 'input' argument", __func__);
709 
710   /* Shift all non-blank elements to the start of the array. */
711   switch(input->type)
712     {
713     case GAL_TYPE_UINT8:    BLANK_FLAG_REMOVE( uint8_t  );    break;
714     case GAL_TYPE_INT8:     BLANK_FLAG_REMOVE( int8_t   );    break;
715     case GAL_TYPE_UINT16:   BLANK_FLAG_REMOVE( uint16_t );    break;
716     case GAL_TYPE_INT16:    BLANK_FLAG_REMOVE( int16_t  );    break;
717     case GAL_TYPE_UINT32:   BLANK_FLAG_REMOVE( uint32_t );    break;
718     case GAL_TYPE_INT32:    BLANK_FLAG_REMOVE( int32_t  );    break;
719     case GAL_TYPE_UINT64:   BLANK_FLAG_REMOVE( uint64_t );    break;
720     case GAL_TYPE_INT64:    BLANK_FLAG_REMOVE( int64_t  );    break;
721     case GAL_TYPE_FLOAT32:  BLANK_FLAG_REMOVE( float    );    break;
722     case GAL_TYPE_FLOAT64:  BLANK_FLAG_REMOVE( double   );    break;
723     case GAL_TYPE_STRING:
724       strarr=input->array;
725       for(i=0;i<input->size;++i)
726         {
727           if( *f && *f!=GAL_BLANK_UINT8 )        /* Flagged to be removed */
728             { free(strarr[i]); strarr[i]=NULL; }
729           else strarr[num++]=strarr[i];          /* Keep. */
730           ++f;
731         }
732       break;
733     default:
734       error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
735             __func__, input->type);
736     }
737 
738   /* Adjust the size elements of the dataset. */
739   input->ndim=1;
740   input->dsize[0]=input->size=num;
741 }
742 
743 
744 
745 
746 
747 /* Remove blank elements from a dataset, convert it to a 1D dataset and
748    adjust the size properly. In practice this function doesn't 'realloc'
749    the input array, all it does is to shift the blank eleemnts to the end
750    and adjust the size elements of the 'gal_data_t'. */
751 #define BLANK_REMOVE(IT) {                                              \
752     IT b, *a=input->array, *af=a+input->size, *o=input->array;          \
753     gal_blank_write(&b, input->type);                                   \
754     if(b==b)       /* Blank value can be be checked with equal. */      \
755       do if(*a!=b)  { ++num; *o++=*a; } while(++a<af);                  \
756     else           /* Blank value will fail on equal comparison. */     \
757       do if(*a==*a) { ++num; *o++=*a; } while(++a<af);                  \
758   }
759 void
gal_blank_remove(gal_data_t * input)760 gal_blank_remove(gal_data_t *input)
761 {
762   char **strarr;
763   size_t i, num=0;
764 
765   /* This function currently assumes a contiguous patch of memory. */
766   if(input->block && input->ndim!=1 )
767     error(EXIT_FAILURE, 0, "%s: tiles in datasets with more dimensions than "
768           "1 are not yet supported. Your input has %zu dimensions",
769           __func__, input->ndim);
770 
771   /* If the dataset doesn't have blank values, then just get the size. */
772   if( gal_blank_present(input, 0) )
773     {
774       /* Shift all non-blank elements to the start of the array. */
775       switch(input->type)
776         {
777         case GAL_TYPE_UINT8:    BLANK_REMOVE( uint8_t  );    break;
778         case GAL_TYPE_INT8:     BLANK_REMOVE( int8_t   );    break;
779         case GAL_TYPE_UINT16:   BLANK_REMOVE( uint16_t );    break;
780         case GAL_TYPE_INT16:    BLANK_REMOVE( int16_t  );    break;
781         case GAL_TYPE_UINT32:   BLANK_REMOVE( uint32_t );    break;
782         case GAL_TYPE_INT32:    BLANK_REMOVE( int32_t  );    break;
783         case GAL_TYPE_UINT64:   BLANK_REMOVE( uint64_t );    break;
784         case GAL_TYPE_INT64:    BLANK_REMOVE( int64_t  );    break;
785         case GAL_TYPE_FLOAT32:  BLANK_REMOVE( float    );    break;
786         case GAL_TYPE_FLOAT64:  BLANK_REMOVE( double   );    break;
787         case GAL_TYPE_STRING:
788           strarr=input->array;
789           for(i=0;i<input->size;++i)
790             if( strcmp(strarr[i], GAL_BLANK_STRING) ) /* Not blank. */
791               { strarr[num++]=strarr[i]; }
792             else { free(strarr[i]); strarr[i]=NULL; }  /* Is blank. */
793           break;
794         default:
795           error(EXIT_FAILURE, 0, "%s: type code %d not recognized",
796                 __func__, input->type);
797         }
798     }
799   else num=input->size;
800 
801   /* Adjust the size elements of the dataset. */
802   input->ndim=1;
803   input->dsize[0]=input->size=num;
804 
805   /* Set the flags to mark that there is no blanks. */
806   input->flag |= GAL_DATA_FLAG_BLANK_CH;
807   input->flag &= ~GAL_DATA_FLAG_HASBLANK;
808 }
809 
810 
811 
812 
813 
814 /* Similar to 'gal_blank_remove', but also reallocates/frees the extra
815    space. */
816 void
gal_blank_remove_realloc(gal_data_t * input)817 gal_blank_remove_realloc(gal_data_t *input)
818 {
819   /* Remove the blanks and fix the size of the dataset. */
820   gal_blank_remove(input);
821 
822   /* Run realloc to shrink the allocated space. */
823   input->array=realloc(input->array,
824                        input->size*gal_type_sizeof(input->type));
825   if(input->array==NULL)
826     error(EXIT_FAILURE, 0, "%s: couldn't reallocate memory", __func__);
827 }
828 
829 
830 
831 
832 
833 static gal_data_t *
blank_remove_in_list_merge_flags(gal_data_t * thisdata,gal_data_t * flag)834 blank_remove_in_list_merge_flags(gal_data_t *thisdata, gal_data_t *flag)
835 {
836   size_t i;
837   uint8_t *u, *tu;
838   gal_data_t *flagtmp;
839 
840   /* Build the flag of blank elements for this column. */
841   flagtmp=gal_blank_flag(thisdata);
842 
843   /* If this is the first column, then use flagtmp as flag. */
844   if(flag)
845     {
846       u=flag->array;
847       tu=flagtmp->array;
848       for(i=0;i<flag->size;++i) u[i] = u[i] || tu[i];
849       gal_data_free(flagtmp);
850     }
851   else
852     flag=flagtmp;
853 
854   /* For a check.
855   u=flag->array;
856   double *d=thisdata->array;
857   for(i=0;i<flag->size;++i) printf("%f, %u\n", d[i], u[i]);
858   printf("\n");
859   */
860 
861   /* Return the flag dataset. */
862   return flag;
863 }
864 
865 
866 
867 
868 
869 /* Remove any row that has a blank in any of the given columns. */
870 gal_data_t *
gal_blank_remove_rows(gal_data_t * columns,gal_list_sizet_t * column_indexs)871 gal_blank_remove_rows(gal_data_t *columns, gal_list_sizet_t *column_indexs)
872 {
873   size_t i;
874   gal_list_sizet_t *tcol;
875   gal_data_t *tmp, *flag=NULL;
876 
877   /* If any columns are requested, only use the given columns for the
878      flags, otherwise use all the input columns. */
879   if(column_indexs)
880     for(tcol=column_indexs; tcol!=NULL; tcol=tcol->next)
881       {
882         /* Find the correct column in the full list. */
883         i=0;
884         for(tmp=columns; tmp!=NULL; tmp=tmp->next)
885           if(i++==tcol->v) break;
886 
887         /* If the given index is larger than the number of elements in the
888            input list, then print an error. */
889         if(tmp==NULL)
890           error(EXIT_FAILURE, 0, "%s: input list has %zu elements, but the "
891                 "column %zu (counting from zero) has been requested",
892                 __func__, gal_list_data_number(columns), tcol->v);
893 
894         /* Build the flag of blank elements for this column. */
895         flag=blank_remove_in_list_merge_flags(tmp, flag);
896       }
897   else
898     for(tmp=columns; tmp!=NULL; tmp=tmp->next)
899       flag=blank_remove_in_list_merge_flags(tmp, flag);
900 
901   /* Now that the flags have been set, remove the rows. */
902   for(tmp=columns; tmp!=NULL; tmp=tmp->next)
903     gal_blank_flag_remove(tmp, flag);
904 
905   /* For a check.
906   double *d1=columns->array, *d2=columns->next->array;
907   for(i=0;i<columns->size;++i)
908     printf("%f, %f\n", d2[i], d1[i]);
909   printf("\n");
910   */
911 
912   /* Return the flag. */
913   return flag;
914 }
915