1 /*
2  # This file is part of the Astrometry.net suite.
3  # Licensed under a 3-clause BSD style license - see LICENSE
4  */
5 
6 #include <unistd.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <math.h>
10 #include <string.h>
11 #include <arpa/inet.h>
12 #include <assert.h>
13 
14 #include "os-features.h"
15 #include "healpix.h"
16 #include "healpix-utils.h"
17 #include "starutil.h"
18 #include "errors.h"
19 #include "log.h"
20 #include "boilerplate.h"
21 #include "fitsioutils.h"
22 #include "bl.h"
23 #include "fitstable.h"
24 #include "ioutils.h"
25 #include "mathutil.h"
26 
27 /**
28  Accepts a list of input FITS tables, all with exactly the same
29  structure, and including RA,Dec columns.
30 
31  Accepts a big-healpix Nside, and a margin in degrees.
32 
33  Writes an output file for each of the big-healpixes, containing those
34  rows that are within (or within range) of the healpix.
35  */
36 
37 const char* OPTIONS = "hvn:r:d:m:o:gc:e:t:b:RC";
38 
printHelp(char * progname)39 void printHelp(char* progname) {
40     BOILERPLATE_HELP_HEADER(stdout);
41     printf("\nUsage: %s [options] <input-FITS-catalog> [...]\n"
42            "    -o <output-filename-pattern>  with %%i printf-pattern\n"
43            "    [-r <ra-column-name>]: name of RA in FITS table (default RA)\n"
44            "    [-d <dec-column-name>]: name of DEC in FITS table (default DEC)\n"
45            "    [-n <healpix Nside>]: default is 1\n"
46            "    [-R]: use ring indexing, rather than xy indexing, for healpixes\n"
47            "    [-m <margin in deg>]: add a margin of this many degrees around the healpixes; default 0\n"
48            "    [-g]: gzip'd inputs\n"
49            "    [-c <name>]: copy given column name to the output files\n"
50            "    [-e <name>]: copy given column name to the output files, converting to FITS type E (float)\n"
51            "    [-C]: close output files after each input file has been read\n"
52            "    [-t <temp-dir>]: use the given temp dir; default is /tmp\n"
53            "    [-b <backref-file>]: save the filenumber->filename map in this file; enables writing backreferences too\n"
54            "    [-v]: +verbose\n"
55            "\n\n\n"
56            "WARNING: The input FITS files MUST have EXACTLY the same format!!",
57            progname);
58 }
59 
60 
61 struct cap_s {
62     double xyz[3];
63     double r2;
64 };
65 typedef struct cap_s cap_t;
66 
refill_rowbuffer(void * baton,void * buffer,unsigned int offset,unsigned int nelems)67 static int refill_rowbuffer(void* baton, void* buffer,
68                             unsigned int offset, unsigned int nelems) {
69     fitstable_t* table = baton;
70     //printf("refill_rowbuffer: offset %i, n %i\n", offset, nelems);
71     return fitstable_read_nrows_data(table, offset, nelems, buffer);
72 }
73 
74 
main(int argc,char * argv[])75 int main(int argc, char *argv[]) {
76     int argchar;
77     char* progname = argv[0];
78     sl* infns = sl_new(16);
79     char* outfnpat = NULL;
80     char* racol = "RA";
81     char* deccol = "DEC";
82     char* tempdir = "/tmp";
83     anbool gzip = FALSE;
84     sl* cols = sl_new(16);
85     sl* e_cols = sl_new(16);
86     int loglvl = LOG_MSG;
87     int nside = 1;
88     double margin = 0.0;
89     int NHP;
90     double md;
91     char* backref = NULL;
92     anbool ringindex = FALSE;
93     anbool closefiles = FALSE;
94     off_t* resume_offsets = NULL;
95 
96     fitstable_t* intable;
97     fitstable_t* intable2;
98     fitstable_t** outtables;
99 
100     anbool anycols = FALSE;
101 
102     char** myargs;
103     int nmyargs;
104     int i;
105 
106     while ((argchar = getopt (argc, argv, OPTIONS)) != -1)
107         switch (argchar) {
108         case 'C':
109             closefiles = TRUE;
110             break;
111         case 'R':
112             ringindex = TRUE;
113             break;
114         case 'b':
115             backref = optarg;
116             break;
117         case 't':
118             tempdir = optarg;
119             break;
120         case 'c':
121             sl_append(cols, optarg);
122             break;
123         case 'e':
124             sl_append(e_cols, optarg);
125             break;
126         case 'g':
127             gzip = TRUE;
128             break;
129         case 'o':
130             outfnpat = optarg;
131             break;
132         case 'r':
133             racol = optarg;
134             break;
135         case 'd':
136             deccol = optarg;
137             break;
138         case 'n':
139             nside = atoi(optarg);
140             break;
141         case 'm':
142             margin = atof(optarg);
143             break;
144         case 'v':
145             loglvl++;
146             break;
147         case '?':
148             fprintf(stderr, "Unknown option `-%c'.\n", optopt);
149         case 'h':
150             printHelp(progname);
151             return 0;
152         default:
153             return -1;
154         }
155 
156     if (sl_size(cols) == 0) {
157         sl_free2(cols);
158         cols = NULL;
159     }
160     if (sl_size(e_cols) == 0) {
161         sl_free2(e_cols);
162         e_cols = NULL;
163     }
164     anycols = (cols || e_cols);
165 
166     nmyargs = argc - optind;
167     myargs = argv + optind;
168 
169     for (i=0; i<nmyargs; i++)
170         sl_append(infns, myargs[i]);
171 
172     if (!sl_size(infns)) {
173         printHelp(progname);
174         printf("Need input filenames!\n");
175         exit(-1);
176     }
177     log_init(loglvl);
178     fits_use_error_system();
179 
180     NHP = 12 * nside * nside;
181     logmsg("%i output healpixes\n", NHP);
182     outtables = calloc(NHP, sizeof(fitstable_t*));
183     assert(outtables);
184 
185     if (closefiles) {
186         // In order to reduce the number of open output files, we're
187         // going to close output files after we finished reading each
188         // input file.  When we close a file, we'll remember where we
189         // left off if we need to re-open it in the future.  (We do
190         // this rather than just using the file size because FITS
191         // files are always padded out to fill an integer number of
192         // FITS blocks of 2880 bytes).
193         resume_offsets = calloc(NHP, sizeof(off_t));
194         assert(resume_offsets);
195     }
196 
197     md = deg2dist(margin);
198 
199     /**
200      About the mincaps/maxcaps:
201 
202      These have a center and radius-squared, describing the region
203      inside a small circle on the sphere.
204 
205      The "mincaps" describe the regions that are definitely owned by a
206      single healpix -- ie, more than MARGIN distance from any edge.
207      That is, the mincap is the small circle centered at (0.5, 0.5) in
208      the healpix and with radius = the distance to the closest healpix
209      boundary, MINUS the margin distance.
210 
211      Below, we first check whether a new star is within the "mincap"
212      of any healpix.  If so, we stick it in that healpix and continue.
213 
214      Otherwise, we check all the "maxcaps" -- these are the healpixes
215      it could *possibly* be in.  We then refine with
216      healpix_within_range_of_xyz.  The maxcap distance is the distance
217      to the furthest boundary point, PLUS the margin distance.
218      */
219 
220 
221     cap_t* mincaps = malloc(NHP * sizeof(cap_t));
222     cap_t* maxcaps = malloc(NHP * sizeof(cap_t));
223     for (i=0; i<NHP; i++) {
224         // center
225         double r2;
226         double xyz[3];
227         double* cxyz;
228         double step = 1e-3;
229         double v;
230         double r2b, r2a;
231 
232         cxyz = mincaps[i].xyz;
233         healpix_to_xyzarr(i, nside, 0.5, 0.5, mincaps[i].xyz);
234         memcpy(maxcaps[i].xyz, cxyz, 3 * sizeof(double));
235         logverb("Center of HP %i: (%.3f, %.3f, %.3f)\n", i, cxyz[0], cxyz[1], cxyz[2]);
236 
237         // radius-squared:
238         // max is the easy one: max of the four corners (I assume)
239         r2 = 0.0;
240         healpix_to_xyzarr(i, nside, 0.0, 0.0, xyz);
241         logverb("  HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
242         r2 = MAX(r2, distsq(xyz, cxyz, 3));
243         healpix_to_xyzarr(i, nside, 1.0, 0.0, xyz);
244         logverb("  HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
245         r2 = MAX(r2, distsq(xyz, cxyz, 3));
246         healpix_to_xyzarr(i, nside, 0.0, 1.0, xyz);
247         logverb("  HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
248         r2 = MAX(r2, distsq(xyz, cxyz, 3));
249         healpix_to_xyzarr(i, nside, 1.0, 1.0, xyz);
250         logverb("  HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
251         r2 = MAX(r2, distsq(xyz, cxyz, 3));
252         logverb("  max distsq: %.3f\n", r2);
253         logverb("  margin dist: %.3f\n", md);
254         maxcaps[i].r2 = square(sqrt(r2) + md);
255         logverb("  max cap distsq: %.3f\n", maxcaps[i].r2);
256         r2a = r2;
257 
258         r2 = 1.0;
259         r2b = 0.0;
260         for (v=0; v<=1.0; v+=step) {
261             healpix_to_xyzarr(i, nside, 0.0, v, xyz);
262             r2 = MIN(r2, distsq(xyz, cxyz, 3));
263             r2b = MAX(r2b, distsq(xyz, cxyz, 3));
264             healpix_to_xyzarr(i, nside, 1.0, v, xyz);
265             r2 = MIN(r2, distsq(xyz, cxyz, 3));
266             r2b = MAX(r2b, distsq(xyz, cxyz, 3));
267             healpix_to_xyzarr(i, nside, v, 0.0, xyz);
268             r2 = MIN(r2, distsq(xyz, cxyz, 3));
269             r2b = MAX(r2b, distsq(xyz, cxyz, 3));
270             healpix_to_xyzarr(i, nside, v, 1.0, xyz);
271             r2 = MIN(r2, distsq(xyz, cxyz, 3));
272             r2b = MAX(r2b, distsq(xyz, cxyz, 3));
273         }
274         mincaps[i].r2 = square(MAX(0, sqrt(r2) - md));
275         logverb("\nhealpix %i: min rad    %g\n", i, sqrt(r2));
276         logverb("healpix %i: max rad    %g\n", i, sqrt(r2a));
277         logverb("healpix %i: max rad(b) %g\n", i, sqrt(r2b));
278         assert(r2a >= r2b);
279     }
280 
281     if (backref) {
282         fitstable_t* tab = fitstable_open_for_writing(backref);
283         int maxlen = 0;
284         char* buf;
285         for (i=0; i<sl_size(infns); i++) {
286             char* infn = sl_get(infns, i);
287             maxlen = MAX(maxlen, strlen(infn));
288         }
289         fitstable_add_write_column_array(tab, fitscolumn_char_type(), maxlen,
290                                          "filename", NULL);
291         fitstable_add_write_column(tab, fitscolumn_i16_type(), "index", NULL);
292         if (fitstable_write_primary_header(tab) ||
293             fitstable_write_header(tab)) {
294             ERROR("Failed to write header of backref table \"%s\"", backref);
295             exit(-1);
296         }
297         buf = malloc(maxlen+1);
298         assert(buf);
299 
300         for (i=0; i<sl_size(infns); i++) {
301             char* infn = sl_get(infns, i);
302             int16_t ind;
303             memset(buf, 0, maxlen);
304             strcpy(buf, infn);
305             ind = i;
306             if (fitstable_write_row(tab, buf, &ind)) {
307                 ERROR("Failed to write row %i of backref table: %s = %i",
308                       i, buf, ind);
309                 exit(-1);
310             }
311         }
312         if (fitstable_fix_header(tab) ||
313             fitstable_close(tab)) {
314             ERROR("Failed to fix header & close backref table");
315             exit(-1);
316         }
317         logmsg("Wrote backref table %s\n", backref);
318         free(buf);
319     }
320 
321     for (i=0; i<sl_size(infns); i++) {
322         char* infn = sl_get(infns, i);
323         char* originfn = infn;
324         int r, NR;
325         tfits_type any, dubl;
326         il* hps = NULL;
327         bread_t* rowbuf;
328         int R;
329         char* tempfn = NULL;
330         char* padrowdata = NULL;
331         int ii;
332 
333         logmsg("Reading input \"%s\"...\n", infn);
334 
335         if (gzip) {
336             char* cmd;
337             int rtn;
338             tempfn = create_temp_file("hpsplit", tempdir);
339             asprintf_safe(&cmd, "gunzip -cd %s > %s", infn, tempfn);
340             logmsg("Running: \"%s\"\n", cmd);
341             rtn = run_command_get_outputs(cmd, NULL, NULL);
342             if (rtn) {
343                 ERROR("Failed to run command: \"%s\"", cmd);
344                 exit(-1);
345             }
346             free(cmd);
347             infn = tempfn;
348         }
349 
350         intable = fitstable_open(infn);
351         if (!intable) {
352             ERROR("Couldn't read catalog %s", infn);
353             exit(-1);
354         }
355         NR = fitstable_nrows(intable);
356         logmsg("Got %i rows\n", NR);
357 
358         // For '-e', we need to endian-flip the input rows, which requires
359         // knowing the columns... we use 'intable2' just for that.
360         intable2 = fitstable_open(infn);
361         if (!intable2) {
362             ERROR("Couldn't read catalog %s", infn);
363             exit(-1);
364         }
365         fitstable_add_fits_columns_as_struct(intable2);
366 
367         any = fitscolumn_any_type();
368         dubl = fitscolumn_double_type();
369 
370         fitstable_add_read_column_struct(intable, dubl, 1, 0, any, racol, TRUE);
371         fitstable_add_read_column_struct(intable, dubl, 1, sizeof(double), any, deccol, TRUE);
372 
373         fitstable_use_buffered_reading(intable, 2*sizeof(double), 1000);
374 
375         R = fitstable_row_size(intable);
376         rowbuf = buffered_read_new(R, 1000, NR, refill_rowbuffer, intable);
377 
378         if (fitstable_read_extension(intable, 1)) {
379             ERROR("Failed to find RA and DEC columns (called \"%s\" and \"%s\" in the FITS file)", racol, deccol);
380             exit(-1);
381         }
382 
383         for (r=0; r<NR; r++) {
384             int hp = -1;
385             double ra, dec;
386             int j;
387             double* rd;
388             void* rowdata;
389             void* rdata;
390             anbool flipped;
391 
392             if (r && ((r % 100000) == 0)) {
393                 logmsg("Reading row %i of %i\n", r, NR);
394             }
395 
396             //printf("reading RA,Dec for row %i\n", r);
397             rd = fitstable_next_struct(intable);
398             ra = rd[0];
399             dec = rd[1];
400 
401             logverb("row %i: ra,dec %g,%g\n", r, ra, dec);
402             if (margin == 0) {
403                 hp = radecdegtohealpix(ra, dec, nside);
404                 logverb("  --> healpix %i\n", hp);
405             } else {
406 
407                 double xyz[3];
408                 anbool gotit = FALSE;
409                 double d2;
410                 if (!hps)
411                     hps = il_new(4);
412                 radecdeg2xyzarr(ra, dec, xyz);
413                 for (j=0; j<NHP; j++) {
414                     d2 = distsq(xyz, mincaps[j].xyz, 3);
415                     if (d2 <= mincaps[j].r2) {
416                         logverb("  -> in mincap %i  (dist %g vs %g)\n", j, sqrt(d2), sqrt(mincaps[j].r2));
417                         il_append(hps, j);
418                         gotit = TRUE;
419                         break;
420                     }
421                 }
422                 if (!gotit) {
423                     for (j=0; j<NHP; j++) {
424                         d2 = distsq(xyz, maxcaps[j].xyz, 3);
425                         if (d2 <= maxcaps[j].r2) {
426                             logverb("  -> in maxcap %i  (dist %g vs %g)\n", j, sqrt(d2), sqrt(maxcaps[j].r2));
427                             if (healpix_within_range_of_xyz(j, nside, xyz, margin)) {
428                                 logverb("  -> and within range.\n");
429                                 il_append(hps, j);
430                             }
431                         }
432                     }
433                 }
434 
435                 //hps = healpix_rangesearch_radec(ra, dec, margin, nside, hps);
436 
437                 logverb("  --> healpixes: [");
438                 for (j=0; j<il_size(hps); j++)
439                     logverb(" %i", il_get(hps, j));
440                 logverb(" ]\n");
441             }
442 
443             //printf("Reading rowdata for row %i\n", r);
444             rowdata = buffered_read(rowbuf);
445             assert(rowdata);
446 
447             flipped = FALSE;
448             j=0;
449             while (1) {
450                 if (hps) {
451                     if (j >= il_size(hps))
452                         break;
453                     hp = il_get(hps, j);
454                     j++;
455                 }
456                 assert(hp < NHP);
457                 assert(hp >= 0);
458 
459                 // Open output file if necessary
460                 if (!outtables[hp]) {
461                     char* outfn;
462                     fitstable_t* out;
463 
464                     // MEMLEAK the output filename.  You'll live.
465                     if (ringindex) {
466                         int ringhp = healpix_xy_to_ring(hp, nside);
467                         logverb("Ring-indexed healpix: %i (xy index: %i)\n", ringhp,hp);
468                         asprintf_safe(&outfn, outfnpat, ringhp);
469                     } else {
470                         asprintf_safe(&outfn, outfnpat, hp);
471                     }
472 
473                     logmsg("Opening output file \"%s\"...\n", outfn);
474                     out = fitstable_open_for_writing(outfn);
475                     if (!out) {
476                         ERROR("Failed to open output table \"%s\"", outfn);
477                         exit(-1);
478                     }
479                     // Set the output table structure.
480                     if (anycols) {
481                         if (cols)
482                             fitstable_add_fits_columns_as_struct3(intable, out, cols, 0);
483                         if (e_cols)
484                             fitstable_add_fits_columns_as_struct4(intable, out, e_cols, 0, TFITS_BIN_TYPE_E);
485 
486                     } else
487                         fitstable_add_fits_columns_as_struct2(intable, out);
488 
489                     if (backref) {
490                         tfits_type i16type;
491                         tfits_type i32type;
492                         // R = fitstable_row_size(intable);
493                         int off = R;
494                         i16type = fitscolumn_i16_type();
495                         i32type = fitscolumn_i32_type();
496                         fitstable_add_read_column_struct(out, i16type, 1, off,
497                                                          i16type, "backref_file", TRUE);
498                         off += sizeof(int16_t);
499                         fitstable_add_read_column_struct(out, i32type, 1, off,
500                                                          i32type, "backref_index", TRUE);
501                     }
502 
503                     if (fitstable_write_primary_header(out) ||
504                         fitstable_write_header(out)) {
505                         ERROR("Failed to write output file headers for \"%s\"", outfn);
506                         exit(-1);
507                     }
508 
509                     outtables[hp] = out;
510                 }
511 
512                 if (backref) {
513                     int16_t brfile;
514                     int32_t brind;
515                     if (!padrowdata) {
516                         padrowdata = malloc(R + sizeof(int16_t) + sizeof(int32_t));
517                         assert(padrowdata);
518                     }
519                     // convert to FITS endian
520                     brfile = htons(i);
521                     brind  = htonl(r);
522                     // add backref data to rowdata
523                     memcpy(padrowdata, rowdata, R);
524                     memcpy(padrowdata + R, &brfile, sizeof(int16_t));
525                     memcpy(padrowdata + R + sizeof(int16_t), &brind, sizeof(int32_t));
526                     rdata = padrowdata;
527                 } else {
528                     rdata = rowdata;
529                 }
530 
531                 if (closefiles && (outtables[hp]->fid == NULL)) {
532                     char* outfn = outtables[hp]->fn;
533                     logverb("Re-opening healpix %i file %s at offset %lu\n",
534                             hp, outfn, (long)resume_offsets[hp]);
535                     outtables[hp]->fid = fopen(outfn, "r+b");
536                     fseeko(outtables[hp]->fid, resume_offsets[hp], SEEK_SET);
537                 }
538 
539                 if (anycols) {
540                     if (!flipped) {
541                         // if we're writing to multiple output
542                         // healpixes, only flip once!
543                         flipped = TRUE;
544                         fitstable_endian_flip_row_data(intable2, rdata);
545                     }
546                     if (fitstable_write_struct(outtables[hp], rdata)) {
547                         ERROR("Failed to copy a row of data from input table \"%s\" to output healpix %i", infn, hp);
548                     }
549 
550                 } else {
551                     if (fitstable_write_row_data(outtables[hp], rdata)) {
552                         ERROR("Failed to copy a row of data from input table \"%s\" to output healpix %i", infn, hp);
553                     }
554                 }
555 
556                 if (!hps)
557                     break;
558             }
559             if (hps)
560                 il_remove_all(hps);
561 
562         }
563         buffered_read_free(rowbuf);
564         // wack... buffered_read_free() just frees its internal buffer,
565         // not the "rowbuf" struct itself.
566         // who wrote this crazy code?  Oh, me of 5 years ago.  Jerk.
567         free(rowbuf);
568 
569         fitstable_close(intable);
570         il_free(hps);
571 
572         if (tempfn) {
573             logverb("Removing temp file %s\n", tempfn);
574             if (unlink(tempfn)) {
575                 SYSERROR("Failed to unlink() temp file \"%s\"", tempfn);
576             }
577             tempfn = NULL;
578         }
579 
580         // fix headers so that the files are valid at this point.
581         for (ii=0; ii<NHP; ii++) {
582             if (!outtables[ii])
583                 continue;
584             if (closefiles && (outtables[ii]->fid == NULL))
585                 continue;
586 
587             off_t offset = ftello(outtables[ii]->fid);
588             if (closefiles) {
589                 resume_offsets[ii] = offset;
590                 logverb("Closing healpix %i (saving offset %lu)\n", ii, (long)offset);
591                 if (fitstable_fix_header(outtables[ii])) {
592                     ERROR("Failed to fix header for healpix %i after reading input file \"%s\"", ii, originfn);
593                     exit(-1);
594                 }
595                 if (fclose(outtables[ii]->fid)) {
596                     SYSERROR("Failed to close file %s\n", outtables[ii]->fn);
597                     exit(-1);
598                 }
599                 outtables[ii]->fid = NULL;
600             } else {
601                 // the "fitstable_fix_header" call (via
602                 // fitsfile_fix_header) adds padding to the file to
603                 // bring it up to a FITS block size, so we ftell and
604                 // fseek afterward.
605                 if (fitstable_fix_header(outtables[ii])) {
606                     ERROR("Failed to fix header for healpix %i after reading input file \"%s\"", ii, originfn);
607                     exit(-1);
608                 }
609                 fseeko(outtables[ii]->fid, offset, SEEK_SET);
610             }
611         }
612 
613         if (padrowdata) {
614             free(padrowdata);
615             padrowdata = NULL;
616         }
617 
618     }
619 
620     for (i=0; i<NHP; i++) {
621         if (!outtables[i])
622             continue;
623         if (closefiles && (outtables[i]->fid == NULL)) {
624             if (fitstable_close(outtables[i])) {
625                 ERROR("Failed to close output table for healpix %i", i);
626                 exit(-1);
627             }
628             continue;
629         }
630         if (fitstable_fix_header(outtables[i]) ||
631             fitstable_fix_primary_header(outtables[i]) ||
632             fitstable_close(outtables[i])) {
633             ERROR("Failed to close output table for healpix %i", i);
634             exit(-1);
635         }
636     }
637 
638     free(outtables);
639     sl_free2(infns);
640     sl_free2(cols);
641     sl_free2(e_cols);
642 
643     free(mincaps);
644     free(maxcaps);
645 
646     free(resume_offsets);
647 
648     return 0;
649 }
650 
651 
652 
653