1 /*
2 # This file is part of the Astrometry.net suite.
3 # Licensed under a 3-clause BSD style license - see LICENSE
4 */
5
6 #include <unistd.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <math.h>
10 #include <string.h>
11 #include <arpa/inet.h>
12 #include <assert.h>
13
14 #include "os-features.h"
15 #include "healpix.h"
16 #include "healpix-utils.h"
17 #include "starutil.h"
18 #include "errors.h"
19 #include "log.h"
20 #include "boilerplate.h"
21 #include "fitsioutils.h"
22 #include "bl.h"
23 #include "fitstable.h"
24 #include "ioutils.h"
25 #include "mathutil.h"
26
27 /**
28 Accepts a list of input FITS tables, all with exactly the same
29 structure, and including RA,Dec columns.
30
31 Accepts a big-healpix Nside, and a margin in degrees.
32
33 Writes an output file for each of the big-healpixes, containing those
34 rows that are within (or within range) of the healpix.
35 */
36
37 const char* OPTIONS = "hvn:r:d:m:o:gc:e:t:b:RC";
38
printHelp(char * progname)39 void printHelp(char* progname) {
40 BOILERPLATE_HELP_HEADER(stdout);
41 printf("\nUsage: %s [options] <input-FITS-catalog> [...]\n"
42 " -o <output-filename-pattern> with %%i printf-pattern\n"
43 " [-r <ra-column-name>]: name of RA in FITS table (default RA)\n"
44 " [-d <dec-column-name>]: name of DEC in FITS table (default DEC)\n"
45 " [-n <healpix Nside>]: default is 1\n"
46 " [-R]: use ring indexing, rather than xy indexing, for healpixes\n"
47 " [-m <margin in deg>]: add a margin of this many degrees around the healpixes; default 0\n"
48 " [-g]: gzip'd inputs\n"
49 " [-c <name>]: copy given column name to the output files\n"
50 " [-e <name>]: copy given column name to the output files, converting to FITS type E (float)\n"
51 " [-C]: close output files after each input file has been read\n"
52 " [-t <temp-dir>]: use the given temp dir; default is /tmp\n"
53 " [-b <backref-file>]: save the filenumber->filename map in this file; enables writing backreferences too\n"
54 " [-v]: +verbose\n"
55 "\n\n\n"
56 "WARNING: The input FITS files MUST have EXACTLY the same format!!",
57 progname);
58 }
59
60
61 struct cap_s {
62 double xyz[3];
63 double r2;
64 };
65 typedef struct cap_s cap_t;
66
refill_rowbuffer(void * baton,void * buffer,unsigned int offset,unsigned int nelems)67 static int refill_rowbuffer(void* baton, void* buffer,
68 unsigned int offset, unsigned int nelems) {
69 fitstable_t* table = baton;
70 //printf("refill_rowbuffer: offset %i, n %i\n", offset, nelems);
71 return fitstable_read_nrows_data(table, offset, nelems, buffer);
72 }
73
74
main(int argc,char * argv[])75 int main(int argc, char *argv[]) {
76 int argchar;
77 char* progname = argv[0];
78 sl* infns = sl_new(16);
79 char* outfnpat = NULL;
80 char* racol = "RA";
81 char* deccol = "DEC";
82 char* tempdir = "/tmp";
83 anbool gzip = FALSE;
84 sl* cols = sl_new(16);
85 sl* e_cols = sl_new(16);
86 int loglvl = LOG_MSG;
87 int nside = 1;
88 double margin = 0.0;
89 int NHP;
90 double md;
91 char* backref = NULL;
92 anbool ringindex = FALSE;
93 anbool closefiles = FALSE;
94 off_t* resume_offsets = NULL;
95
96 fitstable_t* intable;
97 fitstable_t* intable2;
98 fitstable_t** outtables;
99
100 anbool anycols = FALSE;
101
102 char** myargs;
103 int nmyargs;
104 int i;
105
106 while ((argchar = getopt (argc, argv, OPTIONS)) != -1)
107 switch (argchar) {
108 case 'C':
109 closefiles = TRUE;
110 break;
111 case 'R':
112 ringindex = TRUE;
113 break;
114 case 'b':
115 backref = optarg;
116 break;
117 case 't':
118 tempdir = optarg;
119 break;
120 case 'c':
121 sl_append(cols, optarg);
122 break;
123 case 'e':
124 sl_append(e_cols, optarg);
125 break;
126 case 'g':
127 gzip = TRUE;
128 break;
129 case 'o':
130 outfnpat = optarg;
131 break;
132 case 'r':
133 racol = optarg;
134 break;
135 case 'd':
136 deccol = optarg;
137 break;
138 case 'n':
139 nside = atoi(optarg);
140 break;
141 case 'm':
142 margin = atof(optarg);
143 break;
144 case 'v':
145 loglvl++;
146 break;
147 case '?':
148 fprintf(stderr, "Unknown option `-%c'.\n", optopt);
149 case 'h':
150 printHelp(progname);
151 return 0;
152 default:
153 return -1;
154 }
155
156 if (sl_size(cols) == 0) {
157 sl_free2(cols);
158 cols = NULL;
159 }
160 if (sl_size(e_cols) == 0) {
161 sl_free2(e_cols);
162 e_cols = NULL;
163 }
164 anycols = (cols || e_cols);
165
166 nmyargs = argc - optind;
167 myargs = argv + optind;
168
169 for (i=0; i<nmyargs; i++)
170 sl_append(infns, myargs[i]);
171
172 if (!sl_size(infns)) {
173 printHelp(progname);
174 printf("Need input filenames!\n");
175 exit(-1);
176 }
177 log_init(loglvl);
178 fits_use_error_system();
179
180 NHP = 12 * nside * nside;
181 logmsg("%i output healpixes\n", NHP);
182 outtables = calloc(NHP, sizeof(fitstable_t*));
183 assert(outtables);
184
185 if (closefiles) {
186 // In order to reduce the number of open output files, we're
187 // going to close output files after we finished reading each
188 // input file. When we close a file, we'll remember where we
189 // left off if we need to re-open it in the future. (We do
190 // this rather than just using the file size because FITS
191 // files are always padded out to fill an integer number of
192 // FITS blocks of 2880 bytes).
193 resume_offsets = calloc(NHP, sizeof(off_t));
194 assert(resume_offsets);
195 }
196
197 md = deg2dist(margin);
198
199 /**
200 About the mincaps/maxcaps:
201
202 These have a center and radius-squared, describing the region
203 inside a small circle on the sphere.
204
205 The "mincaps" describe the regions that are definitely owned by a
206 single healpix -- ie, more than MARGIN distance from any edge.
207 That is, the mincap is the small circle centered at (0.5, 0.5) in
208 the healpix and with radius = the distance to the closest healpix
209 boundary, MINUS the margin distance.
210
211 Below, we first check whether a new star is within the "mincap"
212 of any healpix. If so, we stick it in that healpix and continue.
213
214 Otherwise, we check all the "maxcaps" -- these are the healpixes
215 it could *possibly* be in. We then refine with
216 healpix_within_range_of_xyz. The maxcap distance is the distance
217 to the furthest boundary point, PLUS the margin distance.
218 */
219
220
221 cap_t* mincaps = malloc(NHP * sizeof(cap_t));
222 cap_t* maxcaps = malloc(NHP * sizeof(cap_t));
223 for (i=0; i<NHP; i++) {
224 // center
225 double r2;
226 double xyz[3];
227 double* cxyz;
228 double step = 1e-3;
229 double v;
230 double r2b, r2a;
231
232 cxyz = mincaps[i].xyz;
233 healpix_to_xyzarr(i, nside, 0.5, 0.5, mincaps[i].xyz);
234 memcpy(maxcaps[i].xyz, cxyz, 3 * sizeof(double));
235 logverb("Center of HP %i: (%.3f, %.3f, %.3f)\n", i, cxyz[0], cxyz[1], cxyz[2]);
236
237 // radius-squared:
238 // max is the easy one: max of the four corners (I assume)
239 r2 = 0.0;
240 healpix_to_xyzarr(i, nside, 0.0, 0.0, xyz);
241 logverb(" HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
242 r2 = MAX(r2, distsq(xyz, cxyz, 3));
243 healpix_to_xyzarr(i, nside, 1.0, 0.0, xyz);
244 logverb(" HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
245 r2 = MAX(r2, distsq(xyz, cxyz, 3));
246 healpix_to_xyzarr(i, nside, 0.0, 1.0, xyz);
247 logverb(" HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
248 r2 = MAX(r2, distsq(xyz, cxyz, 3));
249 healpix_to_xyzarr(i, nside, 1.0, 1.0, xyz);
250 logverb(" HP %i corner 1: (%.3f, %.3f, %.3f), distsq %.3f\n", i, xyz[0], xyz[1], xyz[2], distsq(xyz, cxyz, 3));
251 r2 = MAX(r2, distsq(xyz, cxyz, 3));
252 logverb(" max distsq: %.3f\n", r2);
253 logverb(" margin dist: %.3f\n", md);
254 maxcaps[i].r2 = square(sqrt(r2) + md);
255 logverb(" max cap distsq: %.3f\n", maxcaps[i].r2);
256 r2a = r2;
257
258 r2 = 1.0;
259 r2b = 0.0;
260 for (v=0; v<=1.0; v+=step) {
261 healpix_to_xyzarr(i, nside, 0.0, v, xyz);
262 r2 = MIN(r2, distsq(xyz, cxyz, 3));
263 r2b = MAX(r2b, distsq(xyz, cxyz, 3));
264 healpix_to_xyzarr(i, nside, 1.0, v, xyz);
265 r2 = MIN(r2, distsq(xyz, cxyz, 3));
266 r2b = MAX(r2b, distsq(xyz, cxyz, 3));
267 healpix_to_xyzarr(i, nside, v, 0.0, xyz);
268 r2 = MIN(r2, distsq(xyz, cxyz, 3));
269 r2b = MAX(r2b, distsq(xyz, cxyz, 3));
270 healpix_to_xyzarr(i, nside, v, 1.0, xyz);
271 r2 = MIN(r2, distsq(xyz, cxyz, 3));
272 r2b = MAX(r2b, distsq(xyz, cxyz, 3));
273 }
274 mincaps[i].r2 = square(MAX(0, sqrt(r2) - md));
275 logverb("\nhealpix %i: min rad %g\n", i, sqrt(r2));
276 logverb("healpix %i: max rad %g\n", i, sqrt(r2a));
277 logverb("healpix %i: max rad(b) %g\n", i, sqrt(r2b));
278 assert(r2a >= r2b);
279 }
280
281 if (backref) {
282 fitstable_t* tab = fitstable_open_for_writing(backref);
283 int maxlen = 0;
284 char* buf;
285 for (i=0; i<sl_size(infns); i++) {
286 char* infn = sl_get(infns, i);
287 maxlen = MAX(maxlen, strlen(infn));
288 }
289 fitstable_add_write_column_array(tab, fitscolumn_char_type(), maxlen,
290 "filename", NULL);
291 fitstable_add_write_column(tab, fitscolumn_i16_type(), "index", NULL);
292 if (fitstable_write_primary_header(tab) ||
293 fitstable_write_header(tab)) {
294 ERROR("Failed to write header of backref table \"%s\"", backref);
295 exit(-1);
296 }
297 buf = malloc(maxlen+1);
298 assert(buf);
299
300 for (i=0; i<sl_size(infns); i++) {
301 char* infn = sl_get(infns, i);
302 int16_t ind;
303 memset(buf, 0, maxlen);
304 strcpy(buf, infn);
305 ind = i;
306 if (fitstable_write_row(tab, buf, &ind)) {
307 ERROR("Failed to write row %i of backref table: %s = %i",
308 i, buf, ind);
309 exit(-1);
310 }
311 }
312 if (fitstable_fix_header(tab) ||
313 fitstable_close(tab)) {
314 ERROR("Failed to fix header & close backref table");
315 exit(-1);
316 }
317 logmsg("Wrote backref table %s\n", backref);
318 free(buf);
319 }
320
321 for (i=0; i<sl_size(infns); i++) {
322 char* infn = sl_get(infns, i);
323 char* originfn = infn;
324 int r, NR;
325 tfits_type any, dubl;
326 il* hps = NULL;
327 bread_t* rowbuf;
328 int R;
329 char* tempfn = NULL;
330 char* padrowdata = NULL;
331 int ii;
332
333 logmsg("Reading input \"%s\"...\n", infn);
334
335 if (gzip) {
336 char* cmd;
337 int rtn;
338 tempfn = create_temp_file("hpsplit", tempdir);
339 asprintf_safe(&cmd, "gunzip -cd %s > %s", infn, tempfn);
340 logmsg("Running: \"%s\"\n", cmd);
341 rtn = run_command_get_outputs(cmd, NULL, NULL);
342 if (rtn) {
343 ERROR("Failed to run command: \"%s\"", cmd);
344 exit(-1);
345 }
346 free(cmd);
347 infn = tempfn;
348 }
349
350 intable = fitstable_open(infn);
351 if (!intable) {
352 ERROR("Couldn't read catalog %s", infn);
353 exit(-1);
354 }
355 NR = fitstable_nrows(intable);
356 logmsg("Got %i rows\n", NR);
357
358 // For '-e', we need to endian-flip the input rows, which requires
359 // knowing the columns... we use 'intable2' just for that.
360 intable2 = fitstable_open(infn);
361 if (!intable2) {
362 ERROR("Couldn't read catalog %s", infn);
363 exit(-1);
364 }
365 fitstable_add_fits_columns_as_struct(intable2);
366
367 any = fitscolumn_any_type();
368 dubl = fitscolumn_double_type();
369
370 fitstable_add_read_column_struct(intable, dubl, 1, 0, any, racol, TRUE);
371 fitstable_add_read_column_struct(intable, dubl, 1, sizeof(double), any, deccol, TRUE);
372
373 fitstable_use_buffered_reading(intable, 2*sizeof(double), 1000);
374
375 R = fitstable_row_size(intable);
376 rowbuf = buffered_read_new(R, 1000, NR, refill_rowbuffer, intable);
377
378 if (fitstable_read_extension(intable, 1)) {
379 ERROR("Failed to find RA and DEC columns (called \"%s\" and \"%s\" in the FITS file)", racol, deccol);
380 exit(-1);
381 }
382
383 for (r=0; r<NR; r++) {
384 int hp = -1;
385 double ra, dec;
386 int j;
387 double* rd;
388 void* rowdata;
389 void* rdata;
390 anbool flipped;
391
392 if (r && ((r % 100000) == 0)) {
393 logmsg("Reading row %i of %i\n", r, NR);
394 }
395
396 //printf("reading RA,Dec for row %i\n", r);
397 rd = fitstable_next_struct(intable);
398 ra = rd[0];
399 dec = rd[1];
400
401 logverb("row %i: ra,dec %g,%g\n", r, ra, dec);
402 if (margin == 0) {
403 hp = radecdegtohealpix(ra, dec, nside);
404 logverb(" --> healpix %i\n", hp);
405 } else {
406
407 double xyz[3];
408 anbool gotit = FALSE;
409 double d2;
410 if (!hps)
411 hps = il_new(4);
412 radecdeg2xyzarr(ra, dec, xyz);
413 for (j=0; j<NHP; j++) {
414 d2 = distsq(xyz, mincaps[j].xyz, 3);
415 if (d2 <= mincaps[j].r2) {
416 logverb(" -> in mincap %i (dist %g vs %g)\n", j, sqrt(d2), sqrt(mincaps[j].r2));
417 il_append(hps, j);
418 gotit = TRUE;
419 break;
420 }
421 }
422 if (!gotit) {
423 for (j=0; j<NHP; j++) {
424 d2 = distsq(xyz, maxcaps[j].xyz, 3);
425 if (d2 <= maxcaps[j].r2) {
426 logverb(" -> in maxcap %i (dist %g vs %g)\n", j, sqrt(d2), sqrt(maxcaps[j].r2));
427 if (healpix_within_range_of_xyz(j, nside, xyz, margin)) {
428 logverb(" -> and within range.\n");
429 il_append(hps, j);
430 }
431 }
432 }
433 }
434
435 //hps = healpix_rangesearch_radec(ra, dec, margin, nside, hps);
436
437 logverb(" --> healpixes: [");
438 for (j=0; j<il_size(hps); j++)
439 logverb(" %i", il_get(hps, j));
440 logverb(" ]\n");
441 }
442
443 //printf("Reading rowdata for row %i\n", r);
444 rowdata = buffered_read(rowbuf);
445 assert(rowdata);
446
447 flipped = FALSE;
448 j=0;
449 while (1) {
450 if (hps) {
451 if (j >= il_size(hps))
452 break;
453 hp = il_get(hps, j);
454 j++;
455 }
456 assert(hp < NHP);
457 assert(hp >= 0);
458
459 // Open output file if necessary
460 if (!outtables[hp]) {
461 char* outfn;
462 fitstable_t* out;
463
464 // MEMLEAK the output filename. You'll live.
465 if (ringindex) {
466 int ringhp = healpix_xy_to_ring(hp, nside);
467 logverb("Ring-indexed healpix: %i (xy index: %i)\n", ringhp,hp);
468 asprintf_safe(&outfn, outfnpat, ringhp);
469 } else {
470 asprintf_safe(&outfn, outfnpat, hp);
471 }
472
473 logmsg("Opening output file \"%s\"...\n", outfn);
474 out = fitstable_open_for_writing(outfn);
475 if (!out) {
476 ERROR("Failed to open output table \"%s\"", outfn);
477 exit(-1);
478 }
479 // Set the output table structure.
480 if (anycols) {
481 if (cols)
482 fitstable_add_fits_columns_as_struct3(intable, out, cols, 0);
483 if (e_cols)
484 fitstable_add_fits_columns_as_struct4(intable, out, e_cols, 0, TFITS_BIN_TYPE_E);
485
486 } else
487 fitstable_add_fits_columns_as_struct2(intable, out);
488
489 if (backref) {
490 tfits_type i16type;
491 tfits_type i32type;
492 // R = fitstable_row_size(intable);
493 int off = R;
494 i16type = fitscolumn_i16_type();
495 i32type = fitscolumn_i32_type();
496 fitstable_add_read_column_struct(out, i16type, 1, off,
497 i16type, "backref_file", TRUE);
498 off += sizeof(int16_t);
499 fitstable_add_read_column_struct(out, i32type, 1, off,
500 i32type, "backref_index", TRUE);
501 }
502
503 if (fitstable_write_primary_header(out) ||
504 fitstable_write_header(out)) {
505 ERROR("Failed to write output file headers for \"%s\"", outfn);
506 exit(-1);
507 }
508
509 outtables[hp] = out;
510 }
511
512 if (backref) {
513 int16_t brfile;
514 int32_t brind;
515 if (!padrowdata) {
516 padrowdata = malloc(R + sizeof(int16_t) + sizeof(int32_t));
517 assert(padrowdata);
518 }
519 // convert to FITS endian
520 brfile = htons(i);
521 brind = htonl(r);
522 // add backref data to rowdata
523 memcpy(padrowdata, rowdata, R);
524 memcpy(padrowdata + R, &brfile, sizeof(int16_t));
525 memcpy(padrowdata + R + sizeof(int16_t), &brind, sizeof(int32_t));
526 rdata = padrowdata;
527 } else {
528 rdata = rowdata;
529 }
530
531 if (closefiles && (outtables[hp]->fid == NULL)) {
532 char* outfn = outtables[hp]->fn;
533 logverb("Re-opening healpix %i file %s at offset %lu\n",
534 hp, outfn, (long)resume_offsets[hp]);
535 outtables[hp]->fid = fopen(outfn, "r+b");
536 fseeko(outtables[hp]->fid, resume_offsets[hp], SEEK_SET);
537 }
538
539 if (anycols) {
540 if (!flipped) {
541 // if we're writing to multiple output
542 // healpixes, only flip once!
543 flipped = TRUE;
544 fitstable_endian_flip_row_data(intable2, rdata);
545 }
546 if (fitstable_write_struct(outtables[hp], rdata)) {
547 ERROR("Failed to copy a row of data from input table \"%s\" to output healpix %i", infn, hp);
548 }
549
550 } else {
551 if (fitstable_write_row_data(outtables[hp], rdata)) {
552 ERROR("Failed to copy a row of data from input table \"%s\" to output healpix %i", infn, hp);
553 }
554 }
555
556 if (!hps)
557 break;
558 }
559 if (hps)
560 il_remove_all(hps);
561
562 }
563 buffered_read_free(rowbuf);
564 // wack... buffered_read_free() just frees its internal buffer,
565 // not the "rowbuf" struct itself.
566 // who wrote this crazy code? Oh, me of 5 years ago. Jerk.
567 free(rowbuf);
568
569 fitstable_close(intable);
570 il_free(hps);
571
572 if (tempfn) {
573 logverb("Removing temp file %s\n", tempfn);
574 if (unlink(tempfn)) {
575 SYSERROR("Failed to unlink() temp file \"%s\"", tempfn);
576 }
577 tempfn = NULL;
578 }
579
580 // fix headers so that the files are valid at this point.
581 for (ii=0; ii<NHP; ii++) {
582 if (!outtables[ii])
583 continue;
584 if (closefiles && (outtables[ii]->fid == NULL))
585 continue;
586
587 off_t offset = ftello(outtables[ii]->fid);
588 if (closefiles) {
589 resume_offsets[ii] = offset;
590 logverb("Closing healpix %i (saving offset %lu)\n", ii, (long)offset);
591 if (fitstable_fix_header(outtables[ii])) {
592 ERROR("Failed to fix header for healpix %i after reading input file \"%s\"", ii, originfn);
593 exit(-1);
594 }
595 if (fclose(outtables[ii]->fid)) {
596 SYSERROR("Failed to close file %s\n", outtables[ii]->fn);
597 exit(-1);
598 }
599 outtables[ii]->fid = NULL;
600 } else {
601 // the "fitstable_fix_header" call (via
602 // fitsfile_fix_header) adds padding to the file to
603 // bring it up to a FITS block size, so we ftell and
604 // fseek afterward.
605 if (fitstable_fix_header(outtables[ii])) {
606 ERROR("Failed to fix header for healpix %i after reading input file \"%s\"", ii, originfn);
607 exit(-1);
608 }
609 fseeko(outtables[ii]->fid, offset, SEEK_SET);
610 }
611 }
612
613 if (padrowdata) {
614 free(padrowdata);
615 padrowdata = NULL;
616 }
617
618 }
619
620 for (i=0; i<NHP; i++) {
621 if (!outtables[i])
622 continue;
623 if (closefiles && (outtables[i]->fid == NULL)) {
624 if (fitstable_close(outtables[i])) {
625 ERROR("Failed to close output table for healpix %i", i);
626 exit(-1);
627 }
628 continue;
629 }
630 if (fitstable_fix_header(outtables[i]) ||
631 fitstable_fix_primary_header(outtables[i]) ||
632 fitstable_close(outtables[i])) {
633 ERROR("Failed to close output table for healpix %i", i);
634 exit(-1);
635 }
636 }
637
638 free(outtables);
639 sl_free2(infns);
640 sl_free2(cols);
641 sl_free2(e_cols);
642
643 free(mincaps);
644 free(maxcaps);
645
646 free(resume_offsets);
647
648 return 0;
649 }
650
651
652
653