1 /*
2  # This file is part of the Astrometry.net suite.
3  # Licensed under a 3-clause BSD style license - see LICENSE
4  */
5 
6 /**
7  Builds a kdtree from the Henry Draper catalog.
8 
9  I downloaded the text file from:
10  http://cdsarc.u-strasbg.fr/viz-bin/VizieR?-meta.foot&-source=III/135A
11  with the options:
12  - "|-separated values",
13  - only the "HD" column selected
14 
15  The result is a header including this:
16  #Column _RAJ2000        (F6.2)  Right ascension (FK5) Equinox=J2000.0 Epoch=J1900. (computed by VizieR, not part of the original data)  [ucd=pos.eq.ra;meta.main]
17  #Column _DEJ2000        (F6.2)  Declination (FK5) Equinox=J2000.0 Epoch=J1900. (computed by VizieR, not part of the original data)      [ucd=pos.eq.dec;meta.main]
18  #Column HD      (I6)    [1/272150]+ Henry Draper Catalog (HD) number    [ucd=meta.id;meta.main]
19  _RAJ2000|_DEJ2000|HD
20  deg|deg|
21  ------|------|------
22 
23  And then data like this:
24 
25  001.30|+67.84|     1
26  001.29|+57.77|     2
27  001.29|+45.22|     3
28  001.28|+30.32|     4
29  001.28| +2.37|     5
30 
31  It turns out that the HD numbers are all in sequence and contiguous, so there's no
32  point storing them.
33 
34  A copy of this input file is available at
35  http://trac.astrometry.net/browser/binary/henry-draper/henry-draper.tsv
36 
37  (note, the Tycho-2 cross-ref file is also there, as are hd.fits and hd2.fits (with bounding-boxes))
38 
39  HOWEVER, the positions aren't very accurate, so this program has the capability of
40  reading a Tycho2-to-HD cross-reference catalog:
41  http://cdsarc.u-strasbg.fr/viz-bin/Cat?IV/25
42 
43  */
44 
45 #include <unistd.h>
46 #include <stdlib.h>
47 #include <stdio.h>
48 #include <math.h>
49 #include <string.h>
50 
51 #include "kdtree.h"
52 #include "kdtree_fits_io.h"
53 #include "starutil.h"
54 #include "fitsioutils.h"
55 #include "starkd.h"
56 #include "boilerplate.h"
57 #include "errors.h"
58 #include "bl.h"
59 #include "tycho2.h"
60 #include "tycho2-fits.h"
61 
62 static const char* OPTIONS = "hR:d:t:bsST:X:";
63 
64 #define HD_NENTRIES 272150
65 
printHelp(char * progname)66 void printHelp(char* progname) {
67     BOILERPLATE_HELP_HEADER(stdout);
68     printf("\nUsage: %s\n"
69            "   (   [-b]: build bounding boxes\n"
70            "    OR [-s]: build splitting planes   )\n"
71            "    [-R Nleaf]: number of points in a kdtree leaf node (default 25)\n"
72            "    [-t  <tree type>]:  {double,float,u32,u16}, default u32.\n"
73            "    [-d  <data type>]:  {double,float,u32,u16}, default u32.\n"
74            "    [-S]: include separate splitdim array\n"
75            "    [-T <Tycho-2 catalog>]: path to Tycho-2 catalog.\n"
76            "    [-X <Cross-ref file>]: path to Tycho-2 - to - HD cross-ref file.\n"
77            "\n"
78            "   <input.tsv>  <output.fits>\n"
79            "\n", progname);
80 }
81 
82 
83 // the Tycho-2 and cross-ref fields we care about...
84 struct tyc {
85     // from Tycho-2
86     int16_t tyc1;
87     int16_t tyc2;
88     int8_t  tyc3;
89     double ra;
90     double dec;
91 
92     float mag_BT;
93     float mag_VT;
94     float mag_HP;
95 
96     // From X-ref
97     int hd;
98     // number of Tycho-2 cross-refs for this star.
99     uint8_t ntyc;
100 };
101 typedef struct tyc tycstar_t;
102 
compare_tycs(const void * v1,const void * v2)103 static int compare_tycs(const void* v1, const void* v2) {
104     const tycstar_t* s1 = v1;
105     const tycstar_t* s2 = v2;
106     int d;
107     d = s1->tyc1 - s2->tyc1;
108     if (d > 0) return 1;
109     if (d < 0) return -1;
110     d = s1->tyc2 - s2->tyc2;
111     if (d > 0) return 1;
112     if (d < 0) return -1;
113     d = s1->tyc3 - s2->tyc3;
114     if (d > 0) return 1;
115     if (d < 0) return -1;
116     return 0;
117 }
118 
find_tycho(tycstar_t * stars,int N,int tyc1,int tyc2,int tyc3)119 static tycstar_t* find_tycho(tycstar_t* stars, int N,
120                              int tyc1, int tyc2, int tyc3) {
121     tycstar_t key;
122     key.tyc1 = tyc1;
123     key.tyc2 = tyc2;
124     key.tyc3 = tyc3;
125     return bsearch(&key, stars, N, sizeof(tycstar_t), compare_tycs);
126 }
127 
compare_hds(const void * v1,const void * v2)128 static int compare_hds(const void* v1, const void* v2) {
129     const tycstar_t* s1 = v1;
130     const tycstar_t* s2 = v2;
131     int d;
132     d = s1->hd - s2->hd;
133     if (d > 0) return 1;
134     if (d < 0) return -1;
135     return 0;
136 }
137 
find_hd(tycstar_t * stars,int N,int hd)138 static tycstar_t* find_hd(tycstar_t* stars, int N,
139                           int hd) {
140     tycstar_t key;
141     key.hd = hd;
142     return bsearch(&key, stars, N, sizeof(tycstar_t), compare_hds);
143 }
144 
main(int argc,char ** args)145 int main(int argc, char** args) {
146     int argchar;
147     kdtree_t* kd;
148     int Nleaf = 25;
149     char* infn = NULL;
150     char* outfn = NULL;
151     char* tychofn = NULL;
152     char* crossfn = NULL;
153     char* progname = args[0];
154     FILE* f;
155 
156     tycstar_t* tycstars = NULL;
157     int Ntyc = 0;
158 
159     int exttype  = KDT_EXT_DOUBLE;
160     int datatype = KDT_DATA_U32;
161     int treetype = KDT_TREE_U32;
162     int tt;
163     int buildopts = 0;
164     int i, N, D;
165 
166     dl* ras;
167     dl* decs;
168     dl* hds;
169 
170     fl* mag1s;
171     fl* mag2s;
172     fl* mag3s;
173 
174     int nbad = 0;
175     int nox = 0;
176 
177     int* hd;
178     double* xyz;
179 
180     qfits_header* hdr;
181 
182     while ((argchar = getopt (argc, args, OPTIONS)) != -1)
183         switch (argchar) {
184         case 'T':
185             tychofn = optarg;
186             break;
187         case 'X':
188             crossfn = optarg;
189             break;
190         case 'R':
191             Nleaf = (int)strtoul(optarg, NULL, 0);
192             break;
193         case 't':
194             treetype = kdtree_kdtype_parse_tree_string(optarg);
195             break;
196         case 'd':
197             datatype = kdtree_kdtype_parse_data_string(optarg);
198             break;
199         case 'b':
200             buildopts |= KD_BUILD_BBOX;
201             break;
202         case 's':
203             buildopts |= KD_BUILD_SPLIT;
204             break;
205         case 'S':
206             buildopts |= KD_BUILD_SPLITDIM;
207             break;
208         case '?':
209             fprintf(stderr, "Unknown option `-%c'.\n", optopt);
210         case 'h':
211             printHelp(progname);
212             return 0;
213         default:
214             return -1;
215         }
216 
217     if (optind != argc - 2) {
218         printHelp(progname);
219         exit(-1);
220     }
221 
222     infn = args[optind];
223     outfn = args[optind+1];
224 
225     if (!(buildopts & (KD_BUILD_BBOX | KD_BUILD_SPLIT))) {
226         printf("You need bounding-boxes or splitting planes!\n");
227         printHelp(progname);
228         exit(-1);
229     }
230 
231     if (tychofn || crossfn) {
232         if (!(tychofn && crossfn)) {
233             printf("You need both -T <Tycho2> and -X <Crossref> to do cross-referencing.\n");
234             exit(-1);
235         }
236     }
237 
238     if (tychofn) {
239         int i, N;
240         tycho2_fits* tyc;
241         FILE* f;
242         int nx, nox;
243         int lastgrass = 0;
244 
245         tyc = tycho2_fits_open(tychofn);
246         if (!tyc) {
247             ERROR("Failed to open Tycho-2 catalog.");
248             exit(-1);
249         }
250         printf("Reading Tycho-2 catalog...\n");
251 
252         N = tycho2_fits_count_entries(tyc);
253         tycstars = calloc(N, sizeof(tycstar_t));
254 
255         for (i=0; i<N; i++) {
256             tycho2_entry* te;
257             int grass = (i*80 / N);
258             if (grass != lastgrass) {
259                 printf(".");
260                 fflush(stdout);
261                 lastgrass = grass;
262             }
263             te = tycho2_fits_read_entry(tyc);
264             tycstars[i].tyc1 = te->tyc1;
265             tycstars[i].tyc2 = te->tyc2;
266             tycstars[i].tyc3 = te->tyc3;
267             tycstars[i].ra   = te->ra;
268             tycstars[i].dec  = te->dec;
269             tycstars[i].mag_BT = te->mag_BT;
270             tycstars[i].mag_VT = te->mag_VT;
271             tycstars[i].mag_HP = te->mag_HP;
272         }
273         tycho2_fits_close(tyc);
274 
275         printf("Sorting...\n");
276         qsort(tycstars, N, sizeof(tycstar_t), compare_tycs);
277         Ntyc = N;
278 
279         f = fopen(crossfn, "rb");
280         if (!f) {
281             SYSERROR("Failed to open cross-reference file %s", crossfn);
282             exit(-1);
283         }
284 
285         nx = 0;
286         nox = 0;
287         while (TRUE) {
288             char buf[1024];
289             int tyc1, tyc2, tyc3, hd, nhd, ntyc;
290             char ftyc, sptype0, sptype1, sptype2;
291             tycstar_t* s;
292 
293             if (!fgets(buf, sizeof(buf), f)) {
294                 if (ferror(f)) {
295                     SYSERROR("Failed to read a line of text from the cross-reference file");
296                     exit(-1);
297                 }
298                 break;
299             }
300 
301             if (sscanf(buf, " %d %d %d%c %d %c%c%c %d %d",
302                        &tyc1, &tyc2, &tyc3, &ftyc, &hd,
303                        &sptype0, &sptype1, &sptype2, &nhd, &ntyc) != 10) {
304                 ERROR("Failed to parse line: \"%s\"", buf);
305             }
306 
307             //printf("%i %i %i %i %i %i\n", tyc1, tyc2, tyc3, hd, nhd, ntyc);
308             s = find_tycho(tycstars, Ntyc, tyc1, tyc2, tyc3);
309             if (!s) {
310                 ERROR("Failed to find Tycho-2 star %i-%i-%i", tyc1, tyc2, tyc3);
311                 nox++;
312             } else {
313                 s->hd = hd;
314                 s->ntyc = ntyc;
315             }
316             nx++;
317         }
318         fclose(f);
319 
320         printf("Read %i cross-references.\n", nx);
321         printf("Failed to find %i cross-referenced Tycho-2 stars.\n", nox);
322 
323         printf("Sorting...\n");
324         qsort(tycstars, N, sizeof(tycstar_t), compare_hds);
325     }
326 
327     f = fopen(infn, "rb");
328     if (!f) {
329         SYSERROR("Failed to open input file %s", infn);
330         exit(-1);
331     }
332 
333     ras = dl_new(1024);
334     decs = dl_new(1024);
335     hds = il_new(1024);
336 
337     mag1s = fl_new(1024);
338     mag2s = fl_new(1024);
339     mag3s = fl_new(1024);
340 
341     printf("Reading HD catalog...\n");
342     for (;;) {
343         char buf[1024];
344         double ra, dec;
345         int hd;
346         float mag1, mag2, mag3;
347 
348         mag1 = mag2 = mag3 = 0.0;
349 
350         if (!fgets(buf, sizeof(buf), f)) {
351             if (ferror(f)) {
352                 SYSERROR("Failed to read a line of text from the input file");
353                 exit(-1);
354             }
355             break;
356         }
357 
358         if (buf[0] == '#')
359             continue;
360         if (buf[0] == '\n')
361             continue;
362 
363         if (sscanf(buf, " %lf| %lf| %d", &ra, &dec, &hd) < 3) {
364             // ignore three invalid lines
365             if (nbad > 3) {
366                 ERROR("Failed to parse line: \"%s\"", buf);
367             }
368             nbad++;
369         } else {
370 
371             if (tycstars) {
372                 tycstar_t* s = find_hd(tycstars, Ntyc, hd);
373                 if (!s) {
374                     //printf("Failed to find cross-ref for HD %i\n", hd);
375                     nox++;
376                 } else {
377                     ra = s->ra;
378                     dec = s->dec;
379 
380                     mag1 = s->mag_VT;
381                     mag2 = s->mag_BT;
382                     mag3 = s->mag_HP;
383                 }
384             }
385 
386             dl_append(ras, ra);
387             dl_append(decs, dec);
388             il_append(hds, hd);
389             fl_append(mag1s, mag1);
390             fl_append(mag2s, mag2);
391             fl_append(mag3s, mag3);
392         }
393     }
394     fclose(f);
395 
396     N = dl_size(ras);
397     printf("Read %i entries and %i bad lines.\n", N, nbad);
398 
399     if (dl_size(ras) != HD_NENTRIES) {
400         printf("WARNING: expected %i Henry Draper catalog entries.\n", HD_NENTRIES);
401     }
402 
403     if (nox) {
404         printf("Found %i HD entries with no cross-reference (expect this to be about 1%%)\n", nox);
405     }
406 
407     hd = malloc(sizeof(int) * N);
408     il_copy(hds, 0, N, hd);
409     il_free(hds);
410     for (i=0; i<N; i++)
411         if (hd[i] != i+1) {
412             printf("Line %i is HD %i\n", i+1, hd[i]);
413             break;
414         }
415     // HACK  - don't allocate 'em in the first place...
416     free(hd);
417 
418     xyz = malloc(sizeof(double) * 3 * N);
419     for (i=0; i<N; i++) {
420         radecdeg2xyzarr(dl_get(ras, i), dl_get(decs, i), xyz + 3*i);
421     }
422 
423     dl_free(ras);
424     dl_free(decs);
425 
426     tt = kdtree_kdtypes_to_treetype(exttype, treetype, datatype);
427     D = 3;
428     {
429         // limits of the kdtree...
430         double lo[] = {-1.0, -1.0, -1.0};
431         double hi[] = { 1.0,  1.0,  1.0};
432         kd = kdtree_new(N, D, Nleaf);
433         kdtree_set_limits(kd, lo, hi);
434     }
435     printf("Building tree...\n");
436     kd = kdtree_build(kd, xyz, N, D, Nleaf, tt, buildopts);
437 
438     hdr = qfits_header_default();
439     qfits_header_add(hdr, "AN_FILE", "HDTREE", "Henry Draper catalog kdtree", NULL);
440     BOILERPLATE_ADD_FITS_HEADERS(hdr);
441     fits_add_long_history(hdr, "This file was created by the following command-line:");
442     fits_add_args(hdr, args, argc);
443 
444     if (kdtree_fits_write(kd, outfn, hdr)) {
445         ERROR("Failed to write kdtree");
446         exit(-1);
447     }
448 
449     // Write mags as tag-along table.
450     {
451         fitstable_t* tag;
452         tag = fitstable_open_for_appending(outfn);
453         if (!tag) {
454             ERROR("Failed to open kd-tree file for appending");
455             exit(-1);
456         }
457 
458         fitstable_add_write_column(tag, fitscolumn_float_type(), "MAG_VT", "");
459         fitstable_add_write_column(tag, fitscolumn_float_type(), "MAG_BT", "");
460         fitstable_add_write_column(tag, fitscolumn_float_type(), "MAG_HP", "");
461 
462         if (fitstable_write_header(tag)) {
463             ERROR("Failed to write tag-along header");
464             exit(-1);
465         }
466 
467         for (i=0; i<N; i++) {
468             fitstable_write_row(tag, fl_get(mag1s, i), fl_get(mag2s, i),
469                                 fl_get(mag3s, i));
470         }
471         if (fitstable_fix_header(tag)) {
472             ERROR("Failed to fix tag-along header");
473             exit(-1);
474         }
475         if (fitstable_close(tag)) {
476             ERROR("Failed to close tag-along data");
477             exit(-1);
478         }
479     }
480     fl_free(mag1s);
481     fl_free(mag2s);
482     fl_free(mag3s);
483 
484     printf("Done.\n");
485 
486     qfits_header_destroy(hdr);
487     free(xyz);
488     kdtree_free(kd);
489     free(tycstars);
490 
491     return 0;
492 }
493 
494