1 /*
2 # This file is part of the Astrometry.net suite.
3 # Licensed under a 3-clause BSD style license - see LICENSE
4 */
5
6 /**
7 Builds a kdtree from the Henry Draper catalog.
8
9 I downloaded the text file from:
10 http://cdsarc.u-strasbg.fr/viz-bin/VizieR?-meta.foot&-source=III/135A
11 with the options:
12 - "|-separated values",
13 - only the "HD" column selected
14
15 The result is a header including this:
16 #Column _RAJ2000 (F6.2) Right ascension (FK5) Equinox=J2000.0 Epoch=J1900. (computed by VizieR, not part of the original data) [ucd=pos.eq.ra;meta.main]
17 #Column _DEJ2000 (F6.2) Declination (FK5) Equinox=J2000.0 Epoch=J1900. (computed by VizieR, not part of the original data) [ucd=pos.eq.dec;meta.main]
18 #Column HD (I6) [1/272150]+ Henry Draper Catalog (HD) number [ucd=meta.id;meta.main]
19 _RAJ2000|_DEJ2000|HD
20 deg|deg|
21 ------|------|------
22
23 And then data like this:
24
25 001.30|+67.84| 1
26 001.29|+57.77| 2
27 001.29|+45.22| 3
28 001.28|+30.32| 4
29 001.28| +2.37| 5
30
31 It turns out that the HD numbers are all in sequence and contiguous, so there's no
32 point storing them.
33
34 A copy of this input file is available at
35 http://trac.astrometry.net/browser/binary/henry-draper/henry-draper.tsv
36
37 (note, the Tycho-2 cross-ref file is also there, as are hd.fits and hd2.fits (with bounding-boxes))
38
39 HOWEVER, the positions aren't very accurate, so this program has the capability of
40 reading a Tycho2-to-HD cross-reference catalog:
41 http://cdsarc.u-strasbg.fr/viz-bin/Cat?IV/25
42
43 */
44
45 #include <unistd.h>
46 #include <stdlib.h>
47 #include <stdio.h>
48 #include <math.h>
49 #include <string.h>
50
51 #include "kdtree.h"
52 #include "kdtree_fits_io.h"
53 #include "starutil.h"
54 #include "fitsioutils.h"
55 #include "starkd.h"
56 #include "boilerplate.h"
57 #include "errors.h"
58 #include "bl.h"
59 #include "tycho2.h"
60 #include "tycho2-fits.h"
61
62 static const char* OPTIONS = "hR:d:t:bsST:X:";
63
64 #define HD_NENTRIES 272150
65
printHelp(char * progname)66 void printHelp(char* progname) {
67 BOILERPLATE_HELP_HEADER(stdout);
68 printf("\nUsage: %s\n"
69 " ( [-b]: build bounding boxes\n"
70 " OR [-s]: build splitting planes )\n"
71 " [-R Nleaf]: number of points in a kdtree leaf node (default 25)\n"
72 " [-t <tree type>]: {double,float,u32,u16}, default u32.\n"
73 " [-d <data type>]: {double,float,u32,u16}, default u32.\n"
74 " [-S]: include separate splitdim array\n"
75 " [-T <Tycho-2 catalog>]: path to Tycho-2 catalog.\n"
76 " [-X <Cross-ref file>]: path to Tycho-2 - to - HD cross-ref file.\n"
77 "\n"
78 " <input.tsv> <output.fits>\n"
79 "\n", progname);
80 }
81
82
83 // the Tycho-2 and cross-ref fields we care about...
84 struct tyc {
85 // from Tycho-2
86 int16_t tyc1;
87 int16_t tyc2;
88 int8_t tyc3;
89 double ra;
90 double dec;
91
92 float mag_BT;
93 float mag_VT;
94 float mag_HP;
95
96 // From X-ref
97 int hd;
98 // number of Tycho-2 cross-refs for this star.
99 uint8_t ntyc;
100 };
101 typedef struct tyc tycstar_t;
102
compare_tycs(const void * v1,const void * v2)103 static int compare_tycs(const void* v1, const void* v2) {
104 const tycstar_t* s1 = v1;
105 const tycstar_t* s2 = v2;
106 int d;
107 d = s1->tyc1 - s2->tyc1;
108 if (d > 0) return 1;
109 if (d < 0) return -1;
110 d = s1->tyc2 - s2->tyc2;
111 if (d > 0) return 1;
112 if (d < 0) return -1;
113 d = s1->tyc3 - s2->tyc3;
114 if (d > 0) return 1;
115 if (d < 0) return -1;
116 return 0;
117 }
118
find_tycho(tycstar_t * stars,int N,int tyc1,int tyc2,int tyc3)119 static tycstar_t* find_tycho(tycstar_t* stars, int N,
120 int tyc1, int tyc2, int tyc3) {
121 tycstar_t key;
122 key.tyc1 = tyc1;
123 key.tyc2 = tyc2;
124 key.tyc3 = tyc3;
125 return bsearch(&key, stars, N, sizeof(tycstar_t), compare_tycs);
126 }
127
compare_hds(const void * v1,const void * v2)128 static int compare_hds(const void* v1, const void* v2) {
129 const tycstar_t* s1 = v1;
130 const tycstar_t* s2 = v2;
131 int d;
132 d = s1->hd - s2->hd;
133 if (d > 0) return 1;
134 if (d < 0) return -1;
135 return 0;
136 }
137
find_hd(tycstar_t * stars,int N,int hd)138 static tycstar_t* find_hd(tycstar_t* stars, int N,
139 int hd) {
140 tycstar_t key;
141 key.hd = hd;
142 return bsearch(&key, stars, N, sizeof(tycstar_t), compare_hds);
143 }
144
main(int argc,char ** args)145 int main(int argc, char** args) {
146 int argchar;
147 kdtree_t* kd;
148 int Nleaf = 25;
149 char* infn = NULL;
150 char* outfn = NULL;
151 char* tychofn = NULL;
152 char* crossfn = NULL;
153 char* progname = args[0];
154 FILE* f;
155
156 tycstar_t* tycstars = NULL;
157 int Ntyc = 0;
158
159 int exttype = KDT_EXT_DOUBLE;
160 int datatype = KDT_DATA_U32;
161 int treetype = KDT_TREE_U32;
162 int tt;
163 int buildopts = 0;
164 int i, N, D;
165
166 dl* ras;
167 dl* decs;
168 dl* hds;
169
170 fl* mag1s;
171 fl* mag2s;
172 fl* mag3s;
173
174 int nbad = 0;
175 int nox = 0;
176
177 int* hd;
178 double* xyz;
179
180 qfits_header* hdr;
181
182 while ((argchar = getopt (argc, args, OPTIONS)) != -1)
183 switch (argchar) {
184 case 'T':
185 tychofn = optarg;
186 break;
187 case 'X':
188 crossfn = optarg;
189 break;
190 case 'R':
191 Nleaf = (int)strtoul(optarg, NULL, 0);
192 break;
193 case 't':
194 treetype = kdtree_kdtype_parse_tree_string(optarg);
195 break;
196 case 'd':
197 datatype = kdtree_kdtype_parse_data_string(optarg);
198 break;
199 case 'b':
200 buildopts |= KD_BUILD_BBOX;
201 break;
202 case 's':
203 buildopts |= KD_BUILD_SPLIT;
204 break;
205 case 'S':
206 buildopts |= KD_BUILD_SPLITDIM;
207 break;
208 case '?':
209 fprintf(stderr, "Unknown option `-%c'.\n", optopt);
210 case 'h':
211 printHelp(progname);
212 return 0;
213 default:
214 return -1;
215 }
216
217 if (optind != argc - 2) {
218 printHelp(progname);
219 exit(-1);
220 }
221
222 infn = args[optind];
223 outfn = args[optind+1];
224
225 if (!(buildopts & (KD_BUILD_BBOX | KD_BUILD_SPLIT))) {
226 printf("You need bounding-boxes or splitting planes!\n");
227 printHelp(progname);
228 exit(-1);
229 }
230
231 if (tychofn || crossfn) {
232 if (!(tychofn && crossfn)) {
233 printf("You need both -T <Tycho2> and -X <Crossref> to do cross-referencing.\n");
234 exit(-1);
235 }
236 }
237
238 if (tychofn) {
239 int i, N;
240 tycho2_fits* tyc;
241 FILE* f;
242 int nx, nox;
243 int lastgrass = 0;
244
245 tyc = tycho2_fits_open(tychofn);
246 if (!tyc) {
247 ERROR("Failed to open Tycho-2 catalog.");
248 exit(-1);
249 }
250 printf("Reading Tycho-2 catalog...\n");
251
252 N = tycho2_fits_count_entries(tyc);
253 tycstars = calloc(N, sizeof(tycstar_t));
254
255 for (i=0; i<N; i++) {
256 tycho2_entry* te;
257 int grass = (i*80 / N);
258 if (grass != lastgrass) {
259 printf(".");
260 fflush(stdout);
261 lastgrass = grass;
262 }
263 te = tycho2_fits_read_entry(tyc);
264 tycstars[i].tyc1 = te->tyc1;
265 tycstars[i].tyc2 = te->tyc2;
266 tycstars[i].tyc3 = te->tyc3;
267 tycstars[i].ra = te->ra;
268 tycstars[i].dec = te->dec;
269 tycstars[i].mag_BT = te->mag_BT;
270 tycstars[i].mag_VT = te->mag_VT;
271 tycstars[i].mag_HP = te->mag_HP;
272 }
273 tycho2_fits_close(tyc);
274
275 printf("Sorting...\n");
276 qsort(tycstars, N, sizeof(tycstar_t), compare_tycs);
277 Ntyc = N;
278
279 f = fopen(crossfn, "rb");
280 if (!f) {
281 SYSERROR("Failed to open cross-reference file %s", crossfn);
282 exit(-1);
283 }
284
285 nx = 0;
286 nox = 0;
287 while (TRUE) {
288 char buf[1024];
289 int tyc1, tyc2, tyc3, hd, nhd, ntyc;
290 char ftyc, sptype0, sptype1, sptype2;
291 tycstar_t* s;
292
293 if (!fgets(buf, sizeof(buf), f)) {
294 if (ferror(f)) {
295 SYSERROR("Failed to read a line of text from the cross-reference file");
296 exit(-1);
297 }
298 break;
299 }
300
301 if (sscanf(buf, " %d %d %d%c %d %c%c%c %d %d",
302 &tyc1, &tyc2, &tyc3, &ftyc, &hd,
303 &sptype0, &sptype1, &sptype2, &nhd, &ntyc) != 10) {
304 ERROR("Failed to parse line: \"%s\"", buf);
305 }
306
307 //printf("%i %i %i %i %i %i\n", tyc1, tyc2, tyc3, hd, nhd, ntyc);
308 s = find_tycho(tycstars, Ntyc, tyc1, tyc2, tyc3);
309 if (!s) {
310 ERROR("Failed to find Tycho-2 star %i-%i-%i", tyc1, tyc2, tyc3);
311 nox++;
312 } else {
313 s->hd = hd;
314 s->ntyc = ntyc;
315 }
316 nx++;
317 }
318 fclose(f);
319
320 printf("Read %i cross-references.\n", nx);
321 printf("Failed to find %i cross-referenced Tycho-2 stars.\n", nox);
322
323 printf("Sorting...\n");
324 qsort(tycstars, N, sizeof(tycstar_t), compare_hds);
325 }
326
327 f = fopen(infn, "rb");
328 if (!f) {
329 SYSERROR("Failed to open input file %s", infn);
330 exit(-1);
331 }
332
333 ras = dl_new(1024);
334 decs = dl_new(1024);
335 hds = il_new(1024);
336
337 mag1s = fl_new(1024);
338 mag2s = fl_new(1024);
339 mag3s = fl_new(1024);
340
341 printf("Reading HD catalog...\n");
342 for (;;) {
343 char buf[1024];
344 double ra, dec;
345 int hd;
346 float mag1, mag2, mag3;
347
348 mag1 = mag2 = mag3 = 0.0;
349
350 if (!fgets(buf, sizeof(buf), f)) {
351 if (ferror(f)) {
352 SYSERROR("Failed to read a line of text from the input file");
353 exit(-1);
354 }
355 break;
356 }
357
358 if (buf[0] == '#')
359 continue;
360 if (buf[0] == '\n')
361 continue;
362
363 if (sscanf(buf, " %lf| %lf| %d", &ra, &dec, &hd) < 3) {
364 // ignore three invalid lines
365 if (nbad > 3) {
366 ERROR("Failed to parse line: \"%s\"", buf);
367 }
368 nbad++;
369 } else {
370
371 if (tycstars) {
372 tycstar_t* s = find_hd(tycstars, Ntyc, hd);
373 if (!s) {
374 //printf("Failed to find cross-ref for HD %i\n", hd);
375 nox++;
376 } else {
377 ra = s->ra;
378 dec = s->dec;
379
380 mag1 = s->mag_VT;
381 mag2 = s->mag_BT;
382 mag3 = s->mag_HP;
383 }
384 }
385
386 dl_append(ras, ra);
387 dl_append(decs, dec);
388 il_append(hds, hd);
389 fl_append(mag1s, mag1);
390 fl_append(mag2s, mag2);
391 fl_append(mag3s, mag3);
392 }
393 }
394 fclose(f);
395
396 N = dl_size(ras);
397 printf("Read %i entries and %i bad lines.\n", N, nbad);
398
399 if (dl_size(ras) != HD_NENTRIES) {
400 printf("WARNING: expected %i Henry Draper catalog entries.\n", HD_NENTRIES);
401 }
402
403 if (nox) {
404 printf("Found %i HD entries with no cross-reference (expect this to be about 1%%)\n", nox);
405 }
406
407 hd = malloc(sizeof(int) * N);
408 il_copy(hds, 0, N, hd);
409 il_free(hds);
410 for (i=0; i<N; i++)
411 if (hd[i] != i+1) {
412 printf("Line %i is HD %i\n", i+1, hd[i]);
413 break;
414 }
415 // HACK - don't allocate 'em in the first place...
416 free(hd);
417
418 xyz = malloc(sizeof(double) * 3 * N);
419 for (i=0; i<N; i++) {
420 radecdeg2xyzarr(dl_get(ras, i), dl_get(decs, i), xyz + 3*i);
421 }
422
423 dl_free(ras);
424 dl_free(decs);
425
426 tt = kdtree_kdtypes_to_treetype(exttype, treetype, datatype);
427 D = 3;
428 {
429 // limits of the kdtree...
430 double lo[] = {-1.0, -1.0, -1.0};
431 double hi[] = { 1.0, 1.0, 1.0};
432 kd = kdtree_new(N, D, Nleaf);
433 kdtree_set_limits(kd, lo, hi);
434 }
435 printf("Building tree...\n");
436 kd = kdtree_build(kd, xyz, N, D, Nleaf, tt, buildopts);
437
438 hdr = qfits_header_default();
439 qfits_header_add(hdr, "AN_FILE", "HDTREE", "Henry Draper catalog kdtree", NULL);
440 BOILERPLATE_ADD_FITS_HEADERS(hdr);
441 fits_add_long_history(hdr, "This file was created by the following command-line:");
442 fits_add_args(hdr, args, argc);
443
444 if (kdtree_fits_write(kd, outfn, hdr)) {
445 ERROR("Failed to write kdtree");
446 exit(-1);
447 }
448
449 // Write mags as tag-along table.
450 {
451 fitstable_t* tag;
452 tag = fitstable_open_for_appending(outfn);
453 if (!tag) {
454 ERROR("Failed to open kd-tree file for appending");
455 exit(-1);
456 }
457
458 fitstable_add_write_column(tag, fitscolumn_float_type(), "MAG_VT", "");
459 fitstable_add_write_column(tag, fitscolumn_float_type(), "MAG_BT", "");
460 fitstable_add_write_column(tag, fitscolumn_float_type(), "MAG_HP", "");
461
462 if (fitstable_write_header(tag)) {
463 ERROR("Failed to write tag-along header");
464 exit(-1);
465 }
466
467 for (i=0; i<N; i++) {
468 fitstable_write_row(tag, fl_get(mag1s, i), fl_get(mag2s, i),
469 fl_get(mag3s, i));
470 }
471 if (fitstable_fix_header(tag)) {
472 ERROR("Failed to fix tag-along header");
473 exit(-1);
474 }
475 if (fitstable_close(tag)) {
476 ERROR("Failed to close tag-along data");
477 exit(-1);
478 }
479 }
480 fl_free(mag1s);
481 fl_free(mag2s);
482 fl_free(mag3s);
483
484 printf("Done.\n");
485
486 qfits_header_destroy(hdr);
487 free(xyz);
488 kdtree_free(kd);
489 free(tycstars);
490
491 return 0;
492 }
493
494