1 /*
2  # This file is part of the Astrometry.net suite.
3  # Licensed under a 3-clause BSD style license - see LICENSE
4  */
5 
6 #include <stdint.h>
7 #include <assert.h>
8 
9 #include "os-features.h"
10 #include "uniformize-catalog.h"
11 #include "intmap.h"
12 #include "healpix.h"
13 #include "healpix-utils.h"
14 #include "permutedsort.h"
15 #include "starutil.h"
16 #include "an-bool.h"
17 #include "mathutil.h"
18 #include "errors.h"
19 #include "log.h"
20 #include "boilerplate.h"
21 #include "fitsioutils.h"
22 
23 struct oh_token {
24     int hp;
25     int nside;
26     int finenside;
27 };
28 
29 // Return 1 if the given "hp" is outside the healpix described in "oh_token".
outside_healpix(int hp,void * vtoken)30 static int outside_healpix(int hp, void* vtoken) {
31     struct oh_token* token = vtoken;
32     int bighp;
33     healpix_convert_nside(hp, token->finenside, token->nside, &bighp);
34     return (bighp == token->hp ? 0 : 1);
35 }
36 
is_duplicate(int hp,double ra,double dec,int Nside,intmap_t * starlists,double * ras,double * decs,double dedupr2)37 static anbool is_duplicate(int hp, double ra, double dec, int Nside,
38                            intmap_t* starlists,
39                            double* ras, double* decs, double dedupr2) {
40     double xyz[3];
41     int neigh[9];
42     int nn;
43     double xyz2[3];
44     int k;
45     size_t j;
46     radecdeg2xyzarr(ra, dec, xyz);
47     // Check this healpix...
48     neigh[0] = hp;
49     // Check neighbouring healpixes... (+1 is to skip over this hp)
50     nn = 1 + healpix_get_neighbours(hp, neigh+1, Nside);
51     for (k=0; k<nn; k++) {
52         int otherhp = neigh[k];
53         bl* lst = intmap_find(starlists, otherhp, FALSE);
54         if (!lst)
55             continue;
56         for (j=0; j<bl_size(lst); j++) {
57             int otherindex;
58             bl_get(lst, j, &otherindex);
59             radecdeg2xyzarr(ras[otherindex], decs[otherindex], xyz2);
60             if (!distsq_exceeds(xyz, xyz2, 3, dedupr2))
61                 return TRUE;
62         }
63     }
64     return FALSE;
65 }
66 
uniformize_catalog(fitstable_t * intable,fitstable_t * outtable,const char * racol,const char * deccol,const char * sortcol,anbool sort_ascending,double sort_min_cut,int bighp,int bignside,int nmargin,int Nside,double dedup_radius,int nsweeps,char ** args,int argc)67 int uniformize_catalog(fitstable_t* intable, fitstable_t* outtable,
68                        const char* racol, const char* deccol,
69                        const char* sortcol, anbool sort_ascending,
70                        double sort_min_cut,
71                        // ?  Or do this cut in a separate process?
72                        int bighp, int bignside,
73                        int nmargin,
74                        // uniformization nside.
75                        int Nside,
76                        double dedup_radius,
77                        int nsweeps,
78                        char** args, int argc) {
79     anbool allsky;
80     intmap_t* starlists;
81     int NHP;
82     anbool dense = FALSE;
83     double dedupr2 = 0.0;
84     tfits_type dubl;
85     int N;
86     int* inorder = NULL;
87     int* outorder = NULL;
88     int outi;
89     double *ra = NULL, *dec = NULL;
90     il* myhps = NULL;
91     int i,j,k;
92     int nkeep = nsweeps;
93     int noob = 0;
94     int ndup = 0;
95     struct oh_token token;
96     int* npersweep = NULL;
97     qfits_header* outhdr = NULL;
98     double *sortval = NULL;
99 
100     if (bignside == 0)
101         bignside = 1;
102     allsky = (bighp == -1);
103 
104     if (Nside % bignside) {
105         ERROR("Fine healpixelization Nside must be a multiple of the coarse healpixelization Nside");
106         return -1;
107     }
108     if (Nside > HP_MAX_INT_NSIDE) {
109         ERROR("Error: maximum healpix Nside = %i", HP_MAX_INT_NSIDE);
110         return -1;
111     }
112 
113     NHP = 12 * Nside * Nside;
114     logverb("Healpix Nside: %i, # healpixes on the whole sky: %i\n", Nside, NHP);
115     if (!allsky) {
116         logverb("Creating index for healpix %i, nside %i\n", bighp, bignside);
117         logverb("Number of healpixes: %i\n", ((Nside/bignside)*(Nside/bignside)));
118     }
119     logverb("Healpix side length: %g arcmin.\n", healpix_side_length_arcmin(Nside));
120 
121     dubl = fitscolumn_double_type();
122     if (!racol)
123         racol = "RA";
124     ra = fitstable_read_column(intable, racol, dubl);
125     if (!ra) {
126         ERROR("Failed to find RA column (%s) in table", racol);
127         return -1;
128     }
129     if (!deccol)
130         deccol = "DEC";
131     dec = fitstable_read_column(intable, deccol, dubl);
132     if (!dec) {
133         ERROR("Failed to find DEC column (%s) in table", deccol);
134         free(ra);
135         return -1;
136     }
137 
138     N = fitstable_nrows(intable);
139     logverb("Have %i objects\n", N);
140 
141     // FIXME -- argsort and seek around the input table, and append to
142     // starlists in order; OR read from the input table in sequence and
143     // sort in the starlists?
144     if (sortcol) {
145         logverb("Sorting by %s...\n", sortcol);
146         sortval = fitstable_read_column(intable, sortcol, dubl);
147         if (!sortval) {
148             ERROR("Failed to read sorting column \"%s\"", sortcol);
149             free(ra);
150             free(dec);
151             return -1;
152         }
153         inorder = permuted_sort(sortval, sizeof(double),
154                                 sort_ascending ? compare_doubles_asc : compare_doubles_desc,
155                                 NULL, N);
156         if (sort_min_cut > -HUGE_VAL) {
157             logverb("Cutting to %s > %g...\n", sortcol, sort_min_cut);
158             // Cut objects with sortval < sort_min_cut.
159             if (sort_ascending) {
160                 // skipped objects are at the front -- find the first obj
161                 // to keep
162                 for (i=0; i<N; i++)
163                     if (sortval[inorder[i]] > sort_min_cut)
164                         break;
165                 // move the "inorder" indices down.
166                 if (i)
167                     memmove(inorder, inorder+i, (N-i)*sizeof(int));
168                 N -= i;
169             } else {
170                 // skipped objects are at the end -- find the last obj to keep.
171                 for (i=N-1; i>=0; i--)
172                     if (sortval[inorder[i]] > sort_min_cut)
173                         break;
174                 N = i+1;
175             }
176             logverb("Cut to %i objects\n", N);
177         }
178         //free(sortval);
179     }
180 
181     token.nside = bignside;
182     token.finenside = Nside;
183     token.hp = bighp;
184 
185     if (!allsky && nmargin) {
186         int bigbighp, bighpx, bighpy;
187         //int ninside;
188         il* seeds = il_new(256);
189         logverb("Finding healpixes in range...\n");
190         healpix_decompose_xy(bighp, &bigbighp, &bighpx, &bighpy, bignside);
191         //ninside = (Nside/bignside)*(Nside/bignside);
192         // Prime the queue with the fine healpixes that are on the
193         // boundary of the big healpix.
194         for (i=0; i<((Nside / bignside) - 1); i++) {
195             // add (i,0), (i,max), (0,i), and (0,max) healpixes
196             int xx = i + bighpx * (Nside / bignside);
197             int yy = i + bighpy * (Nside / bignside);
198             int y0 =     bighpy * (Nside / bignside);
199             // -1 prevents us from double-adding the corners.
200             int y1 =(1 + bighpy)* (Nside / bignside) - 1;
201             int x0 =     bighpx * (Nside / bignside);
202             int x1 =(1 + bighpx)* (Nside / bignside) - 1;
203             assert(xx < Nside);
204             assert(yy < Nside);
205             assert(x0 < Nside);
206             assert(x1 < Nside);
207             assert(y0 < Nside);
208             assert(y1 < Nside);
209             il_append(seeds, healpix_compose_xy(bigbighp, xx, y0, Nside));
210             il_append(seeds, healpix_compose_xy(bigbighp, xx, y1, Nside));
211             il_append(seeds, healpix_compose_xy(bigbighp, x0, yy, Nside));
212             il_append(seeds, healpix_compose_xy(bigbighp, x1, yy, Nside));
213         }
214         logmsg("Number of boundary healpixes: %zu (Nside/bignside = %i)\n", il_size(seeds), Nside/bignside);
215 
216         myhps = healpix_region_search(-1, seeds, Nside, NULL, NULL,
217                                       outside_healpix, &token, nmargin);
218         logmsg("Number of margin healpixes: %zu\n", il_size(myhps));
219         il_free(seeds);
220 
221         il_sort(myhps, TRUE);
222         // DEBUG
223         il_check_consistency(myhps);
224         il_check_sorted_ascending(myhps, TRUE);
225     }
226 
227     dedupr2 = arcsec2distsq(dedup_radius);
228     starlists = intmap_new(sizeof(int32_t), nkeep, 0, dense);
229 
230     logverb("Placing stars in grid cells...\n");
231     for (i=0; i<N; i++) {
232         int hp;
233         bl* lst;
234         int32_t j32;
235         anbool oob;
236         if (inorder) {
237             j = inorder[i];
238             //printf("Placing star %i (%i): sort value %s = %g, RA,Dec=%g,%g\n", i, j, sortcol, sortval[j], ra[j], dec[j]);
239         } else
240             j = i;
241 
242         hp = radecdegtohealpix(ra[j], dec[j], Nside);
243         //printf("HP %i\n", hp);
244         // in bounds?
245         oob = FALSE;
246         if (myhps) {
247             oob = (outside_healpix(hp, &token) && !il_sorted_contains(myhps, hp));
248         } else if (!allsky) {
249             oob = (outside_healpix(hp, &token));
250         }
251         if (oob) {
252             //printf("out of bounds.\n");
253             noob++;
254             continue;
255         }
256 
257         lst = intmap_find(starlists, hp, TRUE);
258         /*
259          printf("list has %i existing entries.\n", bl_size(lst));
260          for (k=0; k<bl_size(lst); k++) {
261          bl_get(lst, k, &j32);
262          printf("  %i: index %i, %s = %g\n", k, j32, sortcol, sortval[j32]);
263          }
264          */
265 
266         // is this list full?
267         if (nkeep && (bl_size(lst) >= nkeep)) {
268             // Here we assume we're working in sorted order: once the list is full we're done.
269             //printf("Skipping: list is full.\n");
270             continue;
271         }
272 
273         if ((dedupr2 > 0.0) &&
274             is_duplicate(hp, ra[j], dec[j], Nside, starlists, ra, dec, dedupr2)) {
275             //printf("Skipping: duplicate\n");
276             ndup++;
277             continue;
278         }
279 
280         // Add the new star (by index)
281         j32 = j;
282         bl_append(lst, &j32);
283     }
284     logverb("%i outside the healpix\n", noob);
285     logverb("%i duplicates\n", ndup);
286 
287     il_free(myhps);
288     myhps = NULL;
289     free(inorder);
290     inorder = NULL;
291     free(ra);
292     ra = NULL;
293     free(dec);
294     dec = NULL;
295 
296     outorder = malloc(N * sizeof(int));
297     outi = 0;
298 
299     npersweep = calloc(nsweeps, sizeof(int));
300 
301     for (k=0; k<nsweeps; k++) {
302         int starti = outi;
303         int32_t j32;
304         for (i=0;; i++) {
305             bl* lst;
306             int hp;
307             if (!intmap_get_entry(starlists, i, &hp, &lst))
308                 break;
309             if (bl_size(lst) <= k)
310                 continue;
311             bl_get(lst, k, &j32);
312             outorder[outi] = j32;
313             //printf("sweep %i, cell #%i, hp %i, star %i, %s = %g\n", k, i, hp, j32, sortcol, sortval[j32]);
314             outi++;
315         }
316         logmsg("Sweep %i: %i stars\n", k+1, outi - starti);
317         npersweep[k] = outi - starti;
318 
319         if (sortcol) {
320             // Re-sort within this sweep.
321             permuted_sort(sortval, sizeof(double),
322                           sort_ascending ? compare_doubles_asc : compare_doubles_desc,
323                           outorder + starti, npersweep[k]);
324             /*
325              for (i=0; i<npersweep[k]; i++) {
326              printf("  within sweep %i: star %i, j=%i, %s=%g\n",
327              k, i, outorder[starti + i], sortcol, sortval[outorder[starti + i]]);
328              }
329              */
330         }
331 
332     }
333     intmap_free(starlists);
334     starlists = NULL;
335 
336     //////
337     free(sortval);
338     sortval = NULL;
339 
340     logmsg("Total: %i stars\n", outi);
341     N = outi;
342 
343     outhdr = fitstable_get_primary_header(outtable);
344     if (allsky)
345         qfits_header_add(outhdr, "ALLSKY", "T", "All-sky catalog.", NULL);
346     BOILERPLATE_ADD_FITS_HEADERS(outhdr);
347     qfits_header_add(outhdr, "HISTORY", "This file was generated by the command-line:", NULL, NULL);
348     fits_add_args(outhdr, args, argc);
349     qfits_header_add(outhdr, "HISTORY", "(end of command line)", NULL, NULL);
350     fits_add_long_history(outhdr, "uniformize-catalog args:");
351     fits_add_long_history(outhdr, "  RA,Dec columns: %s,%s", racol, deccol);
352     fits_add_long_history(outhdr, "  sort column: %s", sortcol);
353     fits_add_long_history(outhdr, "  sort direction: %s", sort_ascending ? "ascending" : "descending");
354     if (sort_ascending)
355         fits_add_long_history(outhdr, "    (ie, for mag-like sort columns)");
356     else
357         fits_add_long_history(outhdr, "    (ie, for flux-like sort columns)");
358     fits_add_long_history(outhdr, "  uniformization nside: %i", Nside);
359     fits_add_long_history(outhdr, "    (ie, side length ~ %g arcmin)", healpix_side_length_arcmin(Nside));
360     fits_add_long_history(outhdr, "  deduplication scale: %g arcsec", dedup_radius);
361     fits_add_long_history(outhdr, "  number of sweeps: %i", nsweeps);
362 
363     fits_header_add_int(outhdr, "NSTARS", N, "Number of stars.");
364     fits_header_add_int(outhdr, "HEALPIX", bighp, "Healpix covered by this catalog, with Nside=HPNSIDE");
365     fits_header_add_int(outhdr, "HPNSIDE", bignside, "Nside of HEALPIX.");
366     fits_header_add_int(outhdr, "CUTNSIDE", Nside, "uniformization scale (healpix nside)");
367     fits_header_add_int(outhdr, "CUTMARG", nmargin, "margin size, in healpixels");
368     //qfits_header_add(outhdr, "CUTBAND", cutband, "band on which the cut was made", NULL);
369     fits_header_add_double(outhdr, "CUTDEDUP", dedup_radius, "deduplication radius [arcsec]");
370     fits_header_add_int(outhdr, "CUTNSWEP", nsweeps, "number of sweeps");
371     //fits_header_add_double(outhdr, "CUTMINMG", minmag, "minimum magnitude");
372     //fits_header_add_double(outhdr, "CUTMAXMG", maxmag, "maximum magnitude");
373     for (k=0; k<nsweeps; k++) {
374         char key[64];
375         sprintf(key, "SWEEP%i", (k+1));
376         fits_header_add_int(outhdr, key, npersweep[k], "# stars added");
377     }
378     free(npersweep);
379 
380     if (fitstable_write_primary_header(outtable)) {
381         ERROR("Failed to write primary header");
382         return -1;
383     }
384 
385     // Write output.
386     fitstable_add_fits_columns_as_struct2(intable, outtable);
387     if (fitstable_write_header(outtable)) {
388         ERROR("Failed to write output table header");
389         return -1;
390     }
391     logmsg("Writing output...\n");
392     logverb("Row size: %i\n", fitstable_row_size(intable));
393     if (fitstable_copy_rows_data(intable, outorder, N, outtable)) {
394         ERROR("Failed to copy rows from input table to output");
395         return -1;
396     }
397     if (fitstable_fix_header(outtable)) {
398         ERROR("Failed to fix output table header");
399         return -1;
400     }
401     free(outorder);
402     return 0;
403 }
404 
405