1 /* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2 All rights reserved.
3 This software was developed in the APRIL Robotics Lab under the
4 direction of Edwin Olson, ebolson@umich.edu. This software may be
5 available under alternative licensing terms; contact the address above.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 1. Redistributions of source code must retain the above copyright notice, this
9    list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright notice,
11    this list of conditions and the following disclaimer in the documentation
12    and/or other materials provided with the distribution.
13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 The views and conclusions contained in the software and documentation are those
24 of the authors and should not be interpreted as representing official policies,
25 either expressed or implied, of the Regents of The University of Michigan.
26 */
27 
28 // limitation: image size must be <32768 in width and height. This is
29 // because we use a fixed-point 16 bit integer representation with one
30 // fractional bit.
31 #include <math.h>
32 #include <assert.h>
33 // To ensure UINT32_MAX, INT32_MX are defined on centos, ubuntu 12.04 we define __STDC_LIMIT_MACROS
34 #define __STDC_LIMIT_MACROS
35 #include <string.h>
36 #include <stdio.h>
37 #include <stdint.h>
38 #include <algorithm>
39 
40 #include "apriltag.h"
41 #include "common/image_u8x3.h"
42 #include "common/zarray.h"
43 #include "common/zhash.h"
44 #include "common/unionfind.h"
45 #include "common/timeprofile.h"
46 #include "common/zmaxheap.h"
47 #include "common/postscript_utils.h"
48 #include "common/math_util.h"
49 
50 #ifdef _WIN32
random(void)51 static inline long int random(void)
52 {
53         return rand();
54 }
55 #endif
56 
u64hash_2(uint64_t x)57 static inline uint32_t u64hash_2(uint64_t x) {
58     return (2654435761U * x) >> 32;
59 }
60 
61 struct uint64_zarray_entry
62 {
63     uint64_t id;
64     zarray_t *cluster;
65 
66     struct uint64_zarray_entry *next;
67 };
68 
69 #ifndef M_PI
70 # define M_PI 3.141592653589793238462643383279502884196
71 #endif
72 
73 struct pt
74 {
75     // Note: these represent 2*actual value.
76     uint16_t x, y;
77     int16_t gx, gy;
78 
79     float slope;
80 };
81 
82 struct unionfind_task
83 {
84     int y0, y1;
85     int w, h, s;
86     unionfind_t *uf;
87     image_u8_t *im;
88 };
89 
90 struct quad_task
91 {
92     zarray_t *clusters;
93     int cidx0, cidx1; // [cidx0, cidx1)
94     zarray_t *quads;
95     apriltag_detector_t *td;
96     int w, h;
97 
98     image_u8_t *im;
99     int tag_width;
100     bool normal_border;
101     bool reversed_border;
102 };
103 
104 
105 struct cluster_task
106 {
107     int y0;
108     int y1;
109     int w;
110     int s;
111     int nclustermap;
112     unionfind_t* uf;
113     image_u8_t* im;
114     zarray_t* clusters;
115 };
116 
117 struct remove_vertex
118 {
119     int i;           // which vertex to remove?
120     int left, right; // left vertex, right vertex
121 
122     double err;
123 };
124 
125 struct segment
126 {
127     int is_vertex;
128 
129     // always greater than zero, but right can be > size, which denotes
130     // a wrap around back to the beginning of the points. and left < right.
131     int left, right;
132 };
133 
134 struct line_fit_pt
135 {
136     double Mx, My;
137     double Mxx, Myy, Mxy;
138     double W; // total weight
139 };
140 
141 struct cluster_hash
142 {
143     uint32_t hash;
144     uint64_t id;
145     zarray_t* data;
146 };
147 
148 
149 // lfps contains *cumulative* moments for N points, with
150 // index j reflecting points [0,j] (inclusive).
151 //
152 // fit a line to the points [i0, i1] (inclusive). i0, i1 are both [0,
153 // sz) if i1 < i0, we treat this as a wrap around.
fit_line(struct line_fit_pt * lfps,int sz,int i0,int i1,double * lineparm,double * err,double * mse)154 void fit_line(struct line_fit_pt *lfps, int sz, int i0, int i1, double *lineparm, double *err, double *mse)
155 {
156     assert(i0 != i1);
157     assert(i0 >= 0 && i1 >= 0 && i0 < sz && i1 < sz);
158 
159     double Mx, My, Mxx, Myy, Mxy, W;
160     int N; // how many points are included in the set?
161 
162     if (i0 < i1) {
163         N = i1 - i0 + 1;
164 
165         Mx  = lfps[i1].Mx;
166         My  = lfps[i1].My;
167         Mxx = lfps[i1].Mxx;
168         Mxy = lfps[i1].Mxy;
169         Myy = lfps[i1].Myy;
170         W   = lfps[i1].W;
171 
172         if (i0 > 0) {
173             Mx  -= lfps[i0-1].Mx;
174             My  -= lfps[i0-1].My;
175             Mxx -= lfps[i0-1].Mxx;
176             Mxy -= lfps[i0-1].Mxy;
177             Myy -= lfps[i0-1].Myy;
178             W   -= lfps[i0-1].W;
179         }
180 
181     } else {
182         // i0 > i1, e.g. [15, 2]. Wrap around.
183         assert(i0 > 0);
184 
185         Mx  = lfps[sz-1].Mx   - lfps[i0-1].Mx;
186         My  = lfps[sz-1].My   - lfps[i0-1].My;
187         Mxx = lfps[sz-1].Mxx  - lfps[i0-1].Mxx;
188         Mxy = lfps[sz-1].Mxy  - lfps[i0-1].Mxy;
189         Myy = lfps[sz-1].Myy  - lfps[i0-1].Myy;
190         W   = lfps[sz-1].W    - lfps[i0-1].W;
191 
192         Mx  += lfps[i1].Mx;
193         My  += lfps[i1].My;
194         Mxx += lfps[i1].Mxx;
195         Mxy += lfps[i1].Mxy;
196         Myy += lfps[i1].Myy;
197         W   += lfps[i1].W;
198 
199         N = sz - i0 + i1 + 1;
200     }
201 
202     assert(N >= 2);
203 
204     double Ex = Mx / W;
205     double Ey = My / W;
206     double Cxx = Mxx / W - Ex*Ex;
207     double Cxy = Mxy / W - Ex*Ey;
208     double Cyy = Myy / W - Ey*Ey;
209 
210     //if (1) {
211     //    // on iOS about 5% of total CPU spent in these trig functions.
212     //    // 85 ms per frame on 5S, example.pnm
213     //    //
214     //    // XXX this was using the double-precision atan2. Was there a case where
215     //    // we needed that precision? Seems doubtful.
216     //    double normal_theta = .5 * atan2f(-2*Cxy, (Cyy - Cxx));
217     //    nx_old = cosf(normal_theta);
218     //    ny_old = sinf(normal_theta);
219     //}
220 
221     // Instead of using the above cos/sin method, pose it as an eigenvalue problem.
222     double eig_small = 0.5*(Cxx + Cyy - sqrtf((Cxx - Cyy)*(Cxx - Cyy) + 4*Cxy*Cxy));
223 
224     if (lineparm) {
225         lineparm[0] = Ex;
226         lineparm[1] = Ey;
227 
228         double eig = 0.5*(Cxx + Cyy + sqrtf((Cxx - Cyy)*(Cxx - Cyy) + 4*Cxy*Cxy));
229         double nx1 = Cxx - eig;
230         double ny1 = Cxy;
231         double M1 = nx1*nx1 + ny1*ny1;
232         double nx2 = Cxy;
233         double ny2 = Cyy - eig;
234         double M2 = nx2*nx2 + ny2*ny2;
235 
236         double nx, ny, M;
237         if (M1 > M2) {
238             nx = nx1;
239             ny = ny1;
240             M = M1;
241         } else {
242             nx = nx2;
243             ny = ny2;
244             M = M2;
245         }
246 
247         double length = sqrtf(M);
248         lineparm[2] = nx/length;
249         lineparm[3] = ny/length;
250     }
251 
252     // sum of squared errors =
253     //
254     // SUM_i ((p_x - ux)*nx + (p_y - uy)*ny)^2
255     // SUM_i  nx*nx*(p_x - ux)^2 + 2nx*ny(p_x -ux)(p_y-uy) + ny*ny*(p_y-uy)*(p_y-uy)
256     //  nx*nx*SUM_i((p_x -ux)^2) + 2nx*ny*SUM_i((p_x-ux)(p_y-uy)) + ny*ny*SUM_i((p_y-uy)^2)
257     //
258     //  nx*nx*N*Cxx + 2nx*ny*N*Cxy + ny*ny*N*Cyy
259 
260     // sum of squared errors
261     if (err)
262         *err = N*eig_small;
263 
264     // mean squared error
265     if (mse)
266         *mse = eig_small;
267 }
268 
pt_compare_angle(struct pt * a,struct pt * b)269 float pt_compare_angle(struct pt *a, struct pt *b) {
270     return a->slope - b->slope;
271 }
272 
err_compare_descending(const void * _a,const void * _b)273 int err_compare_descending(const void *_a, const void *_b)
274 {
275     const double *a =  (double *)_a;
276     const double *b =  (double *)_b;
277 
278     return ((*a) < (*b)) ? 1 : -1;
279 }
280 
281 /*
282 
283   1. Identify A) white points near a black point and B) black points near a white point.
284 
285   2. Find the connected components within each of the classes above,
286   yielding clusters of "white-near-black" and
287   "black-near-white". (These two classes are kept separate). Each
288   segment has a unique id.
289 
290   3. For every pair of "white-near-black" and "black-near-white"
291   clusters, find the set of points that are in one and adjacent to the
292   other. In other words, a "boundary" layer between the two
293   clusters. (This is actually performed by iterating over the pixels,
294   rather than pairs of clusters.) Critically, this helps keep nearby
295   edges from becoming connected.
296 */
quad_segment_maxima(apriltag_detector_t * td,zarray_t * cluster,struct line_fit_pt * lfps,int indices[4])297 int quad_segment_maxima(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
298 {
299     int sz = zarray_size(cluster);
300 
301     // ksz: when fitting points, how many points on either side do we consider?
302     // (actual "kernel" width is 2ksz).
303     //
304     // This value should be about: 0.5 * (points along shortest edge).
305     //
306     // If all edges were equally-sized, that would give a value of
307     // sz/8. We make it somewhat smaller to account for tags at high
308     // aspects.
309 
310     // XXX Tunable. Maybe make a multiple of JPEG block size to increase robustness
311     // to JPEG compression artifacts?
312     int ksz = imin(20, sz / 12);
313 
314     // can't fit a quad if there are too few points.
315     if (ksz < 2)
316         return 0;
317 
318     double *errs = (double *)malloc(sizeof(double)*sz);
319 
320     for (int i = 0; i < sz; i++) {
321         fit_line(lfps, sz, (i + sz - ksz) % sz, (i + ksz) % sz, NULL, &errs[i], NULL);
322     }
323 
324     // apply a low-pass filter to errs
325     if (1) {
326         double *y = (double *)malloc(sizeof(double)*sz);
327 
328         // how much filter to apply?
329 
330         // XXX Tunable
331         double sigma = 1; // was 3
332 
333         // cutoff = exp(-j*j/(2*sigma*sigma));
334         // log(cutoff) = -j*j / (2*sigma*sigma)
335         // log(cutoff)*2*sigma*sigma = -j*j;
336 
337         // how big a filter should we use? We make our kernel big
338         // enough such that we represent any values larger than
339         // 'cutoff'.
340 
341         // XXX Tunable (though not super useful to change)
342         double cutoff = 0.05;
343         int fsz = sqrt(-log(cutoff)*2*sigma*sigma) + 1;
344         fsz = 2*fsz + 1;
345 
346         // For default values of cutoff = 0.05, sigma = 3,
347         // we have fsz = 17.
348         float *f = (float *)malloc(sizeof(float)*fsz);
349 
350         for (int i = 0; i < fsz; i++) {
351             int j = i - fsz / 2;
352             f[i] = exp(-j*j/(2*sigma*sigma));
353         }
354 
355         for (int iy = 0; iy < sz; iy++) {
356             double acc = 0;
357 
358             for (int i = 0; i < fsz; i++) {
359                 acc += errs[(iy + i - fsz / 2 + sz) % sz] * f[i];
360             }
361             y[iy] = acc;
362         }
363 
364         memcpy(errs, y, sizeof(double)*sz);
365         free(y);
366         free(f);
367     }
368 
369     int *maxima = (int *)malloc(sizeof(int)*sz);
370     double *maxima_errs = (double *)malloc(sizeof(double)*sz);
371     int nmaxima = 0;
372 
373     for (int i = 0; i < sz; i++) {
374         if (errs[i] > errs[(i+1)%sz] && errs[i] > errs[(i+sz-1)%sz]) {
375             maxima[nmaxima] = i;
376             maxima_errs[nmaxima] = errs[i];
377             nmaxima++;
378         }
379     }
380     free(errs);
381 
382     // if we didn't get at least 4 maxima, we can't fit a quad.
383     if (nmaxima < 4)
384         return 0;
385 
386     // select only the best maxima if we have too many
387     int max_nmaxima = td->qtp.max_nmaxima;
388 
389     if (nmaxima > max_nmaxima) {
390         double *maxima_errs_copy = (double *)malloc(sizeof(double)*nmaxima);
391         memcpy(maxima_errs_copy, maxima_errs, sizeof(double)*nmaxima);
392 
393         // throw out all but the best handful of maxima. Sorts descending.
394         qsort(maxima_errs_copy, nmaxima, sizeof(double), err_compare_descending);
395 
396         double maxima_thresh = maxima_errs_copy[max_nmaxima];
397         int out = 0;
398         for (int in = 0; in < nmaxima; in++) {
399             if (maxima_errs[in] <= maxima_thresh)
400                 continue;
401             maxima[out++] = maxima[in];
402         }
403         nmaxima = out;
404         free(maxima_errs_copy);
405     }
406     free(maxima_errs);
407 
408     int best_indices[4];
409     double best_error = HUGE_VALF;
410 
411     double err01, err12, err23, err30;
412     double mse01, mse12, mse23, mse30;
413     double params01[4], params12[4], params23[4], params30[4];
414 
415     // disallow quads where the angle is less than a critical value.
416     double max_dot = td->qtp.cos_critical_rad; //25*M_PI/180);
417 
418     for (int m0 = 0; m0 < nmaxima - 3; m0++) {
419         int i0 = maxima[m0];
420 
421         for (int m1 = m0+1; m1 < nmaxima - 2; m1++) {
422             int i1 = maxima[m1];
423 
424             fit_line(lfps, sz, i0, i1, params01, &err01, &mse01);
425 
426             if (mse01 > td->qtp.max_line_fit_mse)
427                 continue;
428 
429             for (int m2 = m1+1; m2 < nmaxima - 1; m2++) {
430                 int i2 = maxima[m2];
431 
432                 fit_line(lfps, sz, i1, i2, params12, &err12, &mse12);
433                 if (mse12 > td->qtp.max_line_fit_mse)
434                     continue;
435 
436                 double dot = params01[2]*params12[2] + params01[3]*params12[3];
437                 if (fabs(dot) > max_dot)
438                     continue;
439 
440                 for (int m3 = m2+1; m3 < nmaxima; m3++) {
441                     int i3 = maxima[m3];
442 
443                     fit_line(lfps, sz, i2, i3, params23, &err23, &mse23);
444                     if (mse23 > td->qtp.max_line_fit_mse)
445                         continue;
446 
447                     fit_line(lfps, sz, i3, i0, params30, &err30, &mse30);
448                     if (mse30 > td->qtp.max_line_fit_mse)
449                         continue;
450 
451                     double err = err01 + err12 + err23 + err30;
452                     if (err < best_error) {
453                         best_error = err;
454                         best_indices[0] = i0;
455                         best_indices[1] = i1;
456                         best_indices[2] = i2;
457                         best_indices[3] = i3;
458                     }
459                 }
460             }
461         }
462     }
463 
464     free(maxima);
465 
466     if (best_error == HUGE_VALF)
467         return 0;
468 
469     for (int i = 0; i < 4; i++)
470         indices[i] = best_indices[i];
471 
472     if (best_error / sz < td->qtp.max_line_fit_mse)
473         return 1;
474     return 0;
475 }
476 
477 // returns 0 if the cluster looks bad.
quad_segment_agg(apriltag_detector_t * td,zarray_t * cluster,struct line_fit_pt * lfps,int indices[4])478 int quad_segment_agg(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
479 {
480     int sz = zarray_size(cluster);
481 
482     zmaxheap_t *heap = zmaxheap_create(sizeof(struct remove_vertex*));
483 
484     // We will initially allocate sz rvs. We then have two types of
485     // iterations: some iterations that are no-ops in terms of
486     // allocations, and those that remove a vertex and allocate two
487     // more children.  This will happen at most (sz-4) times.  Thus we
488     // need: sz + 2*(sz-4) entries.
489 
490     int rvalloc_pos = 0;
491     int rvalloc_size = 3*sz;
492     struct remove_vertex *rvalloc = (struct remove_vertex *)calloc(rvalloc_size, sizeof(struct remove_vertex));
493 
494     struct segment *segs = (struct segment *)calloc(sz, sizeof(struct segment));
495 
496     // populate with initial entries
497     for (int i = 0; i < sz; i++) {
498         struct remove_vertex *rv = &rvalloc[rvalloc_pos++];
499         rv->i = i;
500         if (i == 0) {
501             rv->left = sz-1;
502             rv->right = 1;
503         } else {
504             rv->left  = i-1;
505             rv->right = (i+1) % sz;
506         }
507 
508         fit_line(lfps, sz, rv->left, rv->right, NULL, NULL, &rv->err);
509 
510         zmaxheap_add(heap, &rv, -rv->err);
511 
512         segs[i].left = rv->left;
513         segs[i].right = rv->right;
514         segs[i].is_vertex = 1;
515     }
516 
517     int nvertices = sz;
518 
519     while (nvertices > 4) {
520         assert(rvalloc_pos < rvalloc_size);
521 
522         struct remove_vertex *rv;
523         float err;
524 
525         int res = zmaxheap_remove_max(heap, &rv, &err);
526         if (!res)
527             return 0;
528         assert(res);
529 
530         // is this remove_vertex valid? (Or has one of the left/right
531         // vertices changes since we last looked?)
532         if (!segs[rv->i].is_vertex ||
533             !segs[rv->left].is_vertex ||
534             !segs[rv->right].is_vertex) {
535             continue;
536         }
537 
538         // we now merge.
539         assert(segs[rv->i].is_vertex);
540 
541         segs[rv->i].is_vertex = 0;
542         segs[rv->left].right = rv->right;
543         segs[rv->right].left = rv->left;
544 
545         // create the join to the left
546         if (1) {
547             struct remove_vertex *child = &rvalloc[rvalloc_pos++];
548             child->i = rv->left;
549             child->left = segs[rv->left].left;
550             child->right = rv->right;
551 
552             fit_line(lfps, sz, child->left, child->right, NULL, NULL, &child->err);
553 
554             zmaxheap_add(heap, &child, -child->err);
555         }
556 
557         // create the join to the right
558         if (1) {
559             struct remove_vertex *child = &rvalloc[rvalloc_pos++];
560             child->i = rv->right;
561             child->left = rv->left;
562             child->right = segs[rv->right].right;
563 
564             fit_line(lfps, sz, child->left, child->right, NULL, NULL, &child->err);
565 
566             zmaxheap_add(heap, &child, -child->err);
567         }
568 
569         // we now have one less vertex
570         nvertices--;
571     }
572 
573     free(rvalloc);
574     zmaxheap_destroy(heap);
575 
576     int idx = 0;
577     for (int i = 0; i < sz; i++) {
578         if (segs[i].is_vertex) {
579             indices[idx++] = i;
580         }
581     }
582 
583     free(segs);
584 
585     return 1;
586 }
587 
588 /**
589  * Compute statistics that allow line fit queries to be
590  * efficiently computed for any contiguous range of indices.
591  */
compute_lfps(int sz,zarray_t * cluster,image_u8_t * im)592 struct line_fit_pt* compute_lfps(int sz, zarray_t* cluster, image_u8_t* im) {
593     struct line_fit_pt *lfps = (struct line_fit_pt *)calloc(sz, sizeof(struct line_fit_pt));
594 
595     for (int i = 0; i < sz; i++) {
596         struct pt *p;
597         zarray_get_volatile(cluster, i, &p);
598 
599         if (i > 0) {
600             memcpy(&lfps[i], &lfps[i-1], sizeof(struct line_fit_pt));
601         }
602 
603         {
604             // we now undo our fixed-point arithmetic.
605             double delta = 0.5; // adjust for pixel center bias
606             double x = p->x * .5 + delta;
607             double y = p->y * .5 + delta;
608             int ix = x, iy = y;
609             double W = 1;
610 
611             if (ix > 0 && ix+1 < im->width && iy > 0 && iy+1 < im->height) {
612                 int grad_x = im->buf[iy * im->stride + ix + 1] -
613                     im->buf[iy * im->stride + ix - 1];
614 
615                 int grad_y = im->buf[(iy+1) * im->stride + ix] -
616                     im->buf[(iy-1) * im->stride + ix];
617 
618                 // XXX Tunable. How to shape the gradient magnitude?
619                 W = sqrt(grad_x*grad_x + grad_y*grad_y) + 1;
620             }
621 
622             double fx = x, fy = y;
623             lfps[i].Mx  += W * fx;
624             lfps[i].My  += W * fy;
625             lfps[i].Mxx += W * fx * fx;
626             lfps[i].Mxy += W * fx * fy;
627             lfps[i].Myy += W * fy * fy;
628             lfps[i].W   += W;
629         }
630     }
631     return lfps;
632 }
633 
ptsort(struct pt * pts,int sz)634 static inline void ptsort(struct pt *pts, int sz)
635 {
636 #define MAYBE_SWAP(arr,apos,bpos)                                   \
637     if (pt_compare_angle(&(arr[apos]), &(arr[bpos])) > 0) {                        \
638         tmp = arr[apos]; arr[apos] = arr[bpos]; arr[bpos] = tmp;    \
639     };
640 
641     if (sz <= 1)
642         return;
643 
644     if (sz == 2) {
645         struct pt tmp;
646         MAYBE_SWAP(pts, 0, 1);
647         return;
648     }
649 
650     // NB: Using less-branch-intensive sorting networks here on the
651     // hunch that it's better for performance.
652     if (sz == 3) { // 3 element bubble sort is optimal
653         struct pt tmp;
654         MAYBE_SWAP(pts, 0, 1);
655         MAYBE_SWAP(pts, 1, 2);
656         MAYBE_SWAP(pts, 0, 1);
657         return;
658     }
659 
660     if (sz == 4) { // 4 element optimal sorting network.
661         struct pt tmp;
662         MAYBE_SWAP(pts, 0, 1); // sort each half, like a merge sort
663         MAYBE_SWAP(pts, 2, 3);
664         MAYBE_SWAP(pts, 0, 2); // minimum value is now at 0.
665         MAYBE_SWAP(pts, 1, 3); // maximum value is now at end.
666         MAYBE_SWAP(pts, 1, 2); // that only leaves the middle two.
667         return;
668     }
669     if (sz == 5) {
670         // this 9-step swap is optimal for a sorting network, but two
671         // steps slower than a generic sort.
672         struct pt tmp;
673         MAYBE_SWAP(pts, 0, 1); // sort each half (3+2), like a merge sort
674         MAYBE_SWAP(pts, 3, 4);
675         MAYBE_SWAP(pts, 1, 2);
676         MAYBE_SWAP(pts, 0, 1);
677         MAYBE_SWAP(pts, 0, 3); // minimum element now at 0
678         MAYBE_SWAP(pts, 2, 4); // maximum element now at end
679         MAYBE_SWAP(pts, 1, 2); // now resort the three elements 1-3.
680         MAYBE_SWAP(pts, 2, 3);
681         MAYBE_SWAP(pts, 1, 2);
682         return;
683     }
684 
685 #undef MAYBE_SWAP
686 
687     // a merge sort with temp storage.
688 
689     struct pt *tmp = (struct pt *)malloc(sizeof(struct pt) * sz);
690 
691     memcpy(tmp, pts, sizeof(struct pt) * sz);
692 
693     int asz = sz/2;
694     int bsz = sz - asz;
695 
696     struct pt *as = &tmp[0];
697     struct pt *bs = &tmp[asz];
698 
699     ptsort(as, asz);
700     ptsort(bs, bsz);
701 
702     #define MERGE(apos,bpos)                        \
703     if (pt_compare_angle(&(as[apos]), &(bs[bpos])) < 0)        \
704         pts[outpos++] = as[apos++];             \
705     else                                        \
706         pts[outpos++] = bs[bpos++];
707 
708     int apos = 0, bpos = 0, outpos = 0;
709     while (apos + 8 < asz && bpos + 8 < bsz) {
710         MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos);
711         MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos);
712     }
713 
714     while (apos < asz && bpos < bsz) {
715         MERGE(apos,bpos);
716     }
717 
718     if (apos < asz)
719         memcpy(&pts[outpos], &as[apos], (asz-apos)*sizeof(struct pt));
720     if (bpos < bsz)
721         memcpy(&pts[outpos], &bs[bpos], (bsz-bpos)*sizeof(struct pt));
722 
723     free(tmp);
724 
725 #undef MERGE
726 }
727 
728 // return 1 if the quad looks okay, 0 if it should be discarded
fit_quad(apriltag_detector_t * td,image_u8_t * im,zarray_t * cluster,struct quad * quad,int tag_width,bool normal_border,bool reversed_border)729 int fit_quad(
730         apriltag_detector_t *td,
731         image_u8_t *im,
732         zarray_t *cluster,
733         struct quad *quad,
734         int tag_width,
735         bool normal_border,
736         bool reversed_border) {
737     int res = 0;
738 
739     int sz = zarray_size(cluster);
740     if (sz < 24) // Synchronize with later check.
741         return 0;
742 
743     /////////////////////////////////////////////////////////////
744     // Step 1. Sort points so they wrap around the center of the
745     // quad. We will constrain our quad fit to simply partition this
746     // ordered set into 4 groups.
747 
748     // compute a bounding box so that we can order the points
749     // according to their angle WRT the center.
750     struct pt *p1;
751     zarray_get_volatile(cluster, 0, &p1);
752     uint16_t xmax = p1->x;
753     uint16_t xmin = p1->x;
754     uint16_t ymax = p1->y;
755     uint16_t ymin = p1->y;
756     for (int pidx = 1; pidx < zarray_size(cluster); pidx++) {
757         struct pt *p;
758         zarray_get_volatile(cluster, pidx, &p);
759 
760         if (p->x > xmax) {
761             xmax = p->x;
762         } else if (p->x < xmin) {
763             xmin = p->x;
764         }
765 
766         if (p->y > ymax) {
767             ymax = p->y;
768         } else if (p->y < ymin) {
769             ymin = p->y;
770         }
771     }
772 
773     if ((xmax - xmin)*(ymax - ymin) < tag_width) {
774         return 0;
775     }
776 
777     // add some noise to (cx,cy) so that pixels get a more diverse set
778     // of theta estimates. This will help us remove more points.
779     // (Only helps a small amount. The actual noise values here don't
780     // matter much at all, but we want them [-1, 1]. (XXX with
781     // fixed-point, should range be bigger?)
782     float cx = (xmin + xmax) * 0.5 + 0.05118;
783     float cy = (ymin + ymax) * 0.5 + -0.028581;
784 
785     float dot = 0;
786 
787     float quadrants[2][2] = {{-1*(2 << 15), 0}, {2*(2 << 15), 2 << 15}};
788 
789     for (int pidx = 0; pidx < zarray_size(cluster); pidx++) {
790         struct pt *p;
791         zarray_get_volatile(cluster, pidx, &p);
792 
793         float dx = p->x - cx;
794         float dy = p->y - cy;
795 
796         dot += dx*p->gx + dy*p->gy;
797 
798         float quadrant = quadrants[dy > 0][dx > 0];
799         if (dy < 0) {
800             dy = -dy;
801             dx = -dx;
802         }
803 
804         if (dx < 0) {
805             float tmp = dx;
806             dx = dy;
807             dy = -tmp;
808         }
809         p->slope = quadrant + dy/dx;
810     }
811 
812     // Ensure that the black border is inside the white border.
813     quad->reversed_border = dot < 0;
814     if (!reversed_border && quad->reversed_border) {
815         return 0;
816     }
817     if (!normal_border && !quad->reversed_border) {
818         return 0;
819     }
820 
821     // we now sort the points according to theta. This is a prepatory
822     // step for segmenting them into four lines.
823     if (1) {
824         ptsort((struct pt*) cluster->data, zarray_size(cluster));
825 
826         // remove duplicate points. (A byproduct of our segmentation system.)
827         if (1) {
828             int outpos = 1;
829 
830             struct pt *last;
831             zarray_get_volatile(cluster, 0, &last);
832 
833             for (int i = 1; i < sz; i++) {
834 
835                 struct pt *p;
836                 zarray_get_volatile(cluster, i, &p);
837 
838                 if (p->x != last->x || p->y != last->y) {
839 
840                     if (i != outpos)  {
841                         struct pt *out;
842                         zarray_get_volatile(cluster, outpos, &out);
843                         memcpy(out, p, sizeof(struct pt));
844                     }
845 
846                     outpos++;
847                 }
848 
849                 last = p;
850             }
851 
852             cluster->size = outpos;
853             sz = outpos;
854         }
855 
856     }
857 
858     if (sz < 24)
859         return 0;
860 
861 
862     struct line_fit_pt *lfps = compute_lfps(sz, cluster, im);
863 
864     int indices[4];
865     if (1) {
866         if (!quad_segment_maxima(td, cluster, lfps, indices))
867             goto finish;
868     } else {
869         if (!quad_segment_agg(td, cluster, lfps, indices))
870             goto finish;
871     }
872 
873 
874     double lines[4][4];
875 
876     for (int i = 0; i < 4; i++) {
877         int i0 = indices[i];
878         int i1 = indices[(i+1)&3];
879 
880         double err;
881         fit_line(lfps, sz, i0, i1, lines[i], NULL, &err);
882 
883         if (err > td->qtp.max_line_fit_mse) {
884             res = 0;
885             goto finish;
886         }
887     }
888 
889     for (int i = 0; i < 4; i++) {
890         // solve for the intersection of lines (i) and (i+1)&3.
891         // p0 + lambda0*u0 = p1 + lambda1*u1, where u0 and u1
892         // are the line directions.
893         //
894         // lambda0*u0 - lambda1*u1 = (p1 - p0)
895         //
896         // rearrange (solve for lambdas)
897         //
898         // [u0_x   -u1_x ] [lambda0] = [ p1_x - p0_x ]
899         // [u0_y   -u1_y ] [lambda1]   [ p1_y - p0_y ]
900         //
901         // remember that lines[i][0,1] = p, lines[i][2,3] = NORMAL vector.
902         // We want the unit vector, so we need the perpendiculars. Thus, below
903         // we have swapped the x and y components and flipped the y components.
904 
905         double A00 =  lines[i][3],  A01 = -lines[(i+1)&3][3];
906         double A10 =  -lines[i][2],  A11 = lines[(i+1)&3][2];
907         double B0 = -lines[i][0] + lines[(i+1)&3][0];
908         double B1 = -lines[i][1] + lines[(i+1)&3][1];
909 
910         double det = A00 * A11 - A10 * A01;
911 
912         // inverse.
913         double W00 = A11 / det, W01 = -A01 / det;
914         if (fabs(det) < 0.001) {
915             res = 0;
916             goto finish;
917         }
918 
919         // solve
920         double L0 = W00*B0 + W01*B1;
921 
922         // compute intersection
923         quad->p[i][0] = lines[i][0] + L0*A00;
924         quad->p[i][1] = lines[i][1] + L0*A10;
925 
926         res = 1;
927     }
928 
929     // reject quads that are too small
930     if (1) {
931         double area = 0;
932 
933         // get area of triangle formed by points 0, 1, 2, 0
934         double length[3], p;
935         for (int i = 0; i < 3; i++) {
936             int idxa = i; // 0, 1, 2,
937             int idxb = (i+1) % 3; // 1, 2, 0
938             length[i] = sqrt(sq(quad->p[idxb][0] - quad->p[idxa][0]) +
939                              sq(quad->p[idxb][1] - quad->p[idxa][1]));
940         }
941         p = (length[0] + length[1] + length[2]) / 2;
942 
943         area += sqrt(p*(p-length[0])*(p-length[1])*(p-length[2]));
944 
945         // get area of triangle formed by points 2, 3, 0, 2
946         for (int i = 0; i < 3; i++) {
947             int idxs[] = { 2, 3, 0, 2 };
948             int idxa = idxs[i];
949             int idxb = idxs[i+1];
950             length[i] = sqrt(sq(quad->p[idxb][0] - quad->p[idxa][0]) +
951                              sq(quad->p[idxb][1] - quad->p[idxa][1]));
952         }
953         p = (length[0] + length[1] + length[2]) / 2;
954 
955         area += sqrt(p*(p-length[0])*(p-length[1])*(p-length[2]));
956 
957         if (area < tag_width*tag_width) {
958             res = 0;
959             goto finish;
960         }
961     }
962 
963     // reject quads whose cumulative angle change isn't equal to 2PI
964     if (1) {
965         for (int i = 0; i < 4; i++) {
966             int i0 = i, i1 = (i+1)&3, i2 = (i+2)&3;
967 
968             double dx1 = quad->p[i1][0] - quad->p[i0][0];
969             double dy1 = quad->p[i1][1] - quad->p[i0][1];
970             double dx2 = quad->p[i2][0] - quad->p[i1][0];
971             double dy2 = quad->p[i2][1] - quad->p[i1][1];
972             double cos_dtheta = (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2));
973 
974             if ((cos_dtheta > td->qtp.cos_critical_rad || cos_dtheta < -td->qtp.cos_critical_rad) || dx1*dy2 < dy1*dx2) {
975                 res = 0;
976                 goto finish;
977             }
978         }
979     }
980 
981   finish:
982 
983     free(lfps);
984 
985     return res;
986 }
987 
988 #define DO_UNIONFIND2(dx, dy) if (im->buf[(y + dy)*s + x + dx] == v) unionfind_connect(uf, y*w + x, (y + dy)*w + x + dx);
989 
do_unionfind_first_line(unionfind_t * uf,image_u8_t * im,int h,int w,int s)990 static void do_unionfind_first_line(unionfind_t *uf, image_u8_t *im, int h, int w, int s)
991 {
992     int y = 0;
993     uint8_t v;
994 
995     for (int x = 1; x < w - 1; x++) {
996         v = im->buf[y*s + x];
997 
998         if (v == 127)
999             continue;
1000 
1001         DO_UNIONFIND2(-1, 0);
1002     }
1003 }
1004 
do_unionfind_line2(unionfind_t * uf,image_u8_t * im,int h,int w,int s,int y)1005 static void do_unionfind_line2(unionfind_t *uf, image_u8_t *im, int h, int w, int s, int y)
1006 {
1007     assert(y > 0);
1008 
1009     uint8_t v_m1_m1;
1010     uint8_t v_0_m1 = im->buf[(y - 1)*s];
1011     uint8_t v_1_m1 = im->buf[(y - 1)*s + 1];
1012     uint8_t v_m1_0;
1013     uint8_t v = im->buf[y*s];
1014 
1015     for (int x = 1; x < w - 1; x++) {
1016         v_m1_m1 = v_0_m1;
1017         v_0_m1 = v_1_m1;
1018         v_1_m1 = im->buf[(y - 1)*s + x + 1];
1019         v_m1_0 = v;
1020         v = im->buf[y*s + x];
1021 
1022         if (v == 127)
1023             continue;
1024 
1025         // (dx,dy) pairs for 8 connectivity:
1026         // (-1, -1)    (0, -1)    (1, -1)
1027         // (-1, 0)    (REFERENCE)
1028         DO_UNIONFIND2(-1, 0);
1029 
1030         if (x == 1 || !((v_m1_0 == v_m1_m1) && (v_m1_m1 == v_0_m1))) {
1031             DO_UNIONFIND2(0, -1);
1032         }
1033 
1034         if (v == 255) {
1035             if (x == 1 || !(v_m1_0 == v_m1_m1 || v_0_m1 == v_m1_m1) ) {
1036                 DO_UNIONFIND2(-1, -1);
1037             }
1038             if (!(v_0_m1 == v_1_m1)) {
1039                 DO_UNIONFIND2(1, -1);
1040             }
1041         }
1042     }
1043 }
1044 #undef DO_UNIONFIND2
1045 
do_unionfind_task2(void * p)1046 static void do_unionfind_task2(void *p)
1047 {
1048     struct unionfind_task *task = (struct unionfind_task*) p;
1049 
1050     for (int y = task->y0; y < task->y1; y++) {
1051         do_unionfind_line2(task->uf, task->im, task->h, task->w, task->s, y);
1052     }
1053 }
1054 
do_quad_task(void * p)1055 static void do_quad_task(void *p)
1056 {
1057     struct quad_task *task = (struct quad_task*) p;
1058 
1059     zarray_t *clusters = task->clusters;
1060     zarray_t *quads = task->quads;
1061     apriltag_detector_t *td = task->td;
1062     int w = task->w, h = task->h;
1063 
1064     for (int cidx = task->cidx0; cidx < task->cidx1; cidx++) {
1065 
1066         zarray_t *cluster;
1067         zarray_get(clusters, cidx, &cluster);
1068 
1069         if (zarray_size(cluster) < td->qtp.min_cluster_pixels)
1070             continue;
1071 
1072         // a cluster should contain only boundary points around the
1073         // tag. it cannot be bigger than the whole screen. (Reject
1074         // large connected blobs that will be prohibitively slow to
1075         // fit quads to.) A typical point along an edge is added three
1076         // times (because it has 3 neighbors). The maximum perimeter
1077         // is 2w+2h.
1078         if (zarray_size(cluster) > 3*(2*w+2*h)) {
1079             continue;
1080         }
1081 
1082         struct quad quad;
1083         memset(&quad, 0, sizeof(struct quad));
1084 
1085         if (fit_quad(td, task->im, cluster, &quad, task->tag_width, task->normal_border, task->reversed_border)) {
1086             pthread_mutex_lock(&td->mutex);
1087             zarray_add(quads, &quad);
1088             pthread_mutex_unlock(&td->mutex);
1089         }
1090     }
1091 }
1092 
threshold(apriltag_detector_t * td,image_u8_t * im)1093 image_u8_t *threshold(apriltag_detector_t *td, image_u8_t *im)
1094 {
1095     int w = im->width, h = im->height, s = im->stride;
1096     assert(w < 32768);
1097     assert(h < 32768);
1098 
1099     image_u8_t *threshim = image_u8_create_alignment(w, h, s);
1100     assert(threshim->stride == s);
1101 
1102     // The idea is to find the maximum and minimum values in a
1103     // window around each pixel. If it's a contrast-free region
1104     // (max-min is small), don't try to binarize. Otherwise,
1105     // threshold according to (max+min)/2.
1106     //
1107     // Mark low-contrast regions with value 127 so that we can skip
1108     // future work on these areas too.
1109 
1110     // however, computing max/min around every pixel is needlessly
1111     // expensive. We compute max/min for tiles. To avoid artifacts
1112     // that arise when high-contrast features appear near a tile
1113     // edge (and thus moving from one tile to another results in a
1114     // large change in max/min value), the max/min values used for
1115     // any pixel are computed from all 3x3 surrounding tiles. Thus,
1116     // the max/min sampling area for nearby pixels overlap by at least
1117     // one tile.
1118     //
1119     // The important thing is that the windows be large enough to
1120     // capture edge transitions; the tag does not need to fit into
1121     // a tile.
1122 
1123     // XXX Tunable. Generally, small tile sizes--- so long as they're
1124     // large enough to span a single tag edge--- seem to be a winner.
1125     const int tilesz = 4;
1126 
1127     // the last (possibly partial) tiles along each row and column will
1128     // just use the min/max value from the last full tile.
1129     int tw = w / tilesz;
1130     int th = h / tilesz;
1131 
1132     uint8_t *im_max = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1133     uint8_t *im_min = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1134 
1135     // first, collect min/max statistics for each tile
1136     for (int ty = 0; ty < th; ty++) {
1137         for (int tx = 0; tx < tw; tx++) {
1138             uint8_t max = 0, min = 255;
1139 
1140             for (int dy = 0; dy < tilesz; dy++) {
1141 
1142                 for (int dx = 0; dx < tilesz; dx++) {
1143 
1144                     uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx];
1145                     if (v < min)
1146                         min = v;
1147                     if (v > max)
1148                         max = v;
1149                 }
1150             }
1151 
1152             im_max[ty*tw+tx] = max;
1153             im_min[ty*tw+tx] = min;
1154         }
1155     }
1156 
1157     // second, apply 3x3 max/min convolution to "blur" these values
1158     // over larger areas. This reduces artifacts due to abrupt changes
1159     // in the threshold value.
1160     if (1) {
1161         uint8_t *im_max_tmp = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1162         uint8_t *im_min_tmp = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1163 
1164         for (int ty = 0; ty < th; ty++) {
1165             for (int tx = 0; tx < tw; tx++) {
1166                 uint8_t max = 0, min = 255;
1167 
1168                 for (int dy = -1; dy <= 1; dy++) {
1169                     if (ty+dy < 0 || ty+dy >= th)
1170                         continue;
1171                     for (int dx = -1; dx <= 1; dx++) {
1172                         if (tx+dx < 0 || tx+dx >= tw)
1173                             continue;
1174 
1175                         uint8_t m = im_max[(ty+dy)*tw+tx+dx];
1176                         if (m > max)
1177                             max = m;
1178                         m = im_min[(ty+dy)*tw+tx+dx];
1179                         if (m < min)
1180                             min = m;
1181                     }
1182                 }
1183 
1184                 im_max_tmp[ty*tw + tx] = max;
1185                 im_min_tmp[ty*tw + tx] = min;
1186             }
1187         }
1188         free(im_max);
1189         free(im_min);
1190         im_max = im_max_tmp;
1191         im_min = im_min_tmp;
1192     }
1193 
1194     for (int ty = 0; ty < th; ty++) {
1195         for (int tx = 0; tx < tw; tx++) {
1196 
1197             int min = im_min[ty*tw + tx];
1198             int max = im_max[ty*tw + tx];
1199 
1200             // low contrast region? (no edges)
1201             if (max - min < td->qtp.min_white_black_diff) {
1202                 for (int dy = 0; dy < tilesz; dy++) {
1203                     int y = ty*tilesz + dy;
1204 
1205                     for (int dx = 0; dx < tilesz; dx++) {
1206                         int x = tx*tilesz + dx;
1207 
1208                         threshim->buf[y*s+x] = 127;
1209                     }
1210                 }
1211                 continue;
1212             }
1213 
1214             // otherwise, actually threshold this tile.
1215 
1216             // argument for biasing towards dark; specular highlights
1217             // can be substantially brighter than white tag parts
1218             uint8_t thresh = min + (max - min) / 2;
1219 
1220             for (int dy = 0; dy < tilesz; dy++) {
1221                 int y = ty*tilesz + dy;
1222 
1223                 for (int dx = 0; dx < tilesz; dx++) {
1224                     int x = tx*tilesz + dx;
1225 
1226                     uint8_t v = im->buf[y*s+x];
1227                     if (v > thresh)
1228                         threshim->buf[y*s+x] = 255;
1229                     else
1230                         threshim->buf[y*s+x] = 0;
1231                 }
1232             }
1233         }
1234     }
1235 
1236     // we skipped over the non-full-sized tiles above. Fix those now.
1237     if (1) {
1238         for (int y = 0; y < h; y++) {
1239 
1240             // what is the first x coordinate we need to process in this row?
1241 
1242             int x0;
1243 
1244             if (y >= th*tilesz) {
1245                 x0 = 0; // we're at the bottom; do the whole row.
1246             } else {
1247                 x0 = tw*tilesz; // we only need to do the right most part.
1248             }
1249 
1250             // compute tile coordinates and clamp.
1251             int ty = y / tilesz;
1252             if (ty >= th)
1253                 ty = th - 1;
1254 
1255             for (int x = x0; x < w; x++) {
1256                 int tx = x / tilesz;
1257                 if (tx >= tw)
1258                     tx = tw - 1;
1259 
1260                 int max = im_max[ty*tw + tx];
1261                 int min = im_min[ty*tw + tx];
1262                 int thresh = min + (max - min) / 2;
1263 
1264                 uint8_t v = im->buf[y*s+x];
1265                 if (v > thresh)
1266                     threshim->buf[y*s+x] = 255;
1267                 else
1268                     threshim->buf[y*s+x] = 0;
1269             }
1270         }
1271     }
1272 
1273     free(im_min);
1274     free(im_max);
1275 
1276     // this is a dilate/erode deglitching scheme that does not improve
1277     // anything as far as I can tell.
1278     if (0 || td->qtp.deglitch) {
1279         image_u8_t *tmp = image_u8_create(w, h);
1280 
1281         for (int y = 1; y + 1 < h; y++) {
1282             for (int x = 1; x + 1 < w; x++) {
1283                 uint8_t max = 0;
1284                 for (int dy = -1; dy <= 1; dy++) {
1285                     for (int dx = -1; dx <= 1; dx++) {
1286                         uint8_t v = threshim->buf[(y+dy)*s + x + dx];
1287                         if (v > max)
1288                             max = v;
1289                     }
1290                 }
1291                 tmp->buf[y*s+x] = max;
1292             }
1293         }
1294 
1295         for (int y = 1; y + 1 < h; y++) {
1296             for (int x = 1; x + 1 < w; x++) {
1297                 uint8_t min = 255;
1298                 for (int dy = -1; dy <= 1; dy++) {
1299                     for (int dx = -1; dx <= 1; dx++) {
1300                         uint8_t v = tmp->buf[(y+dy)*s + x + dx];
1301                         if (v < min)
1302                             min = v;
1303                     }
1304                 }
1305                 threshim->buf[y*s+x] = min;
1306             }
1307         }
1308 
1309         image_u8_destroy(tmp);
1310     }
1311 
1312     timeprofile_stamp(td->tp, "threshold");
1313 
1314     return threshim;
1315 }
1316 
1317 // basically the same as threshold(), but assumes the input image is a
1318 // bayer image. It collects statistics separately for each 2x2 block
1319 // of pixels. NOT WELL TESTED.
threshold_bayer(apriltag_detector_t * td,image_u8_t * im)1320 image_u8_t *threshold_bayer(apriltag_detector_t *td, image_u8_t *im)
1321 {
1322     int w = im->width, h = im->height, s = im->stride;
1323 
1324     image_u8_t *threshim = image_u8_create_alignment(w, h, s);
1325     assert(threshim->stride == s);
1326 
1327     int tilesz = 32;
1328     assert((tilesz & 1) == 0); // must be multiple of 2
1329 
1330     int tw = w/tilesz + 1;
1331     int th = h/tilesz + 1;
1332 
1333     uint8_t *im_max[4], *im_min[4];
1334     for (int i = 0; i < 4; i++) {
1335         im_max[i] = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1336         im_min[i] = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1337     }
1338 
1339     for (int ty = 0; ty < th; ty++) {
1340         for (int tx = 0; tx < tw; tx++) {
1341 
1342             uint8_t max[4] = { 0, 0, 0, 0};
1343             uint8_t min[4] = { 255, 255, 255, 255 };
1344 
1345             for (int dy = 0; dy < tilesz; dy++) {
1346                 if (ty*tilesz+dy >= h)
1347                     continue;
1348 
1349                 for (int dx = 0; dx < tilesz; dx++) {
1350                     if (tx*tilesz+dx >= w)
1351                         continue;
1352 
1353                     // which bayer element is this pixel?
1354                     int idx = (2*(dy&1) + (dx&1));
1355 
1356                     uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx];
1357                     if (v < min[idx])
1358                         min[idx] = v;
1359                     if (v > max[idx])
1360                         max[idx] = v;
1361                 }
1362             }
1363 
1364             for (int i = 0; i < 4; i++) {
1365                 im_max[i][ty*tw+tx] = max[i];
1366                 im_min[i][ty*tw+tx] = min[i];
1367             }
1368         }
1369     }
1370 
1371     for (int ty = 0; ty < th; ty++) {
1372         for (int tx = 0; tx < tw; tx++) {
1373 
1374             uint8_t max[4] = { 0, 0, 0, 0};
1375             uint8_t min[4] = { 255, 255, 255, 255 };
1376 
1377             for (int dy = -1; dy <= 1; dy++) {
1378                 if (ty+dy < 0 || ty+dy >= th)
1379                     continue;
1380                 for (int dx = -1; dx <= 1; dx++) {
1381                     if (tx+dx < 0 || tx+dx >= tw)
1382                         continue;
1383 
1384                     for (int i = 0; i < 4; i++) {
1385                         uint8_t m = im_max[i][(ty+dy)*tw+tx+dx];
1386                         if (m > max[i])
1387                             max[i] = m;
1388                         m = im_min[i][(ty+dy)*tw+tx+dx];
1389                         if (m < min[i])
1390                             min[i] = m;
1391                     }
1392                 }
1393             }
1394 
1395             // XXX CONSTANT
1396 //            if (max - min < 30)
1397 //                continue;
1398 
1399             // argument for biasing towards dark: specular highlights
1400             // can be substantially brighter than white tag parts
1401             uint8_t thresh[4];
1402             for (int i = 0; i < 4; i++) {
1403                 thresh[i] = min[i] + (max[i] - min[i]) / 2;
1404             }
1405 
1406             for (int dy = 0; dy < tilesz; dy++) {
1407                 int y = ty*tilesz + dy;
1408                 if (y >= h)
1409                     continue;
1410 
1411                 for (int dx = 0; dx < tilesz; dx++) {
1412                     int x = tx*tilesz + dx;
1413                     if (x >= w)
1414                         continue;
1415 
1416                     // which bayer element is this pixel?
1417                     int idx = (2*(y&1) + (x&1));
1418 
1419                     uint8_t v = im->buf[y*s+x];
1420                     threshim->buf[y*s+x] = v > thresh[idx];
1421                 }
1422             }
1423         }
1424     }
1425 
1426     for (int i = 0; i < 4; i++) {
1427         free(im_min[i]);
1428         free(im_max[i]);
1429     }
1430 
1431     timeprofile_stamp(td->tp, "threshold");
1432 
1433     return threshim;
1434 }
1435 
connected_components(apriltag_detector_t * td,image_u8_t * threshim,int w,int h,int ts)1436 unionfind_t* connected_components(apriltag_detector_t *td, image_u8_t* threshim, int w, int h, int ts) {
1437     unionfind_t *uf = unionfind_create(w * h);
1438 
1439     if (td->nthreads <= 1) {
1440         do_unionfind_first_line(uf, threshim, h, w, ts);
1441         for (int y = 1; y < h; y++) {
1442             do_unionfind_line2(uf, threshim, h, w, ts, y);
1443         }
1444     } else {
1445         do_unionfind_first_line(uf, threshim, h, w, ts);
1446 
1447         int sz = h;
1448         int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads);
1449         struct unionfind_task *tasks = (struct unionfind_task *)malloc(sizeof(struct unionfind_task)*(sz / chunksize + 1));
1450 
1451         int ntasks = 0;
1452 
1453         for (int i = 1; i < sz; i += chunksize) {
1454             // each task will process [y0, y1). Note that this attaches
1455             // each cell to the right and down, so row y1 *is* potentially modified.
1456             //
1457             // for parallelization, make sure that each task doesn't touch rows
1458             // used by another thread.
1459             tasks[ntasks].y0 = i;
1460             tasks[ntasks].y1 = imin(sz, i + chunksize - 1);
1461             tasks[ntasks].h = h;
1462             tasks[ntasks].w = w;
1463             tasks[ntasks].s = ts;
1464             tasks[ntasks].uf = uf;
1465             tasks[ntasks].im = threshim;
1466 
1467             workerpool_add_task(td->wp, do_unionfind_task2, &tasks[ntasks]);
1468             ntasks++;
1469         }
1470 
1471         workerpool_run(td->wp);
1472 
1473         // XXX stitch together the different chunks.
1474         for (int i = 1; i < ntasks; i++) {
1475             do_unionfind_line2(uf, threshim, h, w, ts, tasks[i].y0 - 1);
1476         }
1477 
1478         free(tasks);
1479     }
1480     return uf;
1481 }
1482 
do_gradient_clusters(image_u8_t * threshim,int ts,int y0,int y1,int w,int nclustermap,unionfind_t * uf,zarray_t * clusters)1483 zarray_t* do_gradient_clusters(image_u8_t* threshim, int ts, int y0, int y1, int w, int nclustermap, unionfind_t* uf, zarray_t* clusters) {
1484     struct uint64_zarray_entry **clustermap = (struct uint64_zarray_entry **)calloc(nclustermap, sizeof(struct uint64_zarray_entry*));
1485 
1486     int mem_chunk_size = 2048;
1487     struct uint64_zarray_entry** mem_pools = (struct uint64_zarray_entry**)malloc(sizeof(struct uint64_zarray_entry *)*2*nclustermap/mem_chunk_size);
1488     int mem_pool_idx = 0;
1489     int mem_pool_loc = 0;
1490     mem_pools[mem_pool_idx] = (struct uint64_zarray_entry *)calloc(mem_chunk_size, sizeof(struct uint64_zarray_entry));
1491 
1492     for (int y = y0; y < y1; y++) {
1493         for (int x = 1; x < w-1; x++) {
1494 
1495             uint8_t v0 = threshim->buf[y*ts + x];
1496             if (v0 == 127)
1497                 continue;
1498 
1499             // XXX don't query this until we know we need it?
1500             uint64_t rep0 = unionfind_get_representative(uf, y*w + x);
1501             if (unionfind_get_set_size(uf, rep0) < 25) {
1502                 continue;
1503             }
1504 
1505             // whenever we find two adjacent pixels such that one is
1506             // white and the other black, we add the point half-way
1507             // between them to a cluster associated with the unique
1508             // ids of the white and black regions.
1509             //
1510             // We additionally compute the gradient direction (i.e., which
1511             // direction was the white pixel?) Note: if (v1-v0) == 255, then
1512             // (dx,dy) points towards the white pixel. if (v1-v0) == -255, then
1513             // (dx,dy) points towards the black pixel. p.gx and p.gy will thus
1514             // be -255, 0, or 255.
1515             //
1516             // Note that any given pixel might be added to multiple
1517             // different clusters. But in the common case, a given
1518             // pixel will be added multiple times to the same cluster,
1519             // which increases the size of the cluster and thus the
1520             // computational costs.
1521             //
1522             // A possible optimization would be to combine entries
1523             // within the same cluster.
1524 
1525 #define DO_CONN(dx, dy)                                                 \
1526             if (1) {                                                    \
1527                 uint8_t v1 = threshim->buf[(y + dy)*ts + x + dx];       \
1528                                                                         \
1529                 if (v0 + v1 == 255) {                                   \
1530                     uint64_t rep1 = unionfind_get_representative(uf, (y + dy)*w + x + dx); \
1531                     if (unionfind_get_set_size(uf, rep1) > 24) {        \
1532                         uint64_t clusterid;                                 \
1533                         if (rep0 < rep1)                                    \
1534                             clusterid = (rep1 << 32) + rep0;                \
1535                         else                                                \
1536                             clusterid = (rep0 << 32) + rep1;                \
1537                                                                             \
1538                         /* XXX lousy hash function */                       \
1539                         uint32_t clustermap_bucket = u64hash_2(clusterid) % nclustermap; \
1540                         struct uint64_zarray_entry *entry = clustermap[clustermap_bucket]; \
1541                         while (entry && entry->id != clusterid) {           \
1542                             entry = entry->next;                            \
1543                         }                                                   \
1544                                                                             \
1545                         if (!entry) {                                       \
1546                             if (mem_pool_loc == mem_chunk_size) {           \
1547                                 mem_pool_loc = 0;                           \
1548                                 mem_pool_idx++;                             \
1549                                 mem_pools[mem_pool_idx] = (struct uint64_zarray_entry *)calloc(mem_chunk_size, sizeof(struct uint64_zarray_entry)); \
1550                             }                                               \
1551                             entry = mem_pools[mem_pool_idx] + mem_pool_loc; \
1552                             mem_pool_loc++;                                 \
1553                                                                             \
1554                             entry->id = clusterid;                          \
1555                             entry->cluster = zarray_create(sizeof(struct pt)); \
1556                             entry->next = clustermap[clustermap_bucket];    \
1557                             clustermap[clustermap_bucket] = entry;          \
1558                         }                                                   \
1559  			    \
1560                         struct pt p = { (uint16_t)(2*x + dx), (uint16_t)(2*y + dy), (int16_t)(dx*((int) v1-v0)), (int16_t)(dy*((int) v1-v0)), 0.f}; \
1561                         zarray_add(entry->cluster, &p);                     \
1562                     }                                                   \
1563                 }                                                       \
1564             }
1565 
1566             // do 4 connectivity. NB: Arguments must be [-1, 1] or we'll overflow .gx, .gy
1567             DO_CONN(1, 0);
1568             DO_CONN(0, 1);
1569 
1570             // do 8 connectivity
1571             DO_CONN(-1, 1);
1572             DO_CONN(1, 1);
1573         }
1574     }
1575 #undef DO_CONN
1576 
1577     for (int i = 0; i < nclustermap; i++) {
1578         int start = zarray_size(clusters);
1579         for (struct uint64_zarray_entry *entry = clustermap[i]; entry; entry = entry->next) {
1580             struct cluster_hash* cluster_hash = (struct cluster_hash*)malloc(sizeof(struct cluster_hash));
1581             cluster_hash->hash = u64hash_2(entry->id) % nclustermap;
1582             cluster_hash->id = entry->id;
1583             cluster_hash->data = entry->cluster;
1584             zarray_add(clusters, &cluster_hash);
1585         }
1586         int end = zarray_size(clusters);
1587 
1588         // Do a quick bubblesort on the secondary key.
1589         int n = end - start;
1590         for (int j = 0; j < n - 1; j++) {
1591             for (int k = 0; k < n - j - 1; k++) {
1592                 struct cluster_hash* hash1;
1593                 struct cluster_hash* hash2;
1594                 zarray_get(clusters, start + k, &hash1);
1595                 zarray_get(clusters, start + k + 1, &hash2);
1596                 if (hash1->id > hash2->id) {
1597                     struct cluster_hash tmp = *hash2;
1598                     *hash2 = *hash1;
1599                     *hash1 = tmp;
1600                 }
1601             }
1602         }
1603     }
1604     for (int i = 0; i <= mem_pool_idx; i++) {
1605         free(mem_pools[i]);
1606     }
1607     free(mem_pools);
1608     free(clustermap);
1609 
1610     return clusters;
1611 }
1612 
do_cluster_task(void * p)1613 static void do_cluster_task(void *p)
1614 {
1615     struct cluster_task *task = (struct cluster_task*) p;
1616 
1617     do_gradient_clusters(task->im, task->s, task->y0, task->y1, task->w, task->nclustermap, task->uf, task->clusters);
1618 }
1619 
merge_clusters(zarray_t * c1,zarray_t * c2)1620 zarray_t* merge_clusters(zarray_t* c1, zarray_t* c2) {
1621     zarray_t* ret = zarray_create(sizeof(struct cluster_hash*));
1622     zarray_ensure_capacity(ret, zarray_size(c1) + zarray_size(c2));
1623 
1624     int i1 = 0;
1625     int i2 = 0;
1626     int l1 = zarray_size(c1);
1627     int l2 = zarray_size(c2);
1628 
1629     while (i1 < l1 && i2 < l2) {
1630         struct cluster_hash* h1;
1631         struct cluster_hash* h2;
1632         zarray_get(c1, i1, &h1);
1633         zarray_get(c2, i2, &h2);
1634 
1635         if (h1->hash == h2->hash && h1->id == h2->id) {
1636             zarray_add_all(h1->data, h2->data);
1637             zarray_add(ret, &h1);
1638             i1++;
1639             i2++;
1640             zarray_destroy(h2->data);
1641             free(h2);
1642         } else if (h2->hash < h1->hash || (h2->hash == h1->hash && h2->id < h1->id)) {
1643             zarray_add(ret, &h2);
1644             i2++;
1645         } else {
1646             zarray_add(ret, &h1);
1647             i1++;
1648         }
1649     }
1650 
1651     for (; i1 < l1; i1++) {
1652         struct cluster_hash* h1;
1653         zarray_get(c1, i1, &h1);
1654         zarray_add(ret, &h1);
1655     }
1656 
1657     for (; i2 < l2; i2++) {
1658         struct cluster_hash* h2;
1659         zarray_get(c2, i2, &h2);
1660         zarray_add(ret, &h2);
1661     }
1662 
1663     zarray_destroy(c1);
1664     zarray_destroy(c2);
1665 
1666     return ret;
1667 }
1668 
gradient_clusters(apriltag_detector_t * td,image_u8_t * threshim,int w,int h,int ts,unionfind_t * uf)1669 zarray_t* gradient_clusters(apriltag_detector_t *td, image_u8_t* threshim, int w, int h, int ts, unionfind_t* uf) {
1670     zarray_t* clusters;
1671     int nclustermap = 0.2*w*h;
1672 
1673     int sz = h - 1;
1674     int chunksize = 1 + sz / td->nthreads;
1675     struct cluster_task *tasks = (struct cluster_task *)malloc(sizeof(struct cluster_task)*(sz / chunksize + 1));
1676 
1677     int ntasks = 0;
1678 
1679     for (int i = 1; i < sz; i += chunksize) {
1680         // each task will process [y0, y1). Note that this processes
1681         // each cell to the right and down.
1682         tasks[ntasks].y0 = i;
1683         tasks[ntasks].y1 = imin(sz, i + chunksize);
1684         tasks[ntasks].w = w;
1685         tasks[ntasks].s = ts;
1686         tasks[ntasks].uf = uf;
1687         tasks[ntasks].im = threshim;
1688         tasks[ntasks].nclustermap = nclustermap/(sz / chunksize + 1);
1689         tasks[ntasks].clusters = zarray_create(sizeof(struct cluster_hash*));
1690 
1691         workerpool_add_task(td->wp, do_cluster_task, &tasks[ntasks]);
1692         ntasks++;
1693     }
1694 
1695     workerpool_run(td->wp);
1696 
1697     zarray_t** clusters_list = (zarray_t**)malloc(sizeof(zarray_t *)*ntasks);
1698     for (int i = 0; i < ntasks; i++) {
1699         clusters_list[i] = tasks[i].clusters;
1700     }
1701 
1702     int length = ntasks;
1703     while (length > 1) {
1704         int write = 0;
1705         for (int i = 0; i < length - 1; i += 2) {
1706             clusters_list[write] = merge_clusters(clusters_list[i], clusters_list[i + 1]);
1707             write++;
1708         }
1709 
1710         if (length % 2) {
1711             clusters_list[write] = clusters_list[length - 1];
1712         }
1713 
1714         length = (length >> 1) + length % 2;
1715     }
1716 
1717     clusters = zarray_create(sizeof(zarray_t*));
1718     zarray_ensure_capacity(clusters, zarray_size(clusters_list[0]));
1719     for (int i = 0; i < zarray_size(clusters_list[0]); i++) {
1720         struct cluster_hash* h;
1721         zarray_get(clusters_list[0], i, &h);
1722         zarray_add(clusters, &h->data);
1723         free(h);
1724     }
1725     zarray_destroy(clusters_list[0]);
1726     free(clusters_list);
1727     free(tasks);
1728     return clusters;
1729 }
1730 
fit_quads(apriltag_detector_t * td,int w,int h,zarray_t * clusters,image_u8_t * im)1731 zarray_t* fit_quads(apriltag_detector_t *td, int w, int h, zarray_t* clusters, image_u8_t* im) {
1732     zarray_t *quads = zarray_create(sizeof(struct quad));
1733 
1734     bool normal_border = false;
1735     bool reversed_border = false;
1736     int min_tag_width = 1000000;
1737     for (int i = 0; i < zarray_size(td->tag_families); i++) {
1738         apriltag_family_t* family;
1739         zarray_get(td->tag_families, i, &family);
1740         if (family->width_at_border < min_tag_width) {
1741             min_tag_width = family->width_at_border;
1742         }
1743         normal_border |= !family->reversed_border;
1744         reversed_border |= family->reversed_border;
1745     }
1746     min_tag_width /= td->quad_decimate;
1747     if (min_tag_width < 3) {
1748         min_tag_width = 3;
1749     }
1750 
1751     int sz = zarray_size(clusters);
1752     int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads);
1753     struct quad_task *tasks = (struct quad_task *)malloc(sizeof(struct quad_task)*(sz / chunksize + 1));
1754 
1755     int ntasks = 0;
1756     for (int i = 0; i < sz; i += chunksize) {
1757         tasks[ntasks].td = td;
1758         tasks[ntasks].cidx0 = i;
1759         tasks[ntasks].cidx1 = imin(sz, i + chunksize);
1760         tasks[ntasks].h = h;
1761         tasks[ntasks].w = w;
1762         tasks[ntasks].quads = quads;
1763         tasks[ntasks].clusters = clusters;
1764         tasks[ntasks].im = im;
1765         tasks[ntasks].tag_width = min_tag_width;
1766         tasks[ntasks].normal_border = normal_border;
1767         tasks[ntasks].reversed_border = reversed_border;
1768 
1769         workerpool_add_task(td->wp, do_quad_task, &tasks[ntasks]);
1770         ntasks++;
1771     }
1772 
1773     workerpool_run(td->wp);
1774 
1775     free(tasks);
1776 
1777     return quads;
1778 }
1779 
apriltag_quad_thresh(apriltag_detector_t * td,image_u8_t * im)1780 zarray_t *apriltag_quad_thresh(apriltag_detector_t *td, image_u8_t *im)
1781 {
1782     ////////////////////////////////////////////////////////
1783     // step 1. threshold the image, creating the edge image.
1784 
1785     int w = im->width, h = im->height;
1786 
1787     image_u8_t *threshim = threshold(td, im);
1788     int ts = threshim->stride;
1789 
1790     if (td->debug)
1791         image_u8_write_pnm(threshim, "debug_threshold.pnm");
1792 
1793 
1794     ////////////////////////////////////////////////////////
1795     // step 2. find connected components.
1796     unionfind_t* uf = connected_components(td, threshim, w, h, ts);
1797 
1798     // make segmentation image.
1799     if (td->debug) {
1800         image_u8x3_t *d = image_u8x3_create(w, h);
1801 
1802         uint32_t *colors = (uint32_t*) calloc(w*h, sizeof(*colors));
1803 
1804         for (int y = 0; y < h; y++) {
1805             for (int x = 0; x < w; x++) {
1806                 uint32_t v = unionfind_get_representative(uf, y*w+x);
1807 
1808                 if (unionfind_get_set_size(uf, v) < td->qtp.min_cluster_pixels)
1809                     continue;
1810 
1811                 uint32_t color = colors[v];
1812                 uint8_t r = color >> 16,
1813                     g = color >> 8,
1814                     b = color;
1815 
1816                 if (color == 0) {
1817                     const int bias = 50;
1818                     r = bias + (random() % (200-bias));
1819                     g = bias + (random() % (200-bias));
1820                     b = bias + (random() % (200-bias));
1821                     colors[v] = (r << 16) | (g << 8) | b;
1822                 }
1823 
1824                 d->buf[y*d->stride + 3*x + 0] = r;
1825                 d->buf[y*d->stride + 3*x + 1] = g;
1826                 d->buf[y*d->stride + 3*x + 2] = b;
1827             }
1828         }
1829 
1830         free(colors);
1831 
1832         image_u8x3_write_pnm(d, "debug_segmentation.pnm");
1833         image_u8x3_destroy(d);
1834     }
1835 
1836 
1837     timeprofile_stamp(td->tp, "unionfind");
1838 
1839     zarray_t* clusters = gradient_clusters(td, threshim, w, h, ts, uf);
1840 
1841     if (td->debug) {
1842         image_u8x3_t *d = image_u8x3_create(w, h);
1843 
1844         for (int i = 0; i < zarray_size(clusters); i++) {
1845             zarray_t *cluster;
1846             zarray_get(clusters, i, &cluster);
1847 
1848             uint32_t r, g, b;
1849 
1850             if (1) {
1851                 const int bias = 50;
1852                 r = bias + (random() % (200-bias));
1853                 g = bias + (random() % (200-bias));
1854                 b = bias + (random() % (200-bias));
1855             }
1856 
1857             for (int j = 0; j < zarray_size(cluster); j++) {
1858                 struct pt *p;
1859                 zarray_get_volatile(cluster, j, &p);
1860 
1861                 int x = p->x / 2;
1862                 int y = p->y / 2;
1863                 d->buf[y*d->stride + 3*x + 0] = r;
1864                 d->buf[y*d->stride + 3*x + 1] = g;
1865                 d->buf[y*d->stride + 3*x + 2] = b;
1866             }
1867         }
1868 
1869         image_u8x3_write_pnm(d, "debug_clusters.pnm");
1870         image_u8x3_destroy(d);
1871     }
1872 
1873 
1874     image_u8_destroy(threshim);
1875     timeprofile_stamp(td->tp, "make clusters");
1876 
1877     ////////////////////////////////////////////////////////
1878     // step 3. process each connected component.
1879 
1880     zarray_t* quads = fit_quads(td, w, h, clusters, im);
1881 
1882     if (td->debug) {
1883         FILE *f = fopen("debug_lines.ps", "w");
1884         fprintf(f, "%%!PS\n\n");
1885 
1886         image_u8_t *im2 = image_u8_copy(im);
1887         image_u8_darken(im2);
1888         image_u8_darken(im2);
1889 
1890         // assume letter, which is 612x792 points.
1891         double scale = (std::min)(612.0/im->width, 792.0/im2->height);
1892         fprintf(f, "%.15f %.15f scale\n", scale, scale);
1893         fprintf(f, "0 %d translate\n", im2->height);
1894         fprintf(f, "1 -1 scale\n");
1895 
1896         postscript_image(f, im2);
1897 
1898         image_u8_destroy(im2);
1899 
1900         for (int i = 0; i < zarray_size(quads); i++) {
1901             struct quad *q;
1902             zarray_get_volatile(quads, i, &q);
1903 
1904             float rgb[3];
1905             int bias = 100;
1906 
1907             for (int i = 0; i < 3; i++)
1908                 rgb[i] = bias + (random() % (255-bias));
1909 
1910             fprintf(f, "%f %f %f setrgbcolor\n", rgb[0]/255.0f, rgb[1]/255.0f, rgb[2]/255.0f);
1911             fprintf(f, "%.15f %.15f moveto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto stroke\n",
1912                     q->p[0][0], q->p[0][1],
1913                     q->p[1][0], q->p[1][1],
1914                     q->p[2][0], q->p[2][1],
1915                     q->p[3][0], q->p[3][1],
1916                     q->p[0][0], q->p[0][1]);
1917         }
1918 
1919         fclose(f);
1920     }
1921 
1922     timeprofile_stamp(td->tp, "fit quads to clusters");
1923 
1924     unionfind_destroy(uf);
1925 
1926     for (int i = 0; i < zarray_size(clusters); i++) {
1927         zarray_t *cluster;
1928         zarray_get(clusters, i, &cluster);
1929         zarray_destroy(cluster);
1930     }
1931     zarray_destroy(clusters);
1932 
1933     return quads;
1934 }
1935