1 /* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2 All rights reserved.
3 This software was developed in the APRIL Robotics Lab under the
4 direction of Edwin Olson, ebolson@umich.edu. This software may be
5 available under alternative licensing terms; contact the address above.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 1. Redistributions of source code must retain the above copyright notice, this
9 list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the documentation
12 and/or other materials provided with the distribution.
13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 The views and conclusions contained in the software and documentation are those
24 of the authors and should not be interpreted as representing official policies,
25 either expressed or implied, of the Regents of The University of Michigan.
26 */
27
28 // limitation: image size must be <32768 in width and height. This is
29 // because we use a fixed-point 16 bit integer representation with one
30 // fractional bit.
31 #include <math.h>
32 #include <assert.h>
33 // To ensure UINT32_MAX, INT32_MX are defined on centos, ubuntu 12.04 we define __STDC_LIMIT_MACROS
34 #define __STDC_LIMIT_MACROS
35 #include <string.h>
36 #include <stdio.h>
37 #include <stdint.h>
38 #include <algorithm>
39
40 #include "apriltag.h"
41 #include "common/image_u8x3.h"
42 #include "common/zarray.h"
43 #include "common/zhash.h"
44 #include "common/unionfind.h"
45 #include "common/timeprofile.h"
46 #include "common/zmaxheap.h"
47 #include "common/postscript_utils.h"
48 #include "common/math_util.h"
49
50 #ifdef _WIN32
random(void)51 static inline long int random(void)
52 {
53 return rand();
54 }
55 #endif
56
u64hash_2(uint64_t x)57 static inline uint32_t u64hash_2(uint64_t x) {
58 return (2654435761U * x) >> 32;
59 }
60
61 struct uint64_zarray_entry
62 {
63 uint64_t id;
64 zarray_t *cluster;
65
66 struct uint64_zarray_entry *next;
67 };
68
69 #ifndef M_PI
70 # define M_PI 3.141592653589793238462643383279502884196
71 #endif
72
73 struct pt
74 {
75 // Note: these represent 2*actual value.
76 uint16_t x, y;
77 int16_t gx, gy;
78
79 float slope;
80 };
81
82 struct unionfind_task
83 {
84 int y0, y1;
85 int w, h, s;
86 unionfind_t *uf;
87 image_u8_t *im;
88 };
89
90 struct quad_task
91 {
92 zarray_t *clusters;
93 int cidx0, cidx1; // [cidx0, cidx1)
94 zarray_t *quads;
95 apriltag_detector_t *td;
96 int w, h;
97
98 image_u8_t *im;
99 int tag_width;
100 bool normal_border;
101 bool reversed_border;
102 };
103
104
105 struct cluster_task
106 {
107 int y0;
108 int y1;
109 int w;
110 int s;
111 int nclustermap;
112 unionfind_t* uf;
113 image_u8_t* im;
114 zarray_t* clusters;
115 };
116
117 struct remove_vertex
118 {
119 int i; // which vertex to remove?
120 int left, right; // left vertex, right vertex
121
122 double err;
123 };
124
125 struct segment
126 {
127 int is_vertex;
128
129 // always greater than zero, but right can be > size, which denotes
130 // a wrap around back to the beginning of the points. and left < right.
131 int left, right;
132 };
133
134 struct line_fit_pt
135 {
136 double Mx, My;
137 double Mxx, Myy, Mxy;
138 double W; // total weight
139 };
140
141 struct cluster_hash
142 {
143 uint32_t hash;
144 uint64_t id;
145 zarray_t* data;
146 };
147
148
149 // lfps contains *cumulative* moments for N points, with
150 // index j reflecting points [0,j] (inclusive).
151 //
152 // fit a line to the points [i0, i1] (inclusive). i0, i1 are both [0,
153 // sz) if i1 < i0, we treat this as a wrap around.
fit_line(struct line_fit_pt * lfps,int sz,int i0,int i1,double * lineparm,double * err,double * mse)154 void fit_line(struct line_fit_pt *lfps, int sz, int i0, int i1, double *lineparm, double *err, double *mse)
155 {
156 assert(i0 != i1);
157 assert(i0 >= 0 && i1 >= 0 && i0 < sz && i1 < sz);
158
159 double Mx, My, Mxx, Myy, Mxy, W;
160 int N; // how many points are included in the set?
161
162 if (i0 < i1) {
163 N = i1 - i0 + 1;
164
165 Mx = lfps[i1].Mx;
166 My = lfps[i1].My;
167 Mxx = lfps[i1].Mxx;
168 Mxy = lfps[i1].Mxy;
169 Myy = lfps[i1].Myy;
170 W = lfps[i1].W;
171
172 if (i0 > 0) {
173 Mx -= lfps[i0-1].Mx;
174 My -= lfps[i0-1].My;
175 Mxx -= lfps[i0-1].Mxx;
176 Mxy -= lfps[i0-1].Mxy;
177 Myy -= lfps[i0-1].Myy;
178 W -= lfps[i0-1].W;
179 }
180
181 } else {
182 // i0 > i1, e.g. [15, 2]. Wrap around.
183 assert(i0 > 0);
184
185 Mx = lfps[sz-1].Mx - lfps[i0-1].Mx;
186 My = lfps[sz-1].My - lfps[i0-1].My;
187 Mxx = lfps[sz-1].Mxx - lfps[i0-1].Mxx;
188 Mxy = lfps[sz-1].Mxy - lfps[i0-1].Mxy;
189 Myy = lfps[sz-1].Myy - lfps[i0-1].Myy;
190 W = lfps[sz-1].W - lfps[i0-1].W;
191
192 Mx += lfps[i1].Mx;
193 My += lfps[i1].My;
194 Mxx += lfps[i1].Mxx;
195 Mxy += lfps[i1].Mxy;
196 Myy += lfps[i1].Myy;
197 W += lfps[i1].W;
198
199 N = sz - i0 + i1 + 1;
200 }
201
202 assert(N >= 2);
203
204 double Ex = Mx / W;
205 double Ey = My / W;
206 double Cxx = Mxx / W - Ex*Ex;
207 double Cxy = Mxy / W - Ex*Ey;
208 double Cyy = Myy / W - Ey*Ey;
209
210 //if (1) {
211 // // on iOS about 5% of total CPU spent in these trig functions.
212 // // 85 ms per frame on 5S, example.pnm
213 // //
214 // // XXX this was using the double-precision atan2. Was there a case where
215 // // we needed that precision? Seems doubtful.
216 // double normal_theta = .5 * atan2f(-2*Cxy, (Cyy - Cxx));
217 // nx_old = cosf(normal_theta);
218 // ny_old = sinf(normal_theta);
219 //}
220
221 // Instead of using the above cos/sin method, pose it as an eigenvalue problem.
222 double eig_small = 0.5*(Cxx + Cyy - sqrtf((Cxx - Cyy)*(Cxx - Cyy) + 4*Cxy*Cxy));
223
224 if (lineparm) {
225 lineparm[0] = Ex;
226 lineparm[1] = Ey;
227
228 double eig = 0.5*(Cxx + Cyy + sqrtf((Cxx - Cyy)*(Cxx - Cyy) + 4*Cxy*Cxy));
229 double nx1 = Cxx - eig;
230 double ny1 = Cxy;
231 double M1 = nx1*nx1 + ny1*ny1;
232 double nx2 = Cxy;
233 double ny2 = Cyy - eig;
234 double M2 = nx2*nx2 + ny2*ny2;
235
236 double nx, ny, M;
237 if (M1 > M2) {
238 nx = nx1;
239 ny = ny1;
240 M = M1;
241 } else {
242 nx = nx2;
243 ny = ny2;
244 M = M2;
245 }
246
247 double length = sqrtf(M);
248 lineparm[2] = nx/length;
249 lineparm[3] = ny/length;
250 }
251
252 // sum of squared errors =
253 //
254 // SUM_i ((p_x - ux)*nx + (p_y - uy)*ny)^2
255 // SUM_i nx*nx*(p_x - ux)^2 + 2nx*ny(p_x -ux)(p_y-uy) + ny*ny*(p_y-uy)*(p_y-uy)
256 // nx*nx*SUM_i((p_x -ux)^2) + 2nx*ny*SUM_i((p_x-ux)(p_y-uy)) + ny*ny*SUM_i((p_y-uy)^2)
257 //
258 // nx*nx*N*Cxx + 2nx*ny*N*Cxy + ny*ny*N*Cyy
259
260 // sum of squared errors
261 if (err)
262 *err = N*eig_small;
263
264 // mean squared error
265 if (mse)
266 *mse = eig_small;
267 }
268
pt_compare_angle(struct pt * a,struct pt * b)269 float pt_compare_angle(struct pt *a, struct pt *b) {
270 return a->slope - b->slope;
271 }
272
err_compare_descending(const void * _a,const void * _b)273 int err_compare_descending(const void *_a, const void *_b)
274 {
275 const double *a = (double *)_a;
276 const double *b = (double *)_b;
277
278 return ((*a) < (*b)) ? 1 : -1;
279 }
280
281 /*
282
283 1. Identify A) white points near a black point and B) black points near a white point.
284
285 2. Find the connected components within each of the classes above,
286 yielding clusters of "white-near-black" and
287 "black-near-white". (These two classes are kept separate). Each
288 segment has a unique id.
289
290 3. For every pair of "white-near-black" and "black-near-white"
291 clusters, find the set of points that are in one and adjacent to the
292 other. In other words, a "boundary" layer between the two
293 clusters. (This is actually performed by iterating over the pixels,
294 rather than pairs of clusters.) Critically, this helps keep nearby
295 edges from becoming connected.
296 */
quad_segment_maxima(apriltag_detector_t * td,zarray_t * cluster,struct line_fit_pt * lfps,int indices[4])297 int quad_segment_maxima(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
298 {
299 int sz = zarray_size(cluster);
300
301 // ksz: when fitting points, how many points on either side do we consider?
302 // (actual "kernel" width is 2ksz).
303 //
304 // This value should be about: 0.5 * (points along shortest edge).
305 //
306 // If all edges were equally-sized, that would give a value of
307 // sz/8. We make it somewhat smaller to account for tags at high
308 // aspects.
309
310 // XXX Tunable. Maybe make a multiple of JPEG block size to increase robustness
311 // to JPEG compression artifacts?
312 int ksz = imin(20, sz / 12);
313
314 // can't fit a quad if there are too few points.
315 if (ksz < 2)
316 return 0;
317
318 double *errs = (double *)malloc(sizeof(double)*sz);
319
320 for (int i = 0; i < sz; i++) {
321 fit_line(lfps, sz, (i + sz - ksz) % sz, (i + ksz) % sz, NULL, &errs[i], NULL);
322 }
323
324 // apply a low-pass filter to errs
325 if (1) {
326 double *y = (double *)malloc(sizeof(double)*sz);
327
328 // how much filter to apply?
329
330 // XXX Tunable
331 double sigma = 1; // was 3
332
333 // cutoff = exp(-j*j/(2*sigma*sigma));
334 // log(cutoff) = -j*j / (2*sigma*sigma)
335 // log(cutoff)*2*sigma*sigma = -j*j;
336
337 // how big a filter should we use? We make our kernel big
338 // enough such that we represent any values larger than
339 // 'cutoff'.
340
341 // XXX Tunable (though not super useful to change)
342 double cutoff = 0.05;
343 int fsz = sqrt(-log(cutoff)*2*sigma*sigma) + 1;
344 fsz = 2*fsz + 1;
345
346 // For default values of cutoff = 0.05, sigma = 3,
347 // we have fsz = 17.
348 float *f = (float *)malloc(sizeof(float)*fsz);
349
350 for (int i = 0; i < fsz; i++) {
351 int j = i - fsz / 2;
352 f[i] = exp(-j*j/(2*sigma*sigma));
353 }
354
355 for (int iy = 0; iy < sz; iy++) {
356 double acc = 0;
357
358 for (int i = 0; i < fsz; i++) {
359 acc += errs[(iy + i - fsz / 2 + sz) % sz] * f[i];
360 }
361 y[iy] = acc;
362 }
363
364 memcpy(errs, y, sizeof(double)*sz);
365 free(y);
366 free(f);
367 }
368
369 int *maxima = (int *)malloc(sizeof(int)*sz);
370 double *maxima_errs = (double *)malloc(sizeof(double)*sz);
371 int nmaxima = 0;
372
373 for (int i = 0; i < sz; i++) {
374 if (errs[i] > errs[(i+1)%sz] && errs[i] > errs[(i+sz-1)%sz]) {
375 maxima[nmaxima] = i;
376 maxima_errs[nmaxima] = errs[i];
377 nmaxima++;
378 }
379 }
380 free(errs);
381
382 // if we didn't get at least 4 maxima, we can't fit a quad.
383 if (nmaxima < 4)
384 return 0;
385
386 // select only the best maxima if we have too many
387 int max_nmaxima = td->qtp.max_nmaxima;
388
389 if (nmaxima > max_nmaxima) {
390 double *maxima_errs_copy = (double *)malloc(sizeof(double)*nmaxima);
391 memcpy(maxima_errs_copy, maxima_errs, sizeof(double)*nmaxima);
392
393 // throw out all but the best handful of maxima. Sorts descending.
394 qsort(maxima_errs_copy, nmaxima, sizeof(double), err_compare_descending);
395
396 double maxima_thresh = maxima_errs_copy[max_nmaxima];
397 int out = 0;
398 for (int in = 0; in < nmaxima; in++) {
399 if (maxima_errs[in] <= maxima_thresh)
400 continue;
401 maxima[out++] = maxima[in];
402 }
403 nmaxima = out;
404 free(maxima_errs_copy);
405 }
406 free(maxima_errs);
407
408 int best_indices[4];
409 double best_error = HUGE_VALF;
410
411 double err01, err12, err23, err30;
412 double mse01, mse12, mse23, mse30;
413 double params01[4], params12[4], params23[4], params30[4];
414
415 // disallow quads where the angle is less than a critical value.
416 double max_dot = td->qtp.cos_critical_rad; //25*M_PI/180);
417
418 for (int m0 = 0; m0 < nmaxima - 3; m0++) {
419 int i0 = maxima[m0];
420
421 for (int m1 = m0+1; m1 < nmaxima - 2; m1++) {
422 int i1 = maxima[m1];
423
424 fit_line(lfps, sz, i0, i1, params01, &err01, &mse01);
425
426 if (mse01 > td->qtp.max_line_fit_mse)
427 continue;
428
429 for (int m2 = m1+1; m2 < nmaxima - 1; m2++) {
430 int i2 = maxima[m2];
431
432 fit_line(lfps, sz, i1, i2, params12, &err12, &mse12);
433 if (mse12 > td->qtp.max_line_fit_mse)
434 continue;
435
436 double dot = params01[2]*params12[2] + params01[3]*params12[3];
437 if (fabs(dot) > max_dot)
438 continue;
439
440 for (int m3 = m2+1; m3 < nmaxima; m3++) {
441 int i3 = maxima[m3];
442
443 fit_line(lfps, sz, i2, i3, params23, &err23, &mse23);
444 if (mse23 > td->qtp.max_line_fit_mse)
445 continue;
446
447 fit_line(lfps, sz, i3, i0, params30, &err30, &mse30);
448 if (mse30 > td->qtp.max_line_fit_mse)
449 continue;
450
451 double err = err01 + err12 + err23 + err30;
452 if (err < best_error) {
453 best_error = err;
454 best_indices[0] = i0;
455 best_indices[1] = i1;
456 best_indices[2] = i2;
457 best_indices[3] = i3;
458 }
459 }
460 }
461 }
462 }
463
464 free(maxima);
465
466 if (best_error == HUGE_VALF)
467 return 0;
468
469 for (int i = 0; i < 4; i++)
470 indices[i] = best_indices[i];
471
472 if (best_error / sz < td->qtp.max_line_fit_mse)
473 return 1;
474 return 0;
475 }
476
477 // returns 0 if the cluster looks bad.
quad_segment_agg(apriltag_detector_t * td,zarray_t * cluster,struct line_fit_pt * lfps,int indices[4])478 int quad_segment_agg(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
479 {
480 int sz = zarray_size(cluster);
481
482 zmaxheap_t *heap = zmaxheap_create(sizeof(struct remove_vertex*));
483
484 // We will initially allocate sz rvs. We then have two types of
485 // iterations: some iterations that are no-ops in terms of
486 // allocations, and those that remove a vertex and allocate two
487 // more children. This will happen at most (sz-4) times. Thus we
488 // need: sz + 2*(sz-4) entries.
489
490 int rvalloc_pos = 0;
491 int rvalloc_size = 3*sz;
492 struct remove_vertex *rvalloc = (struct remove_vertex *)calloc(rvalloc_size, sizeof(struct remove_vertex));
493
494 struct segment *segs = (struct segment *)calloc(sz, sizeof(struct segment));
495
496 // populate with initial entries
497 for (int i = 0; i < sz; i++) {
498 struct remove_vertex *rv = &rvalloc[rvalloc_pos++];
499 rv->i = i;
500 if (i == 0) {
501 rv->left = sz-1;
502 rv->right = 1;
503 } else {
504 rv->left = i-1;
505 rv->right = (i+1) % sz;
506 }
507
508 fit_line(lfps, sz, rv->left, rv->right, NULL, NULL, &rv->err);
509
510 zmaxheap_add(heap, &rv, -rv->err);
511
512 segs[i].left = rv->left;
513 segs[i].right = rv->right;
514 segs[i].is_vertex = 1;
515 }
516
517 int nvertices = sz;
518
519 while (nvertices > 4) {
520 assert(rvalloc_pos < rvalloc_size);
521
522 struct remove_vertex *rv;
523 float err;
524
525 int res = zmaxheap_remove_max(heap, &rv, &err);
526 if (!res)
527 return 0;
528 assert(res);
529
530 // is this remove_vertex valid? (Or has one of the left/right
531 // vertices changes since we last looked?)
532 if (!segs[rv->i].is_vertex ||
533 !segs[rv->left].is_vertex ||
534 !segs[rv->right].is_vertex) {
535 continue;
536 }
537
538 // we now merge.
539 assert(segs[rv->i].is_vertex);
540
541 segs[rv->i].is_vertex = 0;
542 segs[rv->left].right = rv->right;
543 segs[rv->right].left = rv->left;
544
545 // create the join to the left
546 if (1) {
547 struct remove_vertex *child = &rvalloc[rvalloc_pos++];
548 child->i = rv->left;
549 child->left = segs[rv->left].left;
550 child->right = rv->right;
551
552 fit_line(lfps, sz, child->left, child->right, NULL, NULL, &child->err);
553
554 zmaxheap_add(heap, &child, -child->err);
555 }
556
557 // create the join to the right
558 if (1) {
559 struct remove_vertex *child = &rvalloc[rvalloc_pos++];
560 child->i = rv->right;
561 child->left = rv->left;
562 child->right = segs[rv->right].right;
563
564 fit_line(lfps, sz, child->left, child->right, NULL, NULL, &child->err);
565
566 zmaxheap_add(heap, &child, -child->err);
567 }
568
569 // we now have one less vertex
570 nvertices--;
571 }
572
573 free(rvalloc);
574 zmaxheap_destroy(heap);
575
576 int idx = 0;
577 for (int i = 0; i < sz; i++) {
578 if (segs[i].is_vertex) {
579 indices[idx++] = i;
580 }
581 }
582
583 free(segs);
584
585 return 1;
586 }
587
588 /**
589 * Compute statistics that allow line fit queries to be
590 * efficiently computed for any contiguous range of indices.
591 */
compute_lfps(int sz,zarray_t * cluster,image_u8_t * im)592 struct line_fit_pt* compute_lfps(int sz, zarray_t* cluster, image_u8_t* im) {
593 struct line_fit_pt *lfps = (struct line_fit_pt *)calloc(sz, sizeof(struct line_fit_pt));
594
595 for (int i = 0; i < sz; i++) {
596 struct pt *p;
597 zarray_get_volatile(cluster, i, &p);
598
599 if (i > 0) {
600 memcpy(&lfps[i], &lfps[i-1], sizeof(struct line_fit_pt));
601 }
602
603 {
604 // we now undo our fixed-point arithmetic.
605 double delta = 0.5; // adjust for pixel center bias
606 double x = p->x * .5 + delta;
607 double y = p->y * .5 + delta;
608 int ix = x, iy = y;
609 double W = 1;
610
611 if (ix > 0 && ix+1 < im->width && iy > 0 && iy+1 < im->height) {
612 int grad_x = im->buf[iy * im->stride + ix + 1] -
613 im->buf[iy * im->stride + ix - 1];
614
615 int grad_y = im->buf[(iy+1) * im->stride + ix] -
616 im->buf[(iy-1) * im->stride + ix];
617
618 // XXX Tunable. How to shape the gradient magnitude?
619 W = sqrt(grad_x*grad_x + grad_y*grad_y) + 1;
620 }
621
622 double fx = x, fy = y;
623 lfps[i].Mx += W * fx;
624 lfps[i].My += W * fy;
625 lfps[i].Mxx += W * fx * fx;
626 lfps[i].Mxy += W * fx * fy;
627 lfps[i].Myy += W * fy * fy;
628 lfps[i].W += W;
629 }
630 }
631 return lfps;
632 }
633
ptsort(struct pt * pts,int sz)634 static inline void ptsort(struct pt *pts, int sz)
635 {
636 #define MAYBE_SWAP(arr,apos,bpos) \
637 if (pt_compare_angle(&(arr[apos]), &(arr[bpos])) > 0) { \
638 tmp = arr[apos]; arr[apos] = arr[bpos]; arr[bpos] = tmp; \
639 };
640
641 if (sz <= 1)
642 return;
643
644 if (sz == 2) {
645 struct pt tmp;
646 MAYBE_SWAP(pts, 0, 1);
647 return;
648 }
649
650 // NB: Using less-branch-intensive sorting networks here on the
651 // hunch that it's better for performance.
652 if (sz == 3) { // 3 element bubble sort is optimal
653 struct pt tmp;
654 MAYBE_SWAP(pts, 0, 1);
655 MAYBE_SWAP(pts, 1, 2);
656 MAYBE_SWAP(pts, 0, 1);
657 return;
658 }
659
660 if (sz == 4) { // 4 element optimal sorting network.
661 struct pt tmp;
662 MAYBE_SWAP(pts, 0, 1); // sort each half, like a merge sort
663 MAYBE_SWAP(pts, 2, 3);
664 MAYBE_SWAP(pts, 0, 2); // minimum value is now at 0.
665 MAYBE_SWAP(pts, 1, 3); // maximum value is now at end.
666 MAYBE_SWAP(pts, 1, 2); // that only leaves the middle two.
667 return;
668 }
669 if (sz == 5) {
670 // this 9-step swap is optimal for a sorting network, but two
671 // steps slower than a generic sort.
672 struct pt tmp;
673 MAYBE_SWAP(pts, 0, 1); // sort each half (3+2), like a merge sort
674 MAYBE_SWAP(pts, 3, 4);
675 MAYBE_SWAP(pts, 1, 2);
676 MAYBE_SWAP(pts, 0, 1);
677 MAYBE_SWAP(pts, 0, 3); // minimum element now at 0
678 MAYBE_SWAP(pts, 2, 4); // maximum element now at end
679 MAYBE_SWAP(pts, 1, 2); // now resort the three elements 1-3.
680 MAYBE_SWAP(pts, 2, 3);
681 MAYBE_SWAP(pts, 1, 2);
682 return;
683 }
684
685 #undef MAYBE_SWAP
686
687 // a merge sort with temp storage.
688
689 struct pt *tmp = (struct pt *)malloc(sizeof(struct pt) * sz);
690
691 memcpy(tmp, pts, sizeof(struct pt) * sz);
692
693 int asz = sz/2;
694 int bsz = sz - asz;
695
696 struct pt *as = &tmp[0];
697 struct pt *bs = &tmp[asz];
698
699 ptsort(as, asz);
700 ptsort(bs, bsz);
701
702 #define MERGE(apos,bpos) \
703 if (pt_compare_angle(&(as[apos]), &(bs[bpos])) < 0) \
704 pts[outpos++] = as[apos++]; \
705 else \
706 pts[outpos++] = bs[bpos++];
707
708 int apos = 0, bpos = 0, outpos = 0;
709 while (apos + 8 < asz && bpos + 8 < bsz) {
710 MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos);
711 MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos);
712 }
713
714 while (apos < asz && bpos < bsz) {
715 MERGE(apos,bpos);
716 }
717
718 if (apos < asz)
719 memcpy(&pts[outpos], &as[apos], (asz-apos)*sizeof(struct pt));
720 if (bpos < bsz)
721 memcpy(&pts[outpos], &bs[bpos], (bsz-bpos)*sizeof(struct pt));
722
723 free(tmp);
724
725 #undef MERGE
726 }
727
728 // return 1 if the quad looks okay, 0 if it should be discarded
fit_quad(apriltag_detector_t * td,image_u8_t * im,zarray_t * cluster,struct quad * quad,int tag_width,bool normal_border,bool reversed_border)729 int fit_quad(
730 apriltag_detector_t *td,
731 image_u8_t *im,
732 zarray_t *cluster,
733 struct quad *quad,
734 int tag_width,
735 bool normal_border,
736 bool reversed_border) {
737 int res = 0;
738
739 int sz = zarray_size(cluster);
740 if (sz < 24) // Synchronize with later check.
741 return 0;
742
743 /////////////////////////////////////////////////////////////
744 // Step 1. Sort points so they wrap around the center of the
745 // quad. We will constrain our quad fit to simply partition this
746 // ordered set into 4 groups.
747
748 // compute a bounding box so that we can order the points
749 // according to their angle WRT the center.
750 struct pt *p1;
751 zarray_get_volatile(cluster, 0, &p1);
752 uint16_t xmax = p1->x;
753 uint16_t xmin = p1->x;
754 uint16_t ymax = p1->y;
755 uint16_t ymin = p1->y;
756 for (int pidx = 1; pidx < zarray_size(cluster); pidx++) {
757 struct pt *p;
758 zarray_get_volatile(cluster, pidx, &p);
759
760 if (p->x > xmax) {
761 xmax = p->x;
762 } else if (p->x < xmin) {
763 xmin = p->x;
764 }
765
766 if (p->y > ymax) {
767 ymax = p->y;
768 } else if (p->y < ymin) {
769 ymin = p->y;
770 }
771 }
772
773 if ((xmax - xmin)*(ymax - ymin) < tag_width) {
774 return 0;
775 }
776
777 // add some noise to (cx,cy) so that pixels get a more diverse set
778 // of theta estimates. This will help us remove more points.
779 // (Only helps a small amount. The actual noise values here don't
780 // matter much at all, but we want them [-1, 1]. (XXX with
781 // fixed-point, should range be bigger?)
782 float cx = (xmin + xmax) * 0.5 + 0.05118;
783 float cy = (ymin + ymax) * 0.5 + -0.028581;
784
785 float dot = 0;
786
787 float quadrants[2][2] = {{-1*(2 << 15), 0}, {2*(2 << 15), 2 << 15}};
788
789 for (int pidx = 0; pidx < zarray_size(cluster); pidx++) {
790 struct pt *p;
791 zarray_get_volatile(cluster, pidx, &p);
792
793 float dx = p->x - cx;
794 float dy = p->y - cy;
795
796 dot += dx*p->gx + dy*p->gy;
797
798 float quadrant = quadrants[dy > 0][dx > 0];
799 if (dy < 0) {
800 dy = -dy;
801 dx = -dx;
802 }
803
804 if (dx < 0) {
805 float tmp = dx;
806 dx = dy;
807 dy = -tmp;
808 }
809 p->slope = quadrant + dy/dx;
810 }
811
812 // Ensure that the black border is inside the white border.
813 quad->reversed_border = dot < 0;
814 if (!reversed_border && quad->reversed_border) {
815 return 0;
816 }
817 if (!normal_border && !quad->reversed_border) {
818 return 0;
819 }
820
821 // we now sort the points according to theta. This is a prepatory
822 // step for segmenting them into four lines.
823 if (1) {
824 ptsort((struct pt*) cluster->data, zarray_size(cluster));
825
826 // remove duplicate points. (A byproduct of our segmentation system.)
827 if (1) {
828 int outpos = 1;
829
830 struct pt *last;
831 zarray_get_volatile(cluster, 0, &last);
832
833 for (int i = 1; i < sz; i++) {
834
835 struct pt *p;
836 zarray_get_volatile(cluster, i, &p);
837
838 if (p->x != last->x || p->y != last->y) {
839
840 if (i != outpos) {
841 struct pt *out;
842 zarray_get_volatile(cluster, outpos, &out);
843 memcpy(out, p, sizeof(struct pt));
844 }
845
846 outpos++;
847 }
848
849 last = p;
850 }
851
852 cluster->size = outpos;
853 sz = outpos;
854 }
855
856 }
857
858 if (sz < 24)
859 return 0;
860
861
862 struct line_fit_pt *lfps = compute_lfps(sz, cluster, im);
863
864 int indices[4];
865 if (1) {
866 if (!quad_segment_maxima(td, cluster, lfps, indices))
867 goto finish;
868 } else {
869 if (!quad_segment_agg(td, cluster, lfps, indices))
870 goto finish;
871 }
872
873
874 double lines[4][4];
875
876 for (int i = 0; i < 4; i++) {
877 int i0 = indices[i];
878 int i1 = indices[(i+1)&3];
879
880 double err;
881 fit_line(lfps, sz, i0, i1, lines[i], NULL, &err);
882
883 if (err > td->qtp.max_line_fit_mse) {
884 res = 0;
885 goto finish;
886 }
887 }
888
889 for (int i = 0; i < 4; i++) {
890 // solve for the intersection of lines (i) and (i+1)&3.
891 // p0 + lambda0*u0 = p1 + lambda1*u1, where u0 and u1
892 // are the line directions.
893 //
894 // lambda0*u0 - lambda1*u1 = (p1 - p0)
895 //
896 // rearrange (solve for lambdas)
897 //
898 // [u0_x -u1_x ] [lambda0] = [ p1_x - p0_x ]
899 // [u0_y -u1_y ] [lambda1] [ p1_y - p0_y ]
900 //
901 // remember that lines[i][0,1] = p, lines[i][2,3] = NORMAL vector.
902 // We want the unit vector, so we need the perpendiculars. Thus, below
903 // we have swapped the x and y components and flipped the y components.
904
905 double A00 = lines[i][3], A01 = -lines[(i+1)&3][3];
906 double A10 = -lines[i][2], A11 = lines[(i+1)&3][2];
907 double B0 = -lines[i][0] + lines[(i+1)&3][0];
908 double B1 = -lines[i][1] + lines[(i+1)&3][1];
909
910 double det = A00 * A11 - A10 * A01;
911
912 // inverse.
913 double W00 = A11 / det, W01 = -A01 / det;
914 if (fabs(det) < 0.001) {
915 res = 0;
916 goto finish;
917 }
918
919 // solve
920 double L0 = W00*B0 + W01*B1;
921
922 // compute intersection
923 quad->p[i][0] = lines[i][0] + L0*A00;
924 quad->p[i][1] = lines[i][1] + L0*A10;
925
926 res = 1;
927 }
928
929 // reject quads that are too small
930 if (1) {
931 double area = 0;
932
933 // get area of triangle formed by points 0, 1, 2, 0
934 double length[3], p;
935 for (int i = 0; i < 3; i++) {
936 int idxa = i; // 0, 1, 2,
937 int idxb = (i+1) % 3; // 1, 2, 0
938 length[i] = sqrt(sq(quad->p[idxb][0] - quad->p[idxa][0]) +
939 sq(quad->p[idxb][1] - quad->p[idxa][1]));
940 }
941 p = (length[0] + length[1] + length[2]) / 2;
942
943 area += sqrt(p*(p-length[0])*(p-length[1])*(p-length[2]));
944
945 // get area of triangle formed by points 2, 3, 0, 2
946 for (int i = 0; i < 3; i++) {
947 int idxs[] = { 2, 3, 0, 2 };
948 int idxa = idxs[i];
949 int idxb = idxs[i+1];
950 length[i] = sqrt(sq(quad->p[idxb][0] - quad->p[idxa][0]) +
951 sq(quad->p[idxb][1] - quad->p[idxa][1]));
952 }
953 p = (length[0] + length[1] + length[2]) / 2;
954
955 area += sqrt(p*(p-length[0])*(p-length[1])*(p-length[2]));
956
957 if (area < tag_width*tag_width) {
958 res = 0;
959 goto finish;
960 }
961 }
962
963 // reject quads whose cumulative angle change isn't equal to 2PI
964 if (1) {
965 for (int i = 0; i < 4; i++) {
966 int i0 = i, i1 = (i+1)&3, i2 = (i+2)&3;
967
968 double dx1 = quad->p[i1][0] - quad->p[i0][0];
969 double dy1 = quad->p[i1][1] - quad->p[i0][1];
970 double dx2 = quad->p[i2][0] - quad->p[i1][0];
971 double dy2 = quad->p[i2][1] - quad->p[i1][1];
972 double cos_dtheta = (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2));
973
974 if ((cos_dtheta > td->qtp.cos_critical_rad || cos_dtheta < -td->qtp.cos_critical_rad) || dx1*dy2 < dy1*dx2) {
975 res = 0;
976 goto finish;
977 }
978 }
979 }
980
981 finish:
982
983 free(lfps);
984
985 return res;
986 }
987
988 #define DO_UNIONFIND2(dx, dy) if (im->buf[(y + dy)*s + x + dx] == v) unionfind_connect(uf, y*w + x, (y + dy)*w + x + dx);
989
do_unionfind_first_line(unionfind_t * uf,image_u8_t * im,int h,int w,int s)990 static void do_unionfind_first_line(unionfind_t *uf, image_u8_t *im, int h, int w, int s)
991 {
992 int y = 0;
993 uint8_t v;
994
995 for (int x = 1; x < w - 1; x++) {
996 v = im->buf[y*s + x];
997
998 if (v == 127)
999 continue;
1000
1001 DO_UNIONFIND2(-1, 0);
1002 }
1003 }
1004
do_unionfind_line2(unionfind_t * uf,image_u8_t * im,int h,int w,int s,int y)1005 static void do_unionfind_line2(unionfind_t *uf, image_u8_t *im, int h, int w, int s, int y)
1006 {
1007 assert(y > 0);
1008
1009 uint8_t v_m1_m1;
1010 uint8_t v_0_m1 = im->buf[(y - 1)*s];
1011 uint8_t v_1_m1 = im->buf[(y - 1)*s + 1];
1012 uint8_t v_m1_0;
1013 uint8_t v = im->buf[y*s];
1014
1015 for (int x = 1; x < w - 1; x++) {
1016 v_m1_m1 = v_0_m1;
1017 v_0_m1 = v_1_m1;
1018 v_1_m1 = im->buf[(y - 1)*s + x + 1];
1019 v_m1_0 = v;
1020 v = im->buf[y*s + x];
1021
1022 if (v == 127)
1023 continue;
1024
1025 // (dx,dy) pairs for 8 connectivity:
1026 // (-1, -1) (0, -1) (1, -1)
1027 // (-1, 0) (REFERENCE)
1028 DO_UNIONFIND2(-1, 0);
1029
1030 if (x == 1 || !((v_m1_0 == v_m1_m1) && (v_m1_m1 == v_0_m1))) {
1031 DO_UNIONFIND2(0, -1);
1032 }
1033
1034 if (v == 255) {
1035 if (x == 1 || !(v_m1_0 == v_m1_m1 || v_0_m1 == v_m1_m1) ) {
1036 DO_UNIONFIND2(-1, -1);
1037 }
1038 if (!(v_0_m1 == v_1_m1)) {
1039 DO_UNIONFIND2(1, -1);
1040 }
1041 }
1042 }
1043 }
1044 #undef DO_UNIONFIND2
1045
do_unionfind_task2(void * p)1046 static void do_unionfind_task2(void *p)
1047 {
1048 struct unionfind_task *task = (struct unionfind_task*) p;
1049
1050 for (int y = task->y0; y < task->y1; y++) {
1051 do_unionfind_line2(task->uf, task->im, task->h, task->w, task->s, y);
1052 }
1053 }
1054
do_quad_task(void * p)1055 static void do_quad_task(void *p)
1056 {
1057 struct quad_task *task = (struct quad_task*) p;
1058
1059 zarray_t *clusters = task->clusters;
1060 zarray_t *quads = task->quads;
1061 apriltag_detector_t *td = task->td;
1062 int w = task->w, h = task->h;
1063
1064 for (int cidx = task->cidx0; cidx < task->cidx1; cidx++) {
1065
1066 zarray_t *cluster;
1067 zarray_get(clusters, cidx, &cluster);
1068
1069 if (zarray_size(cluster) < td->qtp.min_cluster_pixels)
1070 continue;
1071
1072 // a cluster should contain only boundary points around the
1073 // tag. it cannot be bigger than the whole screen. (Reject
1074 // large connected blobs that will be prohibitively slow to
1075 // fit quads to.) A typical point along an edge is added three
1076 // times (because it has 3 neighbors). The maximum perimeter
1077 // is 2w+2h.
1078 if (zarray_size(cluster) > 3*(2*w+2*h)) {
1079 continue;
1080 }
1081
1082 struct quad quad;
1083 memset(&quad, 0, sizeof(struct quad));
1084
1085 if (fit_quad(td, task->im, cluster, &quad, task->tag_width, task->normal_border, task->reversed_border)) {
1086 pthread_mutex_lock(&td->mutex);
1087 zarray_add(quads, &quad);
1088 pthread_mutex_unlock(&td->mutex);
1089 }
1090 }
1091 }
1092
threshold(apriltag_detector_t * td,image_u8_t * im)1093 image_u8_t *threshold(apriltag_detector_t *td, image_u8_t *im)
1094 {
1095 int w = im->width, h = im->height, s = im->stride;
1096 assert(w < 32768);
1097 assert(h < 32768);
1098
1099 image_u8_t *threshim = image_u8_create_alignment(w, h, s);
1100 assert(threshim->stride == s);
1101
1102 // The idea is to find the maximum and minimum values in a
1103 // window around each pixel. If it's a contrast-free region
1104 // (max-min is small), don't try to binarize. Otherwise,
1105 // threshold according to (max+min)/2.
1106 //
1107 // Mark low-contrast regions with value 127 so that we can skip
1108 // future work on these areas too.
1109
1110 // however, computing max/min around every pixel is needlessly
1111 // expensive. We compute max/min for tiles. To avoid artifacts
1112 // that arise when high-contrast features appear near a tile
1113 // edge (and thus moving from one tile to another results in a
1114 // large change in max/min value), the max/min values used for
1115 // any pixel are computed from all 3x3 surrounding tiles. Thus,
1116 // the max/min sampling area for nearby pixels overlap by at least
1117 // one tile.
1118 //
1119 // The important thing is that the windows be large enough to
1120 // capture edge transitions; the tag does not need to fit into
1121 // a tile.
1122
1123 // XXX Tunable. Generally, small tile sizes--- so long as they're
1124 // large enough to span a single tag edge--- seem to be a winner.
1125 const int tilesz = 4;
1126
1127 // the last (possibly partial) tiles along each row and column will
1128 // just use the min/max value from the last full tile.
1129 int tw = w / tilesz;
1130 int th = h / tilesz;
1131
1132 uint8_t *im_max = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1133 uint8_t *im_min = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1134
1135 // first, collect min/max statistics for each tile
1136 for (int ty = 0; ty < th; ty++) {
1137 for (int tx = 0; tx < tw; tx++) {
1138 uint8_t max = 0, min = 255;
1139
1140 for (int dy = 0; dy < tilesz; dy++) {
1141
1142 for (int dx = 0; dx < tilesz; dx++) {
1143
1144 uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx];
1145 if (v < min)
1146 min = v;
1147 if (v > max)
1148 max = v;
1149 }
1150 }
1151
1152 im_max[ty*tw+tx] = max;
1153 im_min[ty*tw+tx] = min;
1154 }
1155 }
1156
1157 // second, apply 3x3 max/min convolution to "blur" these values
1158 // over larger areas. This reduces artifacts due to abrupt changes
1159 // in the threshold value.
1160 if (1) {
1161 uint8_t *im_max_tmp = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1162 uint8_t *im_min_tmp = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1163
1164 for (int ty = 0; ty < th; ty++) {
1165 for (int tx = 0; tx < tw; tx++) {
1166 uint8_t max = 0, min = 255;
1167
1168 for (int dy = -1; dy <= 1; dy++) {
1169 if (ty+dy < 0 || ty+dy >= th)
1170 continue;
1171 for (int dx = -1; dx <= 1; dx++) {
1172 if (tx+dx < 0 || tx+dx >= tw)
1173 continue;
1174
1175 uint8_t m = im_max[(ty+dy)*tw+tx+dx];
1176 if (m > max)
1177 max = m;
1178 m = im_min[(ty+dy)*tw+tx+dx];
1179 if (m < min)
1180 min = m;
1181 }
1182 }
1183
1184 im_max_tmp[ty*tw + tx] = max;
1185 im_min_tmp[ty*tw + tx] = min;
1186 }
1187 }
1188 free(im_max);
1189 free(im_min);
1190 im_max = im_max_tmp;
1191 im_min = im_min_tmp;
1192 }
1193
1194 for (int ty = 0; ty < th; ty++) {
1195 for (int tx = 0; tx < tw; tx++) {
1196
1197 int min = im_min[ty*tw + tx];
1198 int max = im_max[ty*tw + tx];
1199
1200 // low contrast region? (no edges)
1201 if (max - min < td->qtp.min_white_black_diff) {
1202 for (int dy = 0; dy < tilesz; dy++) {
1203 int y = ty*tilesz + dy;
1204
1205 for (int dx = 0; dx < tilesz; dx++) {
1206 int x = tx*tilesz + dx;
1207
1208 threshim->buf[y*s+x] = 127;
1209 }
1210 }
1211 continue;
1212 }
1213
1214 // otherwise, actually threshold this tile.
1215
1216 // argument for biasing towards dark; specular highlights
1217 // can be substantially brighter than white tag parts
1218 uint8_t thresh = min + (max - min) / 2;
1219
1220 for (int dy = 0; dy < tilesz; dy++) {
1221 int y = ty*tilesz + dy;
1222
1223 for (int dx = 0; dx < tilesz; dx++) {
1224 int x = tx*tilesz + dx;
1225
1226 uint8_t v = im->buf[y*s+x];
1227 if (v > thresh)
1228 threshim->buf[y*s+x] = 255;
1229 else
1230 threshim->buf[y*s+x] = 0;
1231 }
1232 }
1233 }
1234 }
1235
1236 // we skipped over the non-full-sized tiles above. Fix those now.
1237 if (1) {
1238 for (int y = 0; y < h; y++) {
1239
1240 // what is the first x coordinate we need to process in this row?
1241
1242 int x0;
1243
1244 if (y >= th*tilesz) {
1245 x0 = 0; // we're at the bottom; do the whole row.
1246 } else {
1247 x0 = tw*tilesz; // we only need to do the right most part.
1248 }
1249
1250 // compute tile coordinates and clamp.
1251 int ty = y / tilesz;
1252 if (ty >= th)
1253 ty = th - 1;
1254
1255 for (int x = x0; x < w; x++) {
1256 int tx = x / tilesz;
1257 if (tx >= tw)
1258 tx = tw - 1;
1259
1260 int max = im_max[ty*tw + tx];
1261 int min = im_min[ty*tw + tx];
1262 int thresh = min + (max - min) / 2;
1263
1264 uint8_t v = im->buf[y*s+x];
1265 if (v > thresh)
1266 threshim->buf[y*s+x] = 255;
1267 else
1268 threshim->buf[y*s+x] = 0;
1269 }
1270 }
1271 }
1272
1273 free(im_min);
1274 free(im_max);
1275
1276 // this is a dilate/erode deglitching scheme that does not improve
1277 // anything as far as I can tell.
1278 if (0 || td->qtp.deglitch) {
1279 image_u8_t *tmp = image_u8_create(w, h);
1280
1281 for (int y = 1; y + 1 < h; y++) {
1282 for (int x = 1; x + 1 < w; x++) {
1283 uint8_t max = 0;
1284 for (int dy = -1; dy <= 1; dy++) {
1285 for (int dx = -1; dx <= 1; dx++) {
1286 uint8_t v = threshim->buf[(y+dy)*s + x + dx];
1287 if (v > max)
1288 max = v;
1289 }
1290 }
1291 tmp->buf[y*s+x] = max;
1292 }
1293 }
1294
1295 for (int y = 1; y + 1 < h; y++) {
1296 for (int x = 1; x + 1 < w; x++) {
1297 uint8_t min = 255;
1298 for (int dy = -1; dy <= 1; dy++) {
1299 for (int dx = -1; dx <= 1; dx++) {
1300 uint8_t v = tmp->buf[(y+dy)*s + x + dx];
1301 if (v < min)
1302 min = v;
1303 }
1304 }
1305 threshim->buf[y*s+x] = min;
1306 }
1307 }
1308
1309 image_u8_destroy(tmp);
1310 }
1311
1312 timeprofile_stamp(td->tp, "threshold");
1313
1314 return threshim;
1315 }
1316
1317 // basically the same as threshold(), but assumes the input image is a
1318 // bayer image. It collects statistics separately for each 2x2 block
1319 // of pixels. NOT WELL TESTED.
threshold_bayer(apriltag_detector_t * td,image_u8_t * im)1320 image_u8_t *threshold_bayer(apriltag_detector_t *td, image_u8_t *im)
1321 {
1322 int w = im->width, h = im->height, s = im->stride;
1323
1324 image_u8_t *threshim = image_u8_create_alignment(w, h, s);
1325 assert(threshim->stride == s);
1326
1327 int tilesz = 32;
1328 assert((tilesz & 1) == 0); // must be multiple of 2
1329
1330 int tw = w/tilesz + 1;
1331 int th = h/tilesz + 1;
1332
1333 uint8_t *im_max[4], *im_min[4];
1334 for (int i = 0; i < 4; i++) {
1335 im_max[i] = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1336 im_min[i] = (uint8_t *)calloc(tw*th, sizeof(uint8_t));
1337 }
1338
1339 for (int ty = 0; ty < th; ty++) {
1340 for (int tx = 0; tx < tw; tx++) {
1341
1342 uint8_t max[4] = { 0, 0, 0, 0};
1343 uint8_t min[4] = { 255, 255, 255, 255 };
1344
1345 for (int dy = 0; dy < tilesz; dy++) {
1346 if (ty*tilesz+dy >= h)
1347 continue;
1348
1349 for (int dx = 0; dx < tilesz; dx++) {
1350 if (tx*tilesz+dx >= w)
1351 continue;
1352
1353 // which bayer element is this pixel?
1354 int idx = (2*(dy&1) + (dx&1));
1355
1356 uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx];
1357 if (v < min[idx])
1358 min[idx] = v;
1359 if (v > max[idx])
1360 max[idx] = v;
1361 }
1362 }
1363
1364 for (int i = 0; i < 4; i++) {
1365 im_max[i][ty*tw+tx] = max[i];
1366 im_min[i][ty*tw+tx] = min[i];
1367 }
1368 }
1369 }
1370
1371 for (int ty = 0; ty < th; ty++) {
1372 for (int tx = 0; tx < tw; tx++) {
1373
1374 uint8_t max[4] = { 0, 0, 0, 0};
1375 uint8_t min[4] = { 255, 255, 255, 255 };
1376
1377 for (int dy = -1; dy <= 1; dy++) {
1378 if (ty+dy < 0 || ty+dy >= th)
1379 continue;
1380 for (int dx = -1; dx <= 1; dx++) {
1381 if (tx+dx < 0 || tx+dx >= tw)
1382 continue;
1383
1384 for (int i = 0; i < 4; i++) {
1385 uint8_t m = im_max[i][(ty+dy)*tw+tx+dx];
1386 if (m > max[i])
1387 max[i] = m;
1388 m = im_min[i][(ty+dy)*tw+tx+dx];
1389 if (m < min[i])
1390 min[i] = m;
1391 }
1392 }
1393 }
1394
1395 // XXX CONSTANT
1396 // if (max - min < 30)
1397 // continue;
1398
1399 // argument for biasing towards dark: specular highlights
1400 // can be substantially brighter than white tag parts
1401 uint8_t thresh[4];
1402 for (int i = 0; i < 4; i++) {
1403 thresh[i] = min[i] + (max[i] - min[i]) / 2;
1404 }
1405
1406 for (int dy = 0; dy < tilesz; dy++) {
1407 int y = ty*tilesz + dy;
1408 if (y >= h)
1409 continue;
1410
1411 for (int dx = 0; dx < tilesz; dx++) {
1412 int x = tx*tilesz + dx;
1413 if (x >= w)
1414 continue;
1415
1416 // which bayer element is this pixel?
1417 int idx = (2*(y&1) + (x&1));
1418
1419 uint8_t v = im->buf[y*s+x];
1420 threshim->buf[y*s+x] = v > thresh[idx];
1421 }
1422 }
1423 }
1424 }
1425
1426 for (int i = 0; i < 4; i++) {
1427 free(im_min[i]);
1428 free(im_max[i]);
1429 }
1430
1431 timeprofile_stamp(td->tp, "threshold");
1432
1433 return threshim;
1434 }
1435
connected_components(apriltag_detector_t * td,image_u8_t * threshim,int w,int h,int ts)1436 unionfind_t* connected_components(apriltag_detector_t *td, image_u8_t* threshim, int w, int h, int ts) {
1437 unionfind_t *uf = unionfind_create(w * h);
1438
1439 if (td->nthreads <= 1) {
1440 do_unionfind_first_line(uf, threshim, h, w, ts);
1441 for (int y = 1; y < h; y++) {
1442 do_unionfind_line2(uf, threshim, h, w, ts, y);
1443 }
1444 } else {
1445 do_unionfind_first_line(uf, threshim, h, w, ts);
1446
1447 int sz = h;
1448 int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads);
1449 struct unionfind_task *tasks = (struct unionfind_task *)malloc(sizeof(struct unionfind_task)*(sz / chunksize + 1));
1450
1451 int ntasks = 0;
1452
1453 for (int i = 1; i < sz; i += chunksize) {
1454 // each task will process [y0, y1). Note that this attaches
1455 // each cell to the right and down, so row y1 *is* potentially modified.
1456 //
1457 // for parallelization, make sure that each task doesn't touch rows
1458 // used by another thread.
1459 tasks[ntasks].y0 = i;
1460 tasks[ntasks].y1 = imin(sz, i + chunksize - 1);
1461 tasks[ntasks].h = h;
1462 tasks[ntasks].w = w;
1463 tasks[ntasks].s = ts;
1464 tasks[ntasks].uf = uf;
1465 tasks[ntasks].im = threshim;
1466
1467 workerpool_add_task(td->wp, do_unionfind_task2, &tasks[ntasks]);
1468 ntasks++;
1469 }
1470
1471 workerpool_run(td->wp);
1472
1473 // XXX stitch together the different chunks.
1474 for (int i = 1; i < ntasks; i++) {
1475 do_unionfind_line2(uf, threshim, h, w, ts, tasks[i].y0 - 1);
1476 }
1477
1478 free(tasks);
1479 }
1480 return uf;
1481 }
1482
do_gradient_clusters(image_u8_t * threshim,int ts,int y0,int y1,int w,int nclustermap,unionfind_t * uf,zarray_t * clusters)1483 zarray_t* do_gradient_clusters(image_u8_t* threshim, int ts, int y0, int y1, int w, int nclustermap, unionfind_t* uf, zarray_t* clusters) {
1484 struct uint64_zarray_entry **clustermap = (struct uint64_zarray_entry **)calloc(nclustermap, sizeof(struct uint64_zarray_entry*));
1485
1486 int mem_chunk_size = 2048;
1487 struct uint64_zarray_entry** mem_pools = (struct uint64_zarray_entry**)malloc(sizeof(struct uint64_zarray_entry *)*2*nclustermap/mem_chunk_size);
1488 int mem_pool_idx = 0;
1489 int mem_pool_loc = 0;
1490 mem_pools[mem_pool_idx] = (struct uint64_zarray_entry *)calloc(mem_chunk_size, sizeof(struct uint64_zarray_entry));
1491
1492 for (int y = y0; y < y1; y++) {
1493 for (int x = 1; x < w-1; x++) {
1494
1495 uint8_t v0 = threshim->buf[y*ts + x];
1496 if (v0 == 127)
1497 continue;
1498
1499 // XXX don't query this until we know we need it?
1500 uint64_t rep0 = unionfind_get_representative(uf, y*w + x);
1501 if (unionfind_get_set_size(uf, rep0) < 25) {
1502 continue;
1503 }
1504
1505 // whenever we find two adjacent pixels such that one is
1506 // white and the other black, we add the point half-way
1507 // between them to a cluster associated with the unique
1508 // ids of the white and black regions.
1509 //
1510 // We additionally compute the gradient direction (i.e., which
1511 // direction was the white pixel?) Note: if (v1-v0) == 255, then
1512 // (dx,dy) points towards the white pixel. if (v1-v0) == -255, then
1513 // (dx,dy) points towards the black pixel. p.gx and p.gy will thus
1514 // be -255, 0, or 255.
1515 //
1516 // Note that any given pixel might be added to multiple
1517 // different clusters. But in the common case, a given
1518 // pixel will be added multiple times to the same cluster,
1519 // which increases the size of the cluster and thus the
1520 // computational costs.
1521 //
1522 // A possible optimization would be to combine entries
1523 // within the same cluster.
1524
1525 #define DO_CONN(dx, dy) \
1526 if (1) { \
1527 uint8_t v1 = threshim->buf[(y + dy)*ts + x + dx]; \
1528 \
1529 if (v0 + v1 == 255) { \
1530 uint64_t rep1 = unionfind_get_representative(uf, (y + dy)*w + x + dx); \
1531 if (unionfind_get_set_size(uf, rep1) > 24) { \
1532 uint64_t clusterid; \
1533 if (rep0 < rep1) \
1534 clusterid = (rep1 << 32) + rep0; \
1535 else \
1536 clusterid = (rep0 << 32) + rep1; \
1537 \
1538 /* XXX lousy hash function */ \
1539 uint32_t clustermap_bucket = u64hash_2(clusterid) % nclustermap; \
1540 struct uint64_zarray_entry *entry = clustermap[clustermap_bucket]; \
1541 while (entry && entry->id != clusterid) { \
1542 entry = entry->next; \
1543 } \
1544 \
1545 if (!entry) { \
1546 if (mem_pool_loc == mem_chunk_size) { \
1547 mem_pool_loc = 0; \
1548 mem_pool_idx++; \
1549 mem_pools[mem_pool_idx] = (struct uint64_zarray_entry *)calloc(mem_chunk_size, sizeof(struct uint64_zarray_entry)); \
1550 } \
1551 entry = mem_pools[mem_pool_idx] + mem_pool_loc; \
1552 mem_pool_loc++; \
1553 \
1554 entry->id = clusterid; \
1555 entry->cluster = zarray_create(sizeof(struct pt)); \
1556 entry->next = clustermap[clustermap_bucket]; \
1557 clustermap[clustermap_bucket] = entry; \
1558 } \
1559 \
1560 struct pt p = { (uint16_t)(2*x + dx), (uint16_t)(2*y + dy), (int16_t)(dx*((int) v1-v0)), (int16_t)(dy*((int) v1-v0)), 0.f}; \
1561 zarray_add(entry->cluster, &p); \
1562 } \
1563 } \
1564 }
1565
1566 // do 4 connectivity. NB: Arguments must be [-1, 1] or we'll overflow .gx, .gy
1567 DO_CONN(1, 0);
1568 DO_CONN(0, 1);
1569
1570 // do 8 connectivity
1571 DO_CONN(-1, 1);
1572 DO_CONN(1, 1);
1573 }
1574 }
1575 #undef DO_CONN
1576
1577 for (int i = 0; i < nclustermap; i++) {
1578 int start = zarray_size(clusters);
1579 for (struct uint64_zarray_entry *entry = clustermap[i]; entry; entry = entry->next) {
1580 struct cluster_hash* cluster_hash = (struct cluster_hash*)malloc(sizeof(struct cluster_hash));
1581 cluster_hash->hash = u64hash_2(entry->id) % nclustermap;
1582 cluster_hash->id = entry->id;
1583 cluster_hash->data = entry->cluster;
1584 zarray_add(clusters, &cluster_hash);
1585 }
1586 int end = zarray_size(clusters);
1587
1588 // Do a quick bubblesort on the secondary key.
1589 int n = end - start;
1590 for (int j = 0; j < n - 1; j++) {
1591 for (int k = 0; k < n - j - 1; k++) {
1592 struct cluster_hash* hash1;
1593 struct cluster_hash* hash2;
1594 zarray_get(clusters, start + k, &hash1);
1595 zarray_get(clusters, start + k + 1, &hash2);
1596 if (hash1->id > hash2->id) {
1597 struct cluster_hash tmp = *hash2;
1598 *hash2 = *hash1;
1599 *hash1 = tmp;
1600 }
1601 }
1602 }
1603 }
1604 for (int i = 0; i <= mem_pool_idx; i++) {
1605 free(mem_pools[i]);
1606 }
1607 free(mem_pools);
1608 free(clustermap);
1609
1610 return clusters;
1611 }
1612
do_cluster_task(void * p)1613 static void do_cluster_task(void *p)
1614 {
1615 struct cluster_task *task = (struct cluster_task*) p;
1616
1617 do_gradient_clusters(task->im, task->s, task->y0, task->y1, task->w, task->nclustermap, task->uf, task->clusters);
1618 }
1619
merge_clusters(zarray_t * c1,zarray_t * c2)1620 zarray_t* merge_clusters(zarray_t* c1, zarray_t* c2) {
1621 zarray_t* ret = zarray_create(sizeof(struct cluster_hash*));
1622 zarray_ensure_capacity(ret, zarray_size(c1) + zarray_size(c2));
1623
1624 int i1 = 0;
1625 int i2 = 0;
1626 int l1 = zarray_size(c1);
1627 int l2 = zarray_size(c2);
1628
1629 while (i1 < l1 && i2 < l2) {
1630 struct cluster_hash* h1;
1631 struct cluster_hash* h2;
1632 zarray_get(c1, i1, &h1);
1633 zarray_get(c2, i2, &h2);
1634
1635 if (h1->hash == h2->hash && h1->id == h2->id) {
1636 zarray_add_all(h1->data, h2->data);
1637 zarray_add(ret, &h1);
1638 i1++;
1639 i2++;
1640 zarray_destroy(h2->data);
1641 free(h2);
1642 } else if (h2->hash < h1->hash || (h2->hash == h1->hash && h2->id < h1->id)) {
1643 zarray_add(ret, &h2);
1644 i2++;
1645 } else {
1646 zarray_add(ret, &h1);
1647 i1++;
1648 }
1649 }
1650
1651 for (; i1 < l1; i1++) {
1652 struct cluster_hash* h1;
1653 zarray_get(c1, i1, &h1);
1654 zarray_add(ret, &h1);
1655 }
1656
1657 for (; i2 < l2; i2++) {
1658 struct cluster_hash* h2;
1659 zarray_get(c2, i2, &h2);
1660 zarray_add(ret, &h2);
1661 }
1662
1663 zarray_destroy(c1);
1664 zarray_destroy(c2);
1665
1666 return ret;
1667 }
1668
gradient_clusters(apriltag_detector_t * td,image_u8_t * threshim,int w,int h,int ts,unionfind_t * uf)1669 zarray_t* gradient_clusters(apriltag_detector_t *td, image_u8_t* threshim, int w, int h, int ts, unionfind_t* uf) {
1670 zarray_t* clusters;
1671 int nclustermap = 0.2*w*h;
1672
1673 int sz = h - 1;
1674 int chunksize = 1 + sz / td->nthreads;
1675 struct cluster_task *tasks = (struct cluster_task *)malloc(sizeof(struct cluster_task)*(sz / chunksize + 1));
1676
1677 int ntasks = 0;
1678
1679 for (int i = 1; i < sz; i += chunksize) {
1680 // each task will process [y0, y1). Note that this processes
1681 // each cell to the right and down.
1682 tasks[ntasks].y0 = i;
1683 tasks[ntasks].y1 = imin(sz, i + chunksize);
1684 tasks[ntasks].w = w;
1685 tasks[ntasks].s = ts;
1686 tasks[ntasks].uf = uf;
1687 tasks[ntasks].im = threshim;
1688 tasks[ntasks].nclustermap = nclustermap/(sz / chunksize + 1);
1689 tasks[ntasks].clusters = zarray_create(sizeof(struct cluster_hash*));
1690
1691 workerpool_add_task(td->wp, do_cluster_task, &tasks[ntasks]);
1692 ntasks++;
1693 }
1694
1695 workerpool_run(td->wp);
1696
1697 zarray_t** clusters_list = (zarray_t**)malloc(sizeof(zarray_t *)*ntasks);
1698 for (int i = 0; i < ntasks; i++) {
1699 clusters_list[i] = tasks[i].clusters;
1700 }
1701
1702 int length = ntasks;
1703 while (length > 1) {
1704 int write = 0;
1705 for (int i = 0; i < length - 1; i += 2) {
1706 clusters_list[write] = merge_clusters(clusters_list[i], clusters_list[i + 1]);
1707 write++;
1708 }
1709
1710 if (length % 2) {
1711 clusters_list[write] = clusters_list[length - 1];
1712 }
1713
1714 length = (length >> 1) + length % 2;
1715 }
1716
1717 clusters = zarray_create(sizeof(zarray_t*));
1718 zarray_ensure_capacity(clusters, zarray_size(clusters_list[0]));
1719 for (int i = 0; i < zarray_size(clusters_list[0]); i++) {
1720 struct cluster_hash* h;
1721 zarray_get(clusters_list[0], i, &h);
1722 zarray_add(clusters, &h->data);
1723 free(h);
1724 }
1725 zarray_destroy(clusters_list[0]);
1726 free(clusters_list);
1727 free(tasks);
1728 return clusters;
1729 }
1730
fit_quads(apriltag_detector_t * td,int w,int h,zarray_t * clusters,image_u8_t * im)1731 zarray_t* fit_quads(apriltag_detector_t *td, int w, int h, zarray_t* clusters, image_u8_t* im) {
1732 zarray_t *quads = zarray_create(sizeof(struct quad));
1733
1734 bool normal_border = false;
1735 bool reversed_border = false;
1736 int min_tag_width = 1000000;
1737 for (int i = 0; i < zarray_size(td->tag_families); i++) {
1738 apriltag_family_t* family;
1739 zarray_get(td->tag_families, i, &family);
1740 if (family->width_at_border < min_tag_width) {
1741 min_tag_width = family->width_at_border;
1742 }
1743 normal_border |= !family->reversed_border;
1744 reversed_border |= family->reversed_border;
1745 }
1746 min_tag_width /= td->quad_decimate;
1747 if (min_tag_width < 3) {
1748 min_tag_width = 3;
1749 }
1750
1751 int sz = zarray_size(clusters);
1752 int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads);
1753 struct quad_task *tasks = (struct quad_task *)malloc(sizeof(struct quad_task)*(sz / chunksize + 1));
1754
1755 int ntasks = 0;
1756 for (int i = 0; i < sz; i += chunksize) {
1757 tasks[ntasks].td = td;
1758 tasks[ntasks].cidx0 = i;
1759 tasks[ntasks].cidx1 = imin(sz, i + chunksize);
1760 tasks[ntasks].h = h;
1761 tasks[ntasks].w = w;
1762 tasks[ntasks].quads = quads;
1763 tasks[ntasks].clusters = clusters;
1764 tasks[ntasks].im = im;
1765 tasks[ntasks].tag_width = min_tag_width;
1766 tasks[ntasks].normal_border = normal_border;
1767 tasks[ntasks].reversed_border = reversed_border;
1768
1769 workerpool_add_task(td->wp, do_quad_task, &tasks[ntasks]);
1770 ntasks++;
1771 }
1772
1773 workerpool_run(td->wp);
1774
1775 free(tasks);
1776
1777 return quads;
1778 }
1779
apriltag_quad_thresh(apriltag_detector_t * td,image_u8_t * im)1780 zarray_t *apriltag_quad_thresh(apriltag_detector_t *td, image_u8_t *im)
1781 {
1782 ////////////////////////////////////////////////////////
1783 // step 1. threshold the image, creating the edge image.
1784
1785 int w = im->width, h = im->height;
1786
1787 image_u8_t *threshim = threshold(td, im);
1788 int ts = threshim->stride;
1789
1790 if (td->debug)
1791 image_u8_write_pnm(threshim, "debug_threshold.pnm");
1792
1793
1794 ////////////////////////////////////////////////////////
1795 // step 2. find connected components.
1796 unionfind_t* uf = connected_components(td, threshim, w, h, ts);
1797
1798 // make segmentation image.
1799 if (td->debug) {
1800 image_u8x3_t *d = image_u8x3_create(w, h);
1801
1802 uint32_t *colors = (uint32_t*) calloc(w*h, sizeof(*colors));
1803
1804 for (int y = 0; y < h; y++) {
1805 for (int x = 0; x < w; x++) {
1806 uint32_t v = unionfind_get_representative(uf, y*w+x);
1807
1808 if (unionfind_get_set_size(uf, v) < td->qtp.min_cluster_pixels)
1809 continue;
1810
1811 uint32_t color = colors[v];
1812 uint8_t r = color >> 16,
1813 g = color >> 8,
1814 b = color;
1815
1816 if (color == 0) {
1817 const int bias = 50;
1818 r = bias + (random() % (200-bias));
1819 g = bias + (random() % (200-bias));
1820 b = bias + (random() % (200-bias));
1821 colors[v] = (r << 16) | (g << 8) | b;
1822 }
1823
1824 d->buf[y*d->stride + 3*x + 0] = r;
1825 d->buf[y*d->stride + 3*x + 1] = g;
1826 d->buf[y*d->stride + 3*x + 2] = b;
1827 }
1828 }
1829
1830 free(colors);
1831
1832 image_u8x3_write_pnm(d, "debug_segmentation.pnm");
1833 image_u8x3_destroy(d);
1834 }
1835
1836
1837 timeprofile_stamp(td->tp, "unionfind");
1838
1839 zarray_t* clusters = gradient_clusters(td, threshim, w, h, ts, uf);
1840
1841 if (td->debug) {
1842 image_u8x3_t *d = image_u8x3_create(w, h);
1843
1844 for (int i = 0; i < zarray_size(clusters); i++) {
1845 zarray_t *cluster;
1846 zarray_get(clusters, i, &cluster);
1847
1848 uint32_t r, g, b;
1849
1850 if (1) {
1851 const int bias = 50;
1852 r = bias + (random() % (200-bias));
1853 g = bias + (random() % (200-bias));
1854 b = bias + (random() % (200-bias));
1855 }
1856
1857 for (int j = 0; j < zarray_size(cluster); j++) {
1858 struct pt *p;
1859 zarray_get_volatile(cluster, j, &p);
1860
1861 int x = p->x / 2;
1862 int y = p->y / 2;
1863 d->buf[y*d->stride + 3*x + 0] = r;
1864 d->buf[y*d->stride + 3*x + 1] = g;
1865 d->buf[y*d->stride + 3*x + 2] = b;
1866 }
1867 }
1868
1869 image_u8x3_write_pnm(d, "debug_clusters.pnm");
1870 image_u8x3_destroy(d);
1871 }
1872
1873
1874 image_u8_destroy(threshim);
1875 timeprofile_stamp(td->tp, "make clusters");
1876
1877 ////////////////////////////////////////////////////////
1878 // step 3. process each connected component.
1879
1880 zarray_t* quads = fit_quads(td, w, h, clusters, im);
1881
1882 if (td->debug) {
1883 FILE *f = fopen("debug_lines.ps", "w");
1884 fprintf(f, "%%!PS\n\n");
1885
1886 image_u8_t *im2 = image_u8_copy(im);
1887 image_u8_darken(im2);
1888 image_u8_darken(im2);
1889
1890 // assume letter, which is 612x792 points.
1891 double scale = (std::min)(612.0/im->width, 792.0/im2->height);
1892 fprintf(f, "%.15f %.15f scale\n", scale, scale);
1893 fprintf(f, "0 %d translate\n", im2->height);
1894 fprintf(f, "1 -1 scale\n");
1895
1896 postscript_image(f, im2);
1897
1898 image_u8_destroy(im2);
1899
1900 for (int i = 0; i < zarray_size(quads); i++) {
1901 struct quad *q;
1902 zarray_get_volatile(quads, i, &q);
1903
1904 float rgb[3];
1905 int bias = 100;
1906
1907 for (int i = 0; i < 3; i++)
1908 rgb[i] = bias + (random() % (255-bias));
1909
1910 fprintf(f, "%f %f %f setrgbcolor\n", rgb[0]/255.0f, rgb[1]/255.0f, rgb[2]/255.0f);
1911 fprintf(f, "%.15f %.15f moveto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto stroke\n",
1912 q->p[0][0], q->p[0][1],
1913 q->p[1][0], q->p[1][1],
1914 q->p[2][0], q->p[2][1],
1915 q->p[3][0], q->p[3][1],
1916 q->p[0][0], q->p[0][1]);
1917 }
1918
1919 fclose(f);
1920 }
1921
1922 timeprofile_stamp(td->tp, "fit quads to clusters");
1923
1924 unionfind_destroy(uf);
1925
1926 for (int i = 0; i < zarray_size(clusters); i++) {
1927 zarray_t *cluster;
1928 zarray_get(clusters, i, &cluster);
1929 zarray_destroy(cluster);
1930 }
1931 zarray_destroy(clusters);
1932
1933 return quads;
1934 }
1935