1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * The Original Code is Copyright (C) 2012 Blender Foundation.
17  * All rights reserved.
18  */
19 
20 /** \file
21  * \ingroup bke
22  *
23  * This module exposes a rasterizer that works as a black box - implementation details
24  * are confined to this file.
25  *
26  * The basic method to access is:
27  * - create & initialize a handle from a #Mask datablock.
28  * - execute pixel lookups.
29  * - free the handle.
30  *
31  * This file is admittedly a bit confusticated,
32  * in quite few areas speed was chosen over readability,
33  * though it is commented - so shouldn't be so hard to see what's going on.
34  *
35  * Implementation:
36  *
37  * To rasterize the mask its converted into geometry that use a ray-cast for each pixel lookup.
38  *
39  * Initially 'kdopbvh' was used but this ended up being too slow.
40  *
41  * To gain some extra speed we take advantage of a few shortcuts
42  * that can be made rasterizing masks specifically.
43  *
44  * - All triangles are known to be completely white -
45  *   so no depth check is done on triangle intersection.
46  * - All quads are known to be feather outlines -
47  *   the 1 and 0 depths are known by the vertex order in the quad,
48  * - There is no color - just a value for each mask pixel.
49  * - The mask spacial structure always maps to space 0-1 on X and Y axis.
50  * - Bucketing is used to speed up lookups for geometry.
51  *
52  * Other Details:
53  * - used unsigned values all over for some extra speed on some arch's.
54  * - anti-aliasing is faked, just ensuring at least one pixel feather - avoids oversampling.
55  * - initializing the spacial structure doesn't need to be as optimized as pixel lookups are.
56  * - mask lookups need not be pixel aligned so any sub-pixel values from x/y (0 - 1), can be found.
57  *   (perhaps masks can be used as a vector texture in 3D later on)
58  * Currently, to build the spacial structure we have to calculate
59  * the total number of faces ahead of time.
60  *
61  * This is getting a bit complicated with the addition of unfilled splines and end capping -
62  * If large changes are needed here we would be better off using an iterable
63  * BLI_mempool for triangles and converting to a contiguous array afterwards.
64  *
65  * - Campbell
66  */
67 
68 #include "CLG_log.h"
69 
70 #include "MEM_guardedalloc.h"
71 
72 #include "DNA_mask_types.h"
73 #include "DNA_scene_types.h"
74 #include "DNA_vec_types.h"
75 
76 #include "BLI_memarena.h"
77 #include "BLI_scanfill.h"
78 #include "BLI_utildefines.h"
79 
80 #include "BLI_linklist.h"
81 #include "BLI_listbase.h"
82 #include "BLI_math.h"
83 #include "BLI_rect.h"
84 #include "BLI_task.h"
85 
86 #include "BKE_mask.h"
87 
88 #include "BLI_strict_flags.h"
89 
90 /* this is rather and annoying hack, use define to isolate it.
91  * problem is caused by scanfill removing edges on us. */
92 #define USE_SCANFILL_EDGE_WORKAROUND
93 
94 #define SPLINE_RESOL_CAP_PER_PIXEL 2
95 #define SPLINE_RESOL_CAP_MIN 8
96 #define SPLINE_RESOL_CAP_MAX 64
97 
98 /* found this gives best performance for high detail masks, values between 2 and 8 work best */
99 #define BUCKET_PIXELS_PER_CELL 4
100 
101 #define SF_EDGE_IS_BOUNDARY 0xff
102 #define SF_KEYINDEX_TEMP_ID ((unsigned int)-1)
103 
104 #define TRI_TERMINATOR_ID ((unsigned int)-1)
105 #define TRI_VERT ((unsigned int)-1)
106 
107 /* for debugging add... */
108 #ifndef NDEBUG
109 /* printf("%u %u %u %u\n", _t[0], _t[1], _t[2], _t[3]); \ */
110 #  define FACE_ASSERT(face, vert_max) \
111     { \
112       unsigned int *_t = face; \
113       BLI_assert(_t[0] < vert_max); \
114       BLI_assert(_t[1] < vert_max); \
115       BLI_assert(_t[2] < vert_max); \
116       BLI_assert(_t[3] < vert_max || _t[3] == TRI_VERT); \
117     } \
118     (void)0
119 #else
120 /* do nothing */
121 #  define FACE_ASSERT(face, vert_max)
122 #endif
123 
124 static CLG_LogRef LOG = {"bke.mask_rasterize"};
125 
rotate_point_v2(float r_p[2],const float p[2],const float cent[2],const float angle,const float asp[2])126 static void rotate_point_v2(
127     float r_p[2], const float p[2], const float cent[2], const float angle, const float asp[2])
128 {
129   const float s = sinf(angle);
130   const float c = cosf(angle);
131   float p_new[2];
132 
133   /* translate point back to origin */
134   r_p[0] = (p[0] - cent[0]) / asp[0];
135   r_p[1] = (p[1] - cent[1]) / asp[1];
136 
137   /* rotate point */
138   p_new[0] = ((r_p[0] * c) - (r_p[1] * s)) * asp[0];
139   p_new[1] = ((r_p[0] * s) + (r_p[1] * c)) * asp[1];
140 
141   /* translate point back */
142   r_p[0] = p_new[0] + cent[0];
143   r_p[1] = p_new[1] + cent[1];
144 }
145 
clampis_uint(const unsigned int v,const unsigned int min,const unsigned int max)146 BLI_INLINE unsigned int clampis_uint(const unsigned int v,
147                                      const unsigned int min,
148                                      const unsigned int max)
149 {
150   return v < min ? min : (v > max ? max : v);
151 }
152 
153 /* --------------------------------------------------------------------- */
154 /* local structs for mask rasterizeing                                   */
155 /* --------------------------------------------------------------------- */
156 
157 /**
158  * A single #MaskRasterHandle contains multiple #MaskRasterLayer's,
159  * each #MaskRasterLayer does its own lookup which contributes to
160  * the final pixel with its own blending mode and the final pixel
161  * is blended between these.
162  */
163 
164 /* internal use only */
165 typedef struct MaskRasterLayer {
166   /* geometry */
167   unsigned int face_tot;
168   unsigned int (*face_array)[4]; /* access coords tri/quad */
169   float (*face_coords)[3];       /* xy, z 0-1 (1.0 == filled) */
170 
171   /* 2d bounds (to quickly skip bucket lookup) */
172   rctf bounds;
173 
174   /* buckets */
175   unsigned int **buckets_face;
176   /* cache divide and subtract */
177   float buckets_xy_scalar[2]; /* (1.0 / (buckets_width + FLT_EPSILON)) * buckets_x */
178   unsigned int buckets_x;
179   unsigned int buckets_y;
180 
181   /* copied direct from #MaskLayer.--- */
182   /* blending options */
183   float alpha;
184   char blend;
185   char blend_flag;
186   char falloff;
187 
188 } MaskRasterLayer;
189 
190 typedef struct MaskRasterSplineInfo {
191   /* body of the spline */
192   unsigned int vertex_offset;
193   unsigned int vertex_total;
194 
195   /* capping for non-filled, non cyclic splines */
196   unsigned int vertex_total_cap_head;
197   unsigned int vertex_total_cap_tail;
198 
199   bool is_cyclic;
200 } MaskRasterSplineInfo;
201 
202 /**
203  * opaque local struct for mask pixel lookup, each MaskLayer needs one of these
204  */
205 struct MaskRasterHandle {
206   MaskRasterLayer *layers;
207   unsigned int layers_tot;
208 
209   /* 2d bounds (to quickly skip bucket lookup) */
210   rctf bounds;
211 };
212 
213 /* --------------------------------------------------------------------- */
214 /* alloc / free functions                                                */
215 /* --------------------------------------------------------------------- */
216 
BKE_maskrasterize_handle_new(void)217 MaskRasterHandle *BKE_maskrasterize_handle_new(void)
218 {
219   MaskRasterHandle *mr_handle;
220 
221   mr_handle = MEM_callocN(sizeof(MaskRasterHandle), "MaskRasterHandle");
222 
223   return mr_handle;
224 }
225 
BKE_maskrasterize_handle_free(MaskRasterHandle * mr_handle)226 void BKE_maskrasterize_handle_free(MaskRasterHandle *mr_handle)
227 {
228   const unsigned int layers_tot = mr_handle->layers_tot;
229   MaskRasterLayer *layer = mr_handle->layers;
230 
231   for (uint i = 0; i < layers_tot; i++, layer++) {
232 
233     if (layer->face_array) {
234       MEM_freeN(layer->face_array);
235     }
236 
237     if (layer->face_coords) {
238       MEM_freeN(layer->face_coords);
239     }
240 
241     if (layer->buckets_face) {
242       const unsigned int bucket_tot = layer->buckets_x * layer->buckets_y;
243       unsigned int bucket_index;
244       for (bucket_index = 0; bucket_index < bucket_tot; bucket_index++) {
245         unsigned int *face_index = layer->buckets_face[bucket_index];
246         if (face_index) {
247           MEM_freeN(face_index);
248         }
249       }
250 
251       MEM_freeN(layer->buckets_face);
252     }
253   }
254 
255   MEM_freeN(mr_handle->layers);
256   MEM_freeN(mr_handle);
257 }
258 
maskrasterize_spline_differentiate_point_outset(float (* diff_feather_points)[2],float (* diff_points)[2],const unsigned int tot_diff_point,const float ofs,const bool do_test)259 static void maskrasterize_spline_differentiate_point_outset(float (*diff_feather_points)[2],
260                                                             float (*diff_points)[2],
261                                                             const unsigned int tot_diff_point,
262                                                             const float ofs,
263                                                             const bool do_test)
264 {
265   unsigned int k_prev = tot_diff_point - 2;
266   unsigned int k_curr = tot_diff_point - 1;
267   unsigned int k_next = 0;
268 
269   unsigned int k;
270 
271   float d_prev[2];
272   float d_next[2];
273   float d[2];
274 
275   const float *co_prev;
276   const float *co_curr;
277   const float *co_next;
278 
279   const float ofs_squared = ofs * ofs;
280 
281   co_prev = diff_points[k_prev];
282   co_curr = diff_points[k_curr];
283   co_next = diff_points[k_next];
284 
285   /* precalc */
286   sub_v2_v2v2(d_prev, co_prev, co_curr);
287   normalize_v2(d_prev);
288 
289   for (k = 0; k < tot_diff_point; k++) {
290 
291     /* co_prev = diff_points[k_prev]; */ /* precalc */
292     co_curr = diff_points[k_curr];
293     co_next = diff_points[k_next];
294 
295     /* sub_v2_v2v2(d_prev, co_prev, co_curr); */ /* precalc */
296     sub_v2_v2v2(d_next, co_curr, co_next);
297 
298     /* normalize_v2(d_prev); */ /* precalc */
299     normalize_v2(d_next);
300 
301     if ((do_test == false) ||
302         (len_squared_v2v2(diff_feather_points[k], diff_points[k]) < ofs_squared)) {
303 
304       add_v2_v2v2(d, d_prev, d_next);
305 
306       normalize_v2(d);
307 
308       diff_feather_points[k][0] = diff_points[k][0] + (d[1] * ofs);
309       diff_feather_points[k][1] = diff_points[k][1] + (-d[0] * ofs);
310     }
311 
312     /* use next iter */
313     copy_v2_v2(d_prev, d_next);
314 
315     /* k_prev = k_curr; */ /* precalc */
316     k_curr = k_next;
317     k_next++;
318   }
319 }
320 
321 /* this function is not exact, sometimes it returns false positives,
322  * the main point of it is to clear out _almost_ all bucket/face non-intersections,
323  * returning true in corner cases is ok but missing an intersection is NOT.
324  *
325  * method used
326  * - check if the center of the buckets bounding box is intersecting the face
327  * - if not get the max radius to a corner of the bucket and see how close we
328  *   are to any of the triangle edges.
329  */
layer_bucket_isect_test(const MaskRasterLayer * layer,unsigned int face_index,const unsigned int bucket_x,const unsigned int bucket_y,const float bucket_size_x,const float bucket_size_y,const float bucket_max_rad_squared)330 static bool layer_bucket_isect_test(const MaskRasterLayer *layer,
331                                     unsigned int face_index,
332                                     const unsigned int bucket_x,
333                                     const unsigned int bucket_y,
334                                     const float bucket_size_x,
335                                     const float bucket_size_y,
336                                     const float bucket_max_rad_squared)
337 {
338   unsigned int *face = layer->face_array[face_index];
339   float(*cos)[3] = layer->face_coords;
340 
341   const float xmin = layer->bounds.xmin + (bucket_size_x * (float)bucket_x);
342   const float ymin = layer->bounds.ymin + (bucket_size_y * (float)bucket_y);
343   const float xmax = xmin + bucket_size_x;
344   const float ymax = ymin + bucket_size_y;
345 
346   const float cent[2] = {(xmin + xmax) * 0.5f, (ymin + ymax) * 0.5f};
347 
348   if (face[3] == TRI_VERT) {
349     const float *v1 = cos[face[0]];
350     const float *v2 = cos[face[1]];
351     const float *v3 = cos[face[2]];
352 
353     if (isect_point_tri_v2(cent, v1, v2, v3)) {
354       return true;
355     }
356 
357     if ((dist_squared_to_line_segment_v2(cent, v1, v2) < bucket_max_rad_squared) ||
358         (dist_squared_to_line_segment_v2(cent, v2, v3) < bucket_max_rad_squared) ||
359         (dist_squared_to_line_segment_v2(cent, v3, v1) < bucket_max_rad_squared)) {
360       return true;
361     }
362 
363     // printf("skip tri\n");
364     return false;
365   }
366 
367   const float *v1 = cos[face[0]];
368   const float *v2 = cos[face[1]];
369   const float *v3 = cos[face[2]];
370   const float *v4 = cos[face[3]];
371 
372   if (isect_point_tri_v2(cent, v1, v2, v3)) {
373     return true;
374   }
375   if (isect_point_tri_v2(cent, v1, v3, v4)) {
376     return true;
377   }
378 
379   if ((dist_squared_to_line_segment_v2(cent, v1, v2) < bucket_max_rad_squared) ||
380       (dist_squared_to_line_segment_v2(cent, v2, v3) < bucket_max_rad_squared) ||
381       (dist_squared_to_line_segment_v2(cent, v3, v4) < bucket_max_rad_squared) ||
382       (dist_squared_to_line_segment_v2(cent, v4, v1) < bucket_max_rad_squared)) {
383     return true;
384   }
385 
386   // printf("skip quad\n");
387   return false;
388 }
389 
layer_bucket_init_dummy(MaskRasterLayer * layer)390 static void layer_bucket_init_dummy(MaskRasterLayer *layer)
391 {
392   layer->face_tot = 0;
393   layer->face_coords = NULL;
394   layer->face_array = NULL;
395 
396   layer->buckets_x = 0;
397   layer->buckets_y = 0;
398 
399   layer->buckets_xy_scalar[0] = 0.0f;
400   layer->buckets_xy_scalar[1] = 0.0f;
401 
402   layer->buckets_face = NULL;
403 
404   BLI_rctf_init(&layer->bounds, -1.0f, -1.0f, -1.0f, -1.0f);
405 }
406 
layer_bucket_init(MaskRasterLayer * layer,const float pixel_size)407 static void layer_bucket_init(MaskRasterLayer *layer, const float pixel_size)
408 {
409   MemArena *arena = BLI_memarena_new(MEM_SIZE_OPTIMAL(1 << 16), __func__);
410 
411   const float bucket_dim_x = BLI_rctf_size_x(&layer->bounds);
412   const float bucket_dim_y = BLI_rctf_size_y(&layer->bounds);
413 
414   layer->buckets_x = (unsigned int)((bucket_dim_x / pixel_size) / (float)BUCKET_PIXELS_PER_CELL);
415   layer->buckets_y = (unsigned int)((bucket_dim_y / pixel_size) / (float)BUCKET_PIXELS_PER_CELL);
416 
417   //      printf("bucket size %ux%u\n", layer->buckets_x, layer->buckets_y);
418 
419   CLAMP(layer->buckets_x, 8, 512);
420   CLAMP(layer->buckets_y, 8, 512);
421 
422   layer->buckets_xy_scalar[0] = (1.0f / (bucket_dim_x + FLT_EPSILON)) * (float)layer->buckets_x;
423   layer->buckets_xy_scalar[1] = (1.0f / (bucket_dim_y + FLT_EPSILON)) * (float)layer->buckets_y;
424 
425   {
426     /* width and height of each bucket */
427     const float bucket_size_x = (bucket_dim_x + FLT_EPSILON) / (float)layer->buckets_x;
428     const float bucket_size_y = (bucket_dim_y + FLT_EPSILON) / (float)layer->buckets_y;
429     const float bucket_max_rad = (max_ff(bucket_size_x, bucket_size_y) * (float)M_SQRT2) +
430                                  FLT_EPSILON;
431     const float bucket_max_rad_squared = bucket_max_rad * bucket_max_rad;
432 
433     unsigned int *face = &layer->face_array[0][0];
434     float(*cos)[3] = layer->face_coords;
435 
436     const unsigned int bucket_tot = layer->buckets_x * layer->buckets_y;
437     LinkNode **bucketstore = MEM_callocN(bucket_tot * sizeof(LinkNode *), __func__);
438     unsigned int *bucketstore_tot = MEM_callocN(bucket_tot * sizeof(unsigned int), __func__);
439 
440     unsigned int face_index;
441 
442     for (face_index = 0; face_index < layer->face_tot; face_index++, face += 4) {
443       float xmin;
444       float xmax;
445       float ymin;
446       float ymax;
447 
448       if (face[3] == TRI_VERT) {
449         const float *v1 = cos[face[0]];
450         const float *v2 = cos[face[1]];
451         const float *v3 = cos[face[2]];
452 
453         xmin = min_ff(v1[0], min_ff(v2[0], v3[0]));
454         xmax = max_ff(v1[0], max_ff(v2[0], v3[0]));
455         ymin = min_ff(v1[1], min_ff(v2[1], v3[1]));
456         ymax = max_ff(v1[1], max_ff(v2[1], v3[1]));
457       }
458       else {
459         const float *v1 = cos[face[0]];
460         const float *v2 = cos[face[1]];
461         const float *v3 = cos[face[2]];
462         const float *v4 = cos[face[3]];
463 
464         xmin = min_ff(v1[0], min_ff(v2[0], min_ff(v3[0], v4[0])));
465         xmax = max_ff(v1[0], max_ff(v2[0], max_ff(v3[0], v4[0])));
466         ymin = min_ff(v1[1], min_ff(v2[1], min_ff(v3[1], v4[1])));
467         ymax = max_ff(v1[1], max_ff(v2[1], max_ff(v3[1], v4[1])));
468       }
469 
470       /* not essential but may as will skip any faces outside the view */
471       if (!((xmax < 0.0f) || (ymax < 0.0f) || (xmin > 1.0f) || (ymin > 1.0f))) {
472 
473         CLAMP(xmin, 0.0f, 1.0f);
474         CLAMP(ymin, 0.0f, 1.0f);
475         CLAMP(xmax, 0.0f, 1.0f);
476         CLAMP(ymax, 0.0f, 1.0f);
477 
478         {
479           unsigned int xi_min = (unsigned int)((xmin - layer->bounds.xmin) *
480                                                layer->buckets_xy_scalar[0]);
481           unsigned int xi_max = (unsigned int)((xmax - layer->bounds.xmin) *
482                                                layer->buckets_xy_scalar[0]);
483           unsigned int yi_min = (unsigned int)((ymin - layer->bounds.ymin) *
484                                                layer->buckets_xy_scalar[1]);
485           unsigned int yi_max = (unsigned int)((ymax - layer->bounds.ymin) *
486                                                layer->buckets_xy_scalar[1]);
487           void *face_index_void = POINTER_FROM_UINT(face_index);
488 
489           unsigned int xi, yi;
490 
491           /* this should _almost_ never happen but since it can in extreme cases,
492            * we have to clamp the values or we overrun the buffer and crash */
493           if (xi_min >= layer->buckets_x) {
494             xi_min = layer->buckets_x - 1;
495           }
496           if (xi_max >= layer->buckets_x) {
497             xi_max = layer->buckets_x - 1;
498           }
499           if (yi_min >= layer->buckets_y) {
500             yi_min = layer->buckets_y - 1;
501           }
502           if (yi_max >= layer->buckets_y) {
503             yi_max = layer->buckets_y - 1;
504           }
505 
506           for (yi = yi_min; yi <= yi_max; yi++) {
507             unsigned int bucket_index = (layer->buckets_x * yi) + xi_min;
508             for (xi = xi_min; xi <= xi_max; xi++, bucket_index++) {
509               /* correct but do in outer loop */
510               // unsigned int bucket_index = (layer->buckets_x * yi) + xi;
511 
512               BLI_assert(xi < layer->buckets_x);
513               BLI_assert(yi < layer->buckets_y);
514               BLI_assert(bucket_index < bucket_tot);
515 
516               /* Check if the bucket intersects with the face. */
517               /* Note: there is a trade off here since checking box/tri intersections isn't as
518                * optimal as it could be, but checking pixels against faces they will never
519                * intersect with is likely the greater slowdown here -
520                * so check if the cell intersects the face. */
521               if (layer_bucket_isect_test(layer,
522                                           face_index,
523                                           xi,
524                                           yi,
525                                           bucket_size_x,
526                                           bucket_size_y,
527                                           bucket_max_rad_squared)) {
528                 BLI_linklist_prepend_arena(&bucketstore[bucket_index], face_index_void, arena);
529                 bucketstore_tot[bucket_index]++;
530               }
531             }
532           }
533         }
534       }
535     }
536 
537     if (1) {
538       /* now convert linknodes into arrays for faster per pixel access */
539       unsigned int **buckets_face = MEM_mallocN(bucket_tot * sizeof(*buckets_face), __func__);
540       unsigned int bucket_index;
541 
542       for (bucket_index = 0; bucket_index < bucket_tot; bucket_index++) {
543         if (bucketstore_tot[bucket_index]) {
544           unsigned int *bucket = MEM_mallocN(
545               (bucketstore_tot[bucket_index] + 1) * sizeof(unsigned int), __func__);
546           LinkNode *bucket_node;
547 
548           buckets_face[bucket_index] = bucket;
549 
550           for (bucket_node = bucketstore[bucket_index]; bucket_node;
551                bucket_node = bucket_node->next) {
552             *bucket = POINTER_AS_UINT(bucket_node->link);
553             bucket++;
554           }
555           *bucket = TRI_TERMINATOR_ID;
556         }
557         else {
558           buckets_face[bucket_index] = NULL;
559         }
560       }
561 
562       layer->buckets_face = buckets_face;
563     }
564 
565     MEM_freeN(bucketstore);
566     MEM_freeN(bucketstore_tot);
567   }
568 
569   BLI_memarena_free(arena);
570 }
571 
BKE_maskrasterize_handle_init(MaskRasterHandle * mr_handle,struct Mask * mask,const int width,const int height,const bool do_aspect_correct,const bool do_mask_aa,const bool do_feather)572 void BKE_maskrasterize_handle_init(MaskRasterHandle *mr_handle,
573                                    struct Mask *mask,
574                                    const int width,
575                                    const int height,
576                                    const bool do_aspect_correct,
577                                    const bool do_mask_aa,
578                                    const bool do_feather)
579 {
580   const rctf default_bounds = {0.0f, 1.0f, 0.0f, 1.0f};
581   const float pixel_size = 1.0f / (float)min_ii(width, height);
582   const float asp_xy[2] = {
583       (do_aspect_correct && width > height) ? (float)height / (float)width : 1.0f,
584       (do_aspect_correct && width < height) ? (float)width / (float)height : 1.0f};
585 
586   const float zvec[3] = {0.0f, 0.0f, -1.0f};
587   MaskLayer *masklay;
588   unsigned int masklay_index;
589   MemArena *sf_arena;
590 
591   mr_handle->layers_tot = (unsigned int)BLI_listbase_count(&mask->masklayers);
592   mr_handle->layers = MEM_mallocN(sizeof(MaskRasterLayer) * mr_handle->layers_tot,
593                                   "MaskRasterLayer");
594   BLI_rctf_init_minmax(&mr_handle->bounds);
595 
596   sf_arena = BLI_memarena_new(BLI_SCANFILL_ARENA_SIZE, __func__);
597 
598   for (masklay = mask->masklayers.first, masklay_index = 0; masklay;
599        masklay = masklay->next, masklay_index++) {
600 
601     /* we need to store vertex ranges for open splines for filling */
602     unsigned int tot_splines;
603     MaskRasterSplineInfo *open_spline_ranges;
604     unsigned int open_spline_index = 0;
605 
606     MaskSpline *spline;
607 
608     /* scanfill */
609     ScanFillContext sf_ctx;
610     ScanFillVert *sf_vert = NULL;
611     ScanFillVert *sf_vert_next = NULL;
612     ScanFillFace *sf_tri;
613 
614     unsigned int sf_vert_tot = 0;
615     unsigned int tot_feather_quads = 0;
616 
617 #ifdef USE_SCANFILL_EDGE_WORKAROUND
618     unsigned int tot_boundary_used = 0;
619     unsigned int tot_boundary_found = 0;
620 #endif
621 
622     if (masklay->restrictflag & MASK_RESTRICT_RENDER) {
623       /* skip the layer */
624       mr_handle->layers_tot--;
625       masklay_index--;
626       continue;
627     }
628 
629     tot_splines = (unsigned int)BLI_listbase_count(&masklay->splines);
630     open_spline_ranges = MEM_callocN(sizeof(*open_spline_ranges) * tot_splines, __func__);
631 
632     BLI_scanfill_begin_arena(&sf_ctx, sf_arena);
633 
634     for (spline = masklay->splines.first; spline; spline = spline->next) {
635       const bool is_cyclic = (spline->flag & MASK_SPLINE_CYCLIC) != 0;
636       const bool is_fill = (spline->flag & MASK_SPLINE_NOFILL) == 0;
637 
638       float(*diff_points)[2];
639       unsigned int tot_diff_point;
640 
641       float(*diff_feather_points)[2];
642       float(*diff_feather_points_flip)[2];
643       unsigned int tot_diff_feather_points;
644 
645       const unsigned int resol_a = BKE_mask_spline_resolution(spline, width, height) / 4;
646       const unsigned int resol_b = BKE_mask_spline_feather_resolution(spline, width, height) / 4;
647       const unsigned int resol = CLAMPIS(MAX2(resol_a, resol_b), 4, 512);
648 
649       diff_points = BKE_mask_spline_differentiate_with_resolution(spline, &tot_diff_point, resol);
650 
651       if (do_feather) {
652         diff_feather_points = BKE_mask_spline_feather_differentiated_points_with_resolution(
653             spline, &tot_diff_feather_points, resol, false);
654         BLI_assert(diff_feather_points);
655       }
656       else {
657         tot_diff_feather_points = 0;
658         diff_feather_points = NULL;
659       }
660 
661       if (tot_diff_point > 3) {
662         ScanFillVert *sf_vert_prev;
663         unsigned int j;
664 
665         float co[3];
666         co[2] = 0.0f;
667 
668         sf_ctx.poly_nr++;
669 
670         if (do_aspect_correct) {
671           if (width != height) {
672             float *fp;
673             float *ffp;
674             float asp;
675 
676             if (width < height) {
677               fp = &diff_points[0][0];
678               ffp = tot_diff_feather_points ? &diff_feather_points[0][0] : NULL;
679               asp = (float)width / (float)height;
680             }
681             else {
682               fp = &diff_points[0][1];
683               ffp = tot_diff_feather_points ? &diff_feather_points[0][1] : NULL;
684               asp = (float)height / (float)width;
685             }
686 
687             for (uint i = 0; i < tot_diff_point; i++, fp += 2) {
688               (*fp) = (((*fp) - 0.5f) / asp) + 0.5f;
689             }
690 
691             if (tot_diff_feather_points) {
692               for (uint i = 0; i < tot_diff_feather_points; i++, ffp += 2) {
693                 (*ffp) = (((*ffp) - 0.5f) / asp) + 0.5f;
694               }
695             }
696           }
697         }
698 
699         /* fake aa, using small feather */
700         if (do_mask_aa == true) {
701           if (do_feather == false) {
702             tot_diff_feather_points = tot_diff_point;
703             diff_feather_points = MEM_mallocN(
704                 sizeof(*diff_feather_points) * (size_t)tot_diff_feather_points, __func__);
705             /* add single pixel feather */
706             maskrasterize_spline_differentiate_point_outset(
707                 diff_feather_points, diff_points, tot_diff_point, pixel_size, false);
708           }
709           else {
710             /* ensure single pixel feather, on any zero feather areas */
711             maskrasterize_spline_differentiate_point_outset(
712                 diff_feather_points, diff_points, tot_diff_point, pixel_size, true);
713           }
714         }
715 
716         if (is_fill) {
717           /* Apply intersections depending on fill settings. */
718           if (spline->flag & MASK_SPLINE_NOINTERSECT) {
719             BKE_mask_spline_feather_collapse_inner_loops(
720                 spline, diff_feather_points, tot_diff_feather_points);
721           }
722 
723           copy_v2_v2(co, diff_points[0]);
724           sf_vert_prev = BLI_scanfill_vert_add(&sf_ctx, co);
725           sf_vert_prev->tmp.u = sf_vert_tot;
726 
727           /* Absolute index of feather vert. */
728           sf_vert_prev->keyindex = sf_vert_tot + tot_diff_point;
729 
730           sf_vert_tot++;
731 
732           /* TODO, an alternate functions so we can avoid double vector copy! */
733           for (j = 1; j < tot_diff_point; j++) {
734             copy_v2_v2(co, diff_points[j]);
735             sf_vert = BLI_scanfill_vert_add(&sf_ctx, co);
736             sf_vert->tmp.u = sf_vert_tot;
737             sf_vert->keyindex = sf_vert_tot + tot_diff_point; /* absolute index of feather vert */
738             sf_vert_tot++;
739           }
740 
741           sf_vert = sf_vert_prev;
742           sf_vert_prev = sf_ctx.fillvertbase.last;
743 
744           for (j = 0; j < tot_diff_point; j++) {
745             ScanFillEdge *sf_edge = BLI_scanfill_edge_add(&sf_ctx, sf_vert_prev, sf_vert);
746 
747 #ifdef USE_SCANFILL_EDGE_WORKAROUND
748             if (diff_feather_points) {
749               sf_edge->tmp.c = SF_EDGE_IS_BOUNDARY;
750               tot_boundary_used++;
751             }
752 #else
753             (void)sf_edge;
754 #endif
755             sf_vert_prev = sf_vert;
756             sf_vert = sf_vert->next;
757           }
758 
759           if (diff_feather_points) {
760             float co_feather[3];
761             co_feather[2] = 1.0f;
762 
763             BLI_assert(tot_diff_feather_points == tot_diff_point);
764 
765             /* Note: only added for convenience, we don't in fact use these to scan-fill,
766              * only to create feather faces after scan-fill. */
767             for (j = 0; j < tot_diff_feather_points; j++) {
768               copy_v2_v2(co_feather, diff_feather_points[j]);
769               sf_vert = BLI_scanfill_vert_add(&sf_ctx, co_feather);
770               sf_vert->keyindex = SF_KEYINDEX_TEMP_ID;
771               sf_vert_tot++;
772             }
773 
774             tot_feather_quads += tot_diff_point;
775           }
776         }
777         else {
778           /* unfilled spline */
779           if (diff_feather_points) {
780 
781             float co_diff[2];
782 
783             float co_feather[3];
784             co_feather[2] = 1.0f;
785 
786             if (spline->flag & MASK_SPLINE_NOINTERSECT) {
787               diff_feather_points_flip = MEM_mallocN(sizeof(float[2]) * tot_diff_feather_points,
788                                                      "diff_feather_points_flip");
789 
790               for (j = 0; j < tot_diff_point; j++) {
791                 sub_v2_v2v2(co_diff, diff_points[j], diff_feather_points[j]);
792                 add_v2_v2v2(diff_feather_points_flip[j], diff_points[j], co_diff);
793               }
794 
795               BKE_mask_spline_feather_collapse_inner_loops(
796                   spline, diff_feather_points, tot_diff_feather_points);
797               BKE_mask_spline_feather_collapse_inner_loops(
798                   spline, diff_feather_points_flip, tot_diff_feather_points);
799             }
800             else {
801               diff_feather_points_flip = NULL;
802             }
803 
804             open_spline_ranges[open_spline_index].vertex_offset = sf_vert_tot;
805             open_spline_ranges[open_spline_index].vertex_total = tot_diff_point;
806 
807             /* TODO, an alternate functions so we can avoid double vector copy! */
808             for (j = 0; j < tot_diff_point; j++) {
809 
810               /* center vert */
811               copy_v2_v2(co, diff_points[j]);
812               sf_vert = BLI_scanfill_vert_add(&sf_ctx, co);
813               sf_vert->tmp.u = sf_vert_tot;
814               sf_vert->keyindex = SF_KEYINDEX_TEMP_ID;
815               sf_vert_tot++;
816 
817               /* feather vert A */
818               copy_v2_v2(co_feather, diff_feather_points[j]);
819               sf_vert = BLI_scanfill_vert_add(&sf_ctx, co_feather);
820               sf_vert->tmp.u = sf_vert_tot;
821               sf_vert->keyindex = SF_KEYINDEX_TEMP_ID;
822               sf_vert_tot++;
823 
824               /* feather vert B */
825               if (diff_feather_points_flip) {
826                 copy_v2_v2(co_feather, diff_feather_points_flip[j]);
827               }
828               else {
829                 sub_v2_v2v2(co_diff, co, co_feather);
830                 add_v2_v2v2(co_feather, co, co_diff);
831               }
832 
833               sf_vert = BLI_scanfill_vert_add(&sf_ctx, co_feather);
834               sf_vert->tmp.u = sf_vert_tot;
835               sf_vert->keyindex = SF_KEYINDEX_TEMP_ID;
836               sf_vert_tot++;
837 
838               tot_feather_quads += 2;
839             }
840 
841             if (!is_cyclic) {
842               tot_feather_quads -= 2;
843             }
844 
845             if (diff_feather_points_flip) {
846               MEM_freeN(diff_feather_points_flip);
847               diff_feather_points_flip = NULL;
848             }
849 
850             /* cap ends */
851 
852             /* dummy init value */
853             open_spline_ranges[open_spline_index].vertex_total_cap_head = 0;
854             open_spline_ranges[open_spline_index].vertex_total_cap_tail = 0;
855 
856             if (!is_cyclic) {
857               const float *fp_cent;
858               const float *fp_turn;
859 
860               unsigned int k;
861 
862               fp_cent = diff_points[0];
863               fp_turn = diff_feather_points[0];
864 
865 #define CALC_CAP_RESOL \
866   clampis_uint( \
867       (unsigned int)(len_v2v2(fp_cent, fp_turn) / (pixel_size * SPLINE_RESOL_CAP_PER_PIXEL)), \
868       SPLINE_RESOL_CAP_MIN, \
869       SPLINE_RESOL_CAP_MAX)
870 
871               {
872                 const unsigned int vertex_total_cap = CALC_CAP_RESOL;
873 
874                 for (k = 1; k < vertex_total_cap; k++) {
875                   const float angle = (float)k * (1.0f / (float)vertex_total_cap) * (float)M_PI;
876                   rotate_point_v2(co_feather, fp_turn, fp_cent, angle, asp_xy);
877 
878                   sf_vert = BLI_scanfill_vert_add(&sf_ctx, co_feather);
879                   sf_vert->tmp.u = sf_vert_tot;
880                   sf_vert->keyindex = SF_KEYINDEX_TEMP_ID;
881                   sf_vert_tot++;
882                 }
883                 tot_feather_quads += vertex_total_cap;
884 
885                 open_spline_ranges[open_spline_index].vertex_total_cap_head = vertex_total_cap;
886               }
887 
888               fp_cent = diff_points[tot_diff_point - 1];
889               fp_turn = diff_feather_points[tot_diff_point - 1];
890 
891               {
892                 const unsigned int vertex_total_cap = CALC_CAP_RESOL;
893 
894                 for (k = 1; k < vertex_total_cap; k++) {
895                   const float angle = (float)k * (1.0f / (float)vertex_total_cap) * (float)M_PI;
896                   rotate_point_v2(co_feather, fp_turn, fp_cent, -angle, asp_xy);
897 
898                   sf_vert = BLI_scanfill_vert_add(&sf_ctx, co_feather);
899                   sf_vert->tmp.u = sf_vert_tot;
900                   sf_vert->keyindex = SF_KEYINDEX_TEMP_ID;
901                   sf_vert_tot++;
902                 }
903                 tot_feather_quads += vertex_total_cap;
904 
905                 open_spline_ranges[open_spline_index].vertex_total_cap_tail = vertex_total_cap;
906               }
907             }
908 
909             open_spline_ranges[open_spline_index].is_cyclic = is_cyclic;
910             open_spline_index++;
911 
912 #undef CALC_CAP_RESOL
913             /* end capping */
914           }
915         }
916       }
917 
918       if (diff_points) {
919         MEM_freeN(diff_points);
920       }
921 
922       if (diff_feather_points) {
923         MEM_freeN(diff_feather_points);
924       }
925     }
926 
927     {
928       unsigned int(*face_array)[4], *face; /* access coords */
929       float(*face_coords)[3], *cos;        /* xy, z 0-1 (1.0 == filled) */
930       unsigned int sf_tri_tot;
931       rctf bounds;
932       unsigned int face_index;
933       int scanfill_flag = 0;
934 
935       bool is_isect = false;
936       ListBase isect_remvertbase = {NULL, NULL};
937       ListBase isect_remedgebase = {NULL, NULL};
938 
939       /* now we have all the splines */
940       face_coords = MEM_mallocN((sizeof(float[3])) * sf_vert_tot, "maskrast_face_coords");
941 
942       /* init bounds */
943       BLI_rctf_init_minmax(&bounds);
944 
945       /* coords */
946       cos = (float *)face_coords;
947       for (sf_vert = sf_ctx.fillvertbase.first; sf_vert; sf_vert = sf_vert_next) {
948         sf_vert_next = sf_vert->next;
949         copy_v3_v3(cos, sf_vert->co);
950 
951         /* remove so as not to interfere with fill (called after) */
952         if (sf_vert->keyindex == SF_KEYINDEX_TEMP_ID) {
953           BLI_remlink(&sf_ctx.fillvertbase, sf_vert);
954         }
955 
956         /* bounds */
957         BLI_rctf_do_minmax_v(&bounds, cos);
958 
959         cos += 3;
960       }
961 
962       /* --- inefficient self-intersect case --- */
963       /* if self intersections are found, its too tricky to attempt to map vertices
964        * so just realloc and add entirely new vertices - the result of the self-intersect check.
965        */
966       if ((masklay->flag & MASK_LAYERFLAG_FILL_OVERLAP) &&
967           (is_isect = BLI_scanfill_calc_self_isect(
968                &sf_ctx, &isect_remvertbase, &isect_remedgebase))) {
969         unsigned int sf_vert_tot_isect = (unsigned int)BLI_listbase_count(&sf_ctx.fillvertbase);
970         unsigned int i = sf_vert_tot;
971 
972         face_coords = MEM_reallocN(face_coords,
973                                    sizeof(float[3]) * (sf_vert_tot + sf_vert_tot_isect));
974 
975         cos = (float *)&face_coords[sf_vert_tot][0];
976 
977         for (sf_vert = sf_ctx.fillvertbase.first; sf_vert; sf_vert = sf_vert->next) {
978           copy_v3_v3(cos, sf_vert->co);
979           sf_vert->tmp.u = i++;
980           cos += 3;
981         }
982 
983         sf_vert_tot += sf_vert_tot_isect;
984 
985         /* we need to calc polys after self intersect */
986         scanfill_flag |= BLI_SCANFILL_CALC_POLYS;
987       }
988       /* --- end inefficient code --- */
989 
990       /* main scan-fill */
991       if ((masklay->flag & MASK_LAYERFLAG_FILL_DISCRETE) == 0) {
992         scanfill_flag |= BLI_SCANFILL_CALC_HOLES;
993       }
994 
995       sf_tri_tot = (unsigned int)BLI_scanfill_calc_ex(&sf_ctx, scanfill_flag, zvec);
996 
997       if (is_isect) {
998         /* add removed data back, we only need edges for feather,
999          * but add verts back so they get freed along with others */
1000         BLI_movelisttolist(&sf_ctx.fillvertbase, &isect_remvertbase);
1001         BLI_movelisttolist(&sf_ctx.filledgebase, &isect_remedgebase);
1002       }
1003 
1004       face_array = MEM_mallocN(sizeof(*face_array) *
1005                                    ((size_t)sf_tri_tot + (size_t)tot_feather_quads),
1006                                "maskrast_face_index");
1007       face_index = 0;
1008 
1009       /* faces */
1010       face = (unsigned int *)face_array;
1011       for (sf_tri = sf_ctx.fillfacebase.first; sf_tri; sf_tri = sf_tri->next) {
1012         *(face++) = sf_tri->v3->tmp.u;
1013         *(face++) = sf_tri->v2->tmp.u;
1014         *(face++) = sf_tri->v1->tmp.u;
1015         *(face++) = TRI_VERT;
1016         face_index++;
1017         FACE_ASSERT(face - 4, sf_vert_tot);
1018       }
1019 
1020       /* start of feather faces... if we have this set,
1021        * 'face_index' is kept from loop above */
1022 
1023       BLI_assert(face_index == sf_tri_tot);
1024 
1025       if (tot_feather_quads) {
1026         ScanFillEdge *sf_edge;
1027 
1028         for (sf_edge = sf_ctx.filledgebase.first; sf_edge; sf_edge = sf_edge->next) {
1029           if (sf_edge->tmp.c == SF_EDGE_IS_BOUNDARY) {
1030             *(face++) = sf_edge->v1->tmp.u;
1031             *(face++) = sf_edge->v2->tmp.u;
1032             *(face++) = sf_edge->v2->keyindex;
1033             *(face++) = sf_edge->v1->keyindex;
1034             face_index++;
1035             FACE_ASSERT(face - 4, sf_vert_tot);
1036 
1037 #ifdef USE_SCANFILL_EDGE_WORKAROUND
1038             tot_boundary_found++;
1039 #endif
1040           }
1041         }
1042       }
1043 
1044 #ifdef USE_SCANFILL_EDGE_WORKAROUND
1045       if (tot_boundary_found != tot_boundary_used) {
1046         BLI_assert(tot_boundary_found < tot_boundary_used);
1047       }
1048 #endif
1049 
1050       /* feather only splines */
1051       while (open_spline_index > 0) {
1052         const unsigned int vertex_offset = open_spline_ranges[--open_spline_index].vertex_offset;
1053         unsigned int vertex_total = open_spline_ranges[open_spline_index].vertex_total;
1054         unsigned int vertex_total_cap_head =
1055             open_spline_ranges[open_spline_index].vertex_total_cap_head;
1056         unsigned int vertex_total_cap_tail =
1057             open_spline_ranges[open_spline_index].vertex_total_cap_tail;
1058         unsigned int k, j;
1059 
1060         j = vertex_offset;
1061 
1062         /* subtract one since we reference next vertex triple */
1063         for (k = 0; k < vertex_total - 1; k++, j += 3) {
1064 
1065           BLI_assert(j == vertex_offset + (k * 3));
1066 
1067           *(face++) = j + 3; /* next span */ /* z 1 */
1068           *(face++) = j + 0;                 /* z 1 */
1069           *(face++) = j + 1;                 /* z 0 */
1070           *(face++) = j + 4; /* next span */ /* z 0 */
1071           face_index++;
1072           FACE_ASSERT(face - 4, sf_vert_tot);
1073 
1074           *(face++) = j + 0;                 /* z 1 */
1075           *(face++) = j + 3; /* next span */ /* z 1 */
1076           *(face++) = j + 5; /* next span */ /* z 0 */
1077           *(face++) = j + 2;                 /* z 0 */
1078           face_index++;
1079           FACE_ASSERT(face - 4, sf_vert_tot);
1080         }
1081 
1082         if (open_spline_ranges[open_spline_index].is_cyclic) {
1083           *(face++) = vertex_offset + 0; /* next span */ /* z 1 */
1084           *(face++) = j + 0;                             /* z 1 */
1085           *(face++) = j + 1;                             /* z 0 */
1086           *(face++) = vertex_offset + 1; /* next span */ /* z 0 */
1087           face_index++;
1088           FACE_ASSERT(face - 4, sf_vert_tot);
1089 
1090           *(face++) = j + 0;                             /* z 1 */
1091           *(face++) = vertex_offset + 0; /* next span */ /* z 1 */
1092           *(face++) = vertex_offset + 2; /* next span */ /* z 0 */
1093           *(face++) = j + 2;                             /* z 0 */
1094           face_index++;
1095           FACE_ASSERT(face - 4, sf_vert_tot);
1096         }
1097         else {
1098           unsigned int midvidx = vertex_offset;
1099 
1100           /***************
1101            * cap end 'a' */
1102           j = midvidx + (vertex_total * 3);
1103 
1104           for (k = 0; k < vertex_total_cap_head - 2; k++, j++) {
1105             *(face++) = midvidx + 0; /* z 1 */
1106             *(face++) = midvidx + 0; /* z 1 */
1107             *(face++) = j + 0;       /* z 0 */
1108             *(face++) = j + 1;       /* z 0 */
1109             face_index++;
1110             FACE_ASSERT(face - 4, sf_vert_tot);
1111           }
1112 
1113           j = vertex_offset + (vertex_total * 3);
1114 
1115           /* 2 tris that join the original */
1116           *(face++) = midvidx + 0; /* z 1 */
1117           *(face++) = midvidx + 0; /* z 1 */
1118           *(face++) = midvidx + 1; /* z 0 */
1119           *(face++) = j + 0;       /* z 0 */
1120           face_index++;
1121           FACE_ASSERT(face - 4, sf_vert_tot);
1122 
1123           *(face++) = midvidx + 0;                   /* z 1 */
1124           *(face++) = midvidx + 0;                   /* z 1 */
1125           *(face++) = j + vertex_total_cap_head - 2; /* z 0 */
1126           *(face++) = midvidx + 2;                   /* z 0 */
1127           face_index++;
1128           FACE_ASSERT(face - 4, sf_vert_tot);
1129 
1130           /***************
1131            * cap end 'b' */
1132           /* ... same as previous but v 2-3 flipped, and different initial offsets */
1133 
1134           j = vertex_offset + (vertex_total * 3) + (vertex_total_cap_head - 1);
1135 
1136           midvidx = vertex_offset + (vertex_total * 3) - 3;
1137 
1138           for (k = 0; k < vertex_total_cap_tail - 2; k++, j++) {
1139             *(face++) = midvidx; /* z 1 */
1140             *(face++) = midvidx; /* z 1 */
1141             *(face++) = j + 1;   /* z 0 */
1142             *(face++) = j + 0;   /* z 0 */
1143             face_index++;
1144             FACE_ASSERT(face - 4, sf_vert_tot);
1145           }
1146 
1147           j = vertex_offset + (vertex_total * 3) + (vertex_total_cap_head - 1);
1148 
1149           /* 2 tris that join the original */
1150           *(face++) = midvidx + 0; /* z 1 */
1151           *(face++) = midvidx + 0; /* z 1 */
1152           *(face++) = j + 0;       /* z 0 */
1153           *(face++) = midvidx + 1; /* z 0 */
1154           face_index++;
1155           FACE_ASSERT(face - 4, sf_vert_tot);
1156 
1157           *(face++) = midvidx + 0;                   /* z 1 */
1158           *(face++) = midvidx + 0;                   /* z 1 */
1159           *(face++) = midvidx + 2;                   /* z 0 */
1160           *(face++) = j + vertex_total_cap_tail - 2; /* z 0 */
1161           face_index++;
1162           FACE_ASSERT(face - 4, sf_vert_tot);
1163         }
1164       }
1165 
1166       MEM_freeN(open_spline_ranges);
1167 
1168 #if 0
1169       fprintf(stderr,
1170               "%u %u (%u %u), %u\n",
1171               face_index,
1172               sf_tri_tot + tot_feather_quads,
1173               sf_tri_tot,
1174               tot_feather_quads,
1175               tot_boundary_used - tot_boundary_found);
1176 #endif
1177 
1178 #ifdef USE_SCANFILL_EDGE_WORKAROUND
1179       BLI_assert(face_index + (tot_boundary_used - tot_boundary_found) ==
1180                  sf_tri_tot + tot_feather_quads);
1181 #else
1182       BLI_assert(face_index == sf_tri_tot + tot_feather_quads);
1183 #endif
1184       {
1185         MaskRasterLayer *layer = &mr_handle->layers[masklay_index];
1186 
1187         if (BLI_rctf_isect(&default_bounds, &bounds, &bounds)) {
1188 #ifdef USE_SCANFILL_EDGE_WORKAROUND
1189           layer->face_tot = (sf_tri_tot + tot_feather_quads) -
1190                             (tot_boundary_used - tot_boundary_found);
1191 #else
1192           layer->face_tot = (sf_tri_tot + tot_feather_quads);
1193 #endif
1194           layer->face_coords = face_coords;
1195           layer->face_array = face_array;
1196           layer->bounds = bounds;
1197 
1198           layer_bucket_init(layer, pixel_size);
1199 
1200           BLI_rctf_union(&mr_handle->bounds, &bounds);
1201         }
1202         else {
1203           MEM_freeN(face_coords);
1204           MEM_freeN(face_array);
1205 
1206           layer_bucket_init_dummy(layer);
1207         }
1208 
1209         /* copy as-is */
1210         layer->alpha = masklay->alpha;
1211         layer->blend = masklay->blend;
1212         layer->blend_flag = masklay->blend_flag;
1213         layer->falloff = masklay->falloff;
1214       }
1215 
1216       /* printf("tris %d, feather tris %d\n", sf_tri_tot, tot_feather_quads); */
1217     }
1218 
1219     /* add trianges */
1220     BLI_scanfill_end_arena(&sf_ctx, sf_arena);
1221   }
1222 
1223   BLI_memarena_free(sf_arena);
1224 }
1225 
1226 /* --------------------------------------------------------------------- */
1227 /* functions that run inside the sampling thread (keep fast!)            */
1228 /* --------------------------------------------------------------------- */
1229 
1230 /* 2D ray test */
1231 #if 0
1232 static float maskrasterize_layer_z_depth_tri(const float pt[2],
1233                                              const float v1[3],
1234                                              const float v2[3],
1235                                              const float v3[3])
1236 {
1237   float w[3];
1238   barycentric_weights_v2(v1, v2, v3, pt, w);
1239   return (v1[2] * w[0]) + (v2[2] * w[1]) + (v3[2] * w[2]);
1240 }
1241 #endif
1242 
maskrasterize_layer_z_depth_quad(const float pt[2],const float v1[3],const float v2[3],const float v3[3],const float v4[3])1243 static float maskrasterize_layer_z_depth_quad(
1244     const float pt[2], const float v1[3], const float v2[3], const float v3[3], const float v4[3])
1245 {
1246   float w[4];
1247   barycentric_weights_v2_quad(v1, v2, v3, v4, pt, w);
1248   // return (v1[2] * w[0]) + (v2[2] * w[1]) + (v3[2] * w[2]) + (v4[2] * w[3]);
1249   return w[2] + w[3]; /* we can make this assumption for small speedup */
1250 }
1251 
maskrasterize_layer_isect(const unsigned int * face,float (* cos)[3],const float dist_orig,const float xy[2])1252 static float maskrasterize_layer_isect(const unsigned int *face,
1253                                        float (*cos)[3],
1254                                        const float dist_orig,
1255                                        const float xy[2])
1256 {
1257   /* we always cast from same place only need xy */
1258   if (face[3] == TRI_VERT) {
1259     /* --- tri --- */
1260 
1261 #if 0
1262     /* not essential but avoids unneeded extra lookups */
1263     if ((cos[0][2] < dist_orig) || (cos[1][2] < dist_orig) || (cos[2][2] < dist_orig)) {
1264       if (isect_point_tri_v2_cw(xy, cos[face[0]], cos[face[1]], cos[face[2]])) {
1265         /* we know all tris are close for now */
1266         return maskrasterize_layer_z_depth_tri(xy, cos[face[0]], cos[face[1]], cos[face[2]]);
1267       }
1268     }
1269 #else
1270     /* we know all tris are close for now */
1271     if (isect_point_tri_v2_cw(xy, cos[face[0]], cos[face[1]], cos[face[2]])) {
1272       return 0.0f;
1273     }
1274 #endif
1275   }
1276   else {
1277     /* --- quad --- */
1278 
1279     /* not essential but avoids unneeded extra lookups */
1280     if ((cos[0][2] < dist_orig) || (cos[1][2] < dist_orig) || (cos[2][2] < dist_orig) ||
1281         (cos[3][2] < dist_orig)) {
1282 
1283       /* needs work */
1284 #if 1
1285       /* quad check fails for bow-tie, so keep using 2 tri checks */
1286       // if (isect_point_quad_v2(xy, cos[face[0]], cos[face[1]], cos[face[2]], cos[face[3]]))
1287       if (isect_point_tri_v2(xy, cos[face[0]], cos[face[1]], cos[face[2]]) ||
1288           isect_point_tri_v2(xy, cos[face[0]], cos[face[2]], cos[face[3]])) {
1289         return maskrasterize_layer_z_depth_quad(
1290             xy, cos[face[0]], cos[face[1]], cos[face[2]], cos[face[3]]);
1291       }
1292 #elif 1
1293       /* don't use isect_point_tri_v2_cw because we could have bow-tie quads */
1294 
1295       if (isect_point_tri_v2(xy, cos[face[0]], cos[face[1]], cos[face[2]])) {
1296         return maskrasterize_layer_z_depth_tri(xy, cos[face[0]], cos[face[1]], cos[face[2]]);
1297       }
1298       else if (isect_point_tri_v2(xy, cos[face[0]], cos[face[2]], cos[face[3]])) {
1299         return maskrasterize_layer_z_depth_tri(xy, cos[face[0]], cos[face[2]], cos[face[3]]);
1300       }
1301 #else
1302       /* cheat - we know first 2 verts are z0.0f and second 2 are z 1.0f */
1303       /* ... worth looking into */
1304 #endif
1305     }
1306   }
1307 
1308   return 1.0f;
1309 }
1310 
layer_bucket_index_from_xy(MaskRasterLayer * layer,const float xy[2])1311 BLI_INLINE unsigned int layer_bucket_index_from_xy(MaskRasterLayer *layer, const float xy[2])
1312 {
1313   BLI_assert(BLI_rctf_isect_pt_v(&layer->bounds, xy));
1314 
1315   return ((unsigned int)((xy[0] - layer->bounds.xmin) * layer->buckets_xy_scalar[0])) +
1316          (((unsigned int)((xy[1] - layer->bounds.ymin) * layer->buckets_xy_scalar[1])) *
1317           layer->buckets_x);
1318 }
1319 
layer_bucket_depth_from_xy(MaskRasterLayer * layer,const float xy[2])1320 static float layer_bucket_depth_from_xy(MaskRasterLayer *layer, const float xy[2])
1321 {
1322   unsigned int index = layer_bucket_index_from_xy(layer, xy);
1323   unsigned int *face_index = layer->buckets_face[index];
1324 
1325   if (face_index) {
1326     unsigned int(*face_array)[4] = layer->face_array;
1327     float(*cos)[3] = layer->face_coords;
1328     float best_dist = 1.0f;
1329     while (*face_index != TRI_TERMINATOR_ID) {
1330       const float test_dist = maskrasterize_layer_isect(
1331           face_array[*face_index], cos, best_dist, xy);
1332       if (test_dist < best_dist) {
1333         best_dist = test_dist;
1334         /* comparing with 0.0f is OK here because triangles are always zero depth */
1335         if (best_dist == 0.0f) {
1336           /* bail early, we're as close as possible */
1337           return 0.0f;
1338         }
1339       }
1340       face_index++;
1341     }
1342     return best_dist;
1343   }
1344 
1345   return 1.0f;
1346 }
1347 
BKE_maskrasterize_handle_sample(MaskRasterHandle * mr_handle,const float xy[2])1348 float BKE_maskrasterize_handle_sample(MaskRasterHandle *mr_handle, const float xy[2])
1349 {
1350   /* can't do this because some layers may invert */
1351   /* if (BLI_rctf_isect_pt_v(&mr_handle->bounds, xy)) */
1352 
1353   const unsigned int layers_tot = mr_handle->layers_tot;
1354   MaskRasterLayer *layer = mr_handle->layers;
1355 
1356   /* return value */
1357   float value = 0.0f;
1358 
1359   for (uint i = 0; i < layers_tot; i++, layer++) {
1360     float value_layer;
1361 
1362     /* also used as signal for unused layer (when render is disabled) */
1363     if (layer->alpha != 0.0f && BLI_rctf_isect_pt_v(&layer->bounds, xy)) {
1364       value_layer = 1.0f - layer_bucket_depth_from_xy(layer, xy);
1365 
1366       switch (layer->falloff) {
1367         case PROP_SMOOTH:
1368           /* ease - gives less hard lines for dilate/erode feather */
1369           value_layer = (3.0f * value_layer * value_layer -
1370                          2.0f * value_layer * value_layer * value_layer);
1371           break;
1372         case PROP_SPHERE:
1373           value_layer = sqrtf(2.0f * value_layer - value_layer * value_layer);
1374           break;
1375         case PROP_ROOT:
1376           value_layer = sqrtf(value_layer);
1377           break;
1378         case PROP_SHARP:
1379           value_layer = value_layer * value_layer;
1380           break;
1381         case PROP_INVSQUARE:
1382           value_layer = value_layer * (2.0f - value_layer);
1383           break;
1384         case PROP_LIN:
1385         default:
1386           /* nothing */
1387           break;
1388       }
1389 
1390       if (layer->blend != MASK_BLEND_REPLACE) {
1391         value_layer *= layer->alpha;
1392       }
1393     }
1394     else {
1395       value_layer = 0.0f;
1396     }
1397 
1398     if (layer->blend_flag & MASK_BLENDFLAG_INVERT) {
1399       value_layer = 1.0f - value_layer;
1400     }
1401 
1402     switch (layer->blend) {
1403       case MASK_BLEND_MERGE_ADD:
1404         value += value_layer * (1.0f - value);
1405         break;
1406       case MASK_BLEND_MERGE_SUBTRACT:
1407         value -= value_layer * value;
1408         break;
1409       case MASK_BLEND_ADD:
1410         value += value_layer;
1411         break;
1412       case MASK_BLEND_SUBTRACT:
1413         value -= value_layer;
1414         break;
1415       case MASK_BLEND_LIGHTEN:
1416         value = max_ff(value, value_layer);
1417         break;
1418       case MASK_BLEND_DARKEN:
1419         value = min_ff(value, value_layer);
1420         break;
1421       case MASK_BLEND_MUL:
1422         value *= value_layer;
1423         break;
1424       case MASK_BLEND_REPLACE:
1425         value = (value * (1.0f - layer->alpha)) + (value_layer * layer->alpha);
1426         break;
1427       case MASK_BLEND_DIFFERENCE:
1428         value = fabsf(value - value_layer);
1429         break;
1430       default: /* same as add */
1431         CLOG_ERROR(&LOG, "unhandled blend type: %d", layer->blend);
1432         BLI_assert(0);
1433         value += value_layer;
1434         break;
1435     }
1436 
1437     /* clamp after applying each layer so we don't get
1438      * issues subtracting after accumulating over 1.0f */
1439     CLAMP(value, 0.0f, 1.0f);
1440   }
1441 
1442   return value;
1443 }
1444 
1445 typedef struct MaskRasterizeBufferData {
1446   MaskRasterHandle *mr_handle;
1447   float x_inv, y_inv;
1448   float x_px_ofs, y_px_ofs;
1449   uint width;
1450 
1451   float *buffer;
1452 } MaskRasterizeBufferData;
1453 
maskrasterize_buffer_cb(void * __restrict userdata,const int y,const TaskParallelTLS * __restrict UNUSED (tls))1454 static void maskrasterize_buffer_cb(void *__restrict userdata,
1455                                     const int y,
1456                                     const TaskParallelTLS *__restrict UNUSED(tls))
1457 {
1458   MaskRasterizeBufferData *data = userdata;
1459 
1460   MaskRasterHandle *mr_handle = data->mr_handle;
1461   float *buffer = data->buffer;
1462 
1463   const uint width = data->width;
1464   const float x_inv = data->x_inv;
1465   const float x_px_ofs = data->x_px_ofs;
1466 
1467   uint i = (uint)y * width;
1468   float xy[2];
1469   xy[1] = ((float)y * data->y_inv) + data->y_px_ofs;
1470   for (uint x = 0; x < width; x++, i++) {
1471     xy[0] = ((float)x * x_inv) + x_px_ofs;
1472 
1473     buffer[i] = BKE_maskrasterize_handle_sample(mr_handle, xy);
1474   }
1475 }
1476 
1477 /**
1478  * \brief Rasterize a buffer from a single mask (threaded execution).
1479  */
BKE_maskrasterize_buffer(MaskRasterHandle * mr_handle,const unsigned int width,const unsigned int height,float * buffer)1480 void BKE_maskrasterize_buffer(MaskRasterHandle *mr_handle,
1481                               const unsigned int width,
1482                               const unsigned int height,
1483                               /* Cannot be const, because it is assigned to non-const variable.
1484                                * NOLINTNEXTLINE: readability-non-const-parameter. */
1485                               float *buffer)
1486 {
1487   const float x_inv = 1.0f / (float)width;
1488   const float y_inv = 1.0f / (float)height;
1489 
1490   MaskRasterizeBufferData data = {
1491       .mr_handle = mr_handle,
1492       .x_inv = x_inv,
1493       .y_inv = y_inv,
1494       .x_px_ofs = x_inv * 0.5f,
1495       .y_px_ofs = y_inv * 0.5f,
1496       .width = width,
1497       .buffer = buffer,
1498   };
1499   TaskParallelSettings settings;
1500   BLI_parallel_range_settings_defaults(&settings);
1501   settings.use_threading = ((size_t)height * width > 10000);
1502   BLI_task_parallel_range(0, (int)height, &data, maskrasterize_buffer_cb, &settings);
1503 }
1504