1 /*
2  *  This file is part of RawTherapee.
3  *
4  *  Copyright (c) 2017-2018 Ingo Weyrich <heckflosse67@gmx.de>
5  *
6  *  RawTherapee is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  RawTherapee is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with RawTherapee.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <algorithm>
21 #include <cassert>
22 #include <cmath>
23 #include <cstddef>
24 #include <cstdint>
25 #include <vector>
26 #include <algorithm>
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30 
31 #include "gauss.h"
32 #include "opthelper.h"
33 #include "rt_algo.h"
34 #include "rt_math.h"
35 #include "sleef.h"
36 
37 namespace {
calcBlendFactor(float val,float threshold)38 float calcBlendFactor(float val, float threshold) {
39     // sigmoid function
40     // result is in ]0;1] range
41     // inflexion point is at (x, y) (threshold, 0.5)
42     return 1.f / (1.f + xexpf(16.f - 16.f * val / threshold));
43 }
44 
45 #ifdef __SSE2__
calcBlendFactor(vfloat valv,vfloat thresholdv)46 vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) {
47     // sigmoid function
48     // result is in ]0;1] range
49     // inflexion point is at (x, y) (threshold, 0.5)
50     const vfloat onev = F2V(1.f);
51     const vfloat c16v = F2V(16.f);
52     return onev / (onev + xexpf(c16v - c16v * valv / thresholdv));
53 }
54 #endif
55 
tileAverage(float ** data,size_t tileY,size_t tileX,size_t tilesize)56 float tileAverage(float **data, size_t tileY, size_t tileX, size_t tilesize) {
57 
58     float avg = 0.f;
59 #ifdef __SSE2__
60     vfloat avgv = ZEROV;
61 #endif
62     for (std::size_t y = tileY; y < tileY + tilesize; ++y) {
63         std::size_t x = tileX;
64 #ifdef __SSE2__
65         for (; x < tileX + tilesize - 3; x += 4) {
66             avgv += LVFU(data[y][x]);
67         }
68 #endif
69         for (; x < tileX + tilesize; ++x) {
70             avg += data[y][x];
71         }
72     }
73 #ifdef __SSE2__
74     avg += vhadd(avgv);
75 #endif
76     return avg / rtengine::SQR(tilesize);
77 }
78 
tileVariance(float ** data,size_t tileY,size_t tileX,size_t tilesize,float avg)79 float tileVariance(float **data, size_t tileY, size_t tileX, size_t tilesize, float avg) {
80 
81     float var = 0.f;
82 #ifdef __SSE2__
83     vfloat varv = ZEROV;
84     const vfloat avgv = F2V(avg);
85 #endif
86     for (std::size_t y = tileY; y < tileY + tilesize; ++y) {
87         std::size_t x = tileX;
88 #ifdef __SSE2__
89         for (; x < tileX + tilesize - 3; x += 4) {
90             varv += SQRV(LVFU(data[y][x]) - avgv);
91         }
92 #endif
93         for (; x < tileX + tilesize; ++x) {
94             var += rtengine::SQR(data[y][x] - avg);
95         }
96     }
97 #ifdef __SSE2__
98     var += vhadd(varv);
99 #endif
100     return var / (rtengine::SQR(tilesize) * avg);
101 }
102 
calcContrastThreshold(float ** luminance,int tileY,int tileX,int tilesize,float factor)103 float calcContrastThreshold(float** luminance, int tileY, int tileX, int tilesize, float factor) {
104 
105     const float scale = 0.0625f / 327.68f * factor;
106     std::vector<std::vector<float>> blend(tilesize - 4, std::vector<float>(tilesize - 4));
107 
108 #ifdef __SSE2__
109     const vfloat scalev = F2V(scale);
110 #endif
111 
112     for(int j = tileY + 2; j < tileY + tilesize - 2; ++j) {
113         int i = tileX + 2;
114 #ifdef __SSE2__
115         for(; i < tileX + tilesize - 5; i += 4) {
116             vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) +
117                                       SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev;
118             STVFU(blend[j - tileY - 2][i - tileX - 2], contrastv);
119         }
120 #endif
121         for(; i < tileX + tilesize - 2; ++i) {
122 
123             float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) +
124                                    rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale;
125 
126             blend[j - tileY - 2][i - tileX - 2] = contrast;
127         }
128     }
129 
130     const float limit = rtengine::SQR(tilesize - 4) / 100.f;
131 
132     int c;
133     for (c = 1; c < 100; ++c) {
134         const float contrastThreshold = c / 100.f;
135         float sum = 0.f;
136 #ifdef __SSE2__
137         const vfloat contrastThresholdv = F2V(contrastThreshold);
138         vfloat sumv = ZEROV;
139 #endif
140 
141         for(int j = 0; j < tilesize - 4; ++j) {
142             int i = 0;
143 #ifdef __SSE2__
144             for(; i < tilesize - 7; i += 4) {
145                 sumv += calcBlendFactor(LVFU(blend[j][i]), contrastThresholdv);
146             }
147 #endif
148             for(; i < tilesize - 4; ++i) {
149                 sum += calcBlendFactor(blend[j][i], contrastThreshold);
150             }
151         }
152 #ifdef __SSE2__
153         sum += vhadd(sumv);
154 #endif
155         if (sum <= limit) {
156             break;
157         }
158     }
159 
160     return c / 100.f;
161 }
162 }
163 
164 namespace rtengine {
165 
findMinMaxPercentile(const float * data,size_t size,float minPrct,float & minOut,float maxPrct,float & maxOut,bool multithread)166 void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& minOut, float maxPrct, float& maxOut, bool multithread)
167 {
168     // Copyright (c) 2017 Ingo Weyrich <heckflosse67@gmx.de>
169     // We need to find the (minPrct*size) smallest value and the (maxPrct*size) smallest value in data.
170     // We use a histogram based search for speed and to reduce memory usage.
171     // Memory usage of this method is histoSize * sizeof(uint32_t) * (t + 1) byte,
172     // where t is the number of threads and histoSize is in [1;65536].
173     // Processing time is O(n) where n is size of the input array.
174     // It scales well with multiple threads if the size of the input array is large.
175     // The current implementation is not guaranteed to work correctly if size > 2^32 (4294967296).
176 
177     assert(minPrct <= maxPrct);
178 
179     if (size == 0) {
180         return;
181     }
182 
183     size_t numThreads = 1;
184 #ifdef _OPENMP
185     // Because we have an overhead in the critical region of the main loop for each thread
186     // we make a rough calculation to reduce the number of threads for small data size.
187     // This also works fine for the minmax loop.
188     if (multithread) {
189         const size_t maxThreads = omp_get_max_threads();
190         while (size > numThreads * numThreads * 16384 && numThreads < maxThreads) {
191             ++numThreads;
192         }
193     }
194 #endif
195 
196     // We need min and max value of data to calculate the scale factor for the histogram
197     float minVal = data[0];
198     float maxVal = data[0];
199 #ifdef _OPENMP
200     #pragma omp parallel for reduction(min:minVal) reduction(max:maxVal) num_threads(numThreads)
201 #endif
202     for (size_t i = 1; i < size; ++i) {
203         minVal = std::min(minVal, data[i]);
204         maxVal = std::max(maxVal, data[i]);
205     }
206 
207     if (std::fabs(maxVal - minVal) == 0.f) { // fast exit, also avoids division by zero in calculation of scale factor
208         minOut = maxOut = minVal;
209         return;
210     }
211 
212     // Caution: Currently this works correctly only for histoSize in range[1;65536].
213     // For small data size (i.e. thumbnails) we reduce the size of the histogram to the size of data.
214     const unsigned int histoSize = std::min<size_t>(65536, size);
215 
216     // calculate scale factor to use full range of histogram
217     const float scale = (histoSize - 1) / (maxVal - minVal);
218 
219     // We need one main histogram
220     std::vector<uint32_t> histo(histoSize, 0);
221 
222     if (numThreads == 1) {
223         // just one thread => use main histogram
224         for (size_t i = 0; i < size; ++i) {
225             // we have to subtract minVal and multiply with scale to get the data in [0;histosize] range
226             histo[static_cast<uint16_t>(scale * (data[i] - minVal))]++;
227         }
228     } else {
229 #ifdef _OPENMP
230     #pragma omp parallel num_threads(numThreads)
231 #endif
232         {
233             // We need one histogram per thread
234             std::vector<uint32_t> histothr(histoSize, 0);
235 
236 #ifdef _OPENMP
237             #pragma omp for nowait
238 #endif
239             for (size_t i = 0; i < size; ++i) {
240                 // we have to subtract minVal and multiply with scale to get the data in [0;histosize] range
241                 histothr[static_cast<uint16_t>(scale * (data[i] - minVal))]++;
242             }
243 
244 #ifdef _OPENMP
245             #pragma omp critical
246 #endif
247             {
248                 // add per thread histogram to main histogram
249 #ifdef _OPENMP
250                 #pragma omp simd
251 #endif
252 
253                 for (size_t i = 0; i < histoSize; ++i) {
254                     histo[i] += histothr[i];
255                 }
256             }
257         }
258     }
259 
260     size_t k = 0;
261     size_t count = 0;
262 
263     // find (minPrct*size) smallest value
264     const float threshmin = minPrct * size;
265     while (count < threshmin) {
266         count += histo[k++];
267     }
268 
269     if (k > 0) { // interpolate
270         const size_t count_ = count - histo[k - 1];
271         const float c0 = count - threshmin;
272         const float c1 = threshmin - count_;
273         minOut = (c1 * k + c0 * (k - 1)) / (c0 + c1);
274     } else {
275         minOut = k;
276     }
277     // go back to original range
278     minOut /= scale;
279     minOut += minVal;
280     minOut = rtengine::LIM(minOut, minVal, maxVal);
281 
282     // find (maxPrct*size) smallest value
283     const float threshmax = maxPrct * size;
284     while (count < threshmax) {
285         count += histo[k++];
286     }
287 
288     if (k > 0) { // interpolate
289         const size_t count_ = count - histo[k - 1];
290         const float c0 = count - threshmax;
291         const float c1 = threshmax - count_;
292         maxOut = (c1 * k + c0 * (k - 1)) / (c0 + c1);
293     } else {
294         maxOut = k;
295     }
296     // go back to original range
297     maxOut /= scale;
298     maxOut += minVal;
299     maxOut = rtengine::LIM(maxOut, minVal, maxVal);
300 }
301 
buildBlendMask(float ** luminance,float ** blend,int W,int H,float & contrastThreshold,float amount,bool autoContrast,float blur_radius,float luminance_factor)302 void buildBlendMask(float** luminance, float **blend, int W, int H, float &contrastThreshold, float amount, bool autoContrast, float blur_radius, float luminance_factor)
303 {
304     if (autoContrast) {
305         const float minLuminance = 2000.f / luminance_factor;
306         const float maxLuminance = 20000.f / luminance_factor;
307         constexpr float minTileVariance = 0.5f;
308         for (int pass = 0; pass < 2; ++pass) {
309             const int tilesize = 80 / (pass + 1);
310             const int skip = pass == 0 ? tilesize : tilesize / 4;
311             const int numTilesW = W / skip - 3 * pass;
312             const int numTilesH = H / skip - 3 * pass;
313             std::vector<std::vector<float>> variances(numTilesH, std::vector<float>(numTilesW));
314 
315 #ifdef _OPENMP
316             #pragma omp parallel for schedule(dynamic)
317 #endif
318             for (int i = 0; i < numTilesH; ++i) {
319                 const int tileY = i * skip;
320                 for (int j = 0; j < numTilesW; ++j) {
321                     const int tileX = j * skip;
322                     const float avg = tileAverage(luminance, tileY, tileX, tilesize);
323                     if (avg < minLuminance || avg > maxLuminance) {
324                         // too dark or too bright => skip the tile
325                         variances[i][j] = RT_INFINITY_F;
326                         continue;
327                     } else {
328                         variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg);
329                         // exclude tiles with a variance less than minTileVariance
330                         variances[i][j] = variances[i][j] < minTileVariance ? RT_INFINITY_F : variances[i][j];
331                     }
332                 }
333             }
334 
335             float minvar = RT_INFINITY_F;
336             int minI = 0, minJ = 0;
337             for (int i = 0; i < numTilesH; ++i) {
338                 for (int j = 0; j < numTilesW; ++j) {
339                     if (variances[i][j] < minvar) {
340                         minvar = variances[i][j];
341                         minI = i;
342                         minJ = j;
343                     }
344                 }
345             }
346 
347             if (minvar <= 1.f || pass == 1) {
348                 const int minY = skip * minI;
349                 const int minX = skip * minJ;
350                 if (pass == 0) {
351                     // a variance <= 1 means we already found a flat region and can skip second pass
352                     contrastThreshold = calcContrastThreshold(luminance, minY, minX, tilesize, luminance_factor);
353                     break;
354                 } else {
355                     // in second pass we allow a variance of 4
356                     // we additionally scan the tiles +-skip pixels around the best tile from pass 2
357                     // Means we scan (2 * skip + 1)^2 tiles in this step to get a better hit rate
358                     // fortunately the scan is quite fast, so we use only one core and don't parallelize
359                     const int topLeftYStart = std::max(minY - skip, 0);
360                     const int topLeftXStart = std::max(minX - skip, 0);
361                     const int topLeftYEnd = std::min(minY + skip, H - tilesize);
362                     const int topLeftXEnd = std::min(minX + skip, W - tilesize);
363                     const int numTilesH = topLeftYEnd - topLeftYStart + 1;
364                     const int numTilesW = topLeftXEnd - topLeftXStart + 1;
365 
366                     std::vector<std::vector<float>> variances(numTilesH, std::vector<float>(numTilesW));
367                     for (int i = 0; i < numTilesH; ++i) {
368                         const int tileY = topLeftYStart + i;
369                         for (int j = 0; j < numTilesW; ++j) {
370                             const int tileX = topLeftXStart + j;
371                             const float avg = tileAverage(luminance, tileY, tileX, tilesize);
372 
373                             if (avg < minLuminance || avg > maxLuminance) {
374                                 // too dark or too bright => skip the tile
375                                 variances[i][j] = RT_INFINITY_F;
376                                 continue;
377                             } else {
378                                 variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg);
379                             // exclude tiles with a variance less than minTileVariance
380                             variances[i][j] = variances[i][j] < minTileVariance ? RT_INFINITY_F : variances[i][j];
381                             }
382                         }
383                     }
384 
385                     float minvar = RT_INFINITY_F;
386                     int minI = 0, minJ = 0;
387                     for (int i = 0; i < numTilesH; ++i) {
388                         for (int j = 0; j < numTilesW; ++j) {
389                             if (variances[i][j] < minvar) {
390                                 minvar = variances[i][j];
391                                 minI = i;
392                                 minJ = j;
393                             }
394                         }
395                     }
396 
397                     contrastThreshold = minvar <= 8.f ? calcContrastThreshold(luminance, topLeftYStart + minI, topLeftXStart + minJ, tilesize, luminance_factor) : 0.f;
398                 }
399             }
400         }
401     }
402 
403     if(contrastThreshold == 0.f) {
404         for(int j = 0; j < H; ++j) {
405             for(int i = 0; i < W; ++i) {
406                 blend[j][i] = amount;
407             }
408         }
409     } else {
410         const float scale = 0.0625f / 327.68f * luminance_factor;
411 #ifdef _OPENMP
412         #pragma omp parallel
413 #endif
414         {
415 #ifdef __SSE2__
416             const vfloat contrastThresholdv = F2V(contrastThreshold);
417             const vfloat scalev = F2V(scale);
418             const vfloat amountv = F2V(amount);
419 #endif
420 #ifdef _OPENMP
421             #pragma omp for schedule(dynamic,16)
422 #endif
423 
424             for(int j = 2; j < H - 2; ++j) {
425                 int i = 2;
426 #ifdef __SSE2__
427                 for(; i < W - 5; i += 4) {
428                     vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) +
429                                               SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev;
430 
431                     STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv));
432                 }
433 #endif
434                 for(; i < W - 2; ++i) {
435 
436                     float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) +
437                                            rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale;
438 
439                     blend[j][i] = amount * calcBlendFactor(contrast, contrastThreshold);
440                 }
441             }
442 
443 #ifdef _OPENMP
444             #pragma omp single
445 #endif
446             {
447                 // upper border
448                 for(int j = 0; j < 2; ++j) {
449                     for(int i = 2; i < W - 2; ++i) {
450                         blend[j][i] = blend[2][i];
451                     }
452                 }
453                 // lower border
454                 for(int j = H - 2; j < H; ++j) {
455                     for(int i = 2; i < W - 2; ++i) {
456                         blend[j][i] = blend[H-3][i];
457                     }
458                 }
459                 for(int j = 0; j < H; ++j) {
460                     // left border
461                     blend[j][0] = blend[j][1] = blend[j][2];
462                     // right border
463                     blend[j][W - 2] = blend[j][W - 1] = blend[j][W - 3];
464                 }
465             }
466 
467 #ifdef __SSE2__
468             // flush denormals to zero for gaussian blur to avoid performance penalty if there are a lot of zero values in the mask
469             const auto oldMode = _MM_GET_FLUSH_ZERO_MODE();
470             _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
471 #endif
472 
473             // blur blend mask to smooth transitions
474             gaussianBlur(blend, blend, W, H, blur_radius); //2.0);
475 
476 #ifdef __SSE2__
477             _MM_SET_FLUSH_ZERO_MODE(oldMode);
478 #endif
479         }
480     }
481 }
482 
483 
markImpulse(int width,int height,float ** const src,char ** impulse,float thresh)484 void markImpulse(int width, int height, float **const src, char **impulse, float thresh)
485 {
486     // buffer for the lowpass image
487     float * lpf[height] ALIGNED16;
488     lpf[0] = new float [width * height];
489 
490     for (int i = 1; i < height; i++) {
491         lpf[i] = lpf[i - 1] + width;
492     }
493 
494 #ifdef _OPENMP
495     #pragma omp parallel
496 #endif
497     {
498         gaussianBlur(const_cast<float **>(src), lpf, width, height, max(2.f, thresh - 1.f));
499     }
500 
501     //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
502 
503     float impthr = max(1.f, 5.5f - thresh);
504     float impthrDiv24 = impthr / 24.0f;         //Issue 1671: moved the Division outside the loop, impthr can be optimized out too, but I let in the code at the moment
505 
506 
507 #ifdef _OPENMP
508     #pragma omp parallel
509 #endif
510     {
511         int i1, j1, j;
512         float hpfabs, hfnbrave;
513 #ifdef __SSE2__
514         vfloat hfnbravev, hpfabsv;
515         vfloat impthrDiv24v = F2V( impthrDiv24 );
516 #endif
517 #ifdef _OPENMP
518         #pragma omp for
519 #endif
520 
521         for (int i = 0; i < height; i++) {
522             for (j = 0; j < 2; j++) {
523                 hpfabs = fabs(src[i][j] - lpf[i][j]);
524 
525                 //block average of high pass data
526                 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
527                     for (j1 = 0; j1 <= j + 2; j1++) {
528                         hfnbrave += fabs(src[i1][j1] - lpf[i1][j1]);
529                     }
530 
531                 impulse[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
532             }
533 
534 #ifdef __SSE2__
535 
536             for (; j < width - 5; j += 4) {
537                 hfnbravev = ZEROV;
538                 hpfabsv = vabsf(LVFU(src[i][j]) - LVFU(lpf[i][j]));
539 
540                 //block average of high pass data
541                 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) {
542                     for (j1 = j - 2; j1 <= j + 2; j1++) {
543                         hfnbravev += vabsf(LVFU(src[i1][j1]) - LVFU(lpf[i1][j1]));
544                     }
545                 }
546 
547                 int mask = _mm_movemask_ps((hfnbravev - hpfabsv) * impthrDiv24v - hpfabsv);
548                 impulse[i][j] = (mask & 1);
549                 impulse[i][j + 1] = ((mask & 2) >> 1);
550                 impulse[i][j + 2] = ((mask & 4) >> 2);
551                 impulse[i][j + 3] = ((mask & 8) >> 3);
552             }
553 
554 #endif
555 
556             for (; j < width - 2; j++) {
557                 hpfabs = fabs(src[i][j] - lpf[i][j]);
558 
559                 //block average of high pass data
560                 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
561                     for (j1 = j - 2; j1 <= j + 2; j1++) {
562                         hfnbrave += fabs(src[i1][j1] - lpf[i1][j1]);
563                     }
564 
565                 impulse[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
566             }
567 
568             for (; j < width; j++) {
569                 hpfabs = fabs(src[i][j] - lpf[i][j]);
570 
571                 //block average of high pass data
572                 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
573                     for (j1 = j - 2; j1 < width; j1++) {
574                         hfnbrave += fabs(src[i1][j1] - lpf[i1][j1]);
575                     }
576 
577                 impulse[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
578             }
579         }
580     }
581 
582     delete [] lpf[0];
583 }
584 
585 // Code adapted from Blender's project
586 // https://developer.blender.org/diffusion/B/browse/master/source/blender/blenlib/intern/math_geom.c;3b4a8f1cfa7339f3db9ddd4a7974b8cc30d7ff0b$2411
polyFill(float ** buffer,int width,int height,const std::vector<CoordD> & poly,const float color)587 float polyFill(float **buffer, int width, int height, const std::vector<CoordD> &poly, const float color)
588 {
589 
590     // First point of the polygon in image space
591     int xStart = int(poly[0].x + 0.5);
592     int yStart = int(poly[0].y + 0.5);
593     int xEnd = xStart;
594     int yEnd = yStart;
595 
596     // Find boundaries
597     for (auto point : poly) {
598 
599         // X bounds
600         if (int(point.x) < xStart) {
601             xStart = int(point.x);
602         } else if (int(point.x) > xEnd) {
603             xEnd = int(point.x);
604         }
605 
606         // Y bounds
607         if (int(point.y) < yStart) {
608             yStart = int(point.y);
609         } else if (int(point.y) > yEnd) {
610             yEnd = int(point.y);
611         }
612     }
613 
614     float ret = rtengine::min<int>(xEnd - xStart, yEnd - yStart);
615 
616     xStart = rtengine::LIM<int>(xStart, 0., width - 1);
617     xEnd = rtengine::LIM<int>(xEnd, xStart, width - 1);
618     yStart = rtengine::LIM<int>(yStart, 0., height - 1);
619     yEnd = rtengine::LIM<int>(yEnd, yStart, height - 1);
620 
621     std::vector<int> nodeX;
622 
623     //  Loop through the rows of the image.
624     for (int y = yStart; y <= yEnd; ++y) {
625         nodeX.clear();
626 
627         //  Build a list of nodes.
628         size_t j = poly.size() - 1;
629         for (size_t i = 0; i < poly.size(); ++i) {
630             if ((poly[i].y < double(y) && poly[j].y >= double(y))
631             ||  (poly[j].y < double(y) && poly[i].y >= double(y)))
632             {
633                 //TODO: Check rounding here ?
634                 // Possibility to add antialiasing here by calculating the distance of the value from the middle of the pixel (0.5)
635                 nodeX.push_back(int(poly[i].x + (double(y) - poly[i].y) / (poly[j].y - poly[i].y) * (poly[j].x - poly[i].x)));
636             }
637             j = i;
638         }
639 
640         //  Sort the nodes
641         std::sort(nodeX.begin(), nodeX.end());
642 
643         //  Fill the pixels between node pairs.
644         for (size_t i = 0; i < nodeX.size(); i += 2) {
645             if (nodeX.at(i) > xEnd) break;
646             if (nodeX.at(i + 1) > xStart ) {
647                 if (nodeX.at(i) < xStart ) {
648                     nodeX.at(i) = xStart;
649                 }
650                 if (nodeX.at(i + 1) > xEnd) {
651                     nodeX.at(i + 1) = xEnd;
652                 }
653                 for (int x = nodeX.at(i); x <= nodeX.at(i + 1); ++x) {
654                     buffer[y][x] = color;
655                 }
656             }
657         }
658     }
659 
660     return ret;//float(rtengine::min<int>(xEnd - xStart, yEnd - yStart));
661 }
662 
663 } // namespace rtengine
664