1 /*
2 * This file is part of RawTherapee.
3 *
4 * RawTherapee is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * RawTherapee is distributed in the hope that it will be useful,
10 * but widthITheightOUT ANY widthARRANTY; without even the implied warranty of
11 * MERCheightANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with RawTherapee. If not, see <https://www.gnu.org/licenses/>.
16 *
17 * 2010 Emil Martinec <ejmartin@uchicago.edu>
18 *
19 */
20 #include <cstddef>
21 #include "rt_math.h"
22 #include "labimage.h"
23 #include "improcfun.h"
24 #include "cieimage.h"
25 #include "sleef.h"
26 #include "opthelper.h"
27 #include "gauss.h"
28
29 using namespace std;
30
31 namespace rtengine
32 {
33
impulse_nr(LabImage * lab,double thresh)34 void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
35 {
36 // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
37 // impulse noise removal
38 // local variables
39
40 int width = lab->W;
41 int height = lab->H;
42
43 // buffer for the lowpass image
44 float * lpf[height] ALIGNED16;
45 lpf[0] = new float [width * height];
46 // buffer for the highpass image
47 char * impish[height] ALIGNED16;
48 impish[0] = new char [width * height];
49
50 for (int i = 1; i < height; i++) {
51 lpf[i] = lpf[i - 1] + width;
52 impish[i] = impish[i - 1] + width;
53 }
54
55
56 //The cleaning algorithm starts here
57
58 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
59 // modified bilateral filter for lowpass image, omitting input pixel; or Gaussian blur
60
61 const float eps = 1.0;
62
63 #ifdef _OPENMP
64 #pragma omp parallel
65 #endif
66 {
67 gaussianBlur (lab->L, lpf, width, height, max(2.0, thresh - 1.0));
68 }
69
70 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
71
72 float impthr = max(1.0, 5.5 - thresh);
73 float impthrDiv24 = impthr / 24.0f; //Issue 1671: moved the Division outside the loop, impthr can be optimized out too, but I let in the code at the moment
74
75
76 #ifdef _OPENMP
77 #pragma omp parallel
78 #endif
79 {
80 int i1, j1, j;
81 float hpfabs, hfnbrave;
82 #ifdef __SSE2__
83 vfloat hfnbravev, hpfabsv;
84 vfloat impthrDiv24v = F2V( impthrDiv24 );
85 #endif
86 #ifdef _OPENMP
87 #pragma omp for
88 #endif
89
90 for (int i = 0; i < height; i++) {
91 for (j = 0; j < 2; j++) {
92 hpfabs = fabs(lab->L[i][j] - lpf[i][j]);
93
94 //block average of high pass data
95 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
96 for (j1 = 0; j1 <= j + 2; j1++) {
97 hfnbrave += fabs(lab->L[i1][j1] - lpf[i1][j1]);
98 }
99
100 impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
101 }
102
103 #ifdef __SSE2__
104
105 for (; j < width - 5; j += 4) {
106 hfnbravev = ZEROV;
107 hpfabsv = vabsf(LVFU(lab->L[i][j]) - LVFU(lpf[i][j]));
108
109 //block average of high pass data
110 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) {
111 for (j1 = j - 2; j1 <= j + 2; j1++) {
112 hfnbravev += vabsf(LVFU(lab->L[i1][j1]) - LVFU(lpf[i1][j1]));
113 }
114 }
115
116 int mask = _mm_movemask_ps((hfnbravev - hpfabsv) * impthrDiv24v - hpfabsv);
117 impish[i][j] = (mask & 1);
118 impish[i][j + 1] = ((mask & 2) >> 1);
119 impish[i][j + 2] = ((mask & 4) >> 2);
120 impish[i][j + 3] = ((mask & 8) >> 3);
121 }
122
123 #endif
124
125 for (; j < width - 2; j++) {
126 hpfabs = fabs(lab->L[i][j] - lpf[i][j]);
127
128 //block average of high pass data
129 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
130 for (j1 = j - 2; j1 <= j + 2; j1++) {
131 hfnbrave += fabs(lab->L[i1][j1] - lpf[i1][j1]);
132 }
133
134 impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
135 }
136
137 for (; j < width; j++) {
138 hpfabs = fabs(lab->L[i][j] - lpf[i][j]);
139
140 //block average of high pass data
141 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
142 for (j1 = j - 2; j1 < width; j1++) {
143 hfnbrave += fabs(lab->L[i1][j1] - lpf[i1][j1]);
144 }
145
146 impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
147 }
148 }
149 }
150
151 //now impulsive values have been identified
152
153 // Issue 1671:
154 // often, noise isn't evenly distributed, e.g. only a few noisy pixels in the bright sky, but many in the dark foreground,
155 // so it's better to schedule dynamic and let every thread only process 16 rows, to avoid running big threads out of work
156 // Measured it and in fact gives better performance than without schedule(dynamic,16). Of course, there could be a better
157 // choice for the chunk_size than 16
158 // race conditions are avoided by the array impish
159 #ifdef _OPENMP
160 #pragma omp parallel
161 #endif
162 {
163 int i1, j1, j;
164 float wtdsum[3], dirwt, norm;
165 #ifdef _OPENMP
166 #pragma omp for schedule(dynamic,16)
167 #endif
168
169 for (int i = 0; i < height; i++) {
170 for (j = 0; j < 2; j++) {
171 if (!impish[i][j]) {
172 continue;
173 }
174
175 norm = 0.0;
176 wtdsum[0] = wtdsum[1] = wtdsum[2] = 0.0;
177
178 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
179 for (j1 = 0; j1 <= j + 2; j1++ ) {
180 if (impish[i1][j1]) {
181 continue;
182 }
183
184 dirwt = 1 / (SQR(lab->L[i1][j1] - lab->L[i][j]) + eps); //use more sophisticated rangefn???
185 wtdsum[0] += dirwt * lab->L[i1][j1];
186 wtdsum[1] += dirwt * lab->a[i1][j1];
187 wtdsum[2] += dirwt * lab->b[i1][j1];
188 norm += dirwt;
189 }
190
191 if (norm) {
192 lab->L[i][j] = wtdsum[0] / norm; //low pass filter
193 lab->a[i][j] = wtdsum[1] / norm; //low pass filter
194 lab->b[i][j] = wtdsum[2] / norm; //low pass filter
195 }
196 }
197
198 for (; j < width - 2; j++) {
199 if (!impish[i][j]) {
200 continue;
201 }
202
203 norm = 0.0;
204 wtdsum[0] = wtdsum[1] = wtdsum[2] = 0.0;
205
206 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
207 for (j1 = j - 2; j1 <= j + 2; j1++ ) {
208 if (impish[i1][j1]) {
209 continue;
210 }
211
212 dirwt = 1 / (SQR(lab->L[i1][j1] - lab->L[i][j]) + eps); //use more sophisticated rangefn???
213 wtdsum[0] += dirwt * lab->L[i1][j1];
214 wtdsum[1] += dirwt * lab->a[i1][j1];
215 wtdsum[2] += dirwt * lab->b[i1][j1];
216 norm += dirwt;
217 }
218
219 if (norm) {
220 lab->L[i][j] = wtdsum[0] / norm; //low pass filter
221 lab->a[i][j] = wtdsum[1] / norm; //low pass filter
222 lab->b[i][j] = wtdsum[2] / norm; //low pass filter
223 }
224 }
225
226 for (; j < width; j++) {
227 if (!impish[i][j]) {
228 continue;
229 }
230
231 norm = 0.0;
232 wtdsum[0] = wtdsum[1] = wtdsum[2] = 0.0;
233
234 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
235 for (j1 = j - 2; j1 < width; j1++ ) {
236 if (impish[i1][j1]) {
237 continue;
238 }
239
240 dirwt = 1 / (SQR(lab->L[i1][j1] - lab->L[i][j]) + eps); //use more sophisticated rangefn???
241 wtdsum[0] += dirwt * lab->L[i1][j1];
242 wtdsum[1] += dirwt * lab->a[i1][j1];
243 wtdsum[2] += dirwt * lab->b[i1][j1];
244 norm += dirwt;
245 }
246
247 if (norm) {
248 lab->L[i][j] = wtdsum[0] / norm; //low pass filter
249 lab->a[i][j] = wtdsum[1] / norm; //low pass filter
250 lab->b[i][j] = wtdsum[2] / norm; //low pass filter
251 }
252 }
253 }
254 }
255 //now impulsive values have been corrected
256
257 delete [] lpf[0];
258 delete [] impish[0];
259
260 }
261
262
impulse_nrcam(CieImage * ncie,double thresh,float ** buffers[3])263 void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh, float **buffers[3])
264 {
265 // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
266 // impulse noise removal
267 // local variables
268
269 int width = ncie->W;
270 int height = ncie->H;
271
272
273 float piid = 3.14159265f / 180.f;
274
275 // buffer for the lowpass image
276 float ** lpf = buffers[0];
277 // buffer for the highpass image
278 float ** impish = buffers[1];
279
280 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
281 // modified bilateral filter for lowpass image, omitting input pixel; or Gaussian blur
282
283
284
285 //The cleaning algorithm starts here
286
287 //rangeblur<unsigned short, unsigned int> (lab->L, lpf, impish /*used as buffer here*/, width, height, thresh, false);
288 #ifdef _OPENMP
289 #pragma omp parallel
290 #endif
291 {
292 gaussianBlur (ncie->sh_p, lpf, width, height, max(2.0, thresh - 1.0));
293 }
294
295 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
296
297 float impthr = max(1.0f, 5.0f - (float)thresh);
298 float impthrDiv24 = impthr / 24.0f; //Issue 1671: moved the Division outside the loop, impthr can be optimized out too, but I let in the code at the moment
299
300 #ifdef _OPENMP
301 #pragma omp parallel
302 #endif
303 {
304 int i1, j1, j;
305 float hpfabs, hfnbrave;
306 #ifdef __SSE2__
307 vfloat hfnbravev, hpfabsv;
308 vfloat impthrDiv24v = F2V( impthrDiv24 );
309 vfloat onev = F2V( 1.0f );
310 #endif
311 #ifdef _OPENMP
312 #pragma omp for
313 #endif
314
315 for (int i = 0; i < height; i++) {
316 for (j = 0; j < 2; j++) {
317 hpfabs = fabs(ncie->sh_p[i][j] - lpf[i][j]);
318
319 //block average of high pass data
320 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
321 for (j1 = 0; j1 <= j + 2; j1++) {
322 hfnbrave += fabs(ncie->sh_p[i1][j1] - lpf[i1][j1]);
323 }
324
325 impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
326 }
327
328 #ifdef __SSE2__
329
330 for (; j < width - 5; j += 4) {
331 hpfabsv = vabsf(LVFU(ncie->sh_p[i][j]) - LVFU(lpf[i][j]));
332 hfnbravev = ZEROV;
333
334 //block average of high pass data
335 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) {
336 for (j1 = j - 2; j1 <= j + 2; j1++ ) {
337 hfnbravev += vabsf(LVFU(ncie->sh_p[i1][j1]) - LVFU(lpf[i1][j1]));
338 }
339
340 }
341
342 STVFU(impish[i][j], vselfzero(vmaskf_gt(hpfabsv, (hfnbravev - hpfabsv)*impthrDiv24v), onev));
343 }
344
345 #endif
346
347 for (; j < width - 2; j++) {
348 hpfabs = fabs(ncie->sh_p[i][j] - lpf[i][j]);
349
350 //block average of high pass data
351 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
352 for (j1 = j - 2; j1 <= j + 2; j1++ ) {
353 hfnbrave += fabs(ncie->sh_p[i1][j1] - lpf[i1][j1]);
354 }
355
356 impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
357 }
358
359 for (; j < width; j++) {
360 hpfabs = fabs(ncie->sh_p[i][j] - lpf[i][j]);
361
362 //block average of high pass data
363 for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
364 for (j1 = j - 2; j1 < width; j1++ ) {
365 hfnbrave += fabs(ncie->sh_p[i1][j1] - lpf[i1][j1]);
366 }
367
368 impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
369 }
370 }
371 }
372
373 //now impulsive values have been identified
374
375 const float eps = 1.0f;
376
377 float** sraa = buffers[0]; // we can reuse buffers[0] because lpf is not needed anymore at this point
378 float** srbb = buffers[2];
379
380 #ifdef _OPENMP
381 #pragma omp parallel
382 #endif
383 {
384
385 #ifdef __SSE2__
386 vfloat2 sincosvalv;
387 vfloat piidv = F2V( piid );
388 vfloat tempv;
389 #endif
390 #ifdef _OPENMP
391 #pragma omp for
392 #endif
393
394 for (int i = 0; i < height; i++) {
395 int j = 0;
396 #ifdef __SSE2__
397
398 for (; j < width - 3; j += 4) {
399 sincosvalv = xsincosf(piidv * LVFU(ncie->h_p[i][j]));
400 tempv = LVFU(ncie->C_p[i][j]);
401 STVFU(sraa[i][j], tempv * sincosvalv.y);
402 STVFU(srbb[i][j], tempv * sincosvalv.x);
403 }
404
405 #endif
406
407 for (; j < width; j++) {
408 float2 sincosval = xsincosf(piid * ncie->h_p[i][j]);
409 sraa[i][j] = ncie->C_p[i][j] * sincosval.y;
410 srbb[i][j] = ncie->C_p[i][j] * sincosval.x;
411 }
412 }
413 }
414
415 // Issue 1671:
416 // often, noise isn't evenly distributed, e.g. only a few noisy pixels in the bright sky, but many in the dark foreground,
417 // so it's better to schedule dynamic and let every thread only process 16 rows, to avoid running big threads out of work
418 // Measured it and in fact gives better performance than without schedule(dynamic,16). Of course, there could be a better
419 // choice for the chunk_size than 16
420 // race conditions are avoided by the array impish
421 #ifdef _OPENMP
422 #pragma omp parallel
423 #endif
424 {
425 int i1, j1, j;
426 float wtdsum[3], dirwt, norm;
427 #ifdef _OPENMP
428 #pragma omp for schedule(dynamic,16)
429 #endif
430
431 for (int i = 0; i < height; i++) {
432 for (j = 0; j < 2; j++) {
433 if (!impish[i][j]) {
434 continue;
435 }
436
437 norm = 0.0f;
438 wtdsum[0] = wtdsum[1] = wtdsum[2] = 0.0f;
439
440 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
441 for (j1 = 0; j1 <= j + 2; j1++ ) {
442 if (impish[i1][j1]) {
443 continue;
444 }
445
446 dirwt = 1.f / (SQR(ncie->sh_p[i1][j1] - ncie->sh_p[i][j]) + eps); //use more sophisticated rangefn???
447 wtdsum[0] += dirwt * ncie->sh_p[i1][j1];
448 wtdsum[1] += dirwt * sraa[i1][j1];
449 wtdsum[2] += dirwt * srbb[i1][j1];
450 norm += dirwt;
451 }
452
453 if (norm) {
454 ncie->sh_p[i][j] = wtdsum[0] / norm; //low pass filter
455 sraa[i][j] = wtdsum[1] / norm; //low pass filter
456 srbb[i][j] = wtdsum[2] / norm; //low pass filter
457 }
458 }
459
460 for (; j < width - 2; j++) {
461 if (!impish[i][j]) {
462 continue;
463 }
464
465 norm = 0.0f;
466 wtdsum[0] = wtdsum[1] = wtdsum[2] = 0.0f;
467
468 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
469 for (j1 = j - 2; j1 <= j + 2; j1++ ) {
470 if (impish[i1][j1]) {
471 continue;
472 }
473
474 dirwt = 1.f / (SQR(ncie->sh_p[i1][j1] - ncie->sh_p[i][j]) + eps); //use more sophisticated rangefn???
475 wtdsum[0] += dirwt * ncie->sh_p[i1][j1];
476 wtdsum[1] += dirwt * sraa[i1][j1];
477 wtdsum[2] += dirwt * srbb[i1][j1];
478 norm += dirwt;
479 }
480
481 if (norm) {
482 ncie->sh_p[i][j] = wtdsum[0] / norm; //low pass filter
483 sraa[i][j] = wtdsum[1] / norm; //low pass filter
484 srbb[i][j] = wtdsum[2] / norm; //low pass filter
485 }
486 }
487
488 for (; j < width; j++) {
489 if (!impish[i][j]) {
490 continue;
491 }
492
493 norm = 0.0f;
494 wtdsum[0] = wtdsum[1] = wtdsum[2] = 0.0f;
495
496 for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
497 for (j1 = j - 2; j1 < width; j1++ ) {
498 if (impish[i1][j1]) {
499 continue;
500 }
501
502 dirwt = 1.f / (SQR(ncie->sh_p[i1][j1] - ncie->sh_p[i][j]) + eps); //use more sophisticated rangefn???
503 wtdsum[0] += dirwt * ncie->sh_p[i1][j1];
504 wtdsum[1] += dirwt * sraa[i1][j1];
505 wtdsum[2] += dirwt * srbb[i1][j1];
506 norm += dirwt;
507 }
508
509 if (norm) {
510 ncie->sh_p[i][j] = wtdsum[0] / norm; //low pass filter
511 sraa[i][j] = wtdsum[1] / norm; //low pass filter
512 srbb[i][j] = wtdsum[2] / norm; //low pass filter
513 }
514 }
515 }
516 }
517
518 //now impulsive values have been corrected
519
520 #ifdef _OPENMP
521 #pragma omp parallel
522 #endif
523 {
524 #ifdef __SSE2__
525 vfloat interav, interbv;
526 vfloat piidv = F2V(piid);
527 #endif // __SSE2__
528 #ifdef _OPENMP
529 #pragma omp for
530 #endif
531
532 for(int i = 0; i < height; i++ ) {
533 int j = 0;
534 #ifdef __SSE2__
535
536 for(; j < width - 3; j += 4) {
537 interav = LVFU(sraa[i][j]);
538 interbv = LVFU(srbb[i][j]);
539 STVFU(ncie->h_p[i][j], (xatan2f(interbv, interav)) / piidv);
540 STVFU(ncie->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav)));
541 }
542
543 #endif
544
545 for(; j < width; j++) {
546 float intera = sraa[i][j];
547 float interb = srbb[i][j];
548 ncie->h_p[i][j] = (xatan2f(interb, intera)) / piid;
549 ncie->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
550 }
551 }
552 }
553
554 }
555
556
557 }
558