1 ////////////////////////////////////////////////////////////////
2 //
3 //      Highlight reconstruction
4 //
5 //      copyright (c) 2008-2011  Emil Martinec <ejmartin@uchicago.edu>
6 //
7 //
8 // code dated: June 16, 2011
9 //
10 //  hilite_recon.cc is free software: you can redistribute it and/or modify
11 //  it under the terms of the GNU General Public License as published by
12 //  the Free Software Foundation, either version 3 of the License, or
13 //  (at your option) any later version.
14 //
15 //  This program is distributed in the hope that it will be useful,
16 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 //  GNU General Public License for more details.
19 //
20 //  You should have received a copy of the GNU General Public License
21 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
22 //
23 ////////////////////////////////////////////////////////////////
24 
25 #include <cstddef>
26 #include <cmath>
27 #include "array2D.h"
28 #include "librtprocess.h"
29 #include "rt_math.h"
30 #include "opthelper.h"
31 
32 //#define VERBOSE
33 
34 using librtprocess::SQR;
35 using librtprocess::max;
36 using librtprocess::min;
37 
38 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
boxblur2(float ** src,float ** dst,float ** temp,int startY,int startX,int H,int W,int box)40 void boxblur2(float** src, float** dst, float** temp, int startY, int startX, int H, int W, int box )
41 {
42     //box blur image channel; box size = 2*box+1
43     //horizontal blur
44 #ifdef _OPENMP
45     #pragma omp parallel for
46 #endif
47 
48     for (int row = 0; row < H; row++) {
49         int len = box + 1;
50 	temp[row][0] = src[row + startY][startX] / len;
51 
52         for (int j = 1; j <= box; j++) {
53 	    temp[row][0] += src[row + startY][j + startX] / len;
54         }
55 
56         for (int col = 1; col <= box; col++) {
57             temp[row][col] = (temp[row][col - 1] * len + src[row + startY][col + box + startX]) / (len + 1);
58             len ++;
59         }
60 
61         for (int col = box + 1; col < W - box; col++) {
62             temp[row][col] = temp[row][col - 1] + (src[row + startY][col + box + startX] - src[row + startY][col - box - 1 + startX]) / len;
63         }
64 
65         for (int col = W - box; col < W; col++) {
66             temp[row][col] = (temp[row][col - 1] * len - src[row + startY][col - box - 1 + startX]) / (len - 1);
67             len --;
68         }
69     }
70 
71 #ifdef __SSE2__
72     //vertical blur
73 #ifdef _OPENMP
74     #pragma omp parallel
75 #endif
76     {
77         float len = box + 1;
78         vfloat lenv = F2V( len );
79         vfloat lenp1v = F2V( len + 1.0f );
80         vfloat onev = F2V( 1.0f );
81         vfloat tempv, temp2v;
82 #ifdef _OPENMP
83         #pragma omp for nowait
84 #endif
85 
86         for (int col = 0; col < W - 7; col += 8) {
87             tempv = LVFU(temp[0][col]) / lenv;
88             temp2v = LVFU(temp[0][col + 4]) / lenv;
89 
90             for (int i = 1; i <= box; i++) {
91                 tempv = tempv + LVFU(temp[i][col]) / lenv;
92                 temp2v = temp2v + LVFU(temp[i][col + 4]) / lenv;
93             }
94 
95             _mm_storeu_ps( &dst[0][col], tempv);
96             _mm_storeu_ps( &dst[0][col + 4], temp2v);
97 
98             for (int row = 1; row <= box; row++) {
99                 tempv = (tempv * lenv + LVFU(temp[(row + box)][col])) / lenp1v;
100                 temp2v = (temp2v * lenv + LVFU(temp[(row + box)][col + 4])) / lenp1v;
101                 _mm_storeu_ps( &dst[row][col], tempv);
102                 _mm_storeu_ps( &dst[row][col + 4], temp2v);
103                 lenv = lenp1v;
104                 lenp1v = lenp1v + onev;
105             }
106 
107             for (int row = box + 1; row < H - box; row++) {
108                 tempv = tempv + (LVFU(temp[(row + box)][col]) - LVFU(temp[(row - box - 1)][col])) / lenv;
109                 temp2v = temp2v + (LVFU(temp[(row + box)][col + 4]) - LVFU(temp[(row - box - 1)][col + 4])) / lenv;
110                 _mm_storeu_ps( &dst[row][col], tempv);
111                 _mm_storeu_ps( &dst[row][col + 4], temp2v);
112             }
113 
114             for (int row = H - box; row < H; row++) {
115                 lenp1v = lenv;
116                 lenv = lenv - onev;
117                 tempv = (tempv * lenp1v - LVFU(temp[(row - box - 1)][col])) / lenv;
118                 temp2v = (temp2v * lenp1v - LVFU(temp[(row - box - 1)][col + 4])) / lenv;
119                 _mm_storeu_ps( &dst[row][col], tempv );
120                 _mm_storeu_ps( &dst[row][col + 4], temp2v );
121             }
122         }
123 
124 #ifdef _OPENMP
125         #pragma omp single
126 #endif
127         {
128             for (int col = W - (W % 8); col < W - 3; col += 4) {
129                 tempv = LVFU(temp[0][col]) / lenv;
130 
131                 for (int i = 1; i <= box; i++) {
132                     tempv = tempv + LVFU(temp[i][col]) / lenv;
133                 }
134 
135                 _mm_storeu_ps( &dst[0][col], tempv);
136 
137                 for (int row = 1; row <= box; row++) {
138                     tempv = (tempv * lenv + LVFU(temp[(row + box)][col])) / lenp1v;
139                     _mm_storeu_ps( &dst[row][col], tempv);
140                     lenv = lenp1v;
141                     lenp1v = lenp1v + onev;
142                 }
143 
144                 for (int row = box + 1; row < H - box; row++) {
145                     tempv = tempv + (LVFU(temp[(row + box)][col]) - LVFU(temp[(row - box - 1)][col])) / lenv;
146                     _mm_storeu_ps( &dst[row][col], tempv);
147                 }
148 
149                 for (int row = H - box; row < H; row++) {
150                     lenp1v = lenv;
151                     lenv = lenv - onev;
152                     tempv = (tempv * lenp1v - LVFU(temp[(row - box - 1)][col])) / lenv;
153                     _mm_storeu_ps( &dst[row][col], tempv );
154                 }
155             }
156 
157             for (int col = W - (W % 4); col < W; col++) {
158                 int llen = box + 1;
159                 dst[0][col] = temp[0][col] / llen;
160 
161                 for (int i = 1; i <= box; i++) {
162                     dst[0][col] += temp[i][col] / llen;
163                 }
164 
165                 for (int row = 1; row <= box; row++) {
166                     dst[row][col] = (dst[(row - 1)][col] * llen + temp[(row + box)][col]) / (llen + 1);
167                     llen ++;
168                 }
169 
170                 for (int row = box + 1; row < H - box; row++) {
171                     dst[row][col] = dst[(row - 1)][col] + (temp[(row + box)][col] - temp[(row - box - 1)][col]) / llen;
172                 }
173 
174                 for (int row = H - box; row < H; row++) {
175                     dst[row][col] = (dst[(row - 1)][col] * llen - temp[(row - box - 1)][col]) / (llen - 1);
176                     llen --;
177                 }
178             }
179         }
180     }
181 
182 #else
183     //vertical blur
184 #ifdef _OPENMP
185     #pragma omp parallel for
186 #endif
187 
188     for (int col = 0; col < W; col++) {
189         int len = box + 1;
190         dst[0][col] = temp[0][col] / len;
191 
192         for (int i = 1; i <= box; i++) {
193             dst[0][col] += temp[i][col] / len;
194         }
195 
196         for (int row = 1; row <= box; row++) {
197             dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + box)][col]) / (len + 1);
198             len ++;
199         }
200 
201         for (int row = box + 1; row < H - box; row++) {
202             dst[row][col] = dst[(row - 1)][col] + (temp[(row + box)][col] - temp[(row - box - 1)][col]) / len;
203         }
204 
205         for (int row = H - box; row < H; row++) {
206             dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - box - 1)][col]) / (len - 1);
207             len --;
208         }
209     }
210 
211 #endif
212 
213 }
214 
boxblur_resamp(float ** src,float ** dst,float ** temp,int H,int W,int box,int samp)215 void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int box, int samp )
216 {
217 
218 #ifdef _OPENMP
219     #pragma omp parallel
220 #endif
221     {
222 #ifdef _OPENMP
223         #pragma omp for
224 #endif
225 
226         //box blur image channel; box size = 2*box+1
227         //horizontal blur
228         for (int row = 0; row < H; row++)
229         {
230             int len = box + 1;
231             float tempval = src[row][0] / len;
232 
233             for (int j = 1; j <= box; j++) {
234                 tempval += src[row][j] / len;
235             }
236 
237             temp[row][0] = tempval;
238 
239             for (int col = 1; col <= box; col++) {
240                 tempval = (tempval * len + src[row][col + box]) / (len + 1);
241 
242                 if(col % samp == 0) {
243                     temp[row][col / samp] = tempval;
244                 }
245 
246                 len ++;
247             }
248 
249             float oneByLen = 1.f / (float)len;
250 
251             for (int col = box + 1; col < W - box; col++) {
252                 tempval = tempval + (src[row][col + box] - src[row][col - box - 1]) * oneByLen;
253 
254                 if(col % samp == 0) {
255                     temp[row][col / samp] = tempval;
256                 }
257             }
258 
259             for (int col = W - box; col < W; col++) {
260                 tempval = (tempval * len - src[row][col - box - 1]) / (len - 1);
261 
262                 if(col % samp == 0) {
263                     temp[row][col / samp] = tempval;
264                 }
265 
266                 len --;
267             }
268         }
269     }
270 
271     static const int numCols = 8;   // process numCols columns at once for better L1 CPU cache usage
272 #ifdef _OPENMP
273     #pragma omp parallel
274 #endif
275     {
276         float tempvalN[numCols] ALIGNED16;
277 #ifdef _OPENMP
278         #pragma omp for nowait
279 #endif
280 
281         //vertical blur
282         for (int col = 0; col < (W / samp) - (numCols - 1); col += numCols) {
283             int len = box + 1;
284 
285             for(int n = 0; n < numCols; n++) {
286                 tempvalN[n] = temp[0][col + n] / len;
287             }
288 
289             for (int i = 1; i <= box; i++) {
290                 for(int n = 0; n < numCols; n++) {
291                     tempvalN[n] += temp[i][col + n] / len;
292                 }
293             }
294 
295             for(int n = 0; n < numCols; n++) {
296                 dst[0][col + n] = tempvalN[n];
297             }
298 
299             for (int row = 1; row <= box; row++) {
300                 for(int n = 0; n < numCols; n++) {
301                     tempvalN[n] = (tempvalN[n] * len + temp[(row + box)][col + n]) / (len + 1);
302                 }
303 
304                 if(row % samp == 0) {
305                     for(int n = 0; n < numCols; n++) {
306                         dst[row / samp][col + n] = tempvalN[n];
307                     }
308                 }
309 
310                 len ++;
311             }
312 
313             for (int row = box + 1; row < H - box; row++) {
314                 for(int n = 0; n < numCols; n++) {
315                     tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) / len;
316                 }
317 
318                 if(row % samp == 0) {
319                     for(int n = 0; n < numCols; n++) {
320                         dst[row / samp][col + n] = tempvalN[n];
321                     }
322                 }
323             }
324 
325             for (int row = H - box; row < H; row++) {
326                 for(int n = 0; n < numCols; n++) {
327                     tempvalN[n] = (tempvalN[n] * len - temp[(row - box - 1)][col + n]) / (len - 1);
328                 }
329 
330                 if(row % samp == 0) {
331                     for(int n = 0; n < numCols; n++) {
332                         dst[row / samp][col + n] = tempvalN[n];
333                     }
334                 }
335 
336                 len --;
337             }
338         }
339 
340         // process remaining columns
341 #ifdef _OPENMP
342         #pragma omp single
343 #endif
344         {
345 
346             //vertical blur
347             for (int col = (W / samp) - ((W / samp) % numCols); col < W / samp; col++) {
348                 int len = box + 1;
349                 float tempval = temp[0][col] / len;
350 
351                 for (int i = 1; i <= box; i++) {
352                     tempval += temp[i][col] / len;
353                 }
354 
355                 dst[0][col] = tempval;
356 
357                 for (int row = 1; row <= box; row++) {
358                     tempval = (tempval * len + temp[(row + box)][col]) / (len + 1);
359 
360                     if(row % samp == 0) {
361                         dst[row / samp][col] = tempval;
362                     }
363 
364                     len ++;
365                 }
366 
367                 for (int row = box + 1; row < H - box; row++) {
368                     tempval = tempval + (temp[(row + box)][col] - temp[(row - box - 1)][col]) / len;
369 
370                     if(row % samp == 0) {
371                         dst[row / samp][col] = tempval;
372                     }
373                 }
374 
375                 for (int row = H - box; row < H; row++) {
376                     tempval = (tempval * len - temp[(row - box - 1)][col]) / (len - 1);
377 
378                     if(row % samp == 0) {
379                         dst[row / samp][col] = tempval;
380                     }
381 
382                     len --;
383                 }
384             }
385         }
386     }
387 
388 
389 }
390 
HLRecovery_inpaint(const int width,const int height,float ** red,float ** green,float ** blue,const float chmax[3],const float clmax[3],const std::function<bool (double)> & setProgCancel)391 rpError HLRecovery_inpaint (const int width, const int height, float** red, float** green, float** blue, const float chmax[3], const float clmax[3], const std::function<bool(double)> &setProgCancel)
392 {
393     double progress = 0.0;
394 
395     setProgCancel(progress);
396 
397 
398     constexpr int range = 2;
399     constexpr int pitch = 4;
400 
401     constexpr float threshpct = 0.25f;
402     constexpr float maxpct = 0.95f;
403     constexpr float epsilon = 0.00001f;
404     //%%%%%%%%%%%%%%%%%%%%
405     //for blend algorithm:
406     constexpr float blendthresh = 1.0;
407     constexpr int ColorCount = 3;
408     // Transform matrixes rgb>lab and back
409     constexpr float trans[ColorCount][ColorCount] =
410     { { 1.f, 1.f, 1.f }, { 1.7320508f, -1.7320508f, 0.f }, { -1.f, -1.f, 2.f } };
411     constexpr float itrans[ColorCount][ColorCount] =
412     { { 1.f, 0.8660254f, -0.5f }, { 1.f, -0.8660254f, -0.5f }, { 1.f, 0.f, 1.f } };
413 
414 #ifdef VERBOSE
415         for(int c = 0; c < 3; c++) {
416             printf("chmax[%d] : %f\tclmax[%d] : %f\tratio[%d] : %f\n", c, chmax[c], c, clmax[c], c, chmax[c] / clmax[c]);
417         }
418 #endif
419     /*What are chmax and clmax?
420     clmax is the raw clip level.
421     In RawTherapee it's calculated as
422     clmax[i] * (c_white[i] - cblacksom[i]) * scale_mul[i]
423     cblacksom = max(c_black[i] + black_lev[i], 0.0f)
424     c_white is the white level
425     scale_mul is the white balance multipliers that scale so that the image ends up 0-65535
426     For monochrome it's just 65535/(c_white-c_black)
427     For color, it's scale_mul[c] = (pre_mul[c]/maxpremul)*65535/(c_white-c_black)
428 
429     chmax is the actual highest value in the channel for the image after multiplying by scale_mul
430     */
431 
432 
433     float factor[3];
434 
435     for(int c = 0; c < ColorCount; c++) {
436         factor[c] = chmax[c] / clmax[c];
437     }
438 
439     float minFactor = min(factor[0], factor[1], factor[2]);
440 
441     if(minFactor > 1.f) { // all 3 channels clipped
442         // calculate clip factor per channel
443         for (int c = 0; c < ColorCount; c++) {
444             factor[c] /= minFactor;
445         }
446 
447         // get max clip factor
448         int maxpos = 0;
449         float maxValNew = 0.f;
450 
451         for (int c = 0; c < ColorCount; c++) {
452             if(chmax[c] / factor[c] > maxValNew) {
453                 maxValNew = chmax[c] / factor[c];
454                 maxpos = c;
455             }
456         }
457 
458         float clipFactor = clmax[maxpos] / maxValNew;
459 
460         if(clipFactor < maxpct)
461 
462             // if max clipFactor < maxpct (0.95) adjust per channel factors
463             for (int c = 0; c < ColorCount; c++) {
464                 factor[c] *= (maxpct / clipFactor);
465             }
466     } else {
467         factor[0] = factor[1] = factor[2] = 1.f;
468     }
469 
470 #ifdef VERBOSE
471         for (int c = 0; c < ColorCount; c++) {
472             printf("correction factor[%d] : %f\n", c, factor[c]);
473         }
474 #endif
475     float max_f[3], thresh[3];
476 
477     for (int c = 0; c < ColorCount; c++) {
478         thresh[c] = chmax[c] * threshpct / factor[c];
479         max_f[c] = chmax[c] * maxpct / factor[c];
480     }
481 
482     float whitept = max(max_f[0], max_f[1], max_f[2]);
483     float clippt  = min(max_f[0], max_f[1], max_f[2]);
484     float medpt   = max_f[0] + max_f[1] + max_f[2] - whitept - clippt;
485     float blendpt = blendthresh * clippt;
486     float medFactor[3];
487 
488     for (int c = 0; c < ColorCount; c++) {
489         medFactor[c] = max(1.0f, max_f[c] / medpt) / (-blendpt);
490     }
491 
492     int minx = width - 1;
493     int maxx = 0;
494     int miny = height - 1;
495     int maxy = 0;
496 
497     #pragma omp parallel for reduction(min:minx,miny) reduction(max:maxx,maxy) schedule(dynamic, 16)
498     for (int i = 0; i < height; ++i) {
499         for (int j = 0; j< width; ++j) {
500             if(red[i][j] >= max_f[0] || green[i][j] >= max_f[1] || blue[i][j] >= max_f[2]) {
501                 minx = std::min(minx, j);
502                 maxx = std::max(maxx, j);
503                 miny = std::min(miny, i);
504                 maxy = std::max(maxy, i);
505             }
506         }
507     }
508 
509 
510     constexpr int blurBorder = 256;
511     minx = std::max(0, minx - blurBorder);
512     miny = std::max(0, miny - blurBorder);
513     maxx = std::min(width - 1, maxx + blurBorder);
514     maxy = std::min(height - 1, maxy + blurBorder);
515     const int blurWidth = maxx - minx + 1;
516     const int blurHeight = maxy - miny + 1;
517 
518     multi_array2D<float, 3> channelblur(blurWidth, blurHeight, 0, 48);
519     array2D<float> temp(blurWidth, blurHeight); // allocate temporary buffer
520 
521     // blur RGB channels
522 
523     boxblur2(red, channelblur[0], temp, miny, minx, blurHeight, blurWidth, 4);
524 
525     progress += 0.05;
526     setProgCancel(progress);
527 
528     boxblur2(green, channelblur[1], temp, miny, minx, blurHeight, blurWidth, 4);
529 
530     progress += 0.05;
531     setProgCancel(progress);
532 
533     boxblur2(blue, channelblur[2], temp, miny, minx, blurHeight, blurWidth, 4);
534 
535     progress += 0.05;
536     setProgCancel(progress);
537 
538     // reduce channel blur to one array
539 #ifdef _OPENMP
540     #pragma omp parallel for
541 #endif
542 
543     for(int i = 0; i < blurHeight; i++)
544         for(int j = 0; j < blurWidth; j++) {
545             channelblur[0][i][j] = fabsf(channelblur[0][i][j] - red[i + miny][j + minx]) + fabsf(channelblur[1][i][j] - green[i + miny][j + minx]) + fabsf(channelblur[2][i][j] - blue[i + miny][j + minx]);
546         }
547 
548     for (int c = 1; c < 3; c++) {
549         channelblur[c].free();    //free up some memory
550     }
551 
552     progress += 0.05;
553     setProgCancel(progress);
554 
555     multi_array2D<float, 4> hilite_full(blurWidth, blurHeight, ARRAY2D_CLEAR_DATA, 32);
556 
557     progress += 0.10;
558     setProgCancel(progress);
559 
560     double hipass_sum = 0.f;
561     int hipass_norm = 0;
562 
563     // set up which pixels are clipped or near clipping
564 #ifdef _OPENMP
565     #pragma omp parallel for reduction(+:hipass_sum,hipass_norm) schedule(dynamic,16)
566 #endif
567 
568     for (int i = 0; i < blurHeight; i++) {
569         for (int j = 0; j < blurWidth; j++) {
570             //if one or more channels is highlight but none are blown, add to highlight accumulator
571             if ((red[i + miny][j + minx] > thresh[0] || green[i + miny][j + minx] > thresh[1] || blue[i + miny][j + minx] > thresh[2]) &&
572                     (red[i + miny][j + minx] < max_f[0] && green[i + miny][j + minx] < max_f[1] && blue[i + miny][j + minx] < max_f[2])) {
573 
574                 hipass_sum += channelblur[0][i][j];
575                 hipass_norm ++;
576 
577                 hilite_full[0][i][j] = red[i + miny][j + minx];
578                 hilite_full[1][i][j] = green[i + miny][j + minx];
579                 hilite_full[2][i][j] = blue[i + miny][j + minx];
580                 hilite_full[3][i][j] = 1.f;
581 
582             }
583         }
584     }//end of filling highlight array
585 
586     float hipass_ave = 2.f * hipass_sum / (hipass_norm + epsilon);
587 
588     progress += 0.05;
589     setProgCancel(progress);
590 
591     array2D<float> hilite_full4(blurWidth, blurHeight);
592     //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
593     //blur highlight data
594     boxblur2(hilite_full[3], hilite_full4, temp, 0, 0, blurHeight, blurWidth, 1);
595 
596     temp.free(); // free temporary buffer
597 
598     progress += 0.05;
599     setProgCancel(progress);
600 
601 #ifdef _OPENMP
602     #pragma omp parallel for schedule(dynamic,16)
603 #endif
604 
605     for (int i = 0; i < blurHeight; i++) {
606         for (int j = 0; j < blurWidth; j++) {
607             if (channelblur[0][i][j] > hipass_ave) {
608                 //too much variation
609                 hilite_full[0][i][j] = hilite_full[1][i][j] = hilite_full[2][i][j] = hilite_full[3][i][j] = 0.f;
610                 continue;
611             }
612 
613             if (hilite_full4[i][j] > epsilon && hilite_full4[i][j] < 0.95f) {
614                 //too near an edge, could risk using CA affected pixels, therefore omit
615                 hilite_full[0][i][j] = hilite_full[1][i][j] = hilite_full[2][i][j] = hilite_full[3][i][j] = 0.f;
616             }
617         }
618     }
619 
620     channelblur[0].free();    //free up some memory
621     hilite_full4.free();    //free up some memory
622 
623     int hfh = (blurHeight - (blurHeight % pitch)) / pitch;
624     int hfw = (blurWidth - (blurWidth % pitch)) / pitch;
625 
626     multi_array2D<float, 4> hilite(hfw + 1, hfh + 1, ARRAY2D_CLEAR_DATA, 48);
627 
628     //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
629     // blur and resample highlight data; range=size of blur, pitch=sample spacing
630 
631     array2D<float> temp2((blurWidth / pitch) + ((blurWidth % pitch) == 0 ? 0 : 1), blurHeight);
632 
633     for (int m = 0; m < 4; m++) {
634         boxblur_resamp(hilite_full[m], hilite[m], temp2, blurHeight, blurWidth, range, pitch);
635 
636         progress += 0.05;
637         setProgCancel(progress);
638     }
639 
640     temp2.free();
641 
642     for (int c = 0; c < 4; c++) {
643         hilite_full[c].free();    //free up some memory
644     }
645 
646     multi_array2D<float, 8> hilite_dir(hfw, hfh, ARRAY2D_CLEAR_DATA, 64);
647     // for faster processing we create two buffers using (height,width) instead of (width,height)
648     multi_array2D<float, 4> hilite_dir0(hfh, hfw, ARRAY2D_CLEAR_DATA, 64);
649     multi_array2D<float, 4> hilite_dir4(hfh, hfw, ARRAY2D_CLEAR_DATA, 64);
650 
651     progress += 0.05;
652     setProgCancel(progress);
653 
654     //fill gaps in highlight map by directional extension
655     //raster scan from four corners
656     for (int j = 1; j < hfw - 1; j++) {
657         for (int i = 2; i < hfh - 2; i++) {
658             //from left
659             if (hilite[3][i][j] > epsilon) {
660                 hilite_dir0[3][j][i] = 1.f;
661             } else {
662                 hilite_dir0[3][j][i] = (hilite_dir0[0 + 3][j - 1][i - 2] + hilite_dir0[0 + 3][j - 1][i - 1] + hilite_dir0[0 + 3][j - 1][i] + hilite_dir0[0 + 3][j - 1][i + 1] + hilite_dir0[0 + 3][j - 1][i + 2]) == 0.f ? 0.f : 0.1f;
663             }
664         }
665 
666         if(hilite[3][2][j] <= epsilon) {
667             hilite_dir[0 + 3][0][j]  = hilite_dir0[3][j][2];
668         }
669 
670         if(hilite[3][3][j] <= epsilon) {
671             hilite_dir[0 + 3][1][j]  = hilite_dir0[3][j][3];
672         }
673 
674         if(hilite[3][hfh - 3][j] <= epsilon) {
675             hilite_dir[4 + 3][hfh - 1][j] = hilite_dir0[3][j][hfh - 3];
676         }
677 
678         if(hilite[3][hfh - 4][j] <= epsilon) {
679             hilite_dir[4 + 3][hfh - 2][j] = hilite_dir0[3][j][hfh - 4];
680         }
681     }
682 
683     for (int i = 2; i < hfh - 2; i++) {
684         if(hilite[3][i][hfw - 2] <= epsilon) {
685             hilite_dir4[3][hfw - 1][i] = hilite_dir0[3][hfw - 2][i];
686         }
687     }
688 
689 
690 #ifdef _OPENMP
691     #pragma omp parallel for
692 #endif
693 
694     for (int c = 0; c < 3; c++) {
695         for (int j = 1; j < hfw - 1; j++) {
696             for (int i = 2; i < hfh - 2; i++) {
697                 //from left
698                 if (hilite[3][i][j] > epsilon) {
699                     hilite_dir0[c][j][i] = hilite[c][i][j] / hilite[3][i][j];
700                 } else {
701                     hilite_dir0[c][j][i] = 0.1f * ((hilite_dir0[0 + c][j - 1][i - 2] + hilite_dir0[0 + c][j - 1][i - 1] + hilite_dir0[0 + c][j - 1][i] + hilite_dir0[0 + c][j - 1][i + 1] + hilite_dir0[0 + c][j - 1][i + 2]) /
702                                                    (hilite_dir0[0 + 3][j - 1][i - 2] + hilite_dir0[0 + 3][j - 1][i - 1] + hilite_dir0[0 + 3][j - 1][i] + hilite_dir0[0 + 3][j - 1][i + 1] + hilite_dir0[0 + 3][j - 1][i + 2] + epsilon));
703                 }
704             }
705 
706             if(hilite[3][2][j] <= epsilon) {
707                 hilite_dir[0 + c][0][j]  = hilite_dir0[c][j][2];
708             }
709 
710             if(hilite[3][3][j] <= epsilon) {
711                 hilite_dir[0 + c][1][j]  = hilite_dir0[c][j][3];
712             }
713 
714             if(hilite[3][hfh - 3][j] <= epsilon) {
715                 hilite_dir[4 + c][hfh - 1][j] = hilite_dir0[c][j][hfh - 3];
716             }
717 
718             if(hilite[3][hfh - 4][j] <= epsilon) {
719                 hilite_dir[4 + c][hfh - 2][j] = hilite_dir0[c][j][hfh - 4];
720             }
721         }
722 
723         for (int i = 2; i < hfh - 2; i++) {
724             if(hilite[3][i][hfw - 2] <= epsilon) {
725                 hilite_dir4[c][hfw - 1][i] = hilite_dir0[c][hfw - 2][i];
726             }
727         }
728     }
729 
730     progress += 0.05;
731     setProgCancel(progress);
732 
733     for (int j = hfw - 2; j > 0; j--) {
734         for (int i = 2; i < hfh - 2; i++) {
735             //from right
736             if (hilite[3][i][j] > epsilon) {
737                 hilite_dir4[3][j][i] = 1.f;
738             } else {
739                 hilite_dir4[3][j][i] = (hilite_dir4[3][(j + 1)][(i - 2)] + hilite_dir4[3][(j + 1)][(i - 1)] + hilite_dir4[3][(j + 1)][(i)] + hilite_dir4[3][(j + 1)][(i + 1)] + hilite_dir4[3][(j + 1)][(i + 2)]) == 0.f ? 0.f : 0.1f;
740             }
741         }
742 
743         if(hilite[3][2][j] <= epsilon) {
744             hilite_dir[0 + 3][0][j] += hilite_dir4[3][j][2];
745         }
746 
747         if(hilite[3][hfh - 3][j] <= epsilon) {
748             hilite_dir[4 + 3][hfh - 1][j] += hilite_dir4[3][j][hfh - 3];
749         }
750     }
751 
752     for (int i = 2; i < hfh - 2; i++) {
753         if(hilite[3][i][0] <= epsilon) {
754             hilite_dir[0 + 3][i - 2][0] += hilite_dir4[3][0][i];
755             hilite_dir[4 + 3][i + 2][0] += hilite_dir4[3][0][i];
756         }
757 
758         if(hilite[3][i][1] <= epsilon) {
759             hilite_dir[0 + 3][i - 2][1] += hilite_dir4[3][1][i];
760             hilite_dir[4 + 3][i + 2][1] += hilite_dir4[3][1][i];
761         }
762 
763         if(hilite[3][i][hfw - 2] <= epsilon) {
764             hilite_dir[0 + 3][i - 2][hfw - 2] += hilite_dir4[3][hfw - 2][i];
765             hilite_dir[4 + 3][i + 2][hfw - 2] += hilite_dir4[3][hfw - 2][i];
766         }
767     }
768 
769 #ifdef _OPENMP
770     #pragma omp parallel for
771 #endif
772 
773     for (int c = 0; c < 3; c++) {
774         for (int j = hfw - 2; j > 0; j--) {
775             for (int i = 2; i < hfh - 2; i++) {
776                 //from right
777                 if (hilite[3][i][j] > epsilon) {
778                     hilite_dir4[c][j][i] = hilite[c][i][j] / hilite[3][i][j];
779                 } else {
780                     hilite_dir4[c][j][i] = 0.1 * ((hilite_dir4[c][(j + 1)][(i - 2)] + hilite_dir4[c][(j + 1)][(i - 1)] + hilite_dir4[c][(j + 1)][(i)] + hilite_dir4[c][(j + 1)][(i + 1)] + hilite_dir4[c][(j + 1)][(i + 2)]) /
781                                                   (hilite_dir4[3][(j + 1)][(i - 2)] + hilite_dir4[3][(j + 1)][(i - 1)] + hilite_dir4[3][(j + 1)][(i)] + hilite_dir4[3][(j + 1)][(i + 1)] + hilite_dir4[3][(j + 1)][(i + 2)] + epsilon));
782                 }
783             }
784 
785             if(hilite[3][2][j] <= epsilon) {
786                 hilite_dir[0 + c][0][j] += hilite_dir4[c][j][2];
787             }
788 
789             if(hilite[3][hfh - 3][j] <= epsilon) {
790                 hilite_dir[4 + c][hfh - 1][j] += hilite_dir4[c][j][hfh - 3];
791             }
792         }
793 
794         for (int i = 2; i < hfh - 2; i++) {
795             if(hilite[3][i][0] <= epsilon) {
796                 hilite_dir[0 + c][i - 2][0] += hilite_dir4[c][0][i];
797                 hilite_dir[4 + c][i + 2][0] += hilite_dir4[c][0][i];
798             }
799 
800             if(hilite[3][i][1] <= epsilon) {
801                 hilite_dir[0 + c][i - 2][1] += hilite_dir4[c][1][i];
802                 hilite_dir[4 + c][i + 2][1] += hilite_dir4[c][1][i];
803             }
804 
805             if(hilite[3][i][hfw - 2] <= epsilon) {
806                 hilite_dir[0 + c][i - 2][hfw - 2] += hilite_dir4[c][hfw - 2][i];
807                 hilite_dir[4 + c][i + 2][hfw - 2] += hilite_dir4[c][hfw - 2][i];
808             }
809         }
810     }
811 
812     progress += 0.05;
813     setProgCancel(progress);
814 
815     for (int i = 1; i < hfh - 1; i++)
816         for (int j = 2; j < hfw - 2; j++) {
817             //from top
818             if (hilite[3][i][j] > epsilon) {
819                 hilite_dir[0 + 3][i][j] = 1.f;
820             } else {
821                 hilite_dir[0 + 3][i][j] = (hilite_dir[0 + 3][i - 1][j - 2] + hilite_dir[0 + 3][i - 1][j - 1] + hilite_dir[0 + 3][i - 1][j] + hilite_dir[0 + 3][i - 1][j + 1] + hilite_dir[0 + 3][i - 1][j + 2]) == 0.f ? 0.f : 0.1f;
822             }
823         }
824 
825     for (int j = 2; j < hfw - 2; j++) {
826         if(hilite[3][hfh - 2][j] <= epsilon) {
827             hilite_dir[4 + 3][hfh - 1][j] += hilite_dir[0 + 3][hfh - 2][j];
828         }
829     }
830 
831 #ifdef _OPENMP
832     #pragma omp parallel for
833 #endif
834 
835     for (int c = 0; c < 3; c++) {
836         for (int i = 1; i < hfh - 1; i++) {
837             for (int j = 2; j < hfw - 2; j++) {
838                 //from top
839                 if (hilite[3][i][j] > epsilon) {
840                     hilite_dir[0 + c][i][j] = hilite[c][i][j] / hilite[3][i][j];
841                 } else {
842                     hilite_dir[0 + c][i][j] = 0.1 * ((hilite_dir[0 + c][i - 1][j - 2] + hilite_dir[0 + c][i - 1][j - 1] + hilite_dir[0 + c][i - 1][j] + hilite_dir[0 + c][i - 1][j + 1] + hilite_dir[0 + c][i - 1][j + 2]) /
843                                                      (hilite_dir[0 + 3][i - 1][j - 2] + hilite_dir[0 + 3][i - 1][j - 1] + hilite_dir[0 + 3][i - 1][j] + hilite_dir[0 + 3][i - 1][j + 1] + hilite_dir[0 + 3][i - 1][j + 2] + epsilon));
844                 }
845             }
846         }
847 
848         for (int j = 2; j < hfw - 2; j++) {
849             if(hilite[3][hfh - 2][j] <= epsilon) {
850                 hilite_dir[4 + c][hfh - 1][j] += hilite_dir[0 + c][hfh - 2][j];
851             }
852         }
853     }
854 
855     progress += 0.05;
856     setProgCancel(progress);
857 
858     for (int i = hfh - 2; i > 0; i--)
859         for (int j = 2; j < hfw - 2; j++) {
860             //from bottom
861             if (hilite[3][i][j] > epsilon) {
862                 hilite_dir[4 + 3][i][j] = 1.f;
863             } else {
864                 hilite_dir[4 + 3][i][j] = (hilite_dir[4 + 3][(i + 1)][(j - 2)] + hilite_dir[4 + 3][(i + 1)][(j - 1)] + hilite_dir[4 + 3][(i + 1)][(j)] + hilite_dir[4 + 3][(i + 1)][(j + 1)] + hilite_dir[4 + 3][(i + 1)][(j + 2)]) == 0.f ? 0.f : 0.1f;
865             }
866         }
867 
868 #ifdef _OPENMP
869     #pragma omp parallel for
870 #endif
871 
872     for (int c = 0; c < 4; c++) {
873         for (int i = hfh - 2; i > 0; i--) {
874             for (int j = 2; j < hfw - 2; j++) {
875                 //from bottom
876                 if (hilite[3][i][j] > epsilon) {
877                     hilite_dir[4 + c][i][j] = hilite[c][i][j] / hilite[3][i][j];
878                 } else {
879                     hilite_dir[4 + c][i][j] = 0.1 * ((hilite_dir[4 + c][(i + 1)][(j - 2)] + hilite_dir[4 + c][(i + 1)][(j - 1)] + hilite_dir[4 + c][(i + 1)][(j)] + hilite_dir[4 + c][(i + 1)][(j + 1)] + hilite_dir[4 + c][(i + 1)][(j + 2)]) /
880                                                      (hilite_dir[4 + 3][(i + 1)][(j - 2)] + hilite_dir[4 + 3][(i + 1)][(j - 1)] + hilite_dir[4 + 3][(i + 1)][(j)] + hilite_dir[4 + 3][(i + 1)][(j + 1)] + hilite_dir[4 + 3][(i + 1)][(j + 2)] + epsilon));
881                 }
882             }
883         }
884     }
885 
886     progress += 0.05;
887     setProgCancel(progress);
888 
889     //fill in edges
890     for (int dir = 0; dir < 2; dir++) {
891         for (int i = 1; i < hfh - 1; i++)
892             for (int c = 0; c < 4; c++) {
893                 hilite_dir[dir * 4 + c][i][0] = hilite_dir[dir * 4 + c][i][1];
894                 hilite_dir[dir * 4 + c][i][hfw - 1] = hilite_dir[dir * 4 + c][i][hfw - 2];
895             }
896 
897         for (int j = 1; j < hfw - 1; j++)
898             for (int c = 0; c < 4; c++) {
899                 hilite_dir[dir * 4 + c][0][j] = hilite_dir[dir * 4 + c][1][j];
900                 hilite_dir[dir * 4 + c][hfh - 1][j] = hilite_dir[dir * 4 + c][hfh - 2][j];
901             }
902 
903         for (int c = 0; c < 4; c++) {
904             hilite_dir[dir * 4 + c][0][0] = hilite_dir[dir * 4 + c][1][0] = hilite_dir[dir * 4 + c][0][1] = hilite_dir[dir * 4 + c][1][1] = hilite_dir[dir * 4 + c][2][2];
905             hilite_dir[dir * 4 + c][0][hfw - 1] = hilite_dir[dir * 4 + c][1][hfw - 1] = hilite_dir[dir * 4 + c][0][hfw - 2] = hilite_dir[dir * 4 + c][1][hfw - 2] = hilite_dir[dir * 4 + c][2][hfw - 3];
906             hilite_dir[dir * 4 + c][hfh - 1][0] = hilite_dir[dir * 4 + c][hfh - 2][0] = hilite_dir[dir * 4 + c][hfh - 1][1] = hilite_dir[dir * 4 + c][hfh - 2][1] = hilite_dir[dir * 4 + c][hfh - 3][2];
907             hilite_dir[dir * 4 + c][hfh - 1][hfw - 1] = hilite_dir[dir * 4 + c][hfh - 2][hfw - 1] = hilite_dir[dir * 4 + c][hfh - 1][hfw - 2] = hilite_dir[dir * 4 + c][hfh - 2][hfw - 2] = hilite_dir[dir * 4 + c][hfh - 3][hfw - 3];
908         }
909     }
910 
911     for (int i = 1; i < hfh - 1; i++)
912         for (int c = 0; c < 4; c++) {
913             hilite_dir0[c][0][i] = hilite_dir0[c][1][i];
914             hilite_dir0[c][hfw - 1][i] = hilite_dir0[c][hfw - 2][i];
915         }
916 
917     for (int j = 1; j < hfw - 1; j++)
918         for (int c = 0; c < 4; c++) {
919             hilite_dir0[c][j][0] = hilite_dir0[c][j][1];
920             hilite_dir0[c][j][hfh - 1] = hilite_dir0[c][j][hfh - 2];
921         }
922 
923     for (int c = 0; c < 4; c++) {
924         hilite_dir0[c][0][0] = hilite_dir0[c][0][1] = hilite_dir0[c][1][0] = hilite_dir0[c][1][1] = hilite_dir0[c][2][2];
925         hilite_dir0[c][hfw - 1][0] = hilite_dir0[c][hfw - 1][1] = hilite_dir0[c][hfw - 2][0] = hilite_dir0[c][hfw - 2][1] = hilite_dir0[c][hfw - 3][2];
926         hilite_dir0[c][0][hfh - 1] = hilite_dir0[c][0][hfh - 2] = hilite_dir0[c][1][hfh - 1] = hilite_dir0[c][1][hfh - 2] = hilite_dir0[c][2][hfh - 3];
927         hilite_dir0[c][hfw - 1][hfh - 1] = hilite_dir0[c][hfw - 1][hfh - 2] = hilite_dir0[c][hfw - 2][hfh - 1] = hilite_dir0[c][hfw - 2][hfh - 2] = hilite_dir0[c][hfw - 3][hfh - 3];
928     }
929 
930     for (int i = 1; i < hfh - 1; i++)
931         for (int c = 0; c < 4; c++) {
932             hilite_dir4[c][0][i] = hilite_dir4[c][1][i];
933             hilite_dir4[c][hfw - 1][i] = hilite_dir4[c][hfw - 2][i];
934         }
935 
936     for (int j = 1; j < hfw - 1; j++)
937         for (int c = 0; c < 4; c++) {
938             hilite_dir4[c][j][0] = hilite_dir4[c][j][1];
939             hilite_dir4[c][j][hfh - 1] = hilite_dir4[c][j][hfh - 2];
940         }
941 
942     for (int c = 0; c < 4; c++) {
943         hilite_dir4[c][0][0] = hilite_dir4[c][0][1] = hilite_dir4[c][1][0] = hilite_dir4[c][1][1] = hilite_dir4[c][2][2];
944         hilite_dir4[c][hfw - 1][0] = hilite_dir4[c][hfw - 1][1] = hilite_dir4[c][hfw - 2][0] = hilite_dir4[c][hfw - 2][1] = hilite_dir4[c][hfw - 3][2];
945         hilite_dir4[c][0][hfh - 1] = hilite_dir4[c][0][hfh - 2] = hilite_dir4[c][1][hfh - 1] = hilite_dir4[c][1][hfh - 2] = hilite_dir4[c][2][hfh - 3];
946         hilite_dir4[c][hfw - 1][hfh - 1] = hilite_dir4[c][hfw - 1][hfh - 2] = hilite_dir4[c][hfw - 2][hfh - 1] = hilite_dir4[c][hfw - 2][hfh - 2] = hilite_dir4[c][hfw - 3][hfh - 3];
947     }
948 
949     progress += 0.05;
950     setProgCancel(progress);
951 
952     //free up some memory
953     for(int c = 0; c < 4; c++) {
954         hilite[c].free();
955     }
956 
957     //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
958     // now reconstruct clipped channels using color ratios
959 
960 #ifdef _OPENMP
961     #pragma omp parallel for schedule(dynamic,16)
962 #endif
963 
964     for (int i = 0; i < blurHeight; i++) {
965         int i1 = min((i - (i % pitch)) / pitch, hfh - 1);
966 
967         for (int j = 0; j < blurWidth; j++) {
968 
969             float pixel[3] = {red[i + miny][j + minx], green[i + miny][j + minx], blue[i + miny][j + minx]};
970 
971             if (pixel[0] < max_f[0] && pixel[1] < max_f[1] && pixel[2] < max_f[2]) {
972                 continue;    //pixel not clipped
973             }
974 
975             int j1 = min((j - (j % pitch)) / pitch, hfw - 1);
976 
977             //estimate recovered values using modified HLRecovery_blend algorithm
978             float rgb[ColorCount], rgb_blend[ColorCount] = {}, cam[2][ColorCount], lab[2][ColorCount], sum[2], chratio;
979 
980             // Copy input pixel to rgb so it's easier to access in loops
981             rgb[0] = pixel[0];
982             rgb[1] = pixel[1];
983             rgb[2] = pixel[2];
984 
985             // Initialize cam with raw input [0] and potentially clipped input [1]
986             for (int c = 0; c < ColorCount; c++) {
987                 cam[0][c] = rgb[c];
988                 cam[1][c] = min(cam[0][c], clippt);
989             }
990 
991             // Calculate the lightness correction ratio (chratio)
992             for (int i2 = 0; i2 < 2; i2++) {
993                 for (int c = 0; c < ColorCount; c++) {
994                     lab[i2][c] = 0;
995 
996                     for (int c1 = 0; c1 < ColorCount; c1++) {
997                         lab[i2][c] += trans[c][c1] * cam[i2][c1];
998                     }
999                 }
1000 
1001                 sum[i2] = 0.f;
1002 
1003                 for (int c = 1; c < ColorCount; c++) {
1004                     sum[i2] += SQR(lab[i2][c]);
1005                 }
1006             }
1007 
1008             if(sum[0] == 0.f) {     // avoid division by zero
1009                 sum[0] = epsilon;
1010             }
1011 
1012             chratio = sqrtf(sum[1] / sum[0]);
1013 
1014 
1015             // Apply ratio to lightness in lab space
1016             for (int c = 1; c < ColorCount; c++) {
1017                 lab[0][c] *= chratio;
1018             }
1019 
1020             // Transform back from lab to RGB
1021             for (int c = 0; c < ColorCount; c++) {
1022                 cam[0][c] = 0;
1023 
1024                 for (int c1 = 0; c1 < ColorCount; c1++) {
1025                     cam[0][c] += itrans[c][c1] * lab[0][c1];
1026                 }
1027             }
1028 
1029             for (int c = 0; c < ColorCount; c++) {
1030                 rgb[c] = cam[0][c] / ColorCount;
1031             }
1032 
1033             // Copy converted pixel back
1034             float rfrac = max(0.f, min(1.f, medFactor[0] * (pixel[0] - blendpt)));
1035             float gfrac = max(0.f, min(1.f, medFactor[1] * (pixel[1] - blendpt)));
1036             float bfrac = max(0.f, min(1.f, medFactor[2] * (pixel[2] - blendpt)));
1037 
1038             if (pixel[0] > blendpt) {
1039                 rgb_blend[0] = rfrac * rgb[0] + (1.f - rfrac) * pixel[0];
1040             }
1041 
1042             if (pixel[1] > blendpt) {
1043                 rgb_blend[1] = gfrac * rgb[1] + (1.f - gfrac) * pixel[1];
1044             }
1045 
1046             if (pixel[2] > blendpt) {
1047                 rgb_blend[2] = bfrac * rgb[2] + (1.f - bfrac) * pixel[2];
1048             }
1049 
1050             //end of HLRecovery_blend estimation
1051             //%%%%%%%%%%%%%%%%%%%%%%%
1052 
1053             //there are clipped highlights
1054             //first, determine weighted average of unclipped extensions (weighting is by 'hue' proximity)
1055             float totwt = 0.f;
1056             float clipfix[3] = {0.f, 0.f, 0.f};
1057 
1058             float Y = epsilon + rgb_blend[0] + rgb_blend[1] + rgb_blend[2];
1059 
1060             for (int c = 0; c < ColorCount; c++) {
1061                 rgb_blend[c] /= Y;
1062             }
1063 
1064             float Yhi = 1.f / (hilite_dir0[0][j1][i1] + hilite_dir0[1][j1][i1] + hilite_dir0[2][j1][i1]);
1065 
1066             if (Yhi < 2.f) {
1067                 float dirwt = 1.f / (1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir0[0][j1][i1] * Yhi) +
1068                                                       SQR(rgb_blend[1] - hilite_dir0[1][j1][i1] * Yhi) +
1069                                                       SQR(rgb_blend[2] - hilite_dir0[2][j1][i1] * Yhi)));
1070                 totwt = dirwt;
1071                 dirwt /= (hilite_dir0[3][j1][i1] + epsilon);
1072                 clipfix[0] = dirwt * hilite_dir0[0][j1][i1];
1073                 clipfix[1] = dirwt * hilite_dir0[1][j1][i1];
1074                 clipfix[2] = dirwt * hilite_dir0[2][j1][i1];
1075             }
1076 
1077             for (int dir = 0; dir < 2; dir++) {
1078                 float Yhil = 1.f / ( hilite_dir[dir * 4 + 0][i1][j1] + hilite_dir[dir * 4 + 1][i1][j1] + hilite_dir[dir * 4 + 2][i1][j1]);
1079 
1080                 if (Yhil < 2.f) {
1081                     float dirwt = 1.f / (1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir[dir * 4 + 0][i1][j1] * Yhil) +
1082                                                           SQR(rgb_blend[1] - hilite_dir[dir * 4 + 1][i1][j1] * Yhil) +
1083                                                           SQR(rgb_blend[2] - hilite_dir[dir * 4 + 2][i1][j1] * Yhil)));
1084                     totwt += dirwt;
1085                     dirwt /= (hilite_dir[dir * 4 + 3][i1][j1] + epsilon);
1086                     clipfix[0] += dirwt * hilite_dir[dir * 4 + 0][i1][j1];
1087                     clipfix[1] += dirwt * hilite_dir[dir * 4 + 1][i1][j1];
1088                     clipfix[2] += dirwt * hilite_dir[dir * 4 + 2][i1][j1];
1089                 }
1090             }
1091 
1092 
1093             Yhi = 1.f / (hilite_dir4[0][j1][i1] + hilite_dir4[1][j1][i1] + hilite_dir4[2][j1][i1]);
1094 
1095             if (Yhi < 2.f) {
1096                 float dirwt = 1.f / (1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir4[0][j1][i1] * Yhi) +
1097                                                       SQR(rgb_blend[1] - hilite_dir4[1][j1][i1] * Yhi) +
1098                                                       SQR(rgb_blend[2] - hilite_dir4[2][j1][i1] * Yhi)));
1099                 totwt += dirwt;
1100                 dirwt /= (hilite_dir4[3][j1][i1] + epsilon);
1101                 clipfix[0] += dirwt * hilite_dir4[0][j1][i1];
1102                 clipfix[1] += dirwt * hilite_dir4[1][j1][i1];
1103                 clipfix[2] += dirwt * hilite_dir4[2][j1][i1];
1104             }
1105 
1106             if(totwt == 0.f) {
1107                 continue;
1108             }
1109 
1110             clipfix[0] /= totwt;
1111             clipfix[1] /= totwt;
1112             clipfix[2] /= totwt;
1113 
1114             //now correct clipped channels
1115             if (pixel[0] > max_f[0] && pixel[1] > max_f[1] && pixel[2] > max_f[2]) {
1116                 //all channels clipped
1117                 float Yl = (0.299 * clipfix[0] + 0.587 * clipfix[1] + 0.114 * clipfix[2]);
1118 
1119                 float mult = whitept / Yl;
1120                 red[i + miny][j + minx]   = clipfix[0] * mult; //factor;
1121                 green[i + miny][j + minx] = clipfix[1] * mult; //factor;
1122                 blue[i + miny][j + minx]  = clipfix[2] * mult; //factor;
1123             } else {//some channels clipped
1124                 float notclipped[3] = {pixel[0] <= max_f[0] ? 1.f : 0.f, pixel[1] <= max_f[1] ? 1.f : 0.f, pixel[2] <= max_f[2] ? 1.f : 0.f};
1125 
1126                 if (notclipped[0] == 0.f) { //red clipped
1127                     red[i + miny][j + minx]  = max(red[i + miny][j + minx], (clipfix[0] * ((notclipped[1] * pixel[1] + notclipped[2] * pixel[2]) /
1128                                                  (notclipped[1] * clipfix[1] + notclipped[2] * clipfix[2] + epsilon))));
1129                 }
1130 
1131                 if (notclipped[1] == 0.f) { //green clipped
1132                     green[i + miny][j + minx] = max(green[i + miny][j + minx], (clipfix[1] * ((notclipped[2] * pixel[2] + notclipped[0] * pixel[0]) /
1133                                                     (notclipped[2] * clipfix[2] + notclipped[0] * clipfix[0] + epsilon))));
1134                 }
1135 
1136                 if (notclipped[2] == 0.f) { //blue clipped
1137                     blue[i + miny][j + minx]  = max(blue[i + miny][j + minx], (clipfix[2] * ((notclipped[0] * pixel[0] + notclipped[1] * pixel[1]) /
1138                                                    (notclipped[0] * clipfix[0] + notclipped[1] * clipfix[1] + epsilon))));
1139                 }
1140             }
1141 
1142             Y = (0.299 * red[i + miny][j + minx] + 0.587 * green[i + miny][j + minx] + 0.114 * blue[i + miny][j + minx]);
1143 
1144             if (Y > whitept) {
1145                 float mult = whitept / Y;
1146 
1147                 red[i + miny][j + minx]   *= mult;
1148                 green[i + miny][j + minx] *= mult;
1149                 blue[i + miny][j + minx]  *= mult;
1150             }
1151         }
1152     }
1153 
1154     setProgCancel(1.00);
1155 
1156     return RP_NO_ERROR;
1157 
1158 }// end of HLReconstruction
1159