1 ////////////////////////////////////////////////////////////////
2 //
3 //      Fast demosaicing algorithm
4 //
5 //      copyright (c) 2008-2010  Emil Martinec <ejmartin@uchicago.edu>
6 //
7 //
8 // code dated: August 26, 2010
9 //
10 //  fast_demo.cc is free software: you can redistribute it and/or modify
11 //  it under the terms of the GNU General Public License as published by
12 //  the Free Software Foundation, either version 3 of the License, or
13 //  (at your option) any later version.
14 //
15 //  This program is distributed in the hope that it will be useful,
16 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 //  GNU General Public License for more details.
19 //
20 //  You should have received a copy of the GNU General Public License
21 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
22 //
23 ////////////////////////////////////////////////////////////////
24 
25 #include <cmath>
26 #include "bayerhelper.h"
27 #include "librtprocess.h"
28 #include "opthelper.h"
29 #include "rt_math.h"
30 #include "StopWatch.h"
31 
32 using namespace librtprocess;
33 
34 #define TS 224
35 
36 #define INVGRAD(i) (16.0f/SQR(4.0f+i))
37 #ifdef __SSE2__
38 #define INVGRADV(i) (c16v*_mm_rcp_ps(SQRV(fourv+i)))
39 #endif
40 
41 
bayerfast_demosaic(int width,int height,const float * const * rawData,float ** red,float ** green,float ** blue,const unsigned cfarray[2][2],const std::function<bool (double)> & setProgCancel,double initGain)42 rpError bayerfast_demosaic(int width, int height, const float * const *rawData, float **red, float **green, float **blue, const unsigned cfarray[2][2], const std::function<bool(double)> &setProgCancel, double initGain)
43 {
44 
45     BENCHFUN
46     if (!validateBayerCfa(3, cfarray)) {
47         return RP_WRONG_CFA;
48     }
49 
50     rpError rc = RP_NO_ERROR;
51 
52     double progress = 0.0;
53     setProgCancel(progress);
54 
55     const int bord = 5;
56     const int H = height;
57     const int W = width;
58     const float clip_pt = 4 * 65535 * initGain;
59 
60     rc = bayerborder_demosaic(width, height, bord, rawData, red, green, blue, cfarray);
61 
62     progress += 0.1;
63     setProgCancel(progress);
64 
65 #ifdef _OPENMP
66     #pragma omp parallel
67 #endif
68     {
69 
70 #define CLF 1
71         // assign working space
72         char *buffer = (char *) calloc(3 * sizeof(float) * TS * TS + 3 * CLF * 64 + 63, 1);
73 #ifdef _OPENMP
74     #pragma omp critical
75 #endif
76         {
77             if (!buffer) {
78                 rc = RP_MEMORY_ERROR;
79             }
80         }
81 #ifdef _OPENMP
82         #pragma omp barrier
83 #endif
84         if (!rc) {
85             char *data = (char*)((uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
86 
87             float * const greentile = (float (*)) data; //pointers to array
88             float * const redtile   = (float (*)) ((char*)greentile + sizeof(float) * TS * TS + CLF * 64);
89             float * const bluetile  = (float (*)) ((char*)redtile + sizeof(float) * TS * TS + CLF * 64);
90 
91             int progressCounter = 0;
92             const double progressInc = 16.0 * (1.0 - progress) / ((H * W) / ((TS - 4) * (TS - 4)));
93 
94 #ifdef _OPENMP
95             #pragma omp for nowait
96 #endif
97 
98             for (int top = bord - 2; top < H - bord + 2; top += TS - 4)
99                 for (int left = bord - 2; left < W - bord + 2; left += TS - 4) {
100                     const int bottom = min(top + TS, H - bord + 2);
101                     const int right  = min(left + TS, W - bord + 2);
102 
103 #ifdef __SSE2__
104                     const vfloat c16v = F2V(16.0f);
105                     const vfloat fourv = F2V(4.0f);
106                     vmask selmask;
107                     const vmask andmask = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);
108 
109                     if(fc(cfarray, top, left) == 1) {
110                         selmask = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff);
111                     } else {
112                         selmask = _mm_set_epi32(0xffffffff, 0, 0xffffffff, 0);
113                     }
114 
115 #endif
116 
117                     // interpolate G using gradient weights
118                     for (int i = top, rr = 0; i < bottom; i++, rr++) {
119                         int j = left;
120                         int cc = 0;
121 #ifdef __SSE2__
122                         selmask = (vmask)_mm_andnot_ps((vfloat)selmask, (vfloat)andmask);
123 
124                         for (; j < right - 3; j += 4, cc += 4) {
125                             const vfloat tempv = LVFU(rawData[i][j]);
126                             const vfloat absv = vabsf(LVFU(rawData[i - 1][j]) - LVFU(rawData[i + 1][j]));
127                             const vfloat wtuv = INVGRADV(absv + vabsf(tempv - LVFU(rawData[i - 2][j])) + vabsf(LVFU(rawData[i - 1][j]) - LVFU(rawData[i - 3][j])));
128                             const vfloat wtdv = INVGRADV(absv + vabsf(tempv - LVFU(rawData[i + 2][j])) + vabsf(LVFU(rawData[i + 1][j]) - LVFU(rawData[i + 3][j])));
129                             const vfloat abs2v = vabsf(LVFU(rawData[i][j - 1]) - LVFU(rawData[i][j + 1]));
130                             const vfloat wtlv = INVGRADV(abs2v + vabsf(tempv - LVFU(rawData[i][j - 2])) + vabsf(LVFU(rawData[i][j - 1]) - LVFU(rawData[i][j - 3])));
131                             const vfloat wtrv = INVGRADV(abs2v + vabsf(tempv - LVFU(rawData[i][j + 2])) + vabsf(LVFU(rawData[i][j + 1]) - LVFU(rawData[i][j + 3])));
132                             const vfloat greenv = (wtuv * LVFU(rawData[i - 1][j]) + wtdv * LVFU(rawData[i + 1][j]) + wtlv * LVFU(rawData[i][j - 1]) + wtrv * LVFU(rawData[i][j + 1])) / (wtuv + wtdv + wtlv + wtrv);
133                             STVF(greentile[rr * TS + cc], vself(selmask, greenv, tempv));
134                             STVF(redtile[rr * TS + cc], tempv);
135                             STVF(bluetile[rr * TS + cc], tempv);
136                         }
137 #endif
138                         for (; j < right; j++, cc++) {
139 
140                             if (fc(cfarray, i, j) == 1) {
141                                 greentile[rr * TS + cc] = rawData[i][j];
142 
143                             } else {
144                                 //compute directional weights using image gradients
145                                 const float wtu = INVGRAD((abs(rawData[i + 1][j] - rawData[i - 1][j]) + abs(rawData[i][j] - rawData[i - 2][j]) + abs(rawData[i - 1][j] - rawData[i - 3][j])));
146                                 const float wtd = INVGRAD((abs(rawData[i - 1][j] - rawData[i + 1][j]) + abs(rawData[i][j] - rawData[i + 2][j]) + abs(rawData[i + 1][j] - rawData[i + 3][j])));
147                                 const float wtl = INVGRAD((abs(rawData[i][j + 1] - rawData[i][j - 1]) + abs(rawData[i][j] - rawData[i][j - 2]) + abs(rawData[i][j - 1] - rawData[i][j - 3])));
148                                 const float wtr = INVGRAD((abs(rawData[i][j - 1] - rawData[i][j + 1]) + abs(rawData[i][j] - rawData[i][j + 2]) + abs(rawData[i][j + 1] - rawData[i][j + 3])));
149 
150                                 //store in rgb array the interpolated G value at R/B grid points using directional weighted average
151                                 greentile[rr * TS + cc] = (wtu * rawData[i - 1][j] + wtd * rawData[i + 1][j] + wtl * rawData[i][j - 1] + wtr * rawData[i][j + 1]) / (wtu + wtd + wtl + wtr);
152                             }
153 
154                             redtile[rr * TS + cc] = rawData[i][j];
155                             bluetile[rr * TS + cc] = rawData[i][j];
156                         }
157                     }
158 
159 #ifdef __SSE2__
160                     const vfloat zd25v = F2V(0.25f);
161                     const vfloat clip_ptv = F2V(clip_pt);
162 #endif
163 
164                     for (int i = top + 1, rr = 1; i < bottom - 1; i++, rr++) {
165                         if (fc(cfarray, i, left + (fc(cfarray, i, 2) & 1) + 1) == 0)
166 #ifdef __SSE2__
167                             for (int j = left + 1, cc = 1; j < right - 1; j += 4, cc += 4) {
168                                 //interpolate B/R colors at R/B sites
169                                 STVFU(bluetile[rr * TS + cc], LVFU(greentile[rr * TS + cc]) - zd25v * ((LVFU(greentile[(rr - 1)*TS + (cc - 1)]) + LVFU(greentile[(rr - 1)*TS + (cc + 1)]) + LVFU(greentile[(rr + 1)*TS + cc + 1]) + LVFU(greentile[(rr + 1)*TS + cc - 1])) -
170                                               vminf(LVFU(rawData[i - 1][j - 1]) + LVFU(rawData[i - 1][j + 1]) + LVFU(rawData[i + 1][j + 1]) + LVFU(rawData[i + 1][j - 1]), clip_ptv)));
171                             }
172 
173 #else
174 
175                             for (int cc = (fc(cfarray, i, 2) & 1) + 1, j = left + cc; j < right - 1; j += 2, cc += 2) {
176                                 //interpolate B/R colors at R/B sites
177                                 bluetile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + (cc - 1)] + greentile[(rr - 1) * TS + (cc + 1)] + greentile[(rr + 1) * TS + cc + 1] + greentile[(rr + 1) * TS + cc - 1]) -
178                                                          min(clip_pt, rawData[i - 1][j - 1] + rawData[i - 1][j + 1] + rawData[i + 1][j + 1] + rawData[i + 1][j - 1]));
179                             }
180 
181 #endif
182                         else
183 #ifdef __SSE2__
184                             for (int j = left + 1, cc = 1; j < right - 1; j += 4, cc += 4) {
185                                 //interpolate B/R colors at R/B sites
186                                 STVFU(redtile[rr * TS + cc], LVFU(greentile[rr * TS + cc]) - zd25v * ((LVFU(greentile[(rr - 1)*TS + cc - 1]) + LVFU(greentile[(rr - 1)*TS + cc + 1]) + LVFU(greentile[(rr + 1)*TS + cc + 1]) + LVFU(greentile[(rr + 1)*TS + cc - 1])) -
187                                               vminf(LVFU(rawData[i - 1][j - 1]) + LVFU(rawData[i - 1][j + 1]) + LVFU(rawData[i + 1][j + 1]) + LVFU(rawData[i + 1][j - 1]), clip_ptv)));
188                             }
189 
190 #else
191 
192                             for (int cc = (fc(cfarray, i, 2) & 1) + 1, j = left + cc; j < right - 1; j += 2, cc += 2) {
193                                 //interpolate B/R colors at R/B sites
194                                 redtile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + cc - 1] + greentile[(rr - 1) * TS + cc + 1] + greentile[(rr + 1) * TS + cc + 1] + greentile[(rr + 1) * TS + cc - 1]) -
195                                                         min(clip_pt, rawData[i - 1][j - 1] + rawData[i - 1][j + 1] + rawData[i + 1][j + 1] + rawData[i + 1][j - 1]));
196                             }
197 
198 #endif
199                     }
200 
201 
202 #ifdef __SSE2__
203                     selmask = _mm_set_epi32(0xffffffff, 0, 0xffffffff, 0);
204 #endif
205 
206                     // interpolate R/B using color differences
207                     for (int i = top + 2, rr = 2; i < bottom - 2; i++, rr++) {
208 #ifdef __SSE2__
209 
210                         for (int cc = 2 + (fc(cfarray, i, 2) & 1), j = left + cc; j < right - 2; j += 4, cc += 4) {
211                             // no need to take care about the borders of the tile. There's enough free space.
212                             //interpolate R and B colors at G sites
213                             const vfloat greenv = LVFU(greentile[rr * TS + cc]);
214                             const vfloat greensumv = LVFU(greentile[(rr - 1) * TS + cc]) + LVFU(greentile[(rr + 1) * TS + cc]) + LVFU(greentile[rr * TS + cc - 1]) + LVFU(greentile[rr * TS + cc + 1]);
215 
216                             vfloat temp1v = LVFU(redtile[rr * TS + cc]);
217                             vfloat temp2v = greenv - zd25v * (greensumv - LVFU(redtile[(rr - 1) * TS + cc]) - LVFU(redtile[(rr + 1) * TS + cc]) - LVFU(redtile[rr * TS + cc - 1]) - LVFU(redtile[rr * TS + cc + 1]));
218 
219                             STVFU(redtile[rr * TS + cc], vself(selmask, temp1v, temp2v));
220 
221                             temp1v = LVFU(bluetile[rr * TS + cc]);
222 
223                             temp2v = greenv - zd25v * (greensumv - LVFU(bluetile[(rr - 1) * TS + cc]) - LVFU(bluetile[(rr + 1) * TS + cc]) - LVFU(bluetile[rr * TS + cc - 1]) - LVFU(bluetile[rr * TS + cc + 1]));
224 
225                             STVFU(bluetile[rr * TS + cc], vself(selmask, temp1v, temp2v));
226                         }
227 
228 #else
229 
230                         for (int cc = 2 + (fc(cfarray, i, 2) & 1), j = left + cc; j < right - 2; j += 2, cc += 2) {
231                             //interpolate R and B colors at G sites
232                             redtile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + cc] - redtile[(rr - 1) * TS + cc]) + (greentile[(rr + 1) * TS + cc] - redtile[(rr + 1) * TS + cc]) +
233                                                     (greentile[rr * TS + cc - 1] - redtile[rr * TS + cc - 1]) + (greentile[rr * TS + cc + 1] - redtile[rr * TS + cc + 1]));
234                             bluetile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + cc] - bluetile[(rr - 1) * TS + cc]) + (greentile[(rr + 1) * TS + cc] - bluetile[(rr + 1) * TS + cc]) +
235                                                      (greentile[rr * TS + cc - 1] - bluetile[rr * TS + cc - 1]) + (greentile[rr * TS + cc + 1] - bluetile[rr * TS + cc + 1]));
236                         }
237 
238 #endif
239                     }
240 
241 
242                     for (int i = top + 2, rr = 2; i < bottom - 2; i++, rr++) {
243                         int j = left + 2;
244                         int cc = 2;
245 #ifdef __SSE2__
246                         for (; j < right - 5; j += 4, cc += 4) {
247                             STVFU(red[i][j], LVFU(redtile[rr * TS + cc]));
248                             STVFU(green[i][j], LVFU(greentile[rr * TS + cc]));
249                             STVFU(blue[i][j], LVFU(bluetile[rr * TS + cc]));
250                         }
251 #endif
252                         for (; j < right - 2; j++, cc++) {
253                             red[i][j] = redtile[rr * TS + cc];
254                             green[i][j] = greentile[rr * TS + cc];
255                             blue[i][j] = bluetile[rr * TS + cc];
256                         }
257                     }
258 
259                     if((++progressCounter) % 16 == 0) {
260 #ifdef _OPENMP
261                         #pragma omp critical (updateprogress)
262 #endif
263                         {
264                             progress += progressInc;
265                             progress = min(1.0, progress);
266                             setProgCancel(progress);
267                         }
268                     }
269                 }
270             }
271         free(buffer);
272     } // End of parallelization
273 
274     setProgCancel(1.0);
275     return rc;
276 
277 }
278 #undef TS
279 #undef CLF
280