1 ////////////////////////////////////////////////////////////////
2 //
3 // Fast demosaicing algorithm
4 //
5 // copyright (c) 2008-2010 Emil Martinec <ejmartin@uchicago.edu>
6 //
7 //
8 // code dated: August 26, 2010
9 //
10 // fast_demo.cc is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // This program is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with this program. If not, see <http://www.gnu.org/licenses/>.
22 //
23 ////////////////////////////////////////////////////////////////
24
25 #include <cmath>
26 #include "bayerhelper.h"
27 #include "librtprocess.h"
28 #include "opthelper.h"
29 #include "rt_math.h"
30 #include "StopWatch.h"
31
32 using namespace librtprocess;
33
34 #define TS 224
35
36 #define INVGRAD(i) (16.0f/SQR(4.0f+i))
37 #ifdef __SSE2__
38 #define INVGRADV(i) (c16v*_mm_rcp_ps(SQRV(fourv+i)))
39 #endif
40
41
bayerfast_demosaic(int width,int height,const float * const * rawData,float ** red,float ** green,float ** blue,const unsigned cfarray[2][2],const std::function<bool (double)> & setProgCancel,double initGain)42 rpError bayerfast_demosaic(int width, int height, const float * const *rawData, float **red, float **green, float **blue, const unsigned cfarray[2][2], const std::function<bool(double)> &setProgCancel, double initGain)
43 {
44
45 BENCHFUN
46 if (!validateBayerCfa(3, cfarray)) {
47 return RP_WRONG_CFA;
48 }
49
50 rpError rc = RP_NO_ERROR;
51
52 double progress = 0.0;
53 setProgCancel(progress);
54
55 const int bord = 5;
56 const int H = height;
57 const int W = width;
58 const float clip_pt = 4 * 65535 * initGain;
59
60 rc = bayerborder_demosaic(width, height, bord, rawData, red, green, blue, cfarray);
61
62 progress += 0.1;
63 setProgCancel(progress);
64
65 #ifdef _OPENMP
66 #pragma omp parallel
67 #endif
68 {
69
70 #define CLF 1
71 // assign working space
72 char *buffer = (char *) calloc(3 * sizeof(float) * TS * TS + 3 * CLF * 64 + 63, 1);
73 #ifdef _OPENMP
74 #pragma omp critical
75 #endif
76 {
77 if (!buffer) {
78 rc = RP_MEMORY_ERROR;
79 }
80 }
81 #ifdef _OPENMP
82 #pragma omp barrier
83 #endif
84 if (!rc) {
85 char *data = (char*)((uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
86
87 float * const greentile = (float (*)) data; //pointers to array
88 float * const redtile = (float (*)) ((char*)greentile + sizeof(float) * TS * TS + CLF * 64);
89 float * const bluetile = (float (*)) ((char*)redtile + sizeof(float) * TS * TS + CLF * 64);
90
91 int progressCounter = 0;
92 const double progressInc = 16.0 * (1.0 - progress) / ((H * W) / ((TS - 4) * (TS - 4)));
93
94 #ifdef _OPENMP
95 #pragma omp for nowait
96 #endif
97
98 for (int top = bord - 2; top < H - bord + 2; top += TS - 4)
99 for (int left = bord - 2; left < W - bord + 2; left += TS - 4) {
100 const int bottom = min(top + TS, H - bord + 2);
101 const int right = min(left + TS, W - bord + 2);
102
103 #ifdef __SSE2__
104 const vfloat c16v = F2V(16.0f);
105 const vfloat fourv = F2V(4.0f);
106 vmask selmask;
107 const vmask andmask = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);
108
109 if(fc(cfarray, top, left) == 1) {
110 selmask = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff);
111 } else {
112 selmask = _mm_set_epi32(0xffffffff, 0, 0xffffffff, 0);
113 }
114
115 #endif
116
117 // interpolate G using gradient weights
118 for (int i = top, rr = 0; i < bottom; i++, rr++) {
119 int j = left;
120 int cc = 0;
121 #ifdef __SSE2__
122 selmask = (vmask)_mm_andnot_ps((vfloat)selmask, (vfloat)andmask);
123
124 for (; j < right - 3; j += 4, cc += 4) {
125 const vfloat tempv = LVFU(rawData[i][j]);
126 const vfloat absv = vabsf(LVFU(rawData[i - 1][j]) - LVFU(rawData[i + 1][j]));
127 const vfloat wtuv = INVGRADV(absv + vabsf(tempv - LVFU(rawData[i - 2][j])) + vabsf(LVFU(rawData[i - 1][j]) - LVFU(rawData[i - 3][j])));
128 const vfloat wtdv = INVGRADV(absv + vabsf(tempv - LVFU(rawData[i + 2][j])) + vabsf(LVFU(rawData[i + 1][j]) - LVFU(rawData[i + 3][j])));
129 const vfloat abs2v = vabsf(LVFU(rawData[i][j - 1]) - LVFU(rawData[i][j + 1]));
130 const vfloat wtlv = INVGRADV(abs2v + vabsf(tempv - LVFU(rawData[i][j - 2])) + vabsf(LVFU(rawData[i][j - 1]) - LVFU(rawData[i][j - 3])));
131 const vfloat wtrv = INVGRADV(abs2v + vabsf(tempv - LVFU(rawData[i][j + 2])) + vabsf(LVFU(rawData[i][j + 1]) - LVFU(rawData[i][j + 3])));
132 const vfloat greenv = (wtuv * LVFU(rawData[i - 1][j]) + wtdv * LVFU(rawData[i + 1][j]) + wtlv * LVFU(rawData[i][j - 1]) + wtrv * LVFU(rawData[i][j + 1])) / (wtuv + wtdv + wtlv + wtrv);
133 STVF(greentile[rr * TS + cc], vself(selmask, greenv, tempv));
134 STVF(redtile[rr * TS + cc], tempv);
135 STVF(bluetile[rr * TS + cc], tempv);
136 }
137 #endif
138 for (; j < right; j++, cc++) {
139
140 if (fc(cfarray, i, j) == 1) {
141 greentile[rr * TS + cc] = rawData[i][j];
142
143 } else {
144 //compute directional weights using image gradients
145 const float wtu = INVGRAD((abs(rawData[i + 1][j] - rawData[i - 1][j]) + abs(rawData[i][j] - rawData[i - 2][j]) + abs(rawData[i - 1][j] - rawData[i - 3][j])));
146 const float wtd = INVGRAD((abs(rawData[i - 1][j] - rawData[i + 1][j]) + abs(rawData[i][j] - rawData[i + 2][j]) + abs(rawData[i + 1][j] - rawData[i + 3][j])));
147 const float wtl = INVGRAD((abs(rawData[i][j + 1] - rawData[i][j - 1]) + abs(rawData[i][j] - rawData[i][j - 2]) + abs(rawData[i][j - 1] - rawData[i][j - 3])));
148 const float wtr = INVGRAD((abs(rawData[i][j - 1] - rawData[i][j + 1]) + abs(rawData[i][j] - rawData[i][j + 2]) + abs(rawData[i][j + 1] - rawData[i][j + 3])));
149
150 //store in rgb array the interpolated G value at R/B grid points using directional weighted average
151 greentile[rr * TS + cc] = (wtu * rawData[i - 1][j] + wtd * rawData[i + 1][j] + wtl * rawData[i][j - 1] + wtr * rawData[i][j + 1]) / (wtu + wtd + wtl + wtr);
152 }
153
154 redtile[rr * TS + cc] = rawData[i][j];
155 bluetile[rr * TS + cc] = rawData[i][j];
156 }
157 }
158
159 #ifdef __SSE2__
160 const vfloat zd25v = F2V(0.25f);
161 const vfloat clip_ptv = F2V(clip_pt);
162 #endif
163
164 for (int i = top + 1, rr = 1; i < bottom - 1; i++, rr++) {
165 if (fc(cfarray, i, left + (fc(cfarray, i, 2) & 1) + 1) == 0)
166 #ifdef __SSE2__
167 for (int j = left + 1, cc = 1; j < right - 1; j += 4, cc += 4) {
168 //interpolate B/R colors at R/B sites
169 STVFU(bluetile[rr * TS + cc], LVFU(greentile[rr * TS + cc]) - zd25v * ((LVFU(greentile[(rr - 1)*TS + (cc - 1)]) + LVFU(greentile[(rr - 1)*TS + (cc + 1)]) + LVFU(greentile[(rr + 1)*TS + cc + 1]) + LVFU(greentile[(rr + 1)*TS + cc - 1])) -
170 vminf(LVFU(rawData[i - 1][j - 1]) + LVFU(rawData[i - 1][j + 1]) + LVFU(rawData[i + 1][j + 1]) + LVFU(rawData[i + 1][j - 1]), clip_ptv)));
171 }
172
173 #else
174
175 for (int cc = (fc(cfarray, i, 2) & 1) + 1, j = left + cc; j < right - 1; j += 2, cc += 2) {
176 //interpolate B/R colors at R/B sites
177 bluetile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + (cc - 1)] + greentile[(rr - 1) * TS + (cc + 1)] + greentile[(rr + 1) * TS + cc + 1] + greentile[(rr + 1) * TS + cc - 1]) -
178 min(clip_pt, rawData[i - 1][j - 1] + rawData[i - 1][j + 1] + rawData[i + 1][j + 1] + rawData[i + 1][j - 1]));
179 }
180
181 #endif
182 else
183 #ifdef __SSE2__
184 for (int j = left + 1, cc = 1; j < right - 1; j += 4, cc += 4) {
185 //interpolate B/R colors at R/B sites
186 STVFU(redtile[rr * TS + cc], LVFU(greentile[rr * TS + cc]) - zd25v * ((LVFU(greentile[(rr - 1)*TS + cc - 1]) + LVFU(greentile[(rr - 1)*TS + cc + 1]) + LVFU(greentile[(rr + 1)*TS + cc + 1]) + LVFU(greentile[(rr + 1)*TS + cc - 1])) -
187 vminf(LVFU(rawData[i - 1][j - 1]) + LVFU(rawData[i - 1][j + 1]) + LVFU(rawData[i + 1][j + 1]) + LVFU(rawData[i + 1][j - 1]), clip_ptv)));
188 }
189
190 #else
191
192 for (int cc = (fc(cfarray, i, 2) & 1) + 1, j = left + cc; j < right - 1; j += 2, cc += 2) {
193 //interpolate B/R colors at R/B sites
194 redtile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + cc - 1] + greentile[(rr - 1) * TS + cc + 1] + greentile[(rr + 1) * TS + cc + 1] + greentile[(rr + 1) * TS + cc - 1]) -
195 min(clip_pt, rawData[i - 1][j - 1] + rawData[i - 1][j + 1] + rawData[i + 1][j + 1] + rawData[i + 1][j - 1]));
196 }
197
198 #endif
199 }
200
201
202 #ifdef __SSE2__
203 selmask = _mm_set_epi32(0xffffffff, 0, 0xffffffff, 0);
204 #endif
205
206 // interpolate R/B using color differences
207 for (int i = top + 2, rr = 2; i < bottom - 2; i++, rr++) {
208 #ifdef __SSE2__
209
210 for (int cc = 2 + (fc(cfarray, i, 2) & 1), j = left + cc; j < right - 2; j += 4, cc += 4) {
211 // no need to take care about the borders of the tile. There's enough free space.
212 //interpolate R and B colors at G sites
213 const vfloat greenv = LVFU(greentile[rr * TS + cc]);
214 const vfloat greensumv = LVFU(greentile[(rr - 1) * TS + cc]) + LVFU(greentile[(rr + 1) * TS + cc]) + LVFU(greentile[rr * TS + cc - 1]) + LVFU(greentile[rr * TS + cc + 1]);
215
216 vfloat temp1v = LVFU(redtile[rr * TS + cc]);
217 vfloat temp2v = greenv - zd25v * (greensumv - LVFU(redtile[(rr - 1) * TS + cc]) - LVFU(redtile[(rr + 1) * TS + cc]) - LVFU(redtile[rr * TS + cc - 1]) - LVFU(redtile[rr * TS + cc + 1]));
218
219 STVFU(redtile[rr * TS + cc], vself(selmask, temp1v, temp2v));
220
221 temp1v = LVFU(bluetile[rr * TS + cc]);
222
223 temp2v = greenv - zd25v * (greensumv - LVFU(bluetile[(rr - 1) * TS + cc]) - LVFU(bluetile[(rr + 1) * TS + cc]) - LVFU(bluetile[rr * TS + cc - 1]) - LVFU(bluetile[rr * TS + cc + 1]));
224
225 STVFU(bluetile[rr * TS + cc], vself(selmask, temp1v, temp2v));
226 }
227
228 #else
229
230 for (int cc = 2 + (fc(cfarray, i, 2) & 1), j = left + cc; j < right - 2; j += 2, cc += 2) {
231 //interpolate R and B colors at G sites
232 redtile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + cc] - redtile[(rr - 1) * TS + cc]) + (greentile[(rr + 1) * TS + cc] - redtile[(rr + 1) * TS + cc]) +
233 (greentile[rr * TS + cc - 1] - redtile[rr * TS + cc - 1]) + (greentile[rr * TS + cc + 1] - redtile[rr * TS + cc + 1]));
234 bluetile[rr * TS + cc] = greentile[rr * TS + cc] - 0.25f * ((greentile[(rr - 1) * TS + cc] - bluetile[(rr - 1) * TS + cc]) + (greentile[(rr + 1) * TS + cc] - bluetile[(rr + 1) * TS + cc]) +
235 (greentile[rr * TS + cc - 1] - bluetile[rr * TS + cc - 1]) + (greentile[rr * TS + cc + 1] - bluetile[rr * TS + cc + 1]));
236 }
237
238 #endif
239 }
240
241
242 for (int i = top + 2, rr = 2; i < bottom - 2; i++, rr++) {
243 int j = left + 2;
244 int cc = 2;
245 #ifdef __SSE2__
246 for (; j < right - 5; j += 4, cc += 4) {
247 STVFU(red[i][j], LVFU(redtile[rr * TS + cc]));
248 STVFU(green[i][j], LVFU(greentile[rr * TS + cc]));
249 STVFU(blue[i][j], LVFU(bluetile[rr * TS + cc]));
250 }
251 #endif
252 for (; j < right - 2; j++, cc++) {
253 red[i][j] = redtile[rr * TS + cc];
254 green[i][j] = greentile[rr * TS + cc];
255 blue[i][j] = bluetile[rr * TS + cc];
256 }
257 }
258
259 if((++progressCounter) % 16 == 0) {
260 #ifdef _OPENMP
261 #pragma omp critical (updateprogress)
262 #endif
263 {
264 progress += progressInc;
265 progress = min(1.0, progress);
266 setProgCancel(progress);
267 }
268 }
269 }
270 }
271 free(buffer);
272 } // End of parallelization
273
274 setProgCancel(1.0);
275 return rc;
276
277 }
278 #undef TS
279 #undef CLF
280