1 /*
2     This file is part of darktable,
3     Copyright (C) 2016-2021 darktable developers.
4 
5     darktable is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     darktable is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with darktable.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include "develop/imageop_math.h"
20 #include <assert.h> // for assert
21 #include <glib.h> // for MIN, MAX, CLAMP, inline
22 #include <math.h> // for round, floorf, fmaxf
23 #include "common/darktable.h"        // for darktable, darktable_t, dt_code...
24 #include "common/imageio.h"          // for FILTERS_ARE_4BAYER
25 #include "common/interpolation.h"    // for dt_interpolation_new, dt_interp...
26 #include "develop/imageop.h"         // for dt_iop_roi_t
27 
dt_iop_flip_and_zoom_8(const uint8_t * in,int32_t iw,int32_t ih,uint8_t * out,int32_t ow,int32_t oh,const dt_image_orientation_t orientation,uint32_t * width,uint32_t * height)28 void dt_iop_flip_and_zoom_8(const uint8_t *in, int32_t iw, int32_t ih, uint8_t *out, int32_t ow, int32_t oh,
29                             const dt_image_orientation_t orientation, uint32_t *width, uint32_t *height)
30 {
31   // init strides:
32   const uint32_t iwd = (orientation & ORIENTATION_SWAP_XY) ? ih : iw;
33   const uint32_t iht = (orientation & ORIENTATION_SWAP_XY) ? iw : ih;
34   // DO NOT UPSCALE !!!
35   const float scale = fmaxf(1.0, fmaxf(iwd / (float)ow, iht / (float)oh));
36   const uint32_t wd = *width = MIN(ow, iwd / scale);
37   const uint32_t ht = *height = MIN(oh, iht / scale);
38   const int bpp = 4; // bytes per pixel
39   int32_t ii = 0, jj = 0;
40   int32_t si = 1, sj = iw;
41   if(orientation & ORIENTATION_FLIP_Y)
42   {
43     jj = ih - jj - 1;
44     sj = -sj;
45   }
46   if(orientation & ORIENTATION_FLIP_X)
47   {
48     ii = iw - ii - 1;
49     si = -si;
50   }
51   if(orientation & ORIENTATION_SWAP_XY)
52   {
53     int t = sj;
54     sj = si;
55     si = t;
56   }
57   const int32_t half_pixel = .5f * scale;
58   const int32_t offm = half_pixel * bpp * MIN(MIN(0, si), MIN(sj, si + sj));
59   const int32_t offM = half_pixel * bpp * MAX(MAX(0, si), MAX(sj, si + sj));
60 #ifdef _OPENMP
61 #pragma omp parallel for default(none) \
62   dt_omp_firstprivate(bpp, half_pixel, ht, offM, offm, scale, wd) \
63   shared(in, out, jj, ii, sj, si, iw, ih) \
64   schedule(static)
65 #endif
66   for(uint32_t j = 0; j < ht; j++)
67   {
68     uint8_t *out2 = out + bpp * wd * j;
69     const uint8_t *in2 = in + bpp * (iw * jj + ii + sj * (int32_t)(scale * j));
70     float stepi = 0.0f;
71     for(uint32_t i = 0; i < wd; i++)
72     {
73       const uint8_t *in3 = in2 + ((int32_t)stepi) * si * bpp;
74       // this should always be within the bounds of in[], due to the way
75       // wd/ht are constructed by always just rounding down. half_pixel should never
76       // add up to one pixel difference.
77       // we have this check with the hope the branch predictor will get rid of it:
78       if(in3 + offm >= in && in3 + offM < in + bpp * iw * ih)
79       {
80         for(int k = 0; k < 3; k++)
81           out2[k] = // in3[k];
82               CLAMP(((int32_t)in3[bpp * half_pixel * sj + k] + (int32_t)in3[bpp * half_pixel * (si + sj) + k]
83                      + (int32_t)in3[bpp * half_pixel * si + k] + (int32_t)in3[k])
84                         / 4,
85                     0, 255);
86       }
87       out2 += bpp;
88       stepi += scale;
89     }
90   }
91 }
92 
dt_iop_clip_and_zoom_8(const uint8_t * i,int32_t ix,int32_t iy,int32_t iw,int32_t ih,int32_t ibw,int32_t ibh,uint8_t * o,int32_t ox,int32_t oy,int32_t ow,int32_t oh,int32_t obw,int32_t obh)93 void dt_iop_clip_and_zoom_8(const uint8_t *i, int32_t ix, int32_t iy, int32_t iw, int32_t ih, int32_t ibw,
94                             int32_t ibh, uint8_t *o, int32_t ox, int32_t oy, int32_t ow, int32_t oh,
95                             int32_t obw, int32_t obh)
96 {
97   const float scalex = iw / (float)ow;
98   const float scaley = ih / (float)oh;
99   const int32_t ix2 = MAX(ix, 0);
100   const int32_t iy2 = MAX(iy, 0);
101   const int32_t ox2 = MAX(ox, 0);
102   const int32_t oy2 = MAX(oy, 0);
103   const int32_t oh2 = MIN(MIN(oh, (ibh - iy2) / scaley), obh - oy2);
104   const int32_t ow2 = MIN(MIN(ow, (ibw - ix2) / scalex), obw - ox2);
105   assert((int)(ix2 + ow2 * scalex) <= ibw);
106   assert((int)(iy2 + oh2 * scaley) <= ibh);
107   assert(ox2 + ow2 <= obw);
108   assert(oy2 + oh2 <= obh);
109   assert(ix2 >= 0 && iy2 >= 0 && ox2 >= 0 && oy2 >= 0);
110   float x = ix2, y = iy2;
111   for(int s = 0; s < oh2; s++)
112   {
113     int idx = ox2 + obw * (oy2 + s);
114     for(int t = 0; t < ow2; t++)
115     {
116       for(int k = 0; k < 3; k++)
117         o[4 * idx + k] = // i[3*(ibw* (int)y +             (int)x             ) + k)];
118             CLAMP(((int32_t)i[(4 * (ibw * (int32_t)y + (int32_t)(x + .5f * scalex)) + k)]
119                    + (int32_t)i[(4 * (ibw * (int32_t)(y + .5f * scaley) + (int32_t)(x + .5f * scalex)) + k)]
120                    + (int32_t)i[(4 * (ibw * (int32_t)(y + .5f * scaley) + (int32_t)(x)) + k)]
121                    + (int32_t)i[(4 * (ibw * (int32_t)y + (int32_t)(x)) + k)])
122                       / 4,
123                   0, 255);
124       x += scalex;
125       idx++;
126     }
127     y += scaley;
128     x = ix2;
129   }
130 }
131 
132 // apply clip and zoom on parts of a supplied full image.
133 // roi_in and roi_out define which part to work on.
dt_iop_clip_and_zoom(float * out,const float * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride)134 void dt_iop_clip_and_zoom(float *out, const float *const in, const dt_iop_roi_t *const roi_out,
135                           const dt_iop_roi_t *const roi_in, const int32_t out_stride, const int32_t in_stride)
136 {
137   const struct dt_interpolation *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF);
138   dt_interpolation_resample(itor, out, roi_out, out_stride * 4 * sizeof(float), in, roi_in,
139                             in_stride * 4 * sizeof(float));
140 }
141 
142 // apply clip and zoom on the image region supplied in the input buffer.
143 // roi_in and roi_out describe which part of the full image this relates to.
dt_iop_clip_and_zoom_roi(float * out,const float * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride)144 void dt_iop_clip_and_zoom_roi(float *out, const float *const in, const dt_iop_roi_t *const roi_out,
145                               const dt_iop_roi_t *const roi_in, const int32_t out_stride,
146                               const int32_t in_stride)
147 {
148   const struct dt_interpolation *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF);
149   dt_interpolation_resample_roi(itor, out, roi_out, out_stride * 4 * sizeof(float), in, roi_in,
150                                 in_stride * 4 * sizeof(float));
151 }
152 
153 #ifdef HAVE_OPENCL
154 // apply clip and zoom on parts of a supplied full image.
155 // roi_in and roi_out define which part to work on.
dt_iop_clip_and_zoom_cl(int devid,cl_mem dev_out,cl_mem dev_in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in)156 int dt_iop_clip_and_zoom_cl(int devid, cl_mem dev_out, cl_mem dev_in, const dt_iop_roi_t *const roi_out,
157                             const dt_iop_roi_t *const roi_in)
158 {
159   const struct dt_interpolation *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF);
160   return dt_interpolation_resample_cl(itor, devid, dev_out, roi_out, dev_in, roi_in);
161 }
162 
163 // apply clip and zoom on the image region supplied in the input buffer.
164 // roi_in and roi_out describe which part of the full image this relates to.
dt_iop_clip_and_zoom_roi_cl(int devid,cl_mem dev_out,cl_mem dev_in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in)165 int dt_iop_clip_and_zoom_roi_cl(int devid, cl_mem dev_out, cl_mem dev_in, const dt_iop_roi_t *const roi_out,
166                                 const dt_iop_roi_t *const roi_in)
167 {
168   const struct dt_interpolation *itor = dt_interpolation_new(DT_INTERPOLATION_USERPREF);
169   return dt_interpolation_resample_roi_cl(itor, devid, dev_out, roi_out, dev_in, roi_in);
170 }
171 
172 #endif
173 
dt_iop_clip_and_zoom_mosaic_half_size(uint16_t * const out,const uint16_t * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride,const uint32_t filters)174 void dt_iop_clip_and_zoom_mosaic_half_size(uint16_t *const out, const uint16_t *const in,
175                                                  const dt_iop_roi_t *const roi_out,
176                                                  const dt_iop_roi_t *const roi_in, const int32_t out_stride,
177                                                  const int32_t in_stride, const uint32_t filters)
178 {
179   // adjust to pixel region and don't sample more than scale/2 nbs!
180   // pixel footprint on input buffer, radius:
181   const float px_footprint = 1.f / roi_out->scale;
182 
183   // move to origin point 01 of a 2x2 CFA block
184   // (RGGB=0112 or CYGM=0132)
185   int trggbx = 0, trggby = 0;
186   if(FC(trggby, trggbx + 1, filters) != 1) trggbx++;
187   if(FC(trggby, trggbx, filters) != 0)
188   {
189     trggbx = (trggbx + 1) & 1;
190     trggby++;
191   }
192   const int rggbx = trggbx, rggby = trggby;
193 
194   // Create a reverse lookup of FC(): for each CFA color, a list of
195   // offsets from start of a 2x2 block at which to find that
196   // color. First index is color, second is to the list of offsets,
197   // preceded by the number of offsets.
198   int clut[4][3] = {{0}};
199   for(int y = 0; y < 2; ++y)
200     for(int x = 0; x < 2; ++x)
201     {
202       const int c = FC(y + rggby, x + rggbx, filters);
203       assert(clut[c][0] < 2);
204       clut[c][++clut[c][0]] = x + y * in_stride;
205     }
206 
207 #ifdef _OPENMP
208 #pragma omp parallel for default(none) \
209   dt_omp_firstprivate(filters, in, in_stride, out, out_stride, px_footprint, rggbx, rggby, roi_in, roi_out) \
210   shared(clut) schedule(static)
211 #endif
212   for(int y = 0; y < roi_out->height; y++)
213   {
214     uint16_t *outc = out + out_stride * y;
215 
216     const float fy = (y + roi_out->y) * px_footprint;
217     const int miny = (CLAMPS((int)floorf(fy - px_footprint), 0, roi_in->height-3) & ~1u) + rggby;
218     const int maxy = MIN(roi_in->height-1, (int)ceilf(fy + px_footprint));
219 
220     float fx = roi_out->x * px_footprint;
221     for(int x = 0; x < roi_out->width; x++, fx += px_footprint, outc++)
222     {
223       const int minx = (CLAMPS((int)floorf(fx - px_footprint), 0, roi_in->width-3) & ~1u) + rggbx;
224       const int maxx = MIN(roi_in->width-1, (int)ceilf(fx + px_footprint));
225 
226       const int c = FC(y, x, filters);
227       int num = 0;
228       uint32_t col = 0;
229 
230       for(int yy = miny; yy < maxy; yy += 2)
231         for(int xx = minx; xx < maxx; xx += 2)
232         {
233           col += in[clut[c][1] + xx + in_stride * yy];
234           num++;
235           if (clut[c][0] == 2)
236           { // G in RGGB CFA
237             col += in[clut[c][2] + xx + in_stride * yy];
238             num++;
239           }
240         }
241       if(num) *outc = col / num;
242     }
243   }
244 }
245 
dt_iop_clip_and_zoom_mosaic_half_size_f(float * const out,const float * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride,const uint32_t filters)246 void dt_iop_clip_and_zoom_mosaic_half_size_f(float *const out, const float *const in,
247                                                    const dt_iop_roi_t *const roi_out,
248                                                    const dt_iop_roi_t *const roi_in, const int32_t out_stride,
249                                                    const int32_t in_stride, const uint32_t filters)
250 {
251   // adjust to pixel region and don't sample more than scale/2 nbs!
252   // pixel footprint on input buffer, radius:
253   const float px_footprint = 1.f / roi_out->scale;
254   // how many 2x2 blocks can be sampled inside that area
255   const int samples = round(px_footprint / 2);
256 
257   // move p to point to an rggb block:
258   int trggbx = 0, trggby = 0;
259   if(FC(trggby, trggbx + 1, filters) != 1) trggbx++;
260   if(FC(trggby, trggbx, filters) != 0)
261   {
262     trggbx = (trggbx + 1) & 1;
263     trggby++;
264   }
265   const int rggbx = trggbx, rggby = trggby;
266 
267 #ifdef _OPENMP
268 #pragma omp parallel for default(none) \
269   dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, rggbx, rggby, roi_in, roi_out, samples) \
270   schedule(static)
271 #endif
272   for(int y = 0; y < roi_out->height; y++)
273   {
274     float *outc = out + out_stride * y;
275 
276     const float fy = (y + roi_out->y) * px_footprint;
277     int py = (int)fy & ~1;
278     const float dy = (fy - py) / 2;
279     py = MIN(((roi_in->height - 6) & ~1u), py) + rggby;
280 
281     int maxj = MIN(((roi_in->height - 5) & ~1u) + rggby, py + 2 * samples);
282 
283     for(int x = 0; x < roi_out->width; x++)
284     {
285       dt_aligned_pixel_t col = { 0, 0, 0, 0 };
286 
287       const float fx = (x + roi_out->x) * px_footprint;
288       int px = (int)fx & ~1;
289       const float dx = (fx - px) / 2;
290       px = MIN(((roi_in->width - 6) & ~1u), px) + rggbx;
291 
292       const int maxi = MIN(((roi_in->width - 5) & ~1u) + rggbx, px + 2 * samples);
293 
294       dt_aligned_pixel_t p;
295       float num = 0;
296 
297       // upper left 2x2 block of sampling region
298       p[0] = in[px + in_stride * py];
299       p[1] = in[px + 1 + in_stride * py];
300       p[2] = in[px + in_stride * (py + 1)];
301       p[3] = in[px + 1 + in_stride * (py + 1)];
302       for(int c = 0; c < 4; c++) col[c] += ((1 - dx) * (1 - dy)) * p[c];
303 
304       // left 2x2 block border of sampling region
305       for(int j = py + 2; j <= maxj; j += 2)
306       {
307         p[0] = in[px + in_stride * j];
308         p[1] = in[px + 1 + in_stride * j];
309         p[2] = in[px + in_stride * (j + 1)];
310         p[3] = in[px + 1 + in_stride * (j + 1)];
311         for(int c = 0; c < 4; c++) col[c] += (1 - dx) * p[c];
312       }
313 
314       // upper 2x2 block border of sampling region
315       for(int i = px + 2; i <= maxi; i += 2)
316       {
317         p[0] = in[i + in_stride * py];
318         p[1] = in[i + 1 + in_stride * py];
319         p[2] = in[i + in_stride * (py + 1)];
320         p[3] = in[i + 1 + in_stride * (py + 1)];
321         for(int c = 0; c < 4; c++) col[c] += (1 - dy) * p[c];
322       }
323 
324       // 2x2 blocks in the middle of sampling region
325       for(int j = py + 2; j <= maxj; j += 2)
326         for(int i = px + 2; i <= maxi; i += 2)
327         {
328           p[0] = in[i + in_stride * j];
329           p[1] = in[i + 1 + in_stride * j];
330           p[2] = in[i + in_stride * (j + 1)];
331           p[3] = in[i + 1 + in_stride * (j + 1)];
332           for(int c = 0; c < 4; c++) col[c] += p[c];
333         }
334 
335       if(maxi == px + 2 * samples && maxj == py + 2 * samples)
336       {
337         // right border
338         for(int j = py + 2; j <= maxj; j += 2)
339         {
340           p[0] = in[maxi + 2 + in_stride * j];
341           p[1] = in[maxi + 3 + in_stride * j];
342           p[2] = in[maxi + 2 + in_stride * (j + 1)];
343           p[3] = in[maxi + 3 + in_stride * (j + 1)];
344           for(int c = 0; c < 4; c++) col[c] += dx * p[c];
345         }
346 
347         // upper right
348         p[0] = in[maxi + 2 + in_stride * py];
349         p[1] = in[maxi + 3 + in_stride * py];
350         p[2] = in[maxi + 2 + in_stride * (py + 1)];
351         p[3] = in[maxi + 3 + in_stride * (py + 1)];
352         for(int c = 0; c < 4; c++) col[c] += (dx * (1 - dy)) * p[c];
353 
354         // lower border
355         for(int i = px + 2; i <= maxi; i += 2)
356         {
357           p[0] = in[i + in_stride * (maxj + 2)];
358           p[1] = in[i + 1 + in_stride * (maxj + 2)];
359           p[2] = in[i + in_stride * (maxj + 3)];
360           p[3] = in[i + 1 + in_stride * (maxj + 3)];
361           for(int c = 0; c < 4; c++) col[c] += dy * p[c];
362         }
363 
364         // lower left 2x2 block
365         p[0] = in[px + in_stride * (maxj + 2)];
366         p[1] = in[px + 1 + in_stride * (maxj + 2)];
367         p[2] = in[px + in_stride * (maxj + 3)];
368         p[3] = in[px + 1 + in_stride * (maxj + 3)];
369         for(int c = 0; c < 4; c++) col[c] += ((1 - dx) * dy) * p[c];
370 
371         // lower right 2x2 block
372         p[0] = in[maxi + 2 + in_stride * (maxj + 2)];
373         p[1] = in[maxi + 3 + in_stride * (maxj + 2)];
374         p[2] = in[maxi + 2 + in_stride * (maxj + 3)];
375         p[3] = in[maxi + 3 + in_stride * (maxj + 3)];
376         for(int c = 0; c < 4; c++) col[c] += (dx * dy) * p[c];
377 
378         num = (samples + 1) * (samples + 1);
379       }
380       else if(maxi == px + 2 * samples)
381       {
382         // right border
383         for(int j = py + 2; j <= maxj; j += 2)
384         {
385           p[0] = in[maxi + 2 + in_stride * j];
386           p[1] = in[maxi + 3 + in_stride * j];
387           p[2] = in[maxi + 2 + in_stride * (j + 1)];
388           p[3] = in[maxi + 3 + in_stride * (j + 1)];
389           for(int c = 0; c < 4; c++) col[c] += dx * p[c];
390         }
391 
392         // upper right
393         p[0] = in[maxi + 2 + in_stride * py];
394         p[1] = in[maxi + 3 + in_stride * py];
395         p[2] = in[maxi + 2 + in_stride * (py + 1)];
396         p[3] = in[maxi + 3 + in_stride * (py + 1)];
397         for(int c = 0; c < 4; c++) col[c] += (dx * (1 - dy)) * p[c];
398 
399         num = ((maxj - py) / 2 + 1 - dy) * (samples + 1);
400       }
401       else if(maxj == py + 2 * samples)
402       {
403         // lower border
404         for(int i = px + 2; i <= maxi; i += 2)
405         {
406           p[0] = in[i + in_stride * (maxj + 2)];
407           p[1] = in[i + 1 + in_stride * (maxj + 2)];
408           p[2] = in[i + in_stride * (maxj + 3)];
409           p[3] = in[i + 1 + in_stride * (maxj + 3)];
410           for(int c = 0; c < 4; c++) col[c] += dy * p[c];
411         }
412 
413         // lower left 2x2 block
414         p[0] = in[px + in_stride * (maxj + 2)];
415         p[1] = in[px + 1 + in_stride * (maxj + 2)];
416         p[2] = in[px + in_stride * (maxj + 3)];
417         p[3] = in[px + 1 + in_stride * (maxj + 3)];
418         for(int c = 0; c < 4; c++) col[c] += ((1 - dx) * dy) * p[c];
419 
420         num = ((maxi - px) / 2 + 1 - dx) * (samples + 1);
421       }
422       else
423       {
424         num = ((maxi - px) / 2 + 1 - dx) * ((maxj - py) / 2 + 1 - dy);
425       }
426 
427       const int c = (2 * ((y + rggby) % 2) + ((x + rggbx) % 2));
428       if(num) *outc = col[c] / num;
429       outc++;
430     }
431   }
432 }
433 
434 /**
435  * downscales and clips a Fujifilm X-Trans mosaiced buffer (in) to the given region of interest (r_*)
436  * and writes it to out.
437  */
dt_iop_clip_and_zoom_mosaic_third_size_xtrans(uint16_t * const out,const uint16_t * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride,const uint8_t (* const xtrans)[6])438 void dt_iop_clip_and_zoom_mosaic_third_size_xtrans(uint16_t *const out, const uint16_t *const in,
439                                                    const dt_iop_roi_t *const roi_out,
440                                                    const dt_iop_roi_t *const roi_in, const int32_t out_stride,
441                                                    const int32_t in_stride, const uint8_t (*const xtrans)[6])
442 {
443   const float px_footprint = 1.f / roi_out->scale;
444   // Use box filter of width px_footprint*2+1 centered on the current
445   // sample (rounded to nearest input pixel) to anti-alias. Higher MP
446   // images need larger filters to avoid artifacts.
447 #ifdef _OPENMP
448 #pragma omp parallel for default(none) \
449   dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, roi_in, roi_out, xtrans) \
450   schedule(static)
451 #endif
452   for(int y = 0; y < roi_out->height; y++)
453   {
454     uint16_t *outc = out + out_stride * y;
455 
456     const float fy = (y + roi_out->y) * px_footprint;
457     const int miny = MAX(0, (int)roundf(fy - px_footprint));
458     const int maxy = MIN(roi_in->height-1, (int)roundf(fy + px_footprint));
459 
460     float fx = roi_out->x * px_footprint;
461     for(int x = 0; x < roi_out->width; x++, fx += px_footprint, outc++)
462     {
463       const int minx = MAX(0, (int)roundf(fx - px_footprint));
464       const int maxx = MIN(roi_in->width-1, (int)roundf(fx + px_footprint));
465 
466       const int c = FCxtrans(y, x, roi_out, xtrans);
467       int num = 0;
468       uint32_t col = 0;
469 
470       for(int yy = miny; yy <= maxy; ++yy)
471         for(int xx = minx; xx <= maxx; ++xx)
472           if(FCxtrans(yy, xx, roi_in, xtrans) == c)
473           {
474             col += in[xx + in_stride * yy];
475             num++;
476           }
477       *outc = col / num;
478     }
479   }
480 }
481 
dt_iop_clip_and_zoom_mosaic_third_size_xtrans_f(float * const out,const float * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride,const uint8_t (* const xtrans)[6])482 void dt_iop_clip_and_zoom_mosaic_third_size_xtrans_f(float *const out, const float *const in,
483                                                      const dt_iop_roi_t *const roi_out,
484                                                      const dt_iop_roi_t *const roi_in, const int32_t out_stride,
485                                                      const int32_t in_stride, const uint8_t (*const xtrans)[6])
486 {
487   const float px_footprint = 1.f / roi_out->scale;
488 #ifdef _OPENMP
489 #pragma omp parallel for default(none) \
490   dt_omp_firstprivate(in, in_stride, out, out_stride, px_footprint, roi_in, roi_out, xtrans) \
491   schedule(static)
492 #endif
493   for(int y = 0; y < roi_out->height; y++)
494   {
495     float *outc = out + out_stride * y;
496 
497     const float fy = (y + roi_out->y) * px_footprint;
498     const int miny = MAX(0, (int)roundf(fy - px_footprint));
499     const int maxy = MIN(roi_in->height-1, (int)roundf(fy + px_footprint));
500 
501     float fx = roi_out->x * px_footprint;
502     for(int x = 0; x < roi_out->width; x++, fx += px_footprint, outc++)
503     {
504       const int minx = MAX(0, (int)roundf(fx - px_footprint));
505       const int maxx = MIN(roi_in->width-1, (int)roundf(fx + px_footprint));
506 
507       const int c = FCxtrans(y, x, roi_out, xtrans);
508       int num = 0;
509       float col = 0.f;
510 
511       for(int yy = miny; yy <= maxy; ++yy)
512         for(int xx = minx; xx <= maxx; ++xx)
513           if(FCxtrans(yy, xx, roi_in, xtrans) == c)
514           {
515             col += in[xx + in_stride * yy];
516             num++;
517           }
518       *outc = col / (float)num;
519     }
520   }
521 }
522 
dt_iop_clip_and_zoom_demosaic_passthrough_monochrome_f(float * out,const float * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride)523 void dt_iop_clip_and_zoom_demosaic_passthrough_monochrome_f(float *out, const float *const in,
524                                                                   const dt_iop_roi_t *const roi_out,
525                                                                   const dt_iop_roi_t *const roi_in,
526                                                                   const int32_t out_stride,
527                                                                   const int32_t in_stride)
528 {
529   // adjust to pixel region and don't sample more than scale/2 nbs!
530   // pixel footprint on input buffer, radius:
531   const float px_footprint = 1.f / roi_out->scale;
532   // how many pixels can be sampled inside that area
533   const int samples = round(px_footprint);
534 
535 #ifdef _OPENMP
536 #pragma omp parallel for default(none) \
537   dt_omp_firstprivate(in, in_stride, out_stride, px_footprint, roi_in, roi_out, samples) \
538   shared(out) \
539   schedule(static)
540 #endif
541   for(int y = 0; y < roi_out->height; y++)
542   {
543     float *outc = out + 4 * (out_stride * y);
544 
545     const float fy = (y + roi_out->y) * px_footprint;
546     int py = (int)fy;
547     const float dy = fy - py;
548     py = MIN(((roi_in->height - 3)), py);
549 
550     const int maxj = MIN(((roi_in->height - 2)), py + samples);
551 
552     for(int x = 0; x < roi_out->width; x++)
553     {
554       float col = 0.0f;
555 
556       const float fx = (x + roi_out->x) * px_footprint;
557       int px = (int)fx;
558       const float dx = fx - px;
559       px = MIN(((roi_in->width - 3)), px);
560 
561       const int maxi = MIN(((roi_in->width - 2)), px + samples);
562 
563       float p;
564       float num = 0;
565 
566       // upper left pixel of sampling region
567       p = in[px + in_stride * py];
568       col += ((1 - dx) * (1 - dy)) * p;
569 
570       // left pixel border of sampling region
571       for(int j = py + 1; j <= maxj; j++)
572       {
573         p = in[px + in_stride * j];
574         col += (1 - dx) * p;
575       }
576 
577       // upper pixel border of sampling region
578       for(int i = px + 1; i <= maxi; i++)
579       {
580         p = in[i + in_stride * py];
581         col += (1 - dy) * p;
582       }
583 
584       // pixels in the middle of sampling region
585       for(int j = py + 1; j <= maxj; j++)
586         for(int i = px + 1; i <= maxi; i++)
587         {
588           p = in[i + in_stride * j];
589           col += p;
590         }
591 
592       if(maxi == px + samples && maxj == py + samples)
593       {
594         // right border
595         for(int j = py + 1; j <= maxj; j++)
596         {
597           p = in[maxi + 1 + in_stride * j];
598           col += dx * p;
599         }
600 
601         // upper right
602         p = in[maxi + 1 + in_stride * py];
603         col += (dx * (1 - dy)) * p;
604 
605         // lower border
606         for(int i = px + 1; i <= maxi; i++)
607         {
608           p = in[i + in_stride * (maxj + 1)];
609           col += dy * p;
610         }
611 
612         // lower left pixel
613         p = in[px + in_stride * (maxj + 1)];
614         col += ((1 - dx) * dy) * p;
615 
616         // lower right pixel
617         p = in[maxi + 1 + in_stride * (maxj + 1)];
618         col += (dx * dy) * p;
619 
620         num = (samples + 1) * (samples + 1);
621       }
622       else if(maxi == px + samples)
623       {
624         // right border
625         for(int j = py + 1; j <= maxj; j++)
626         {
627           p = in[maxi + 1 + in_stride * j];
628           col += dx * p;
629         }
630 
631         // upper right
632         p = in[maxi + 1 + in_stride * py];
633         col += (dx * (1 - dy)) * p;
634 
635         num = ((maxj - py) / 2 + 1 - dy) * (samples + 1);
636       }
637       else if(maxj == py + samples)
638       {
639         // lower border
640         for(int i = px + 1; i <= maxi; i++)
641         {
642           p = in[i + in_stride * (maxj + 1)];
643           col += dy * p;
644         }
645 
646         // lower left pixel
647         p = in[px + in_stride * (maxj + 1)];
648         col += ((1 - dx) * dy) * p;
649 
650         num = ((maxi - px) / 2 + 1 - dx) * (samples + 1);
651       }
652       else
653       {
654         num = ((maxi - px) / 2 + 1 - dx) * ((maxj - py) / 2 + 1 - dy);
655       }
656 
657       const float pix = (num) ? col / num : 0.0f;
658       outc[0] = pix;
659       outc[1] = pix;
660       outc[2] = pix;
661       outc[3] = 0.0f;
662       outc += 4;
663     }
664   }
665 }
666 
dt_iop_clip_and_zoom_demosaic_half_size_f(float * out,const float * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride,const uint32_t filters)667 void dt_iop_clip_and_zoom_demosaic_half_size_f(float *out, const float *const in,
668                                                      const dt_iop_roi_t *const roi_out,
669                                                      const dt_iop_roi_t *const roi_in, const int32_t out_stride,
670                                                      const int32_t in_stride, const uint32_t filters)
671 {
672   // adjust to pixel region and don't sample more than scale/2 nbs!
673   // pixel footprint on input buffer, radius:
674   const float px_footprint = 1.f / roi_out->scale;
675   // how many 2x2 blocks can be sampled inside that area
676   const int samples = round(px_footprint / 2);
677 
678   // move p to point to an rggb block:
679   int trggbx = 0, trggby = 0;
680   if(FC(trggby, trggbx + 1, filters) != 1) trggbx++;
681   if(FC(trggby, trggbx, filters) != 0)
682   {
683     trggbx = (trggbx + 1) & 1;
684     trggby++;
685   }
686   const int rggbx = trggbx, rggby = trggby;
687 
688 #ifdef _OPENMP
689 #pragma omp parallel for default(none) \
690   dt_omp_firstprivate(in, in_stride, out_stride, px_footprint, rggbx, rggby, roi_in, roi_out, samples) \
691   shared(out) \
692   schedule(static)
693 #endif
694   for(int y = 0; y < roi_out->height; y++)
695   {
696     float *outc = out + 4 * (out_stride * y);
697 
698     const float fy = (y + roi_out->y) * px_footprint;
699     int py = (int)fy & ~1;
700     const float dy = (fy - py) / 2;
701     py = MIN(((roi_in->height - 6) & ~1u), py) + rggby;
702 
703     const int maxj = MIN(((roi_in->height - 5) & ~1u) + rggby, py + 2 * samples);
704 
705     for(int x = 0; x < roi_out->width; x++)
706     {
707       dt_aligned_pixel_t col = { 0, 0, 0, 0 };
708 
709       const float fx = (x + roi_out->x) * px_footprint;
710       int px = (int)fx & ~1;
711       const float dx = (fx - px) / 2;
712       px = MIN(((roi_in->width - 6) & ~1u), px) + rggbx;
713 
714       const int maxi = MIN(((roi_in->width - 5) & ~1u) + rggbx, px + 2 * samples);
715 
716       dt_aligned_pixel_t p;
717       float num = 0;
718 
719       // upper left 2x2 block of sampling region
720       p[0] = in[px + in_stride * py];
721       p[1] = in[px + 1 + in_stride * py] + in[px + in_stride * (py + 1)];
722       p[2] = in[px + 1 + in_stride * (py + 1)];
723       for(int c = 0; c < 3; c++) col[c] += ((1 - dx) * (1 - dy)) * p[c];
724 
725       // left 2x2 block border of sampling region
726       for(int j = py + 2; j <= maxj; j += 2)
727       {
728         p[0] = in[px + in_stride * j];
729         p[1] = in[px + 1 + in_stride * j] + in[px + in_stride * (j + 1)];
730         p[2] = in[px + 1 + in_stride * (j + 1)];
731         for(int c = 0; c < 3; c++) col[c] += (1 - dx) * p[c];
732       }
733 
734       // upper 2x2 block border of sampling region
735       for(int i = px + 2; i <= maxi; i += 2)
736       {
737         p[0] = in[i + in_stride * py];
738         p[1] = in[i + 1 + in_stride * py] + in[i + in_stride * (py + 1)];
739         p[2] = in[i + 1 + in_stride * (py + 1)];
740         for(int c = 0; c < 3; c++) col[c] += (1 - dy) * p[c];
741       }
742 
743       // 2x2 blocks in the middle of sampling region
744       for(int j = py + 2; j <= maxj; j += 2)
745         for(int i = px + 2; i <= maxi; i += 2)
746         {
747           p[0] = in[i + in_stride * j];
748           p[1] = in[i + 1 + in_stride * j] + in[i + in_stride * (j + 1)];
749           p[2] = in[i + 1 + in_stride * (j + 1)];
750           for(int c = 0; c < 3; c++) col[c] += p[c];
751         }
752 
753       if(maxi == px + 2 * samples && maxj == py + 2 * samples)
754       {
755         // right border
756         for(int j = py + 2; j <= maxj; j += 2)
757         {
758           p[0] = in[maxi + 2 + in_stride * j];
759           p[1] = in[maxi + 3 + in_stride * j] + in[maxi + 2 + in_stride * (j + 1)];
760           p[2] = in[maxi + 3 + in_stride * (j + 1)];
761           for(int c = 0; c < 3; c++) col[c] += dx * p[c];
762         }
763 
764         // upper right
765         p[0] = in[maxi + 2 + in_stride * py];
766         p[1] = in[maxi + 3 + in_stride * py] + in[maxi + 2 + in_stride * (py + 1)];
767         p[2] = in[maxi + 3 + in_stride * (py + 1)];
768         for(int c = 0; c < 3; c++) col[c] += (dx * (1 - dy)) * p[c];
769 
770         // lower border
771         for(int i = px + 2; i <= maxi; i += 2)
772         {
773           p[0] = in[i + in_stride * (maxj + 2)];
774           p[1] = in[i + 1 + in_stride * (maxj + 2)] + in[i + in_stride * (maxj + 3)];
775           p[2] = in[i + 1 + in_stride * (maxj + 3)];
776           for(int c = 0; c < 3; c++) col[c] += dy * p[c];
777         }
778 
779         // lower left 2x2 block
780         p[0] = in[px + in_stride * (maxj + 2)];
781         p[1] = in[px + 1 + in_stride * (maxj + 2)] + in[px + in_stride * (maxj + 3)];
782         p[2] = in[px + 1 + in_stride * (maxj + 3)];
783         for(int c = 0; c < 3; c++) col[c] += ((1 - dx) * dy) * p[c];
784 
785         // lower right 2x2 block
786         p[0] = in[maxi + 2 + in_stride * (maxj + 2)];
787         p[1] = in[maxi + 3 + in_stride * (maxj + 2)] + in[maxi + 2 + in_stride * (maxj + 3)];
788         p[2] = in[maxi + 3 + in_stride * (maxj + 3)];
789         for(int c = 0; c < 3; c++) col[c] += (dx * dy) * p[c];
790 
791         num = (samples + 1) * (samples + 1);
792       }
793       else if(maxi == px + 2 * samples)
794       {
795         // right border
796         for(int j = py + 2; j <= maxj; j += 2)
797         {
798           p[0] = in[maxi + 2 + in_stride * j];
799           p[1] = in[maxi + 3 + in_stride * j] + in[maxi + 2 + in_stride * (j + 1)];
800           p[2] = in[maxi + 3 + in_stride * (j + 1)];
801           for(int c = 0; c < 3; c++) col[c] += dx * p[c];
802         }
803 
804         // upper right
805         p[0] = in[maxi + 2 + in_stride * py];
806         p[1] = in[maxi + 3 + in_stride * py] + in[maxi + 2 + in_stride * (py + 1)];
807         p[2] = in[maxi + 3 + in_stride * (py + 1)];
808         for(int c = 0; c < 3; c++) col[c] += (dx * (1 - dy)) * p[c];
809 
810         num = ((maxj - py) / 2 + 1 - dy) * (samples + 1);
811       }
812       else if(maxj == py + 2 * samples)
813       {
814         // lower border
815         for(int i = px + 2; i <= maxi; i += 2)
816         {
817           p[0] = in[i + in_stride * (maxj + 2)];
818           p[1] = in[i + 1 + in_stride * (maxj + 2)] + in[i + in_stride * (maxj + 3)];
819           p[2] = in[i + 1 + in_stride * (maxj + 3)];
820           for(int c = 0; c < 3; c++) col[c] += dy * p[c];
821         }
822 
823         // lower left 2x2 block
824         p[0] = in[px + in_stride * (maxj + 2)];
825         p[1] = in[px + 1 + in_stride * (maxj + 2)] + in[px + in_stride * (maxj + 3)];
826         p[2] = in[px + 1 + in_stride * (maxj + 3)];
827         for(int c = 0; c < 3; c++) col[c] += ((1 - dx) * dy) * p[c];
828 
829         num = ((maxi - px) / 2 + 1 - dx) * (samples + 1);
830       }
831       else
832       {
833         num = ((maxi - px) / 2 + 1 - dx) * ((maxj - py) / 2 + 1 - dy);
834       }
835 
836       outc[0] = col[0] / num;
837       outc[1] = (col[1] / num) / 2.0f;
838       outc[2] = col[2] / num;
839       outc[3] = 0.0f;
840       outc += 4;
841     }
842   }
843 }
844 
845 
dt_iop_clip_and_zoom_demosaic_third_size_xtrans_f(float * out,const float * const in,const dt_iop_roi_t * const roi_out,const dt_iop_roi_t * const roi_in,const int32_t out_stride,const int32_t in_stride,const uint8_t (* const xtrans)[6])846 void dt_iop_clip_and_zoom_demosaic_third_size_xtrans_f(float *out, const float *const in,
847                                                        const dt_iop_roi_t *const roi_out,
848                                                        const dt_iop_roi_t *const roi_in,
849                                                        const int32_t out_stride, const int32_t in_stride,
850                                                        const uint8_t (*const xtrans)[6])
851 {
852   const float px_footprint = 1.f / roi_out->scale;
853   const int samples = MAX(1, (int)floorf(px_footprint / 3));
854 
855   // A slightly different algorithm than
856   // dt_iop_clip_and_zoom_demosaic_half_size_f() which aligns to 2x2
857   // Bayer grid and hence most pull additional data from all edges
858   // which don't align with CFA. Instead align to a 3x3 pattern (which
859   // is semi-regular in X-Trans CFA). This code doesn't worry about
860   // fractional pixel offset of top/left of pattern nor oversampling
861   // by non-integer number of samples.
862 
863 #ifdef _OPENMP
864 #pragma omp parallel for default(none) \
865   dt_omp_firstprivate(in, in_stride, out_stride, px_footprint, roi_in, roi_out, samples, xtrans) \
866   shared(out) \
867   schedule(static)
868 #endif
869   for(int y = 0; y < roi_out->height; y++)
870   {
871     float *outc = out + 4 * (out_stride * y);
872     const int py = CLAMPS((int)round((y + roi_out->y - 0.5f) * px_footprint), 0, roi_in->height - 3);
873     const int ymax = MIN(roi_in->height - 3, py + 3 * samples);
874 
875     for(int x = 0; x < roi_out->width; x++, outc += 4)
876     {
877       dt_aligned_pixel_t col = { 0.0f };
878       int num = 0;
879       const int px = CLAMPS((int)round((x + roi_out->x - 0.5f) * px_footprint), 0, roi_in->width - 3);
880       const int xmax = MIN(roi_in->width - 3, px + 3 * samples);
881       for(int yy = py; yy <= ymax; yy += 3)
882         for(int xx = px; xx <= xmax; xx += 3)
883         {
884           for(int j = 0; j < 3; ++j)
885             for(int i = 0; i < 3; ++i)
886               col[FCxtrans(yy + j, xx + i, roi_in, xtrans)] += in[xx + i + in_stride * (yy + j)];
887           num++;
888         }
889 
890       // X-Trans RGB weighting averages to 2:5:2 for each 3x3 cell
891       outc[0] = col[0] / (num * 2);
892       outc[1] = col[1] / (num * 5);
893       outc[2] = col[2] / (num * 2);
894     }
895   }
896 }
897 
dt_iop_RGB_to_YCbCr(const dt_aligned_pixel_t rgb,dt_aligned_pixel_t yuv)898 void dt_iop_RGB_to_YCbCr(const dt_aligned_pixel_t rgb, dt_aligned_pixel_t yuv)
899 {
900   yuv[0] = 0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2];
901   yuv[1] = -0.147 * rgb[0] - 0.289 * rgb[1] + 0.437 * rgb[2];
902   yuv[2] = 0.615 * rgb[0] - 0.515 * rgb[1] - 0.100 * rgb[2];
903 }
904 
dt_iop_YCbCr_to_RGB(const dt_aligned_pixel_t yuv,dt_aligned_pixel_t rgb)905 void dt_iop_YCbCr_to_RGB(const dt_aligned_pixel_t yuv, dt_aligned_pixel_t rgb)
906 {
907   rgb[0] = yuv[0] + 1.140 * yuv[2];
908   rgb[1] = yuv[0] - 0.394 * yuv[1] - 0.581 * yuv[2];
909   rgb[2] = yuv[0] + 2.028 * yuv[1];
910 }
911 
mat4inv(const float X[][4],float R[][4])912 static inline void mat4inv(const float X[][4], float R[][4])
913 {
914   const float det = X[0][3] * X[1][2] * X[2][1] * X[3][0] - X[0][2] * X[1][3] * X[2][1] * X[3][0]
915                     - X[0][3] * X[1][1] * X[2][2] * X[3][0] + X[0][1] * X[1][3] * X[2][2] * X[3][0]
916                     + X[0][2] * X[1][1] * X[2][3] * X[3][0] - X[0][1] * X[1][2] * X[2][3] * X[3][0]
917                     - X[0][3] * X[1][2] * X[2][0] * X[3][1] + X[0][2] * X[1][3] * X[2][0] * X[3][1]
918                     + X[0][3] * X[1][0] * X[2][2] * X[3][1] - X[0][0] * X[1][3] * X[2][2] * X[3][1]
919                     - X[0][2] * X[1][0] * X[2][3] * X[3][1] + X[0][0] * X[1][2] * X[2][3] * X[3][1]
920                     + X[0][3] * X[1][1] * X[2][0] * X[3][2] - X[0][1] * X[1][3] * X[2][0] * X[3][2]
921                     - X[0][3] * X[1][0] * X[2][1] * X[3][2] + X[0][0] * X[1][3] * X[2][1] * X[3][2]
922                     + X[0][1] * X[1][0] * X[2][3] * X[3][2] - X[0][0] * X[1][1] * X[2][3] * X[3][2]
923                     - X[0][2] * X[1][1] * X[2][0] * X[3][3] + X[0][1] * X[1][2] * X[2][0] * X[3][3]
924                     + X[0][2] * X[1][0] * X[2][1] * X[3][3] - X[0][0] * X[1][2] * X[2][1] * X[3][3]
925                     - X[0][1] * X[1][0] * X[2][2] * X[3][3] + X[0][0] * X[1][1] * X[2][2] * X[3][3];
926   R[0][0] = (X[1][2] * X[2][3] * X[3][1] - X[1][3] * X[2][2] * X[3][1] + X[1][3] * X[2][1] * X[3][2]
927              - X[1][1] * X[2][3] * X[3][2] - X[1][2] * X[2][1] * X[3][3] + X[1][1] * X[2][2] * X[3][3])
928             / det;
929   R[1][0] = (X[1][3] * X[2][2] * X[3][0] - X[1][2] * X[2][3] * X[3][0] - X[1][3] * X[2][0] * X[3][2]
930              + X[1][0] * X[2][3] * X[3][2] + X[1][2] * X[2][0] * X[3][3] - X[1][0] * X[2][2] * X[3][3])
931             / det;
932   R[2][0] = (X[1][1] * X[2][3] * X[3][0] - X[1][3] * X[2][1] * X[3][0] + X[1][3] * X[2][0] * X[3][1]
933              - X[1][0] * X[2][3] * X[3][1] - X[1][1] * X[2][0] * X[3][3] + X[1][0] * X[2][1] * X[3][3])
934             / det;
935   R[3][0] = (X[1][2] * X[2][1] * X[3][0] - X[1][1] * X[2][2] * X[3][0] - X[1][2] * X[2][0] * X[3][1]
936              + X[1][0] * X[2][2] * X[3][1] + X[1][1] * X[2][0] * X[3][2] - X[1][0] * X[2][1] * X[3][2])
937             / det;
938 
939   R[0][1] = (X[0][3] * X[2][2] * X[3][1] - X[0][2] * X[2][3] * X[3][1] - X[0][3] * X[2][1] * X[3][2]
940              + X[0][1] * X[2][3] * X[3][2] + X[0][2] * X[2][1] * X[3][3] - X[0][1] * X[2][2] * X[3][3])
941             / det;
942   R[1][1] = (X[0][2] * X[2][3] * X[3][0] - X[0][3] * X[2][2] * X[3][0] + X[0][3] * X[2][0] * X[3][2]
943              - X[0][0] * X[2][3] * X[3][2] - X[0][2] * X[2][0] * X[3][3] + X[0][0] * X[2][2] * X[3][3])
944             / det;
945   R[2][1] = (X[0][3] * X[2][1] * X[3][0] - X[0][1] * X[2][3] * X[3][0] - X[0][3] * X[2][0] * X[3][1]
946              + X[0][0] * X[2][3] * X[3][1] + X[0][1] * X[2][0] * X[3][3] - X[0][0] * X[2][1] * X[3][3])
947             / det;
948   R[3][1] = (X[0][1] * X[2][2] * X[3][0] - X[0][2] * X[2][1] * X[3][0] + X[0][2] * X[2][0] * X[3][1]
949              - X[0][0] * X[2][2] * X[3][1] - X[0][1] * X[2][0] * X[3][2] + X[0][0] * X[2][1] * X[3][2])
950             / det;
951 
952   R[0][2] = (X[0][2] * X[1][3] * X[3][1] - X[0][3] * X[1][2] * X[3][1] + X[0][3] * X[1][1] * X[3][2]
953              - X[0][1] * X[1][3] * X[3][2] - X[0][2] * X[1][1] * X[3][3] + X[0][1] * X[1][2] * X[3][3])
954             / det;
955   R[1][2] = (X[0][3] * X[1][2] * X[3][0] - X[0][2] * X[1][3] * X[3][0] - X[0][3] * X[1][0] * X[3][2]
956              + X[0][0] * X[1][3] * X[3][2] + X[0][2] * X[1][0] * X[3][3] - X[0][0] * X[1][2] * X[3][3])
957             / det;
958   R[2][2] = (X[0][1] * X[1][3] * X[3][0] - X[0][3] * X[1][1] * X[3][0] + X[0][3] * X[1][0] * X[3][1]
959              - X[0][0] * X[1][3] * X[3][1] - X[0][1] * X[1][0] * X[3][3] + X[0][0] * X[1][1] * X[3][3])
960             / det;
961   R[3][2] = (X[0][2] * X[1][1] * X[3][0] - X[0][1] * X[1][2] * X[3][0] - X[0][2] * X[1][0] * X[3][1]
962              + X[0][0] * X[1][2] * X[3][1] + X[0][1] * X[1][0] * X[3][2] - X[0][0] * X[1][1] * X[3][2])
963             / det;
964 
965   R[0][3] = (X[0][3] * X[1][2] * X[2][1] - X[0][2] * X[1][3] * X[2][1] - X[0][3] * X[1][1] * X[2][2]
966              + X[0][1] * X[1][3] * X[2][2] + X[0][2] * X[1][1] * X[2][3] - X[0][1] * X[1][2] * X[2][3])
967             / det;
968   R[1][3] = (X[0][2] * X[1][3] * X[2][0] - X[0][3] * X[1][2] * X[2][0] + X[0][3] * X[1][0] * X[2][2]
969              - X[0][0] * X[1][3] * X[2][2] - X[0][2] * X[1][0] * X[2][3] + X[0][0] * X[1][2] * X[2][3])
970             / det;
971   R[2][3] = (X[0][3] * X[1][1] * X[2][0] - X[0][1] * X[1][3] * X[2][0] - X[0][3] * X[1][0] * X[2][1]
972              + X[0][0] * X[1][3] * X[2][1] + X[0][1] * X[1][0] * X[2][3] - X[0][0] * X[1][1] * X[2][3])
973             / det;
974   R[3][3] = (X[0][1] * X[1][2] * X[2][0] - X[0][2] * X[1][1] * X[2][0] + X[0][2] * X[1][0] * X[2][1]
975              - X[0][0] * X[1][2] * X[2][1] - X[0][1] * X[1][0] * X[2][2] + X[0][0] * X[1][1] * X[2][2])
976             / det;
977 }
978 
mat4mulv(float dst[4],const float mat[4][4],const float v[4])979 static void mat4mulv(float dst[4], const float mat[4][4], const float v[4])
980 {
981   for(int k = 0; k < 4; k++)
982   {
983     float x = 0.0f;
984     for(int i = 0; i < 4; i++) x += mat[k][i] * v[i];
985     dst[k] = x;
986   }
987 }
988 
dt_iop_estimate_cubic(const float x[4],const float y[4],float a[4])989 void dt_iop_estimate_cubic(const float x[4], const float y[4], float a[4])
990 {
991   // we want to fit a spline
992   // [y]   [x^3 x^2 x^1 1] [a^3]
993   // |y| = |x^3 x^2 x^1 1| |a^2|
994   // |y|   |x^3 x^2 x^1 1| |a^1|
995   // [y]   [x^3 x^2 x^1 1] [ 1 ]
996   // and do that by inverting the matrix X:
997 
998   const float X[4][4] = { { x[0] * x[0] * x[0], x[0] * x[0], x[0], 1.0f },
999                           { x[1] * x[1] * x[1], x[1] * x[1], x[1], 1.0f },
1000                           { x[2] * x[2] * x[2], x[2] * x[2], x[2], 1.0f },
1001                           { x[3] * x[3] * x[3], x[3] * x[3], x[3], 1.0f } };
1002   float X_inv[4][4];
1003   mat4inv(X, X_inv);
1004   mat4mulv(a, X_inv, y);
1005 }
1006 
1007 // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
1008 // vim: shiftwidth=2 expandtab tabstop=2 cindent
1009 // kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
1010