1 /*
2     This file is part of darktable,
3     Copyright (C) 2017-2020 darktable developers.
4 
5     darktable is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     darktable is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with darktable.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #ifndef DT_DEVELOP_DWT_H
20 #define DT_DEVELOP_DWT_H
21 
22 /* structure returned by dt_dwt_init() to be used when calling dwt_decompose() */
23 typedef struct dwt_params_t
24 {
25   float *image;
26   int ch;
27   int width;
28   int height;
29   int scales;
30   int return_layer;
31   int merge_from_scale;
32   void *user_data;
33   float preview_scale;
34   int use_sse;
35 } dwt_params_t;
36 
37 /* function prototype for the layer_func on dwt_decompose() call */
38 typedef void(_dwt_layer_func)(float *layer, dwt_params_t *const p, const int scale);
39 
40 /* returns a structure used when calling dwt_decompose(), free it with dt_dwt_free()
41  * image: image to be decomposed and output image
42  * width, height, ch: dimensions of the image
43  * scales: number of scales to decompose, if > dwt_get_max_scale() the this last will be used
44  * return_layer: 0 returns the recomposed image, 1..scales returns a detail scale, scales+1 returns the residual
45  * image
46  * merge_from_scale: detail scales will be merged together before calling layer_func
47  * user_data: user-supplied data to be passed to layer_func on each call
48  * preview_scale: image scale (zoom factor)
49  * use_sse: use SSE instructions
50  */
51 dwt_params_t *dt_dwt_init(float *image, const int width, const int height, const int ch, const int scales,
52                           const int return_layer, const int merge_from_scale, void *user_data,
53                           const float preview_scale, const int use_sse);
54 
55 /* free resources used by dwt_decompose() */
56 void dt_dwt_free(dwt_params_t *p);
57 
58 /* returns the maximum number of scales that dwt_decompose() will accept for the current image size */
59 int dwt_get_max_scale(dwt_params_t *p);
60 
61 /* returns the first visible detail scale at the current zoom level */
62 int dt_dwt_first_scale_visible(dwt_params_t *p);
63 
64 /* decomposes an image into several wavelet scales
65  * p: returned by dt_dwt_init()
66  * layer_func: this function is called for the original image and then once for each scale, including the residual
67  * image
68  */
69 void dwt_decompose(dwt_params_t *p, _dwt_layer_func layer_func);
70 
71 /* decomposes an image into 'bands' wavelet scales, then recomposes a denoised image from just that portion
72  * of each scale whose absolute magnitude exceeds the threshold in noise[band]
73  * img: input image, overwritten with the denoised image
74  * width, height: image dimensions
75  * bands: number of wavelet scales to generate
76  * noise: array of thresholds, on per band
77  */
78 void dwt_denoise(float *const img, const int width, const int height, const int bands, const float *const noise);
79 
80 // to make the DWT algorithm (and others which operate on a column of spaced-out pixels for each pixel of a
81 // row) as cache-friendly as possible, we want to interleave the actual processing of rows such that the next
82 // iteration processes the row 'stride' pixels below the current one, which will already be in L2 cache (if
83 // not L1) from having been accessed on this iteration so if stride is 16, we want to process rows 0, 16, 32,
84 // ..., then 1, 17, 33, ..., 2, 18, 34, ..., etc.
85 /*
86  * given a row identifier (0 .. height-1), an image height, and a stride,
87  * return the physical row number of the image on which to operate
88  */
dwt_interleave_rows(const int rowid,const int height,const int stride)89 static inline int dwt_interleave_rows(const int rowid, const int height, const int stride)
90 {
91   if (height <= stride)
92     return rowid;
93   const int per_pass = ((height + stride - 1) / stride);
94   const int long_passes = height % stride;
95   // adjust for the fact that we have some passes with one fewer iteration when height is not a multiple of stride
96   if (long_passes == 0 || rowid < long_passes * per_pass)
97     return (rowid / per_pass) + stride * (rowid % per_pass);
98   const int rowid2 = rowid - long_passes * per_pass;
99   return long_passes + (rowid2 / (per_pass-1)) + stride * (rowid2 % (per_pass-1));
100 }
101 
102 
103 
104 #ifdef HAVE_OPENCL
105 typedef struct dt_dwt_cl_global_t
106 {
107   int kernel_dwt_add_img_to_layer;
108   int kernel_dwt_subtract_layer;
109   int kernel_dwt_hat_transform_col;
110   int kernel_dwt_hat_transform_row;
111   int kernel_dwt_init_buffer;
112 } dt_dwt_cl_global_t;
113 
114 typedef struct dwt_params_cl_t
115 {
116   dt_dwt_cl_global_t *global;
117   int devid;
118   cl_mem image;
119   int width;
120   int height;
121   int ch;
122   int scales;
123   int return_layer;
124   int merge_from_scale;
125   void *user_data;
126   float preview_scale;
127 } dwt_params_cl_t;
128 
129 typedef cl_int(_dwt_layer_func_cl)(cl_mem layer, dwt_params_cl_t *const p, const int scale);
130 
131 dt_dwt_cl_global_t *dt_dwt_init_cl_global(void);
132 void dt_dwt_free_cl_global(dt_dwt_cl_global_t *g);
133 
134 dwt_params_cl_t *dt_dwt_init_cl(const int devid, cl_mem image, const int width, const int height, const int scales,
135                                 const int return_layer, const int merge_from_scale, void *user_data,
136                                 const float preview_scale);
137 void dt_dwt_free_cl(dwt_params_cl_t *p);
138 
139 int dwt_get_max_scale_cl(dwt_params_cl_t *p);
140 
141 int dt_dwt_first_scale_visible_cl(dwt_params_cl_t *p);
142 
143 cl_int dwt_decompose_cl(dwt_params_cl_t *p, _dwt_layer_func_cl layer_func);
144 
145 #endif
146 
147 #endif
148