1 /*
2 This file is part of darktable,
3 Copyright (C) 2016-2020 darktable developers.
4
5 darktable is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 darktable is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with darktable. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #ifdef HAVE_OPENCL
20
21 #include "common/bilateral.h"
22 #include "common/bilateralcl.h"
23 #include "common/darktable.h" // for CLAMPS, dt_print, darktable, darktable_t
24 #include "common/opencl.h" // for dt_opencl_set_kernel_arg, dt_opencl_cr...
25 #include <glib.h> // for MAX
26 #include <math.h> // for roundf
27 #include <stdlib.h> // for free, malloc
28
dt_bilateral_init_cl_global()29 dt_bilateral_cl_global_t *dt_bilateral_init_cl_global()
30 {
31 dt_bilateral_cl_global_t *b = (dt_bilateral_cl_global_t *)malloc(sizeof(dt_bilateral_cl_global_t));
32
33 const int program = 10; // bilateral.cl, from programs.conf
34 b->kernel_zero = dt_opencl_create_kernel(program, "zero");
35 b->kernel_splat = dt_opencl_create_kernel(program, "splat");
36 b->kernel_blur_line = dt_opencl_create_kernel(program, "blur_line");
37 b->kernel_blur_line_z = dt_opencl_create_kernel(program, "blur_line_z");
38 b->kernel_slice = dt_opencl_create_kernel(program, "slice");
39 b->kernel_slice2 = dt_opencl_create_kernel(program, "slice_to_output");
40 return b;
41 }
42
dt_bilateral_free_cl(dt_bilateral_cl_t * b)43 void dt_bilateral_free_cl(dt_bilateral_cl_t *b)
44 {
45 if(!b) return;
46 // be sure we're done with the memory:
47 dt_opencl_finish(b->devid);
48 // free device mem
49 dt_opencl_release_mem_object(b->dev_grid);
50 dt_opencl_release_mem_object(b->dev_grid_tmp);
51 free(b);
52 }
53
54
55 // modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
56 // takes account of an additional temp buffer needed in the OpenCL code path
dt_bilateral_memory_use2(const int width,const int height,const float sigma_s,const float sigma_r)57 size_t dt_bilateral_memory_use2(const int width,
58 const int height,
59 const float sigma_s,
60 const float sigma_r)
61 {
62 return dt_bilateral_memory_use(width, height, sigma_s, sigma_r) + sizeof(float) * 4 * width * height;
63 }
64
65 // modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
66 // takes account of an additional temp buffer needed in the OpenCL code path
dt_bilateral_singlebuffer_size2(const int width,const int height,const float sigma_s,const float sigma_r)67 size_t dt_bilateral_singlebuffer_size2(const int width,
68 const int height,
69 const float sigma_s,
70 const float sigma_r)
71 {
72 return MAX(dt_bilateral_singlebuffer_size(width, height, sigma_s, sigma_r), sizeof(float) * 4 * width * height);
73 }
74
75
dt_bilateral_init_cl(const int devid,const int width,const int height,const float sigma_s,const float sigma_r)76 dt_bilateral_cl_t *dt_bilateral_init_cl(const int devid,
77 const int width, // width of input image
78 const int height, // height of input image
79 const float sigma_s, // spatial sigma (blur pixel coords)
80 const float sigma_r) // range sigma (blur luma values)
81 {
82 dt_opencl_local_buffer_t locopt
83 = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
84 .cellsize = 8 * sizeof(float) + sizeof(int), .overhead = 0,
85 .sizex = 1 << 6, .sizey = 1 << 6 };
86
87 if(!dt_opencl_local_buffer_opt(devid, darktable.opencl->bilateral->kernel_splat, &locopt))
88 {
89 dt_print(DT_DEBUG_OPENCL,
90 "[opencl_bilateral] can not identify resource limits for device %d in bilateral grid\n", devid);
91 return NULL;
92 }
93
94 if(locopt.sizex * locopt.sizey < 16 * 16)
95 {
96 dt_print(DT_DEBUG_OPENCL,
97 "[opencl_bilateral] device %d does not offer sufficient resources to run bilateral grid\n",
98 devid);
99 return NULL;
100 }
101
102 dt_bilateral_cl_t *b = (dt_bilateral_cl_t *)malloc(sizeof(dt_bilateral_cl_t));
103 if(!b) return NULL;
104
105 b->global = darktable.opencl->bilateral;
106 b->width = width;
107 b->height = height;
108 b->blocksizex = locopt.sizex;
109 b->blocksizey = locopt.sizey;
110 b->devid = devid;
111 b->dev_grid = NULL;
112 b->dev_grid_tmp = NULL;
113 dt_bilateral_t b2;
114 dt_bilateral_grid_size(&b2,width,height,100.0f,sigma_s,sigma_r);
115 b->size_x = b2.size_x;
116 b->size_y = b2.size_y;
117 b->size_z = b2.size_z;
118 b->sigma_s = b2.sigma_s;
119 b->sigma_r = b2.sigma_r;
120
121 // alloc grid buffer:
122 b->dev_grid
123 = dt_opencl_alloc_device_buffer(b->devid, sizeof(float) * b->size_x * b->size_y * b->size_z);
124 if(!b->dev_grid)
125 {
126 dt_bilateral_free_cl(b);
127 return NULL;
128 }
129
130 // alloc temporary grid buffer
131 b->dev_grid_tmp
132 = dt_opencl_alloc_device_buffer(b->devid, sizeof(float) * b->size_x * b->size_y * b->size_z);
133 if(!b->dev_grid_tmp)
134 {
135 dt_bilateral_free_cl(b);
136 return NULL;
137 }
138
139 // zero out grid
140 int wd = b->size_x, ht = b->size_y * b->size_z;
141 size_t sizes[] = { ROUNDUPWD(wd), ROUNDUPHT(ht), 1 };
142 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 0, sizeof(cl_mem), (void *)&b->dev_grid);
143 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 1, sizeof(int), (void *)&wd);
144 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 2, sizeof(int), (void *)&ht);
145 cl_int err = -666;
146 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_zero, sizes);
147 if(err != CL_SUCCESS)
148 {
149 dt_bilateral_free_cl(b);
150 return NULL;
151 }
152
153 #if 0
154 fprintf(stderr, "[bilateral] created grid [%d %d %d]"
155 " with sigma (%f %f) (%f %f)\n", b->size_x, b->size_y, b->size_z,
156 b->sigma_s, sigma_s, b->sigma_r, sigma_r);
157 #endif
158 return b;
159 }
160
dt_bilateral_splat_cl(dt_bilateral_cl_t * b,cl_mem in)161 cl_int dt_bilateral_splat_cl(dt_bilateral_cl_t *b, cl_mem in)
162 {
163 cl_int err = -666;
164 size_t sizes[] = { ROUNDUP(b->width, b->blocksizex), ROUNDUP(b->height, b->blocksizey), 1 };
165 size_t local[] = { b->blocksizex, b->blocksizey, 1 };
166 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 0, sizeof(cl_mem), (void *)&in);
167 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 1, sizeof(cl_mem), (void *)&b->dev_grid);
168 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 2, sizeof(int), (void *)&b->width);
169 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 3, sizeof(int), (void *)&b->height);
170 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 4, sizeof(int), (void *)&b->size_x);
171 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 5, sizeof(int), (void *)&b->size_y);
172 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 6, sizeof(int), (void *)&b->size_z);
173 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 7, sizeof(float), (void *)&b->sigma_s);
174 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 8, sizeof(float), (void *)&b->sigma_r);
175 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 9, b->blocksizex * b->blocksizey * sizeof(int),
176 NULL);
177 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 10,
178 b->blocksizex * b->blocksizey * 8 * sizeof(float), NULL);
179 err = dt_opencl_enqueue_kernel_2d_with_local(b->devid, b->global->kernel_splat, sizes, local);
180 return err;
181 }
182
dt_bilateral_blur_cl(dt_bilateral_cl_t * b)183 cl_int dt_bilateral_blur_cl(dt_bilateral_cl_t *b)
184 {
185 cl_int err = -666;
186 size_t sizes[3] = { 0, 0, 1 };
187
188 err = dt_opencl_enqueue_copy_buffer_to_buffer(b->devid, b->dev_grid, b->dev_grid_tmp, 0, 0,
189 b->size_x * b->size_y * b->size_z * sizeof(float));
190 if(err != CL_SUCCESS) return err;
191
192 sizes[0] = ROUNDUPWD(b->size_z);
193 sizes[1] = ROUNDUPHT(b->size_y);
194 int stride1, stride2, stride3;
195 stride1 = b->size_x * b->size_y;
196 stride2 = b->size_x;
197 stride3 = 1;
198 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
199 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid);
200 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
201 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
202 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
203 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
204 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_y);
205 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_x);
206 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
207 if(err != CL_SUCCESS) return err;
208
209 stride1 = b->size_x * b->size_y;
210 stride2 = 1;
211 stride3 = b->size_x;
212 sizes[0] = ROUNDUPWD(b->size_z);
213 sizes[1] = ROUNDUPHT(b->size_x);
214 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid);
215 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
216 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
217 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
218 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
219 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
220 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_x);
221 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_y);
222 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
223 if(err != CL_SUCCESS) return err;
224
225 stride1 = 1;
226 stride2 = b->size_x;
227 stride3 = b->size_x * b->size_y;
228 sizes[0] = ROUNDUPWD(b->size_x);
229 sizes[1] = ROUNDUPHT(b->size_y);
230 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 0, sizeof(cl_mem),
231 (void *)&b->dev_grid_tmp);
232 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 1, sizeof(cl_mem), (void *)&b->dev_grid);
233 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 2, sizeof(int), (void *)&stride1);
234 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 3, sizeof(int), (void *)&stride2);
235 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 4, sizeof(int), (void *)&stride3);
236 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 5, sizeof(int), (void *)&b->size_x);
237 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 6, sizeof(int), (void *)&b->size_y);
238 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 7, sizeof(int), (void *)&b->size_z);
239 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line_z, sizes);
240 return err;
241 }
242
dt_bilateral_slice_to_output_cl(dt_bilateral_cl_t * b,cl_mem in,cl_mem out,const float detail)243 cl_int dt_bilateral_slice_to_output_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
244 {
245 cl_int err = -666;
246 cl_mem tmp = NULL;
247
248 tmp = dt_opencl_alloc_device(b->devid, b->width, b->height, sizeof(float) * 4);
249 if(tmp == NULL) goto error;
250
251 size_t origin[] = { 0, 0, 0 };
252 size_t region[] = { b->width, b->height, 1 };
253 err = dt_opencl_enqueue_copy_image(b->devid, out, tmp, origin, origin, region);
254 if(err != CL_SUCCESS) goto error;
255
256 size_t sizes[] = { ROUNDUPWD(b->width), ROUNDUPHT(b->height), 1 };
257 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 0, sizeof(cl_mem), (void *)&in);
258 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 1, sizeof(cl_mem), (void *)&tmp);
259 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 2, sizeof(cl_mem), (void *)&out);
260 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 3, sizeof(cl_mem), (void *)&b->dev_grid);
261 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 4, sizeof(int), (void *)&b->width);
262 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 5, sizeof(int), (void *)&b->height);
263 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 6, sizeof(int), (void *)&b->size_x);
264 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 7, sizeof(int), (void *)&b->size_y);
265 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 8, sizeof(int), (void *)&b->size_z);
266 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 9, sizeof(float), (void *)&b->sigma_s);
267 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 10, sizeof(float), (void *)&b->sigma_r);
268 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 11, sizeof(float), (void *)&detail);
269 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice2, sizes);
270
271 dt_opencl_release_mem_object(tmp);
272 return err;
273
274 error:
275 dt_opencl_release_mem_object(tmp);
276 return err;
277 }
278
dt_bilateral_slice_cl(dt_bilateral_cl_t * b,cl_mem in,cl_mem out,const float detail)279 cl_int dt_bilateral_slice_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
280 {
281 cl_int err = -666;
282 size_t sizes[] = { ROUNDUPWD(b->width), ROUNDUPHT(b->height), 1 };
283 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 0, sizeof(cl_mem), (void *)&in);
284 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 1, sizeof(cl_mem), (void *)&out);
285 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 2, sizeof(cl_mem), (void *)&b->dev_grid);
286 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 3, sizeof(int), (void *)&b->width);
287 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 4, sizeof(int), (void *)&b->height);
288 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 5, sizeof(int), (void *)&b->size_x);
289 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 6, sizeof(int), (void *)&b->size_y);
290 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 7, sizeof(int), (void *)&b->size_z);
291 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 8, sizeof(float), (void *)&b->sigma_s);
292 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 9, sizeof(float), (void *)&b->sigma_r);
293 dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 10, sizeof(float), (void *)&detail);
294 err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice, sizes);
295 return err;
296 }
297
dt_bilateral_free_cl_global(dt_bilateral_cl_global_t * b)298 void dt_bilateral_free_cl_global(dt_bilateral_cl_global_t *b)
299 {
300 if(!b) return;
301 // destroy kernels
302 dt_opencl_free_kernel(b->kernel_zero);
303 dt_opencl_free_kernel(b->kernel_splat);
304 dt_opencl_free_kernel(b->kernel_blur_line);
305 dt_opencl_free_kernel(b->kernel_blur_line_z);
306 dt_opencl_free_kernel(b->kernel_slice);
307 dt_opencl_free_kernel(b->kernel_slice2);
308 free(b);
309 }
310
311 #endif
312
313 // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
314 // vim: shiftwidth=2 expandtab tabstop=2 cindent
315 // kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
316