1 /*
2     This file is part of darktable,
3     Copyright (C) 2016-2020 darktable developers.
4 
5     darktable is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     darktable is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with darktable.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #ifdef HAVE_OPENCL
20 
21 #include "common/bilateral.h"
22 #include "common/bilateralcl.h"
23 #include "common/darktable.h" // for CLAMPS, dt_print, darktable, darktable_t
24 #include "common/opencl.h"    // for dt_opencl_set_kernel_arg, dt_opencl_cr...
25 #include <glib.h>             // for MAX
26 #include <math.h>             // for roundf
27 #include <stdlib.h>           // for free, malloc
28 
dt_bilateral_init_cl_global()29 dt_bilateral_cl_global_t *dt_bilateral_init_cl_global()
30 {
31   dt_bilateral_cl_global_t *b = (dt_bilateral_cl_global_t *)malloc(sizeof(dt_bilateral_cl_global_t));
32 
33   const int program = 10; // bilateral.cl, from programs.conf
34   b->kernel_zero = dt_opencl_create_kernel(program, "zero");
35   b->kernel_splat = dt_opencl_create_kernel(program, "splat");
36   b->kernel_blur_line = dt_opencl_create_kernel(program, "blur_line");
37   b->kernel_blur_line_z = dt_opencl_create_kernel(program, "blur_line_z");
38   b->kernel_slice = dt_opencl_create_kernel(program, "slice");
39   b->kernel_slice2 = dt_opencl_create_kernel(program, "slice_to_output");
40   return b;
41 }
42 
dt_bilateral_free_cl(dt_bilateral_cl_t * b)43 void dt_bilateral_free_cl(dt_bilateral_cl_t *b)
44 {
45   if(!b) return;
46   // be sure we're done with the memory:
47   dt_opencl_finish(b->devid);
48   // free device mem
49   dt_opencl_release_mem_object(b->dev_grid);
50   dt_opencl_release_mem_object(b->dev_grid_tmp);
51   free(b);
52 }
53 
54 
55 // modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
56 // takes account of an additional temp buffer needed in the OpenCL code path
dt_bilateral_memory_use2(const int width,const int height,const float sigma_s,const float sigma_r)57 size_t dt_bilateral_memory_use2(const int width,
58                                 const int height,
59                                 const float sigma_s,
60                                 const float sigma_r)
61 {
62   return dt_bilateral_memory_use(width, height, sigma_s, sigma_r) + sizeof(float) * 4 * width * height;
63 }
64 
65 // modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
66 // takes account of an additional temp buffer needed in the OpenCL code path
dt_bilateral_singlebuffer_size2(const int width,const int height,const float sigma_s,const float sigma_r)67 size_t dt_bilateral_singlebuffer_size2(const int width,
68                                        const int height,
69                                        const float sigma_s,
70                                        const float sigma_r)
71 {
72   return MAX(dt_bilateral_singlebuffer_size(width, height, sigma_s, sigma_r), sizeof(float) * 4 * width * height);
73 }
74 
75 
dt_bilateral_init_cl(const int devid,const int width,const int height,const float sigma_s,const float sigma_r)76 dt_bilateral_cl_t *dt_bilateral_init_cl(const int devid,
77                                         const int width,     // width of input image
78                                         const int height,    // height of input image
79                                         const float sigma_s, // spatial sigma (blur pixel coords)
80                                         const float sigma_r) // range sigma (blur luma values)
81 {
82   dt_opencl_local_buffer_t locopt
83     = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
84                                   .cellsize = 8 * sizeof(float) + sizeof(int), .overhead = 0,
85                                   .sizex = 1 << 6, .sizey = 1 << 6 };
86 
87   if(!dt_opencl_local_buffer_opt(devid, darktable.opencl->bilateral->kernel_splat, &locopt))
88   {
89     dt_print(DT_DEBUG_OPENCL,
90              "[opencl_bilateral] can not identify resource limits for device %d in bilateral grid\n", devid);
91     return NULL;
92   }
93 
94   if(locopt.sizex * locopt.sizey < 16 * 16)
95   {
96     dt_print(DT_DEBUG_OPENCL,
97              "[opencl_bilateral] device %d does not offer sufficient resources to run bilateral grid\n",
98              devid);
99     return NULL;
100   }
101 
102   dt_bilateral_cl_t *b = (dt_bilateral_cl_t *)malloc(sizeof(dt_bilateral_cl_t));
103   if(!b) return NULL;
104 
105   b->global = darktable.opencl->bilateral;
106   b->width = width;
107   b->height = height;
108   b->blocksizex = locopt.sizex;
109   b->blocksizey = locopt.sizey;
110   b->devid = devid;
111   b->dev_grid = NULL;
112   b->dev_grid_tmp = NULL;
113   dt_bilateral_t b2;
114   dt_bilateral_grid_size(&b2,width,height,100.0f,sigma_s,sigma_r);
115   b->size_x = b2.size_x;
116   b->size_y = b2.size_y;
117   b->size_z = b2.size_z;
118   b->sigma_s = b2.sigma_s;
119   b->sigma_r = b2.sigma_r;
120 
121   // alloc grid buffer:
122   b->dev_grid
123       = dt_opencl_alloc_device_buffer(b->devid, sizeof(float) * b->size_x * b->size_y * b->size_z);
124   if(!b->dev_grid)
125   {
126     dt_bilateral_free_cl(b);
127     return NULL;
128   }
129 
130   // alloc temporary grid buffer
131   b->dev_grid_tmp
132       = dt_opencl_alloc_device_buffer(b->devid, sizeof(float) * b->size_x * b->size_y * b->size_z);
133   if(!b->dev_grid_tmp)
134   {
135     dt_bilateral_free_cl(b);
136     return NULL;
137   }
138 
139   // zero out grid
140   int wd = b->size_x, ht = b->size_y * b->size_z;
141   size_t sizes[] = { ROUNDUPWD(wd), ROUNDUPHT(ht), 1 };
142   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 0, sizeof(cl_mem), (void *)&b->dev_grid);
143   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 1, sizeof(int), (void *)&wd);
144   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 2, sizeof(int), (void *)&ht);
145   cl_int err = -666;
146   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_zero, sizes);
147   if(err != CL_SUCCESS)
148   {
149     dt_bilateral_free_cl(b);
150     return NULL;
151   }
152 
153 #if 0
154   fprintf(stderr, "[bilateral] created grid [%d %d %d]"
155           " with sigma (%f %f) (%f %f)\n", b->size_x, b->size_y, b->size_z,
156           b->sigma_s, sigma_s, b->sigma_r, sigma_r);
157 #endif
158   return b;
159 }
160 
dt_bilateral_splat_cl(dt_bilateral_cl_t * b,cl_mem in)161 cl_int dt_bilateral_splat_cl(dt_bilateral_cl_t *b, cl_mem in)
162 {
163   cl_int err = -666;
164   size_t sizes[] = { ROUNDUP(b->width, b->blocksizex), ROUNDUP(b->height, b->blocksizey), 1 };
165   size_t local[] = { b->blocksizex, b->blocksizey, 1 };
166   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 0, sizeof(cl_mem), (void *)&in);
167   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 1, sizeof(cl_mem), (void *)&b->dev_grid);
168   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 2, sizeof(int), (void *)&b->width);
169   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 3, sizeof(int), (void *)&b->height);
170   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 4, sizeof(int), (void *)&b->size_x);
171   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 5, sizeof(int), (void *)&b->size_y);
172   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 6, sizeof(int), (void *)&b->size_z);
173   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 7, sizeof(float), (void *)&b->sigma_s);
174   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 8, sizeof(float), (void *)&b->sigma_r);
175   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 9, b->blocksizex * b->blocksizey * sizeof(int),
176                            NULL);
177   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 10,
178                            b->blocksizex * b->blocksizey * 8 * sizeof(float), NULL);
179   err = dt_opencl_enqueue_kernel_2d_with_local(b->devid, b->global->kernel_splat, sizes, local);
180   return err;
181 }
182 
dt_bilateral_blur_cl(dt_bilateral_cl_t * b)183 cl_int dt_bilateral_blur_cl(dt_bilateral_cl_t *b)
184 {
185   cl_int err = -666;
186   size_t sizes[3] = { 0, 0, 1 };
187 
188   err = dt_opencl_enqueue_copy_buffer_to_buffer(b->devid, b->dev_grid, b->dev_grid_tmp, 0, 0,
189                                                 b->size_x * b->size_y * b->size_z * sizeof(float));
190   if(err != CL_SUCCESS) return err;
191 
192   sizes[0] = ROUNDUPWD(b->size_z);
193   sizes[1] = ROUNDUPHT(b->size_y);
194   int stride1, stride2, stride3;
195   stride1 = b->size_x * b->size_y;
196   stride2 = b->size_x;
197   stride3 = 1;
198   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
199   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid);
200   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
201   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
202   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
203   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
204   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_y);
205   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_x);
206   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
207   if(err != CL_SUCCESS) return err;
208 
209   stride1 = b->size_x * b->size_y;
210   stride2 = 1;
211   stride3 = b->size_x;
212   sizes[0] = ROUNDUPWD(b->size_z);
213   sizes[1] = ROUNDUPHT(b->size_x);
214   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid);
215   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
216   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
217   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
218   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
219   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
220   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_x);
221   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_y);
222   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
223   if(err != CL_SUCCESS) return err;
224 
225   stride1 = 1;
226   stride2 = b->size_x;
227   stride3 = b->size_x * b->size_y;
228   sizes[0] = ROUNDUPWD(b->size_x);
229   sizes[1] = ROUNDUPHT(b->size_y);
230   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 0, sizeof(cl_mem),
231                            (void *)&b->dev_grid_tmp);
232   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 1, sizeof(cl_mem), (void *)&b->dev_grid);
233   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 2, sizeof(int), (void *)&stride1);
234   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 3, sizeof(int), (void *)&stride2);
235   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 4, sizeof(int), (void *)&stride3);
236   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 5, sizeof(int), (void *)&b->size_x);
237   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 6, sizeof(int), (void *)&b->size_y);
238   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 7, sizeof(int), (void *)&b->size_z);
239   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line_z, sizes);
240   return err;
241 }
242 
dt_bilateral_slice_to_output_cl(dt_bilateral_cl_t * b,cl_mem in,cl_mem out,const float detail)243 cl_int dt_bilateral_slice_to_output_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
244 {
245   cl_int err = -666;
246   cl_mem tmp = NULL;
247 
248   tmp = dt_opencl_alloc_device(b->devid, b->width, b->height, sizeof(float) * 4);
249   if(tmp == NULL) goto error;
250 
251   size_t origin[] = { 0, 0, 0 };
252   size_t region[] = { b->width, b->height, 1 };
253   err = dt_opencl_enqueue_copy_image(b->devid, out, tmp, origin, origin, region);
254   if(err != CL_SUCCESS) goto error;
255 
256   size_t sizes[] = { ROUNDUPWD(b->width), ROUNDUPHT(b->height), 1 };
257   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 0, sizeof(cl_mem), (void *)&in);
258   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 1, sizeof(cl_mem), (void *)&tmp);
259   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 2, sizeof(cl_mem), (void *)&out);
260   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 3, sizeof(cl_mem), (void *)&b->dev_grid);
261   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 4, sizeof(int), (void *)&b->width);
262   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 5, sizeof(int), (void *)&b->height);
263   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 6, sizeof(int), (void *)&b->size_x);
264   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 7, sizeof(int), (void *)&b->size_y);
265   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 8, sizeof(int), (void *)&b->size_z);
266   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 9, sizeof(float), (void *)&b->sigma_s);
267   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 10, sizeof(float), (void *)&b->sigma_r);
268   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 11, sizeof(float), (void *)&detail);
269   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice2, sizes);
270 
271   dt_opencl_release_mem_object(tmp);
272   return err;
273 
274 error:
275   dt_opencl_release_mem_object(tmp);
276   return err;
277 }
278 
dt_bilateral_slice_cl(dt_bilateral_cl_t * b,cl_mem in,cl_mem out,const float detail)279 cl_int dt_bilateral_slice_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
280 {
281   cl_int err = -666;
282   size_t sizes[] = { ROUNDUPWD(b->width), ROUNDUPHT(b->height), 1 };
283   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 0, sizeof(cl_mem), (void *)&in);
284   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 1, sizeof(cl_mem), (void *)&out);
285   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 2, sizeof(cl_mem), (void *)&b->dev_grid);
286   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 3, sizeof(int), (void *)&b->width);
287   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 4, sizeof(int), (void *)&b->height);
288   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 5, sizeof(int), (void *)&b->size_x);
289   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 6, sizeof(int), (void *)&b->size_y);
290   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 7, sizeof(int), (void *)&b->size_z);
291   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 8, sizeof(float), (void *)&b->sigma_s);
292   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 9, sizeof(float), (void *)&b->sigma_r);
293   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 10, sizeof(float), (void *)&detail);
294   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice, sizes);
295   return err;
296 }
297 
dt_bilateral_free_cl_global(dt_bilateral_cl_global_t * b)298 void dt_bilateral_free_cl_global(dt_bilateral_cl_global_t *b)
299 {
300   if(!b) return;
301   // destroy kernels
302   dt_opencl_free_kernel(b->kernel_zero);
303   dt_opencl_free_kernel(b->kernel_splat);
304   dt_opencl_free_kernel(b->kernel_blur_line);
305   dt_opencl_free_kernel(b->kernel_blur_line_z);
306   dt_opencl_free_kernel(b->kernel_slice);
307   dt_opencl_free_kernel(b->kernel_slice2);
308   free(b);
309 }
310 
311 #endif
312 
313 // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
314 // vim: shiftwidth=2 expandtab tabstop=2 cindent
315 // kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
316