1/*
2 * Copyright 2011, Blender Foundation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 * Contributor:
19 *      Jeroen Bakker
20 *      Monique Dewanchand
21 */
22
23/// This file contains all opencl kernels for node-operation implementations
24
25// Global SAMPLERS
26const sampler_t SAMPLER_NEAREST       = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
27const sampler_t SAMPLER_NEAREST_CLAMP = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
28
29__constant const int2 zero = {0,0};
30
31// KERNEL --- BOKEH BLUR ---
32__kernel void bokehBlurKernel(__read_only image2d_t boundingBox, __read_only image2d_t inputImage,
33                              __read_only image2d_t bokehImage, __write_only image2d_t output,
34                              int2 offsetInput, int2 offsetOutput, int radius, int step, int2 dimension, int2 offset)
35{
36	int2 coords = {get_global_id(0), get_global_id(1)};
37	coords += offset;
38	float tempBoundingBox;
39	float4 color = {0.0f,0.0f,0.0f,0.0f};
40	float4 multiplyer = {0.0f,0.0f,0.0f,0.0f};
41	float4 bokeh;
42	const float radius2 = radius*2.0f;
43	const int2 realCoordinate = coords + offsetOutput;
44	int2 imageCoordinates = realCoordinate - offsetInput;
45
46	tempBoundingBox = read_imagef(boundingBox, SAMPLER_NEAREST, coords).s0;
47
48	if (tempBoundingBox > 0.0f && radius > 0 ) {
49		const int2 bokehImageDim = get_image_dim(bokehImage);
50		const int2 bokehImageCenter = bokehImageDim/2;
51		const int2 minXY = max(realCoordinate - radius, zero);
52		const int2 maxXY = min(realCoordinate + radius, dimension);
53		int nx, ny;
54
55		float2 uv;
56		int2 inputXy;
57
58		if (radius < 2) {
59			color = read_imagef(inputImage, SAMPLER_NEAREST, imageCoordinates);
60			multiplyer = (float4)(1.0f, 1.0f, 1.0f, 1.0f);
61		}
62
63		for (ny = minXY.y, inputXy.y = ny - offsetInput.y ; ny < maxXY.y ; ny += step, inputXy.y += step) {
64			uv.y = ((realCoordinate.y-ny)/radius2)*bokehImageDim.y+bokehImageCenter.y;
65
66			for (nx = minXY.x, inputXy.x = nx - offsetInput.x; nx < maxXY.x ; nx += step, inputXy.x += step) {
67				uv.x = ((realCoordinate.x-nx)/radius2)*bokehImageDim.x+bokehImageCenter.x;
68				bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);
69				color += bokeh * read_imagef(inputImage, SAMPLER_NEAREST, inputXy);
70				multiplyer += bokeh;
71			}
72		}
73		color /= multiplyer;
74	}
75	else {
76		color = read_imagef(inputImage, SAMPLER_NEAREST, imageCoordinates);
77	}
78
79	write_imagef(output, coords, color);
80}
81
82//KERNEL --- DEFOCUS /VARIABLESIZEBOKEHBLUR ---
83__kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2d_t bokehImage,
84                            __read_only image2d_t inputSize,
85                            __write_only image2d_t output, int2 offsetInput, int2 offsetOutput,
86                            int step, int maxBlurScalar, float threshold, float scalar, int2 dimension, int2 offset)
87{
88	float4 color = {1.0f, 0.0f, 0.0f, 1.0f};
89	int2 coords = {get_global_id(0), get_global_id(1)};
90	coords += offset;
91	const int2 realCoordinate = coords + offsetOutput;
92
93	float4 readColor;
94	float4 tempColor;
95	float4 bokeh;
96	float size;
97	float4 multiplier_accum = {1.0f, 1.0f, 1.0f, 1.0f};
98	float4 color_accum;
99
100	int minx = max(realCoordinate.s0 - maxBlurScalar, 0);
101	int miny = max(realCoordinate.s1 - maxBlurScalar, 0);
102	int maxx = min(realCoordinate.s0 + maxBlurScalar, dimension.s0);
103	int maxy = min(realCoordinate.s1 + maxBlurScalar, dimension.s1);
104
105	{
106		int2 inputCoordinate = realCoordinate - offsetInput;
107		float size_center = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0 * scalar;
108		color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);
109		readColor = color_accum;
110
111		if (size_center > threshold) {
112			for (int ny = miny; ny < maxy; ny += step) {
113				inputCoordinate.s1 = ny - offsetInput.s1;
114				float dy = ny - realCoordinate.s1;
115				for (int nx = minx; nx < maxx; nx += step) {
116					float dx = nx - realCoordinate.s0;
117					if (dx != 0 || dy != 0) {
118						inputCoordinate.s0 = nx - offsetInput.s0;
119						size = min(read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0 * scalar, size_center);
120						if (size > threshold) {
121							if (size >= fabs(dx) && size >= fabs(dy)) {
122								float2 uv = {256.0f + dx * 255.0f / size,
123								             256.0f + dy * 255.0f / size};
124								bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);
125								tempColor = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);
126								color_accum += bokeh * tempColor;
127								multiplier_accum += bokeh;
128							}
129						}
130					}
131				}
132			}
133		}
134
135		color = color_accum * (1.0f / multiplier_accum);
136
137		/* blend in out values over the threshold, otherwise we get sharp, ugly transitions */
138		if ((size_center > threshold) &&
139		    (size_center < threshold * 2.0f))
140		{
141			/* factor from 0-1 */
142			float fac = (size_center - threshold) / threshold;
143			color = (readColor * (1.0f - fac)) +  (color * fac);
144		}
145
146		write_imagef(output, coords, color);
147	}
148}
149
150
151// KERNEL --- DILATE ---
152__kernel void dilateKernel(__read_only image2d_t inputImage,  __write_only image2d_t output,
153                           int2 offsetInput, int2 offsetOutput, int scope, int distanceSquared, int2 dimension,
154                           int2 offset)
155{
156	int2 coords = {get_global_id(0), get_global_id(1)};
157	coords += offset;
158	const int2 realCoordinate = coords + offsetOutput;
159
160	const int2 minXY = max(realCoordinate - scope, zero);
161	const int2 maxXY = min(realCoordinate + scope, dimension);
162
163	float value = 0.0f;
164	int nx, ny;
165	int2 inputXy;
166
167	for (ny = minXY.y, inputXy.y = ny - offsetInput.y ; ny < maxXY.y ; ny ++, inputXy.y++) {
168		const float deltaY = (realCoordinate.y - ny);
169		for (nx = minXY.x, inputXy.x = nx - offsetInput.x; nx < maxXY.x ; nx ++, inputXy.x++) {
170			const float deltaX = (realCoordinate.x - nx);
171			const float measuredDistance = deltaX * deltaX + deltaY * deltaY;
172			if (measuredDistance <= distanceSquared) {
173				value = max(value, read_imagef(inputImage, SAMPLER_NEAREST, inputXy).s0);
174			}
175		}
176	}
177
178	float4 color = {value,0.0f,0.0f,0.0f};
179	write_imagef(output, coords, color);
180}
181
182// KERNEL --- DILATE ---
183__kernel void erodeKernel(__read_only image2d_t inputImage,  __write_only image2d_t output,
184                           int2 offsetInput, int2 offsetOutput, int scope, int distanceSquared, int2 dimension,
185                           int2 offset)
186{
187	int2 coords = {get_global_id(0), get_global_id(1)};
188	coords += offset;
189	const int2 realCoordinate = coords + offsetOutput;
190
191	const int2 minXY = max(realCoordinate - scope, zero);
192	const int2 maxXY = min(realCoordinate + scope, dimension);
193
194	float value = 1.0f;
195	int nx, ny;
196	int2 inputXy;
197
198	for (ny = minXY.y, inputXy.y = ny - offsetInput.y ; ny < maxXY.y ; ny ++, inputXy.y++) {
199		for (nx = minXY.x, inputXy.x = nx - offsetInput.x; nx < maxXY.x ; nx ++, inputXy.x++) {
200			const float deltaX = (realCoordinate.x - nx);
201			const float deltaY = (realCoordinate.y - ny);
202			const float measuredDistance = deltaX * deltaX+deltaY * deltaY;
203			if (measuredDistance <= distanceSquared) {
204				value = min(value, read_imagef(inputImage, SAMPLER_NEAREST, inputXy).s0);
205			}
206		}
207	}
208
209	float4 color = {value,0.0f,0.0f,0.0f};
210	write_imagef(output, coords, color);
211}
212
213// KERNEL --- DIRECTIONAL BLUR ---
214__kernel void directionalBlurKernel(__read_only image2d_t inputImage,  __write_only image2d_t output,
215                                    int2 offsetOutput, int iterations, float scale, float rotation, float2 translate,
216                                     float2 center, int2 offset)
217{
218	int2 coords = {get_global_id(0), get_global_id(1)};
219	coords += offset;
220	const int2 realCoordinate = coords + offsetOutput;
221
222	float4 col;
223	float2 ltxy = translate;
224	float lsc = scale;
225	float lrot = rotation;
226
227	col = read_imagef(inputImage, SAMPLER_NEAREST, realCoordinate);
228
229	/* blur the image */
230	for (int i = 0; i < iterations; ++i) {
231		const float cs = cos(lrot), ss = sin(lrot);
232		const float isc = 1.0f / (1.0f + lsc);
233
234		const float v = isc * (realCoordinate.s1 - center.s1) + ltxy.s1;
235		const float u = isc * (realCoordinate.s0 - center.s0) + ltxy.s0;
236		float2 uv = {
237			cs * u + ss * v + center.s0,
238			cs * v - ss * u + center.s1
239		};
240
241		col += read_imagef(inputImage, SAMPLER_NEAREST_CLAMP, uv);
242
243		/* double transformations */
244		ltxy += translate;
245		lrot += rotation;
246		lsc += scale;
247	}
248
249	col *= (1.0f/(iterations+1));
250
251	write_imagef(output, coords, col);
252}
253
254// KERNEL --- GAUSSIAN BLUR ---
255__kernel void gaussianXBlurOperationKernel(__read_only image2d_t inputImage,
256                                           int2 offsetInput,
257                                           __write_only image2d_t output,
258                                           int2 offsetOutput,
259                                           int filter_size,
260                                           int2 dimension,
261                                           __global float *gausstab,
262                                           int2 offset)
263{
264	float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
265	int2 coords = {get_global_id(0), get_global_id(1)};
266	coords += offset;
267	const int2 realCoordinate = coords + offsetOutput;
268	int2 inputCoordinate = realCoordinate - offsetInput;
269	float weight = 0.0f;
270
271	int xmin = max(realCoordinate.x - filter_size,     0) - offsetInput.x;
272	int xmax = min(realCoordinate.x + filter_size + 1, dimension.x) - offsetInput.x;
273
274	for (int nx = xmin, i = max(filter_size - realCoordinate.x, 0); nx < xmax; ++nx, ++i) {
275		float w = gausstab[i];
276		inputCoordinate.x = nx;
277		color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w;
278		weight += w;
279	}
280
281	color *= (1.0f / weight);
282
283	write_imagef(output, coords, color);
284}
285
286__kernel void gaussianYBlurOperationKernel(__read_only image2d_t inputImage,
287                                           int2 offsetInput,
288                                           __write_only image2d_t output,
289                                           int2 offsetOutput,
290                                           int filter_size,
291                                           int2 dimension,
292                                           __global float *gausstab,
293                                           int2 offset)
294{
295	float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
296	int2 coords = {get_global_id(0), get_global_id(1)};
297	coords += offset;
298	const int2 realCoordinate = coords + offsetOutput;
299	int2 inputCoordinate = realCoordinate - offsetInput;
300	float weight = 0.0f;
301
302	int ymin = max(realCoordinate.y - filter_size,     0) - offsetInput.y;
303	int ymax = min(realCoordinate.y + filter_size + 1, dimension.y) - offsetInput.y;
304
305	for (int ny = ymin, i = max(filter_size - realCoordinate.y, 0); ny < ymax; ++ny, ++i) {
306		float w = gausstab[i];
307		inputCoordinate.y = ny;
308		color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w;
309		weight += w;
310	}
311
312	color *= (1.0f / weight);
313
314	write_imagef(output, coords, color);
315}
316