1 /*
2 cladsynth.cpp
3
4 (c) Victor Lazzarini, 2019
5
6 This file is part of Csound.
7
8 The Csound Library is free software; you can redistribute it
9 and/or modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 Csound is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with Csound; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
21 02110-1301 USA
22 */
23
24 #include <plugin.h>
25 #include <pstream.h>
26 #include <iostream>
27 #include <sstream>
28
29 #ifdef __MACH__
30 #include <OpenCL/opencl.h>
31 #else
32 #include <CL/opencl.h>
33 #endif
34
35 const char *code = R"(
36 #define FMAXLEN ((float)0x40000000)
37 #define PHMASK 0x3fffffff
38 #define PI 3.1415926f
39
40 inline void AtomicAdd(volatile __global float *source, const float operand) {
41 union {
42 unsigned int intVal;
43 float floatVal;
44 } newVal;
45 union {
46 unsigned int intVal;
47 float floatVal;
48 } prevVal;
49 do {
50 prevVal.floatVal = *source;
51 newVal.floatVal = prevVal.floatVal + operand;
52 } while (atomic_cmpxchg((volatile __global unsigned int *)source,
53 prevVal.intVal, newVal.intVal) != prevVal.intVal);
54 }
55
56 kernel void sample(global float *out, global float *frame,
57 global long *ph,
58 global float *amps, float pitch, int bins,
59 int vsize, float sr) {
60 int t = get_global_id(0);
61 int n = t%vsize; /* sample index */
62 int h = t/vsize; /* bin index */
63 int k = h<<1;
64 long lph;
65 float a = amps[h], ascl = ((float)n)/vsize;
66 float fscal = pitch*FMAXLEN/sr;
67 lph = (ph[h] + (long)(n*round(frame[k+1]*fscal))) & PHMASK;
68 a += ascl*(frame[k] - a);
69 AtomicAdd(&out[n], a*sin((2*PI*lph)/FMAXLEN));
70 }
71
72 kernel void update(global float *out, global float *frame,
73 global long *ph, global float *amps, float pitch, int bins,
74 int vsize, float sr){
75 int h = get_global_id(0);
76 int k = h << 1,i,j;
77 /* update phases and amps */
78 ph[h] = (ph[h] + (long)(vsize*round(pitch*frame[k+1]*FMAXLEN/sr))) & PHMASK;
79 amps[h] = frame[k];
80 if(h >= vsize) return;
81 out[h] = 0.f;
82 }
83
84 )";
85
86
cl_error_string(int err)87 const char * cl_error_string(int err) {
88 switch (err) {
89 case CL_SUCCESS: return "Success!";
90 case CL_DEVICE_NOT_FOUND: return "Device not found.";
91 case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
92 case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
93 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
94 return "Memory object allocation failure";
95 case CL_OUT_OF_RESOURCES: return "Out of resources";
96 case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
97 case CL_PROFILING_INFO_NOT_AVAILABLE:
98 return "Profiling information not available";
99 case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
100 case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
101 case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
102 case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
103 case CL_MAP_FAILURE: return "Map failure";
104 case CL_INVALID_VALUE: return "Invalid value";
105 case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
106 case CL_INVALID_PLATFORM: return "Invalid platform";
107 case CL_INVALID_DEVICE: return "Invalid device";
108 case CL_INVALID_CONTEXT: return "Invalid context";
109 case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
110 case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
111 case CL_INVALID_HOST_PTR: return "Invalid host pointer";
112 case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
113 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
114 return "Invalid image format descriptor";
115 case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
116 case CL_INVALID_SAMPLER: return "Invalid sampler";
117 case CL_INVALID_BINARY: return "Invalid binary";
118 case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
119 case CL_INVALID_PROGRAM: return "Invalid program";
120 case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
121 case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
122 case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
123 case CL_INVALID_KERNEL: return "Invalid kernel";
124 case CL_INVALID_ARG_INDEX: return "Invalid argument index";
125 case CL_INVALID_ARG_VALUE: return "Invalid argument value";
126 case CL_INVALID_ARG_SIZE: return "Invalid argument size";
127 case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
128 case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
129 case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
130 case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
131 case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
132 case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
133 case CL_INVALID_EVENT: return "Invalid event";
134 case CL_INVALID_OPERATION: return "Invalid operation";
135 case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
136 case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
137 case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
138 default: return "Unknown error";
139 }
140 }
141
142 struct Cladsyn : csnd::Plugin<1, 5> {
143
144 cl_mem out;
145 cl_mem frame;
146 cl_mem ph;
147 cl_mem amps;
148 int bins;
149 size_t threads;
150 int count;
151 int vsamps;
152 size_t mthreads;
153 int framecount;
154 cl_context context;
155 cl_command_queue commands;
156 cl_program program;
157 cl_kernel kernel1, kernel2;
158 size_t wgs1, wgs2;
159 csnd::AuxMem<float> mix;
160 float cs_sr;
161
162
initCladsyn163 int init() {
164 int asize, ipsize, fpsize, err;
165 cl_device_id device_ids[32], device_id;
166 cl_uint num = 0, nump = 0;
167 cl_platform_id platforms[16];
168 uint32_t i;
169 csnd::pv_frame &fsig = inargs.fsig_data(0);
170 int inum = (int) inargs[3];
171 int idev = (int) inargs[4];
172
173 if(fsig.hop_size() > 1024)
174 return csound->init_error("hopsize is too large\n");
175
176 err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 32, device_ids, &num);
177 if (err != CL_SUCCESS){
178 int devs = 0;
179 clGetPlatformIDs(16, platforms, &nump);
180 for(i=0; i < nump && devs < 32; i++){
181 char name[128];
182 std::stringstream msg;
183 clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 128, name, NULL);
184 msg << "available platform[" << i << "]: " << name << std::endl;
185 csound->message(msg.str());
186 err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL,
187 32-devs, &device_ids[devs], &num);
188 if (err != CL_SUCCESS) {
189 std::stringstream emsg;
190 emsg << "failed to find an OpenCL device!" <<
191 cl_error_string(err) << std::endl;
192 csound->init_error(emsg.str());
193 }
194 }
195 devs += num;
196 }
197
198 for(i=0; i < num; i++){
199 char name[128];
200 cl_device_type type;
201 clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 128, name, NULL);
202 clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type),
203 &type, NULL);
204 if(type & CL_DEVICE_TYPE_CPU) {
205 std::stringstream msg;
206 msg << "available CPU[device " << i << "] " << name << std::endl;
207 csound->message(msg.str());
208 }
209 else if(type & CL_DEVICE_TYPE_GPU) {
210 std::stringstream msg;
211 msg << "available GPU[device " << i << "] " << name << std::endl;
212 csound->message(msg.str());
213 }
214 else if(type & CL_DEVICE_TYPE_ACCELERATOR) {
215 std::stringstream msg;
216 msg << "available ACCELLERATOR[device " << i << "] " << name
217 << std::endl;
218 csound->message(msg.str());
219 }
220 else {
221 std::stringstream msg;
222 msg << "available GENERIC[device " << i << "] " << name << std::endl;
223 csound->message(msg.str());
224 }
225 }
226
227 // SELECT THE DEVICE HERE
228 if(idev < num)
229 device_id = device_ids[idev];
230 else
231 device_id = device_ids[num-1];
232
233 context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
234 if (!context) {
235 std::stringstream msg;
236 msg << "Failed to create a compute context! " << cl_error_string(err)
237 << std::endl;
238 return csound->init_error(msg.str());
239 }
240
241 // Create commands
242 commands = clCreateCommandQueue(context, device_id, 0, &err);
243 if (!commands) {
244 std::stringstream msg;
245 msg << "Failed to create commands! " << cl_error_string(err)
246 << std::endl;
247 return csound->init_error(msg.str());
248 }
249 // Create the compute program from the source buffer
250 program = clCreateProgramWithSource(context, 1, (const char **) &code,
251 NULL, &err);
252 if (!program){
253 std::stringstream msg;
254 msg << "Failed to create program! " << cl_error_string(err)
255 << std::endl;
256 return csound->init_error(msg.str());
257 }
258
259 err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
260 if (err != CL_SUCCESS)
261 {
262 size_t len;
263 char buffer[2048];
264 std::stringstream msg;
265 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
266 sizeof(buffer), buffer, &len);
267 msg << "Failed to build program executable! " << cl_error_string(err)
268 << std::endl << buffer << std::endl;
269 return csound->init_error(msg.str());
270 }
271
272 kernel1 = clCreateKernel(program, "sample", &err);
273 if (!kernel1 || err != CL_SUCCESS) {
274 std::stringstream msg;
275 msg << "Failed to create sample compute kernel! " << cl_error_string(err)
276 << std::endl;
277 return csound->init_error(msg.str());
278 }
279
280 kernel2 = clCreateKernel(program, "update", &err);
281 if (!kernel2 || err != CL_SUCCESS) {
282 std::stringstream msg;
283 msg << "Failed to create update compute kernel! " << cl_error_string(err)
284 << std::endl;
285 return csound->init_error(msg.str());
286 }
287
288 {
289 char name[128];
290 std::stringstream msg;
291 clGetDeviceInfo(device_id, CL_DEVICE_NAME, 128, name, NULL);
292 msg << "using device: " << name << std::endl;
293 csound->message(msg.str());
294 }
295
296 bins = fsig.nbins() - 1;
297 if(inum > 0 && inum < bins) bins = inum;
298 vsamps = fsig.hop_size();
299 threads = bins*vsamps;
300 mthreads = bins > vsamps ? bins : vsamps;
301 asize = vsamps*sizeof(cl_float);
302 ipsize = mthreads*sizeof(cl_long);
303 fpsize = fsig.dft_size()*sizeof(cl_float);
304
305 out = clCreateBuffer(context,0, asize, NULL, NULL);
306 frame = clCreateBuffer(context, CL_MEM_READ_ONLY, fpsize, NULL, NULL);
307 ph = clCreateBuffer(context,0, ipsize, NULL, NULL);
308 amps = clCreateBuffer(context,0, mthreads*sizeof(cl_float), NULL, NULL);
309
310 // memset needed?
311 asize = vsamps*sizeof(float);
312 mix.allocate(csound, asize);
313 csound->plugin_deinit(this);
314 count = 0;
315 cs_sr = csound->sr();
316
317 clGetKernelWorkGroupInfo(kernel1,
318 device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(wgs1), &wgs1, NULL);
319 clGetKernelWorkGroupInfo(kernel2,
320 device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(wgs2), &wgs2, NULL);
321
322 clSetKernelArg(kernel1, 0, sizeof(cl_mem), &out);
323 clSetKernelArg(kernel1, 1, sizeof(cl_mem), &frame);
324 clSetKernelArg(kernel1, 2, sizeof(cl_mem), &ph);
325 clSetKernelArg(kernel1, 3, sizeof(cl_mem), &s);
326 clSetKernelArg(kernel1, 5, sizeof(cl_int), &bins);
327 clSetKernelArg(kernel1, 6, sizeof(cl_int), &vsamps);
328 clSetKernelArg(kernel1, 7, sizeof(cl_float), &cs_sr);
329
330 clSetKernelArg(kernel2, 0, sizeof(cl_mem), &out);
331 clSetKernelArg(kernel2, 1, sizeof(cl_mem), &frame);
332 clSetKernelArg(kernel2, 2, sizeof(cl_mem), &ph);
333 clSetKernelArg(kernel2, 3, sizeof(cl_mem), &s);
334 clSetKernelArg(kernel2, 5, sizeof(cl_int), &bins);
335 clSetKernelArg(kernel2, 6, sizeof(cl_int), &vsamps);
336 clSetKernelArg(kernel2, 7, sizeof(cl_float), &cs_sr);
337
338 return OK;
339 }
340
deinitCladsyn341 int deinit() {
342 clReleaseMemObject(out);
343 clReleaseMemObject(ph);
344 clReleaseMemObject(frame);
345 clReleaseMemObject(amps);
346 clReleaseProgram(program);
347 clReleaseKernel(kernel1);
348 clReleaseKernel(kernel2);
349 clReleaseCommandQueue(commands);
350 clReleaseContext(context);
351 return OK;
352 }
353
aperfCladsyn354 int aperf() {
355
356 uint32_t n;
357 csnd::AudioSig asig(this, outargs(0));
358 float *fp = inargs.fsig_data(0).data();
359
360 for (auto &s : asig) {
361 if(count == 0) {
362 int err;
363 float freq = inargs[2];
364 clSetKernelArg(kernel1, 4, sizeof(cl_float), &freq);
365 clSetKernelArg(kernel2, 4, sizeof(cl_float), &freq);
366
367 clEnqueueWriteBuffer(commands,frame, CL_TRUE, 0, sizeof(cl_float)*bins*2,
368 fp, 0, NULL, NULL);
369 err = clEnqueueNDRangeKernel(commands, kernel1, 1, NULL, &threads, &wgs1,
370 0, NULL, NULL);
371 if(err) {
372 std::stringstream msg;
373 msg << "Error: Failed to compute sample kernel!" << cl_error_string(err)
374 << std::endl;
375 csound->message(msg.str());
376 }
377 clFinish(commands);
378 clEnqueueReadBuffer(commands, out,
379 CL_TRUE, 0,vsamps*sizeof(cl_float), mix.data(), 0, NULL, NULL);
380 err = clEnqueueNDRangeKernel(commands,kernel2, 1, NULL, &mthreads,
381 &wgs2, 0, NULL, NULL);
382 if(err) {
383 std::stringstream msg;
384 msg << "Error: Failed to compute update kernel!" << cl_error_string(err)
385 << std::endl;
386 csound->message(msg.str());
387 }
388 count = vsamps;
389 }
390
391 s = mix[vsamps - count]*inargs[1];
392 count--;
393 }
394
395 return OK;
396 }
397 };
398
399 #include <modload.h>
on_load(Csound * csound)400 void csnd::on_load(Csound *csound) {
401 csnd::plugin<Cladsyn>(csound, "cladsyn", "a", "fkkii", csnd::thread::ia);
402 }
403
404
405