1 /*
2   cladsynth.cpp
3 
4   (c) Victor Lazzarini, 2019
5 
6   This file is part of Csound.
7 
8   The Csound Library is free software; you can redistribute it
9   and/or modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 2.1 of the License, or (at your option) any later version.
12 
13   Csound is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU Lesser General Public License for more details.
17 
18   You should have received a copy of the GNU Lesser General Public
19   License along with Csound; if not, write to the Free Software
20   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
21   02110-1301 USA
22 */
23 
24 #include <plugin.h>
25 #include <pstream.h>
26 #include <iostream>
27 #include <sstream>
28 
29 #ifdef __MACH__
30 #include <OpenCL/opencl.h>
31 #else
32 #include <CL/opencl.h>
33 #endif
34 
35 const char *code =  R"(
36 #define FMAXLEN    ((float)0x40000000)
37 #define PHMASK     0x3fffffff
38 #define PI         3.1415926f
39 
40 inline void AtomicAdd(volatile __global float *source, const float operand) {
41      union {
42          unsigned int intVal;
43          float floatVal;
44      } newVal;
45      union {
46          unsigned int intVal;
47          float floatVal;
48      } prevVal;
49      do {
50          prevVal.floatVal = *source;
51          newVal.floatVal = prevVal.floatVal + operand;
52      } while (atomic_cmpxchg((volatile __global unsigned int *)source,
53        prevVal.intVal, newVal.intVal) != prevVal.intVal);
54 }
55 
56 kernel void sample(global float *out, global float *frame,
57                     global long *ph,
58                     global float *amps, float pitch, int bins,
59                     int vsize,  float sr) {
60    int t = get_global_id(0);
61    int n =  t%vsize;  /* sample index */
62    int h = t/vsize;  /* bin index */
63    int k = h<<1;
64    long lph;
65    float a = amps[h], ascl = ((float)n)/vsize;
66    float fscal = pitch*FMAXLEN/sr;
67    lph = (ph[h] + (long)(n*round(frame[k+1]*fscal))) & PHMASK;
68    a += ascl*(frame[k] - a);
69    AtomicAdd(&out[n], a*sin((2*PI*lph)/FMAXLEN));
70 }
71 
72 kernel void update(global float *out, global float *frame,
73        global long *ph, global float *amps, float pitch, int  bins,
74        int vsize, float sr){
75   int h =  get_global_id(0);
76   int k = h << 1,i,j;
77   /* update phases and amps */
78   ph[h]  = (ph[h] + (long)(vsize*round(pitch*frame[k+1]*FMAXLEN/sr))) & PHMASK;
79   amps[h] = frame[k];
80   if(h >= vsize) return;
81   out[h] = 0.f;
82 }
83 
84 )";
85 
86 
cl_error_string(int err)87 const char * cl_error_string(int err) {
88     switch (err) {
89     case CL_SUCCESS:                            return "Success!";
90     case CL_DEVICE_NOT_FOUND:                   return "Device not found.";
91     case CL_DEVICE_NOT_AVAILABLE:               return "Device not available";
92     case CL_COMPILER_NOT_AVAILABLE:             return "Compiler not available";
93     case CL_MEM_OBJECT_ALLOCATION_FAILURE:
94                                    return "Memory object allocation failure";
95     case CL_OUT_OF_RESOURCES:                   return "Out of resources";
96     case CL_OUT_OF_HOST_MEMORY:                 return "Out of host memory";
97     case CL_PROFILING_INFO_NOT_AVAILABLE:
98                                    return "Profiling information not available";
99     case CL_MEM_COPY_OVERLAP:                   return "Memory copy overlap";
100     case CL_IMAGE_FORMAT_MISMATCH:              return "Image format mismatch";
101     case CL_IMAGE_FORMAT_NOT_SUPPORTED:         return "Image format not supported";
102     case CL_BUILD_PROGRAM_FAILURE:              return "Program build failure";
103     case CL_MAP_FAILURE:                        return "Map failure";
104     case CL_INVALID_VALUE:                      return "Invalid value";
105     case CL_INVALID_DEVICE_TYPE:                return "Invalid device type";
106     case CL_INVALID_PLATFORM:                   return "Invalid platform";
107     case CL_INVALID_DEVICE:                     return "Invalid device";
108     case CL_INVALID_CONTEXT:                    return "Invalid context";
109     case CL_INVALID_QUEUE_PROPERTIES:           return "Invalid queue properties";
110     case CL_INVALID_COMMAND_QUEUE:              return "Invalid command queue";
111     case CL_INVALID_HOST_PTR:                   return "Invalid host pointer";
112     case CL_INVALID_MEM_OBJECT:                 return "Invalid memory object";
113     case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
114                                    return "Invalid image format descriptor";
115     case CL_INVALID_IMAGE_SIZE:                 return "Invalid image size";
116     case CL_INVALID_SAMPLER:                    return "Invalid sampler";
117     case CL_INVALID_BINARY:                     return "Invalid binary";
118     case CL_INVALID_BUILD_OPTIONS:              return "Invalid build options";
119     case CL_INVALID_PROGRAM:                    return "Invalid program";
120     case CL_INVALID_PROGRAM_EXECUTABLE:         return "Invalid program executable";
121     case CL_INVALID_KERNEL_NAME:                return "Invalid kernel name";
122     case CL_INVALID_KERNEL_DEFINITION:          return "Invalid kernel definition";
123     case CL_INVALID_KERNEL:                     return "Invalid kernel";
124     case CL_INVALID_ARG_INDEX:                  return "Invalid argument index";
125     case CL_INVALID_ARG_VALUE:                  return "Invalid argument value";
126     case CL_INVALID_ARG_SIZE:                   return "Invalid argument size";
127     case CL_INVALID_KERNEL_ARGS:                return "Invalid kernel arguments";
128     case CL_INVALID_WORK_DIMENSION:             return "Invalid work dimension";
129     case CL_INVALID_WORK_GROUP_SIZE:            return "Invalid work group size";
130     case CL_INVALID_WORK_ITEM_SIZE:             return "Invalid work item size";
131     case CL_INVALID_GLOBAL_OFFSET:              return "Invalid global offset";
132     case CL_INVALID_EVENT_WAIT_LIST:            return "Invalid event wait list";
133     case CL_INVALID_EVENT:                      return "Invalid event";
134     case CL_INVALID_OPERATION:                  return "Invalid operation";
135     case CL_INVALID_GL_OBJECT:                  return "Invalid OpenGL object";
136     case CL_INVALID_BUFFER_SIZE:                return "Invalid buffer size";
137     case CL_INVALID_MIP_LEVEL:                  return "Invalid mip-map level";
138     default: return "Unknown error";
139     }
140 }
141 
142 struct Cladsyn : csnd::Plugin<1, 5> {
143 
144   cl_mem out;
145   cl_mem frame;
146   cl_mem ph;
147   cl_mem amps;
148   int bins;
149   size_t threads;
150   int count;
151   int vsamps;
152   size_t mthreads;
153   int framecount;
154   cl_context context;
155   cl_command_queue commands;
156   cl_program program;
157   cl_kernel kernel1, kernel2;
158   size_t wgs1, wgs2;
159   csnd::AuxMem<float> mix;
160   float cs_sr;
161 
162 
initCladsyn163   int init() {
164     int asize, ipsize, fpsize, err;
165     cl_device_id device_ids[32], device_id;
166     cl_uint num = 0, nump =  0;
167     cl_platform_id platforms[16];
168     uint32_t i;
169     csnd::pv_frame &fsig = inargs.fsig_data(0);
170     int inum = (int) inargs[3];
171     int idev = (int) inargs[4];
172 
173     if(fsig.hop_size() > 1024)
174      return csound->init_error("hopsize is too large\n");
175 
176     err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 32, device_ids, &num);
177     if (err != CL_SUCCESS){
178       int devs = 0;
179       clGetPlatformIDs(16, platforms, &nump);
180       for(i=0; i < nump && devs < 32; i++){
181         char name[128];
182         std::stringstream msg;
183         clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 128, name, NULL);
184         msg << "available platform[" << i << "]: " << name << std::endl;
185         csound->message(msg.str());
186         err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL,
187                           32-devs, &device_ids[devs], &num);
188         if (err != CL_SUCCESS) {
189          std::stringstream emsg;
190          emsg << "failed to find an OpenCL device!" <<
191            cl_error_string(err) << std::endl;
192          csound->init_error(emsg.str());
193         }
194       }
195       devs += num;
196     }
197 
198    for(i=0; i < num; i++){
199      char name[128];
200      cl_device_type type;
201      clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 128, name, NULL);
202      clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type),
203                   &type, NULL);
204      if(type & CL_DEVICE_TYPE_CPU) {
205        std::stringstream msg;
206        msg <<  "available CPU[device " << i << "] " << name << std::endl;
207        csound->message(msg.str());
208      }
209      else  if(type & CL_DEVICE_TYPE_GPU) {
210        std::stringstream msg;
211        msg <<  "available GPU[device " << i << "] " << name << std::endl;
212        csound->message(msg.str());
213      }
214      else  if(type & CL_DEVICE_TYPE_ACCELERATOR) {
215        std::stringstream msg;
216        msg <<  "available ACCELLERATOR[device " << i << "] " << name
217            << std::endl;
218        csound->message(msg.str());
219      }
220      else {
221        std::stringstream msg;
222        msg <<  "available GENERIC[device " << i << "] " << name << std::endl;
223        csound->message(msg.str());
224      }
225   }
226 
227   // SELECT THE DEVICE HERE
228   if(idev < num)
229    device_id = device_ids[idev];
230   else
231    device_id = device_ids[num-1];
232 
233    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
234    if (!context) {
235      std::stringstream msg;
236      msg << "Failed to create a compute context! " << cl_error_string(err)
237          << std::endl;
238      return csound->init_error(msg.str());
239    }
240 
241     // Create commands
242     commands = clCreateCommandQueue(context, device_id, 0, &err);
243     if (!commands) {
244      std::stringstream msg;
245      msg << "Failed to create commands! " << cl_error_string(err)
246          << std::endl;
247      return csound->init_error(msg.str());
248     }
249     // Create the compute program from the source buffer
250     program = clCreateProgramWithSource(context, 1, (const char **) &code,
251                                         NULL, &err);
252     if (!program){
253      std::stringstream msg;
254      msg << "Failed to create program! " << cl_error_string(err)
255          << std::endl;
256      return csound->init_error(msg.str());
257     }
258 
259     err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
260     if (err != CL_SUCCESS)
261     {
262      size_t len;
263      char buffer[2048];
264      std::stringstream msg;
265      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
266                               sizeof(buffer), buffer, &len);
267      msg << "Failed to build program executable! " << cl_error_string(err)
268          << std::endl << buffer << std::endl;
269      return csound->init_error(msg.str());
270     }
271 
272     kernel1 = clCreateKernel(program, "sample", &err);
273     if (!kernel1 || err != CL_SUCCESS) {
274      std::stringstream msg;
275      msg << "Failed to create sample compute kernel! " << cl_error_string(err)
276          << std::endl;
277      return csound->init_error(msg.str());
278     }
279 
280     kernel2 = clCreateKernel(program, "update", &err);
281     if (!kernel2 || err != CL_SUCCESS) {
282      std::stringstream msg;
283      msg << "Failed to create update compute kernel! " << cl_error_string(err)
284          << std::endl;
285      return csound->init_error(msg.str());
286     }
287 
288     {
289     char name[128];
290     std::stringstream msg;
291     clGetDeviceInfo(device_id, CL_DEVICE_NAME, 128, name, NULL);
292     msg << "using device: " << name << std::endl;
293     csound->message(msg.str());
294     }
295 
296     bins = fsig.nbins() - 1;
297     if(inum > 0 && inum < bins) bins = inum;
298     vsamps = fsig.hop_size();
299     threads = bins*vsamps;
300     mthreads = bins > vsamps ? bins : vsamps;
301     asize =  vsamps*sizeof(cl_float);
302     ipsize = mthreads*sizeof(cl_long);
303     fpsize = fsig.dft_size()*sizeof(cl_float);
304 
305     out = clCreateBuffer(context,0, asize, NULL, NULL);
306     frame = clCreateBuffer(context, CL_MEM_READ_ONLY, fpsize, NULL, NULL);
307     ph = clCreateBuffer(context,0, ipsize, NULL, NULL);
308     amps = clCreateBuffer(context,0, mthreads*sizeof(cl_float), NULL, NULL);
309 
310    // memset needed?
311    asize = vsamps*sizeof(float);
312    mix.allocate(csound, asize);
313    csound->plugin_deinit(this);
314    count = 0;
315    cs_sr = csound->sr();
316 
317    clGetKernelWorkGroupInfo(kernel1,
318        device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(wgs1), &wgs1, NULL);
319    clGetKernelWorkGroupInfo(kernel2,
320        device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(wgs2), &wgs2, NULL);
321 
322    clSetKernelArg(kernel1, 0, sizeof(cl_mem), &out);
323    clSetKernelArg(kernel1, 1, sizeof(cl_mem), &frame);
324    clSetKernelArg(kernel1, 2, sizeof(cl_mem), &ph);
325    clSetKernelArg(kernel1, 3, sizeof(cl_mem), &amps);
326    clSetKernelArg(kernel1, 5, sizeof(cl_int), &bins);
327    clSetKernelArg(kernel1, 6, sizeof(cl_int), &vsamps);
328    clSetKernelArg(kernel1, 7, sizeof(cl_float), &cs_sr);
329 
330    clSetKernelArg(kernel2, 0, sizeof(cl_mem), &out);
331    clSetKernelArg(kernel2, 1, sizeof(cl_mem), &frame);
332    clSetKernelArg(kernel2, 2, sizeof(cl_mem), &ph);
333    clSetKernelArg(kernel2, 3, sizeof(cl_mem), &amps);
334    clSetKernelArg(kernel2, 5, sizeof(cl_int), &bins);
335    clSetKernelArg(kernel2, 6, sizeof(cl_int), &vsamps);
336    clSetKernelArg(kernel2, 7, sizeof(cl_float), &cs_sr);
337 
338    return OK;
339   }
340 
deinitCladsyn341   int deinit() {
342    clReleaseMemObject(out);
343    clReleaseMemObject(ph);
344    clReleaseMemObject(frame);
345    clReleaseMemObject(amps);
346    clReleaseProgram(program);
347    clReleaseKernel(kernel1);
348    clReleaseKernel(kernel2);
349    clReleaseCommandQueue(commands);
350    clReleaseContext(context);
351    return OK;
352   }
353 
aperfCladsyn354   int aperf() {
355 
356    uint32_t n;
357    csnd::AudioSig asig(this, outargs(0));
358    float *fp = inargs.fsig_data(0).data();
359 
360    for (auto &s : asig) {
361     if(count == 0) {
362      int err;
363      float freq = inargs[2];
364      clSetKernelArg(kernel1, 4, sizeof(cl_float), &freq);
365      clSetKernelArg(kernel2, 4, sizeof(cl_float), &freq);
366 
367      clEnqueueWriteBuffer(commands,frame, CL_TRUE, 0, sizeof(cl_float)*bins*2,
368                           fp, 0, NULL, NULL);
369      err = clEnqueueNDRangeKernel(commands, kernel1, 1, NULL, &threads, &wgs1,
370                                   0, NULL, NULL);
371      if(err)  {
372       std::stringstream msg;
373       msg << "Error: Failed to compute sample kernel!" << cl_error_string(err)
374          << std::endl;
375       csound->message(msg.str());
376      }
377      clFinish(commands);
378      clEnqueueReadBuffer(commands, out,
379                          CL_TRUE, 0,vsamps*sizeof(cl_float), mix.data(), 0, NULL, NULL);
380      err = clEnqueueNDRangeKernel(commands,kernel2, 1, NULL, &mthreads,
381           &wgs2, 0, NULL, NULL);
382      if(err) {
383       std::stringstream msg;
384       msg << "Error: Failed to compute update kernel!" << cl_error_string(err)
385          << std::endl;
386       csound->message(msg.str());
387      }
388      count = vsamps;
389     }
390 
391     s = mix[vsamps - count]*inargs[1];
392     count--;
393    }
394 
395    return OK;
396   }
397 };
398 
399 #include <modload.h>
on_load(Csound * csound)400 void csnd::on_load(Csound *csound) {
401   csnd::plugin<Cladsyn>(csound, "cladsyn", "a", "fkkii", csnd::thread::ia);
402 }
403 
404 
405