1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7 %    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8 %    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9 %    A   A   C       C      E      L      E      R R    A   A    T    E       %
10 %    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11 %                                                                             %
12 %                                                                             %
13 %                       MagickCore Acceleration Methods                       %
14 %                                                                             %
15 %                              Software Design                                %
16 %                                  Cristy                                     %
17 %                               SiuChi Chan                                   %
18 %                              Guansong Zhang                                 %
19 %                               January 2010                                  %
20 %                               Dirk Lemstra                                  %
21 %                                 May 2016                                    %
22 %                                                                             %
23 %                                                                             %
24 %  Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization      %
25 %  dedicated to making software imaging solutions freely available.           %
26 %                                                                             %
27 %  You may not use this file except in compliance with the License.  You may  %
28 %  obtain a copy of the License at                                            %
29 %                                                                             %
30 %    https://imagemagick.org/script/license.php                               %
31 %                                                                             %
32 %  Unless required by applicable law or agreed to in writing, software        %
33 %  distributed under the License is distributed on an "AS IS" BASIS,          %
34 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
35 %  See the License for the specific language governing permissions and        %
36 %  limitations under the License.                                             %
37 %                                                                             %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40 
41 /*
42 Include declarations.
43 */
44 #include "magick/studio.h"
45 #include "magick/accelerate-private.h"
46 #include "magick/accelerate-kernels-private.h"
47 #include "magick/artifact.h"
48 #include "magick/cache.h"
49 #include "magick/cache-private.h"
50 #include "magick/cache-view.h"
51 #include "magick/color-private.h"
52 #include "magick/delegate-private.h"
53 #include "magick/enhance.h"
54 #include "magick/exception.h"
55 #include "magick/exception-private.h"
56 #include "magick/gem.h"
57 #include "magick/hashmap.h"
58 #include "magick/image.h"
59 #include "magick/image-private.h"
60 #include "magick/list.h"
61 #include "magick/memory_.h"
62 #include "magick/monitor-private.h"
63 #include "magick/opencl.h"
64 #include "magick/opencl-private.h"
65 #include "magick/option.h"
66 #include "magick/pixel-private.h"
67 #include "magick/prepress.h"
68 #include "magick/quantize.h"
69 #include "magick/random_.h"
70 #include "magick/random-private.h"
71 #include "magick/registry.h"
72 #include "magick/resize.h"
73 #include "magick/resize-private.h"
74 #include "magick/semaphore.h"
75 #include "magick/splay-tree.h"
76 #include "magick/statistic.h"
77 #include "magick/string_.h"
78 #include "magick/string-private.h"
79 #include "magick/token.h"
80 
81 #ifdef MAGICKCORE_CLPERFMARKER
82 #include "CLPerfMarker.h"
83 #endif
84 
85 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
86 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
87 
88 #if defined(MAGICKCORE_OPENCL_SUPPORT)
89 
90 /*
91   Define declarations.
92 */
93 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
94 
95 /*
96   Static declarations.
97 */
98 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
99 {
100   BoxWeightingFunction,
101   TriangleWeightingFunction,
102   HanningWeightingFunction,
103   HammingWeightingFunction,
104   BlackmanWeightingFunction,
105   CubicBCWeightingFunction,
106   SincWeightingFunction,
107   SincFastWeightingFunction,
108   LastWeightingFunction
109 };
110 
111 /*
112   Forward declarations.
113 */
114 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
115   const double radius,const double sigma,const double gain,
116   const double threshold,int blurOnly, ExceptionInfo *exception);
117 
118 /*
119   Helper functions.
120 */
121 
checkAccelerateCondition(const Image * image,const ChannelType channel)122 static MagickBooleanType checkAccelerateCondition(const Image* image,
123   const ChannelType channel)
124 {
125   /* only direct class images are supported */
126   if (image->storage_class != DirectClass)
127     return(MagickFalse);
128 
129   /* check if the image's colorspace is supported */
130   if (image->colorspace != RGBColorspace &&
131       image->colorspace != sRGBColorspace &&
132       image->colorspace != LinearGRAYColorspace &&
133       image->colorspace != GRAYColorspace)
134     return(MagickFalse);
135 
136   /* check if the channel is supported */
137   if (((channel & RedChannel) == 0) ||
138       ((channel & GreenChannel) == 0) ||
139       ((channel & BlueChannel) == 0))
140     return(MagickFalse);
141 
142   /* check if the virtual pixel method is compatible with the OpenCL implementation */
143   if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
144       (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
145     return(MagickFalse);
146 
147   /* check if the image has clip_mask / mask */
148   if ((image->clip_mask != (Image *) NULL) || (image->mask != (Image *) NULL))
149     return(MagickFalse);
150 
151   return(MagickTrue);
152 }
153 
checkHistogramCondition(Image * image,const ChannelType channel)154 static MagickBooleanType checkHistogramCondition(Image *image,
155   const ChannelType channel)
156 {
157   /* ensure this is the only pass get in for now. */
158   if ((channel & SyncChannels) == 0)
159     return MagickFalse;
160 
161   if (image->intensity == Rec601LuminancePixelIntensityMethod ||
162       image->intensity == Rec709LuminancePixelIntensityMethod)
163     return MagickFalse;
164 
165   if (image->colorspace != sRGBColorspace)
166     return MagickFalse;
167 
168   return MagickTrue;
169 }
170 
checkOpenCLEnvironment(ExceptionInfo * exception)171 static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
172 {
173   MagickBooleanType
174     flag;
175 
176   MagickCLEnv
177     clEnv;
178 
179   clEnv=GetDefaultOpenCLEnv();
180 
181   GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED,
182     sizeof(MagickBooleanType),&flag,exception);
183   if (flag != MagickFalse)
184     return(MagickFalse);
185 
186   GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED,
187     sizeof(MagickBooleanType),&flag,exception);
188   if (flag == MagickFalse)
189     {
190       if (InitOpenCLEnv(clEnv,exception) == MagickFalse)
191         return(MagickFalse);
192 
193       GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED,
194         sizeof(MagickBooleanType),&flag,exception);
195       if (flag != MagickFalse)
196         return(MagickFalse);
197     }
198 
199   return(MagickTrue);
200 }
201 
202 /* pad the global workgroup size to the next multiple of
203    the local workgroup size */
padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,const unsigned int localGroupSize)204 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
205   const unsigned int orgGlobalSize,const unsigned int localGroupSize)
206 {
207   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
208 }
209 
paramMatchesValue(MagickCLEnv clEnv,MagickOpenCLEnvParam param,const char * value,ExceptionInfo * exception)210 static MagickBooleanType paramMatchesValue(MagickCLEnv clEnv,
211   MagickOpenCLEnvParam param,const char *value,ExceptionInfo *exception)
212 {
213   char
214     *val;
215 
216   MagickBooleanType
217     status;
218 
219   status=GetMagickOpenCLEnvParam(clEnv,param,sizeof(val),&val,exception);
220   if (status != MagickFalse)
221     {
222       status=strcmp(value,val) == 0 ? MagickTrue : MagickFalse;
223       RelinquishMagickMemory(val);
224     }
225   return(status);
226 }
227 
228 /*
229 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
230 %                                                                             %
231 %                                                                             %
232 %                                                                             %
233 %     A c c e l e r a t e A d d N o i s e I m a g e                           %
234 %                                                                             %
235 %                                                                             %
236 %                                                                             %
237 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
238 */
239 
ComputeAddNoiseImage(const Image * image,const ChannelType channel,const NoiseType noise_type,ExceptionInfo * exception)240 static Image *ComputeAddNoiseImage(const Image *image,
241   const ChannelType channel,const NoiseType noise_type,
242   ExceptionInfo *exception)
243 {
244   cl_command_queue
245     queue;
246 
247   cl_context
248     context;
249 
250   cl_int
251     inputPixelCount,
252     pixelsPerWorkitem,
253     clStatus;
254 
255   cl_uint
256     event_count,
257     seed0,
258     seed1;
259 
260   cl_kernel
261     addNoiseKernel;
262 
263   cl_event
264     event;
265 
266   cl_mem
267     filteredImageBuffer,
268     imageBuffer;
269 
270   const char
271     *option;
272 
273   cl_event
274     *events;
275 
276   float
277     attenuate;
278 
279   MagickBooleanType
280     outputReady;
281 
282   MagickCLEnv
283     clEnv;
284 
285   Image
286     *filteredImage;
287 
288   RandomInfo
289     **magick_restrict random_info;
290 
291   size_t
292     global_work_size[1],
293     local_work_size[1];
294 
295   unsigned int
296     k,
297     numRandomNumberPerPixel;
298 
299 #if defined(MAGICKCORE_OPENMP_SUPPORT)
300   unsigned long
301     key;
302 #endif
303 
304   outputReady = MagickFalse;
305   clEnv = NULL;
306   filteredImage = NULL;
307   context = NULL;
308   imageBuffer = NULL;
309   filteredImageBuffer = NULL;
310   queue = NULL;
311   addNoiseKernel = NULL;
312 
313   clEnv = GetDefaultOpenCLEnv();
314   context = GetOpenCLContext(clEnv);
315   queue = AcquireOpenCLCommandQueue(clEnv);
316 
317   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
318   if (filteredImage == (Image *) NULL)
319     goto cleanup;
320 
321   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
322   if (imageBuffer == (cl_mem) NULL)
323   {
324     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
325       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
326     goto cleanup;
327   }
328   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage,exception);
329   if (filteredImageBuffer == (cl_mem) NULL)
330   {
331     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
332       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
333     goto cleanup;
334   }
335 
336   /* find out how many random numbers needed by pixel */
337   numRandomNumberPerPixel = 0;
338   {
339     unsigned int numRandPerChannel = 0;
340     switch (noise_type)
341     {
342     case UniformNoise:
343     case ImpulseNoise:
344     case LaplacianNoise:
345     case RandomNoise:
346     default:
347       numRandPerChannel = 1;
348       break;
349     case GaussianNoise:
350     case MultiplicativeGaussianNoise:
351     case PoissonNoise:
352       numRandPerChannel = 2;
353       break;
354     };
355 
356     if ((channel & RedChannel) != 0)
357       numRandomNumberPerPixel+=numRandPerChannel;
358     if ((channel & GreenChannel) != 0)
359       numRandomNumberPerPixel+=numRandPerChannel;
360     if ((channel & BlueChannel) != 0)
361       numRandomNumberPerPixel+=numRandPerChannel;
362     if ((channel & OpacityChannel) != 0)
363       numRandomNumberPerPixel+=numRandPerChannel;
364   }
365 
366   /* set up the random number generators */
367   attenuate=1.0;
368   option=GetImageArtifact(image,"attenuate");
369   if (option != (char *) NULL)
370     attenuate=StringToDouble(option,(char **) NULL);
371   random_info=AcquireRandomInfoThreadSet();
372 #if defined(MAGICKCORE_OPENMP_SUPPORT)
373   key=GetRandomSecretKey(random_info[0]);
374   (void) key;
375 #endif
376 
377   addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoise");
378 
379   {
380     cl_uint computeUnitCount;
381     cl_uint workItemCount;
382     clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &computeUnitCount, NULL);
383     workItemCount = computeUnitCount * 2 * 256;			// 256 work items per group, 2 groups per CU
384     inputPixelCount = (cl_int) (image->columns * image->rows);
385     pixelsPerWorkitem = (inputPixelCount + workItemCount - 1) / workItemCount;
386     pixelsPerWorkitem = ((pixelsPerWorkitem + 3) / 4) * 4;
387 
388     local_work_size[0] = 256;
389     global_work_size[0] = workItemCount;
390   }
391   {
392     RandomInfo* randomInfo = AcquireRandomInfo();
393 	const unsigned long* s = GetRandomInfoSeed(randomInfo);
394 	seed0 = s[0];
395 	GetPseudoRandomValue(randomInfo);
396 	seed1 = s[0];
397 	randomInfo = DestroyRandomInfo(randomInfo);
398   }
399 
400   k = 0;
401   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&imageBuffer);
402   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
403   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&inputPixelCount);
404   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
405   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
406   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
407   attenuate=1.0f;
408   option=GetImageArtifact(image,"attenuate");
409   if (option != (char *) NULL)
410     attenuate=(float)StringToDouble(option,(char **) NULL);
411   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
412   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&seed0);
413   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&seed1);
414   clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
415 
416   events=GetOpenCLEvents(image,&event_count);
417   clStatus=clEnv->library->clEnqueueNDRangeKernel(queue,addNoiseKernel,1,NULL,global_work_size,NULL,event_count,events,&event);
418   events=(cl_event *) RelinquishMagickMemory(events);
419   if (clStatus != CL_SUCCESS)
420   {
421     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
422     goto cleanup;
423   }
424   if (RecordProfileData(clEnv,AddNoiseKernel,event) == MagickFalse)
425     {
426       AddOpenCLEvent(image,event);
427       AddOpenCLEvent(filteredImage,event);
428     }
429   clEnv->library->clReleaseEvent(event);
430   outputReady=MagickTrue;
431 
432 cleanup:
433   OpenCLLogException(__FUNCTION__,__LINE__,exception);
434 
435   if (imageBuffer != (cl_mem) NULL)
436     clEnv->library->clReleaseMemObject(imageBuffer);
437   if (filteredImageBuffer != (cl_mem) NULL)
438     clEnv->library->clReleaseMemObject(filteredImageBuffer);
439   if (queue!=NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
440   if (addNoiseKernel!=NULL)         RelinquishOpenCLKernel(clEnv, addNoiseKernel);
441   if ((outputReady == MagickFalse) && (filteredImage != NULL))
442     filteredImage=(Image *) DestroyImage(filteredImage);
443 
444   return(filteredImage);
445 }
446 
AccelerateAddNoiseImage(const Image * image,const ChannelType channel,const NoiseType noise_type,ExceptionInfo * exception)447 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
448   const ChannelType channel,const NoiseType noise_type,
449   ExceptionInfo *exception)
450 {
451   /* Temporary disabled because of repetition.
452 
453   Image
454     *filteredImage;
455 
456   assert(image != NULL);
457   assert(exception != (ExceptionInfo *) NULL);
458 
459   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
460       (checkAccelerateCondition(image, channel) == MagickFalse))
461     return NULL;
462 
463   filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception);
464 
465   return(filteredImage);
466   */
467   magick_unreferenced(image);
468   magick_unreferenced(channel);
469   magick_unreferenced(noise_type);
470   magick_unreferenced(exception);
471   return((Image *)NULL);
472 }
473 
474 /*
475 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
476 %                                                                             %
477 %                                                                             %
478 %                                                                             %
479 %     A c c e l e r a t e B l u r I m a g e                                   %
480 %                                                                             %
481 %                                                                             %
482 %                                                                             %
483 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
484 */
485 
ComputeBlurImage(const Image * image,const ChannelType channel,const double radius,const double sigma,ExceptionInfo * exception)486 static Image *ComputeBlurImage(const Image* image,const ChannelType channel,
487   const double radius,const double sigma,ExceptionInfo *exception)
488 {
489   char
490     geometry[MaxTextExtent];
491 
492   cl_command_queue
493     queue;
494 
495   cl_context
496     context;
497 
498   cl_int
499     clStatus;
500 
501   cl_kernel
502     blurColumnKernel,
503     blurRowKernel;
504 
505   cl_event
506     event;
507 
508   cl_mem
509     filteredImageBuffer,
510     imageBuffer,
511     imageKernelBuffer,
512     tempImageBuffer;
513 
514   cl_uint
515     event_count;
516 
517   cl_event
518     *events;
519 
520   float
521     *kernelBufferPtr;
522 
523   Image
524     *filteredImage;
525 
526   MagickBooleanType
527     outputReady;
528 
529   MagickCLEnv
530     clEnv;
531 
532   MagickSizeType
533     length;
534 
535   KernelInfo
536     *kernel;
537 
538   unsigned int
539     i,
540     imageColumns,
541     imageRows,
542     kernelWidth;
543 
544   context = NULL;
545   filteredImage = NULL;
546   imageBuffer = NULL;
547   tempImageBuffer = NULL;
548   filteredImageBuffer = NULL;
549   imageKernelBuffer = NULL;
550   blurRowKernel = NULL;
551   blurColumnKernel = NULL;
552   queue = NULL;
553   kernel = NULL;
554 
555   outputReady = MagickFalse;
556 
557   clEnv = GetDefaultOpenCLEnv();
558   context = GetOpenCLContext(clEnv);
559   queue = AcquireOpenCLCommandQueue(clEnv);
560 
561   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
562   if (filteredImage == (Image *) NULL)
563     goto cleanup;
564 
565   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
566   if (imageBuffer == (cl_mem) NULL)
567   {
568     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
569       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
570     goto cleanup;
571   }
572   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage,exception);
573   if (filteredImageBuffer == (cl_mem) NULL)
574   {
575     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
576       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
577     goto cleanup;
578   }
579 
580   /* create processing kernel */
581   {
582     (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
583     kernel=AcquireKernelInfo(geometry);
584     if (kernel == (KernelInfo *) NULL)
585     {
586       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
587       goto cleanup;
588     }
589 
590     {
591       kernelBufferPtr = (float *)AcquireMagickMemory(kernel->width * sizeof(float));
592       if (kernelBufferPtr == (float *) NULL)
593         {
594           (void)OpenCLThrowMagickException(exception,GetMagickModule(),
595             ResourceLimitWarning,"AcquireMagickMemory failed.", "'%s'", ".");
596           goto cleanup;
597         }
598       for (i = 0; i < kernel->width; i++)
599         kernelBufferPtr[i] = (float)kernel->values[i];
600 
601       imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, kernel->width * sizeof(float), kernelBufferPtr, &clStatus);
602       RelinquishMagickMemory(kernelBufferPtr);
603       if (clStatus != CL_SUCCESS)
604       {
605         (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.", ".");
606         goto cleanup;
607       }
608     }
609   }
610 
611   {
612 
613     /* create temp buffer */
614     {
615       length = image->columns * image->rows;
616       tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
617       if (clStatus != CL_SUCCESS)
618       {
619         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
620         goto cleanup;
621       }
622     }
623 
624     /* get the OpenCL kernels */
625     {
626       blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
627       if (blurRowKernel == NULL)
628       {
629         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
630         goto cleanup;
631       };
632 
633       blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
634       if (blurColumnKernel == NULL)
635       {
636         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
637         goto cleanup;
638       };
639     }
640 
641     {
642       /* need logic to decide this value */
643       int chunkSize = 256;
644 
645       {
646         imageColumns = (unsigned int) image->columns;
647         imageRows = (unsigned int) image->rows;
648 
649         /* set the kernel arguments */
650         i = 0;
651         clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
652         clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
653         clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
654         clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
655         kernelWidth = (unsigned int) kernel->width;
656         clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
657         clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
658         clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
659         clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
660         if (clStatus != CL_SUCCESS)
661         {
662           (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
663           goto cleanup;
664         }
665       }
666 
667       /* launch the kernel */
668       {
669         size_t gsize[2];
670         size_t wsize[2];
671 
672         gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
673         gsize[1] = image->rows;
674         wsize[0] = chunkSize;
675         wsize[1] = 1;
676 
677         events=GetOpenCLEvents(image,&event_count);
678         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, event_count, events, &event);
679         events=(cl_event *) RelinquishMagickMemory(events);
680         if (clStatus != CL_SUCCESS)
681         {
682           (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
683           goto cleanup;
684         }
685         if (RecordProfileData(clEnv,BlurRowKernel,event) == MagickFalse)
686           {
687             AddOpenCLEvent(image,event);
688             AddOpenCLEvent(filteredImage,event);
689           }
690         clEnv->library->clReleaseEvent(event);
691       }
692     }
693 
694     {
695       /* need logic to decide this value */
696       int chunkSize = 256;
697 
698       {
699         imageColumns = (unsigned int) image->columns;
700         imageRows = (unsigned int) image->rows;
701 
702         /* set the kernel arguments */
703         i = 0;
704         clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
705         clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
706         clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
707         clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
708         kernelWidth = (unsigned int) kernel->width;
709         clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
710         clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
711         clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
712         clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *) NULL);
713         if (clStatus != CL_SUCCESS)
714         {
715           (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
716           goto cleanup;
717         }
718       }
719 
720       /* launch the kernel */
721       {
722         size_t gsize[2];
723         size_t wsize[2];
724 
725         gsize[0] = image->columns;
726         gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
727         wsize[0] = 1;
728         wsize[1] = chunkSize;
729 
730         events=GetOpenCLEvents(image,&event_count);
731         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, event_count, events, &event);
732         events=(cl_event *) RelinquishMagickMemory(events);
733         if (clStatus != CL_SUCCESS)
734         {
735           (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
736           goto cleanup;
737         }
738         if (RecordProfileData(clEnv,BlurColumnKernel,event) == MagickFalse)
739           {
740             AddOpenCLEvent(image,event);
741             AddOpenCLEvent(filteredImage,event);
742           }
743         clEnv->library->clReleaseEvent(event);
744       }
745     }
746 
747   }
748 
749   outputReady=MagickTrue;
750 
751 cleanup:
752   OpenCLLogException(__FUNCTION__,__LINE__,exception);
753 
754   if (imageBuffer != (cl_mem) NULL)
755     clEnv->library->clReleaseMemObject(imageBuffer);
756   if (filteredImageBuffer != (cl_mem) NULL)
757     clEnv->library->clReleaseMemObject(filteredImageBuffer);
758   if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
759   if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
760   if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
761   if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
762   if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
763   if (kernel!=NULL)               DestroyKernelInfo(kernel);
764   if ((outputReady == MagickFalse) && (filteredImage != NULL))
765     filteredImage=(Image *) DestroyImage(filteredImage);
766   return(filteredImage);
767 }
768 
AccelerateBlurImage(const Image * image,const ChannelType channel,const double radius,const double sigma,ExceptionInfo * exception)769 MagickPrivate Image* AccelerateBlurImage(const Image *image,
770   const ChannelType channel,const double radius,const double sigma,
771   ExceptionInfo *exception)
772 {
773   Image
774     *filteredImage;
775 
776   assert(image != NULL);
777   assert(exception != (ExceptionInfo *) NULL);
778 
779   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
780       (checkAccelerateCondition(image, channel) == MagickFalse))
781     return NULL;
782 
783   filteredImage=ComputeBlurImage(image, channel, radius, sigma, exception);
784   return(filteredImage);
785 }
786 
787 /*
788 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
789 %                                                                             %
790 %                                                                             %
791 %                                                                             %
792 %     A c c e l e r a t e C o m p o s i t e I m a g e                         %
793 %                                                                             %
794 %                                                                             %
795 %                                                                             %
796 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
797 */
798 
LaunchCompositeKernel(const Image * image,MagickCLEnv clEnv,cl_command_queue queue,cl_mem imageBuffer,const unsigned int inputWidth,const unsigned int inputHeight,const unsigned int inputMatte,const ChannelType channel,const CompositeOperator compose,const cl_mem compositeImageBuffer,const unsigned int compositeWidth,const unsigned int compositeHeight,const unsigned int compositeMatte,const float destination_dissolve,const float source_dissolve)799 static MagickBooleanType LaunchCompositeKernel(const Image *image,
800   MagickCLEnv clEnv,cl_command_queue queue,cl_mem imageBuffer,
801   const unsigned int inputWidth,const unsigned int inputHeight,
802   const unsigned int inputMatte,const ChannelType channel,
803   const CompositeOperator compose,const cl_mem compositeImageBuffer,
804   const unsigned int compositeWidth,const unsigned int compositeHeight,
805   const unsigned int compositeMatte,const float destination_dissolve,
806   const float source_dissolve)
807 {
808   cl_int
809     clStatus;
810 
811   cl_kernel
812     compositeKernel;
813 
814   cl_event
815     event;
816 
817   cl_uint
818     event_count;
819 
820   cl_event
821     *events;
822 
823   int
824     k;
825 
826   size_t
827     global_work_size[2],
828     local_work_size[2];
829 
830   unsigned int
831     composeOp;
832 
833   compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
834     "Composite");
835 
836   k = 0;
837   clStatus = clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(cl_mem), (void*)&imageBuffer);
838   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(unsigned int), (void*)&inputWidth);
839   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(unsigned int), (void*)&inputHeight);
840   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(unsigned int), (void*)&inputMatte);
841   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(cl_mem), (void*)&compositeImageBuffer);
842   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(unsigned int), (void*)&compositeWidth);
843   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(unsigned int), (void*)&compositeHeight);
844   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(unsigned int), (void*)&compositeMatte);
845   composeOp = (unsigned int)compose;
846   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(unsigned int), (void*)&composeOp);
847   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(ChannelType), (void*)&channel);
848   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(float), (void*)&destination_dissolve);
849   clStatus |= clEnv->library->clSetKernelArg(compositeKernel, k++, sizeof(float), (void*)&source_dissolve);
850 
851   if (clStatus != CL_SUCCESS)
852     return MagickFalse;
853 
854   local_work_size[0] = 64;
855   local_work_size[1] = 1;
856 
857   global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth,
858     (unsigned int)local_work_size[0]);
859   global_work_size[1] = inputHeight;
860   events=GetOpenCLEvents(image,&event_count);
861   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL,
862     global_work_size, local_work_size, event_count, events, &event);
863   events=(cl_event *) RelinquishMagickMemory(events);
864   if (clStatus == CL_SUCCESS)
865     AddOpenCLEvent(image,event);
866   clEnv->library->clReleaseEvent(event);
867 
868   RelinquishOpenCLKernel(clEnv, compositeKernel);
869 
870   return((clStatus == CL_SUCCESS) ? MagickTrue : MagickFalse);
871 }
872 
ComputeCompositeImage(Image * image,const ChannelType channel,const CompositeOperator compose,const Image * compositeImage,const ssize_t magick_unused (x_offset),const ssize_t magick_unused (y_offset),const float destination_dissolve,const float source_dissolve,ExceptionInfo * exception)873 static MagickBooleanType ComputeCompositeImage(Image *image,
874   const ChannelType channel, const CompositeOperator compose,
875   const Image *compositeImage, const ssize_t magick_unused(x_offset),
876   const ssize_t magick_unused(y_offset), const float destination_dissolve,
877   const float source_dissolve, ExceptionInfo *exception)
878 {
879   cl_command_queue
880     queue;
881 
882   cl_context
883     context;
884 
885   cl_mem
886     compositeImageBuffer,
887     imageBuffer;
888 
889   MagickBooleanType
890     outputReady,
891     status;
892 
893   MagickCLEnv
894     clEnv;
895 
896   magick_unreferenced(x_offset);
897   magick_unreferenced(y_offset);
898 
899   status = MagickFalse;
900   outputReady = MagickFalse;
901   imageBuffer = NULL;
902   compositeImageBuffer = NULL;
903 
904   clEnv = GetDefaultOpenCLEnv();
905   context = GetOpenCLContext(clEnv);
906   queue = AcquireOpenCLCommandQueue(clEnv);
907 
908   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
909   if (imageBuffer == (cl_mem) NULL)
910   {
911     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
912       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
913     goto cleanup;
914   }
915 
916   compositeImageBuffer = GetAuthenticOpenCLBuffer(compositeImage,exception);
917   if (compositeImageBuffer == (cl_mem) NULL)
918   {
919     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
920       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
921     goto cleanup;
922   }
923 
924   status = LaunchCompositeKernel(image,clEnv, queue, imageBuffer,
925     (unsigned int)image->columns,
926     (unsigned int)image->rows,
927     (unsigned int)image->matte,
928     channel, compose, compositeImageBuffer,
929     (unsigned int)compositeImage->columns,
930     (unsigned int)compositeImage->rows,
931     (unsigned int)compositeImage->matte,
932     destination_dissolve, source_dissolve);
933 
934   if (status == MagickFalse)
935     goto cleanup;
936 
937   outputReady = MagickTrue;
938 
939 cleanup:
940 
941   if (imageBuffer != (cl_mem) NULL)
942     clEnv->library->clReleaseMemObject(imageBuffer);
943   if (compositeImageBuffer != (cl_mem) NULL)
944     clEnv->library->clReleaseMemObject(compositeImageBuffer);
945   if (queue != NULL)
946     RelinquishOpenCLCommandQueue(clEnv, queue);
947 
948   return(outputReady);
949 }
950 
AccelerateCompositeImage(Image * image,const ChannelType channel,const CompositeOperator compose,const Image * composite,const ssize_t x_offset,const ssize_t y_offset,const float destination_dissolve,const float source_dissolve,ExceptionInfo * exception)951 MagickPrivate MagickBooleanType AccelerateCompositeImage(Image *image,
952   const ChannelType channel, const CompositeOperator compose,
953   const Image *composite, const ssize_t x_offset, const ssize_t y_offset,
954   const float destination_dissolve, const float source_dissolve,
955   ExceptionInfo *exception)
956 {
957   MagickBooleanType
958     status;
959 
960   assert(image != NULL);
961   assert(exception != (ExceptionInfo *)NULL);
962 
963   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
964     (checkAccelerateCondition(image, channel) == MagickFalse))
965     return(MagickFalse);
966 
967   /* only support zero offset and
968   images with the size for now */
969   if (x_offset != 0
970     || y_offset != 0
971     || image->columns != composite->columns
972     || image->rows != composite->rows)
973     return MagickFalse;
974 
975   switch (compose) {
976   case ColorDodgeCompositeOp:
977   case BlendCompositeOp:
978     break;
979   default:
980     /* unsupported compose operator, quit */
981     return MagickFalse;
982   };
983 
984   status = ComputeCompositeImage(image, channel, compose, composite,
985     x_offset, y_offset, destination_dissolve, source_dissolve, exception);
986 
987   return(status);
988 }
989 
990 /*
991 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
992 %                                                                             %
993 %                                                                             %
994 %                                                                             %
995 %     A c c e l e r a t e C o n t r a s t I m a g e                           %
996 %                                                                             %
997 %                                                                             %
998 %                                                                             %
999 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1000 */
1001 
ComputeContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)1002 static MagickBooleanType ComputeContrastImage(Image *image,
1003   const MagickBooleanType sharpen,ExceptionInfo *exception)
1004 {
1005   cl_command_queue
1006     queue;
1007 
1008   cl_context
1009     context;
1010 
1011   cl_int
1012     clStatus;
1013 
1014   cl_kernel
1015     filterKernel;
1016 
1017   cl_event
1018     event;
1019 
1020   cl_mem
1021     imageBuffer;
1022 
1023   cl_uint
1024     event_count;
1025 
1026   cl_event
1027     *events;
1028 
1029   MagickBooleanType
1030     outputReady;
1031 
1032   MagickCLEnv
1033     clEnv;
1034 
1035   size_t
1036     global_work_size[2];
1037 
1038   unsigned int
1039     i,
1040     uSharpen;
1041 
1042   outputReady = MagickFalse;
1043   clEnv = NULL;
1044   context = NULL;
1045   imageBuffer = NULL;
1046   filterKernel = NULL;
1047   queue = NULL;
1048 
1049   clEnv = GetDefaultOpenCLEnv();
1050   context = GetOpenCLContext(clEnv);
1051 
1052   imageBuffer=GetAuthenticOpenCLBuffer(image,exception);
1053   if (imageBuffer == (cl_mem) NULL)
1054   {
1055     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
1056       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
1057     goto cleanup;
1058   }
1059 
1060   filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
1061   if (filterKernel == NULL)
1062   {
1063     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1064     goto cleanup;
1065   }
1066 
1067   i = 0;
1068   clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1069 
1070   uSharpen = (sharpen == MagickFalse)?0:1;
1071   clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
1072   if (clStatus != CL_SUCCESS)
1073   {
1074     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1075     goto cleanup;
1076   }
1077 
1078   global_work_size[0] = image->columns;
1079   global_work_size[1] = image->rows;
1080   /* launch the kernel */
1081   queue = AcquireOpenCLCommandQueue(clEnv);
1082   events=GetOpenCLEvents(image,&event_count);
1083   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, event_count, events, &event);
1084   events=(cl_event *) RelinquishMagickMemory(events);
1085   if (clStatus != CL_SUCCESS)
1086   {
1087     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1088     goto cleanup;
1089   }
1090   if (RecordProfileData(clEnv,ContrastKernel,event) == MagickFalse)
1091     AddOpenCLEvent(image,event);
1092   clEnv->library->clReleaseEvent(event);
1093   outputReady=MagickTrue;
1094 
1095 cleanup:
1096   OpenCLLogException(__FUNCTION__,__LINE__,exception);
1097 
1098 
1099   if (imageBuffer != (cl_mem) NULL)
1100     clEnv->library->clReleaseMemObject(imageBuffer);
1101   if (filterKernel!=NULL)                     RelinquishOpenCLKernel(clEnv, filterKernel);
1102   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
1103   return(outputReady);
1104 }
1105 
AccelerateContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)1106 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
1107   const MagickBooleanType sharpen,ExceptionInfo *exception)
1108 {
1109   MagickBooleanType
1110     status;
1111 
1112   assert(image != NULL);
1113   assert(exception != (ExceptionInfo *) NULL);
1114 
1115   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1116       (checkAccelerateCondition(image, AllChannels) == MagickFalse))
1117     return(MagickFalse);
1118 
1119   status = ComputeContrastImage(image,sharpen,exception);
1120   return(status);
1121 }
1122 
1123 /*
1124 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1125 %                                                                             %
1126 %                                                                             %
1127 %                                                                             %
1128 %     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
1129 %                                                                             %
1130 %                                                                             %
1131 %                                                                             %
1132 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1133 */
1134 
LaunchHistogramKernel(MagickCLEnv clEnv,cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,Image * image,const ChannelType channel,ExceptionInfo * exception)1135 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
1136   cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,
1137   Image *image,const ChannelType channel,ExceptionInfo *exception)
1138 {
1139   MagickBooleanType
1140     outputReady;
1141 
1142   cl_event
1143     event;
1144 
1145   cl_int
1146     clStatus,
1147     colorspace,
1148     method;
1149 
1150   cl_kernel
1151     histogramKernel;
1152 
1153   cl_uint
1154     event_count;
1155 
1156   cl_event
1157     *events;
1158 
1159   ssize_t
1160     i;
1161 
1162   size_t
1163     global_work_size[2];
1164 
1165   histogramKernel = NULL;
1166 
1167   outputReady = MagickFalse;
1168   method = image->intensity;
1169   colorspace = image->colorspace;
1170 
1171   /* get the OpenCL kernel */
1172   histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
1173   if (histogramKernel == NULL)
1174   {
1175     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1176     goto cleanup;
1177   }
1178 
1179   /* set the kernel arguments */
1180   i = 0;
1181   clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1182   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
1183   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&method);
1184   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
1185   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
1186   if (clStatus != CL_SUCCESS)
1187   {
1188     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1189     goto cleanup;
1190   }
1191 
1192   /* launch the kernel */
1193   global_work_size[0] = image->columns;
1194   global_work_size[1] = image->rows;
1195 
1196   events=GetOpenCLEvents(image,&event_count);
1197   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, event_count, events, &event);
1198   events=(cl_event *) RelinquishMagickMemory(events);
1199 
1200   if (clStatus != CL_SUCCESS)
1201   {
1202     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1203     goto cleanup;
1204   }
1205   if (RecordProfileData(clEnv,HistogramKernel,event) == MagickFalse)
1206     AddOpenCLEvent(image,event);
1207   clEnv->library->clReleaseEvent(event);
1208 
1209   outputReady = MagickTrue;
1210 
1211 cleanup:
1212   OpenCLLogException(__FUNCTION__,__LINE__,exception);
1213 
1214   if (histogramKernel!=NULL)
1215     RelinquishOpenCLKernel(clEnv, histogramKernel);
1216 
1217   return(outputReady);
1218 }
1219 
ComputeContrastStretchImageChannel(Image * image,const ChannelType channel,const double black_point,const double white_point,ExceptionInfo * exception)1220 MagickPrivate MagickBooleanType ComputeContrastStretchImageChannel(Image *image,
1221   const ChannelType channel,const double black_point,const double white_point,
1222   ExceptionInfo *exception)
1223 {
1224 #define ContrastStretchImageTag  "ContrastStretch/Image"
1225 #define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
1226   cl_command_queue
1227     queue;
1228 
1229   cl_context
1230     context;
1231 
1232   cl_int
1233     clStatus;
1234 
1235   cl_mem
1236     histogramBuffer,
1237     imageBuffer,
1238     stretchMapBuffer;
1239 
1240   cl_kernel
1241     histogramKernel,
1242     stretchKernel;
1243 
1244   cl_event
1245     event;
1246 
1247   cl_uint
1248     event_count;
1249 
1250   cl_uint4
1251     *histogram;
1252 
1253   cl_event
1254     *events;
1255 
1256   double
1257     intensity;
1258 
1259   cl_float4
1260     black,
1261     white;
1262 
1263   MagickBooleanType
1264     outputReady,
1265     status;
1266 
1267   MagickCLEnv
1268     clEnv;
1269 
1270   MagickSizeType
1271     length;
1272 
1273   PixelPacket
1274     *stretch_map;
1275 
1276   ssize_t
1277     i;
1278 
1279   size_t
1280     global_work_size[2];
1281 
1282   histogram=NULL;
1283   stretch_map=NULL;
1284   imageBuffer = NULL;
1285   histogramBuffer = NULL;
1286   stretchMapBuffer = NULL;
1287   histogramKernel = NULL;
1288   stretchKernel = NULL;
1289   context = NULL;
1290   queue = NULL;
1291   outputReady = MagickFalse;
1292 
1293 
1294   assert(image != (Image *) NULL);
1295   assert(image->signature == MagickCoreSignature);
1296   if (image->debug != MagickFalse)
1297     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1298 
1299   /* exception=(&image->exception); */
1300 
1301   /*
1302    * initialize opencl env
1303    */
1304   clEnv = GetDefaultOpenCLEnv();
1305   context = GetOpenCLContext(clEnv);
1306   queue = AcquireOpenCLCommandQueue(clEnv);
1307 
1308   /*
1309     Allocate and initialize histogram arrays.
1310   */
1311   length = (MaxMap+1);
1312   histogram=(cl_uint4 *) AcquireQuantumMemory(length, sizeof(*histogram));
1313 
1314   if (histogram == (cl_uint4 *) NULL)
1315     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1316 
1317   /* reset histogram */
1318   (void) memset(histogram,0,length*sizeof(*histogram));
1319 
1320   /*
1321   if (SetImageGray(image,exception) != MagickFalse)
1322     (void) SetImageColorspace(image,GRAYColorspace);
1323   */
1324 
1325   status=MagickTrue;
1326 
1327   imageBuffer=GetAuthenticOpenCLBuffer(image,exception);
1328   if (imageBuffer == (cl_mem) NULL)
1329   {
1330     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
1331       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
1332     goto cleanup;
1333   }
1334 
1335   /* create a CL buffer for histogram  */
1336   histogramBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, length * sizeof(cl_uint4), histogram, &clStatus);
1337   if (clStatus != CL_SUCCESS)
1338   {
1339     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1340     goto cleanup;
1341   }
1342 
1343   status = LaunchHistogramKernel(clEnv, queue, imageBuffer, histogramBuffer, image, channel, exception);
1344   if (status == MagickFalse)
1345     goto cleanup;
1346 
1347   /* this blocks, should be fixed it in the future */
1348   events=GetOpenCLEvents(image,&event_count);
1349   clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), event_count, events, NULL, &clStatus);
1350   events=(cl_event *) RelinquishMagickMemory(events);
1351   if (clStatus != CL_SUCCESS)
1352   {
1353     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
1354     goto cleanup;
1355   }
1356 
1357   /* unmap, don't block gpu to use this buffer again.  */
1358   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1359   if (clStatus != CL_SUCCESS)
1360   {
1361     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
1362     goto cleanup;
1363   }
1364 
1365   /* CPU stuff */
1366   /*
1367      Find the histogram boundaries by locating the black/white levels.
1368   */
1369   black.z=0.0;
1370   white.z=MaxRange(QuantumRange);
1371   if ((channel & RedChannel) != 0)
1372   {
1373     intensity=0.0;
1374     for (i=0; i <= (ssize_t) MaxMap; i++)
1375     {
1376       intensity+=histogram[i].s[2];
1377       if (intensity > black_point)
1378         break;
1379     }
1380     black.z=(MagickRealType) i;
1381     intensity=0.0;
1382     for (i=(ssize_t) MaxMap; i != 0; i--)
1383     {
1384       intensity+=histogram[i].s[2];
1385       if (intensity > ((double) image->columns*image->rows-white_point))
1386         break;
1387     }
1388     white.z=(MagickRealType) i;
1389   }
1390   black.y=0.0;
1391   white.y=MaxRange(QuantumRange);
1392   if ((channel & GreenChannel) != 0)
1393   {
1394     intensity=0.0;
1395     for (i=0; i <= (ssize_t) MaxMap; i++)
1396     {
1397       intensity+=histogram[i].s[2];
1398       if (intensity > black_point)
1399         break;
1400     }
1401     black.y=(MagickRealType) i;
1402     intensity=0.0;
1403     for (i=(ssize_t) MaxMap; i != 0; i--)
1404     {
1405       intensity+=histogram[i].s[2];
1406       if (intensity > ((double) image->columns*image->rows-white_point))
1407         break;
1408     }
1409     white.y=(MagickRealType) i;
1410   }
1411   black.x=0.0;
1412   white.x=MaxRange(QuantumRange);
1413   if ((channel & BlueChannel) != 0)
1414   {
1415     intensity=0.0;
1416     for (i=0; i <= (ssize_t) MaxMap; i++)
1417     {
1418       intensity+=histogram[i].s[2];
1419       if (intensity > black_point)
1420         break;
1421     }
1422     black.x=(MagickRealType) i;
1423     intensity=0.0;
1424     for (i=(ssize_t) MaxMap; i != 0; i--)
1425     {
1426       intensity+=histogram[i].s[2];
1427       if (intensity > ((double) image->columns*image->rows-white_point))
1428         break;
1429     }
1430     white.x=(MagickRealType) i;
1431   }
1432   black.w=0.0;
1433   white.w=MaxRange(QuantumRange);
1434   if ((channel & OpacityChannel) != 0)
1435   {
1436     intensity=0.0;
1437     for (i=0; i <= (ssize_t) MaxMap; i++)
1438     {
1439       intensity+=histogram[i].s[2];
1440       if (intensity > black_point)
1441         break;
1442     }
1443     black.w=(MagickRealType) i;
1444     intensity=0.0;
1445     for (i=(ssize_t) MaxMap; i != 0; i--)
1446     {
1447       intensity+=histogram[i].s[2];
1448       if (intensity > ((double) image->columns*image->rows-white_point))
1449         break;
1450     }
1451     white.w=(MagickRealType) i;
1452   }
1453   /*
1454   black.index=0.0;
1455   white.index=MaxRange(QuantumRange);
1456   if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
1457   {
1458     intensity=0.0;
1459     for (i=0; i <= (ssize_t) MaxMap; i++)
1460     {
1461       intensity+=histogram[i].index;
1462       if (intensity > black_point)
1463         break;
1464     }
1465     black.index=(MagickRealType) i;
1466     intensity=0.0;
1467     for (i=(ssize_t) MaxMap; i != 0; i--)
1468     {
1469       intensity+=histogram[i].index;
1470       if (intensity > ((double) image->columns*image->rows-white_point))
1471         break;
1472     }
1473     white.index=(MagickRealType) i;
1474   }
1475   */
1476 
1477 
1478   stretch_map=(PixelPacket *) AcquireQuantumMemory(length,
1479     sizeof(*stretch_map));
1480 
1481   if (stretch_map == (PixelPacket *) NULL)
1482     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1483       image->filename);
1484 
1485   /*
1486     Stretch the histogram to create the stretched image mapping.
1487   */
1488   (void) memset(stretch_map,0,length*sizeof(*stretch_map));
1489   for (i=0; i <= (ssize_t) MaxMap; i++)
1490   {
1491     if ((channel & RedChannel) != 0)
1492     {
1493       if (i < (ssize_t) black.z)
1494         stretch_map[i].red=(Quantum) 0;
1495       else
1496         if (i > (ssize_t) white.z)
1497           stretch_map[i].red=QuantumRange;
1498         else
1499           if (black.z != white.z)
1500             stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1501                   (i-black.z)/(white.z-black.z)));
1502     }
1503     if ((channel & GreenChannel) != 0)
1504     {
1505       if (i < (ssize_t) black.y)
1506         stretch_map[i].green=0;
1507       else
1508         if (i > (ssize_t) white.y)
1509           stretch_map[i].green=QuantumRange;
1510         else
1511           if (black.y != white.y)
1512             stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1513                   (i-black.y)/(white.y-black.y)));
1514     }
1515     if ((channel & BlueChannel) != 0)
1516     {
1517       if (i < (ssize_t) black.x)
1518         stretch_map[i].blue=0;
1519       else
1520         if (i > (ssize_t) white.x)
1521           stretch_map[i].blue= QuantumRange;
1522         else
1523           if (black.x != white.x)
1524             stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1525                   (i-black.x)/(white.x-black.x)));
1526     }
1527     if ((channel & OpacityChannel) != 0)
1528     {
1529       if (i < (ssize_t) black.w)
1530         stretch_map[i].opacity=0;
1531       else
1532         if (i > (ssize_t) white.w)
1533           stretch_map[i].opacity=QuantumRange;
1534         else
1535           if (black.w != white.w)
1536             stretch_map[i].opacity=ScaleMapToQuantum((MagickRealType) (MaxMap*
1537                   (i-black.w)/(white.w-black.w)));
1538     }
1539     /*
1540     if (((channel & IndexChannel) != 0) &&
1541         (image->colorspace == CMYKColorspace))
1542     {
1543       if (i < (ssize_t) black.index)
1544         stretch_map[i].index=0;
1545       else
1546         if (i > (ssize_t) white.index)
1547           stretch_map[i].index=QuantumRange;
1548         else
1549           if (black.index != white.index)
1550             stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
1551                   (i-black.index)/(white.index-black.index)));
1552     }
1553     */
1554   }
1555 
1556   /*
1557     Stretch the image.
1558   */
1559   if (((channel & OpacityChannel) != 0) || (((channel & IndexChannel) != 0) &&
1560       (image->colorspace == CMYKColorspace)))
1561     image->storage_class=DirectClass;
1562   if (image->storage_class == PseudoClass)
1563   {
1564     /*
1565        Stretch colormap.
1566        */
1567     for (i=0; i < (ssize_t) image->colors; i++)
1568     {
1569       if ((channel & RedChannel) != 0)
1570       {
1571         if (black.z != white.z)
1572           image->colormap[i].red=stretch_map[
1573             ScaleQuantumToMap(image->colormap[i].red)].red;
1574       }
1575       if ((channel & GreenChannel) != 0)
1576       {
1577         if (black.y != white.y)
1578           image->colormap[i].green=stretch_map[
1579             ScaleQuantumToMap(image->colormap[i].green)].green;
1580       }
1581       if ((channel & BlueChannel) != 0)
1582       {
1583         if (black.x != white.x)
1584           image->colormap[i].blue=stretch_map[
1585             ScaleQuantumToMap(image->colormap[i].blue)].blue;
1586       }
1587       if ((channel & OpacityChannel) != 0)
1588       {
1589         if (black.w != white.w)
1590           image->colormap[i].opacity=stretch_map[
1591             ScaleQuantumToMap(image->colormap[i].opacity)].opacity;
1592       }
1593     }
1594   }
1595 
1596 
1597   /* create a CL buffer for stretch_map  */
1598   stretchMapBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, length, stretch_map, &clStatus);
1599   if (clStatus != CL_SUCCESS)
1600   {
1601     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1602     goto cleanup;
1603   }
1604 
1605   /* get the OpenCL kernel */
1606   stretchKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ContrastStretch");
1607   if (stretchKernel == NULL)
1608   {
1609     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1610     goto cleanup;
1611   }
1612 
1613   /* set the kernel arguments */
1614   i = 0;
1615   clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1616   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&channel);
1617   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1618   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&white);
1619   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&black);
1620   if (clStatus != CL_SUCCESS)
1621   {
1622     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1623     goto cleanup;
1624   }
1625 
1626   /* launch the kernel */
1627   global_work_size[0] = image->columns;
1628   global_work_size[1] = image->rows;
1629 
1630   events=GetOpenCLEvents(image,&event_count);
1631   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, event_count, events, &event);
1632   events=(cl_event *) RelinquishMagickMemory(events);
1633 
1634   if (clStatus != CL_SUCCESS)
1635   {
1636     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1637     goto cleanup;
1638   }
1639 
1640   if (RecordProfileData(clEnv,ContrastStretchKernel,event) == MagickFalse)
1641     AddOpenCLEvent(image, event);
1642   clEnv->library->clReleaseEvent(event);
1643 
1644   outputReady=MagickTrue;
1645 
1646 cleanup:
1647   OpenCLLogException(__FUNCTION__,__LINE__,exception);
1648 
1649   if (imageBuffer != (cl_mem) NULL)
1650     clEnv->library->clReleaseMemObject(imageBuffer);
1651 
1652   if (stretchMapBuffer!=NULL)
1653     clEnv->library->clReleaseMemObject(stretchMapBuffer);
1654   if (stretch_map!=NULL)
1655     stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1656 
1657 
1658   if (histogramBuffer!=NULL)
1659     clEnv->library->clReleaseMemObject(histogramBuffer);
1660   if (histogram!=NULL)
1661     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1662 
1663 
1664   if (histogramKernel!=NULL)
1665     RelinquishOpenCLKernel(clEnv, histogramKernel);
1666   if (stretchKernel!=NULL)
1667     RelinquishOpenCLKernel(clEnv, stretchKernel);
1668 
1669   if (queue != NULL)
1670     RelinquishOpenCLCommandQueue(clEnv, queue);
1671 
1672   return(outputReady);
1673 }
1674 
AccelerateContrastStretchImageChannel(Image * image,const ChannelType channel,const double black_point,const double white_point,ExceptionInfo * exception)1675 MagickPrivate MagickBooleanType AccelerateContrastStretchImageChannel(
1676   Image *image,const ChannelType channel,const double black_point,
1677   const double white_point,ExceptionInfo *exception)
1678 {
1679   MagickBooleanType
1680     status;
1681 
1682   assert(image != NULL);
1683   assert(exception != (ExceptionInfo *) NULL);
1684 
1685   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1686       (checkAccelerateCondition(image, channel) == MagickFalse) ||
1687       (checkHistogramCondition(image, channel) == MagickFalse))
1688     return(MagickFalse);
1689 
1690   status=ComputeContrastStretchImageChannel(image,channel, black_point, white_point, exception);
1691   return(status);
1692 }
1693 
1694 /*
1695 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1696 %                                                                             %
1697 %                                                                             %
1698 %                                                                             %
1699 %     A c c e l e r a t e C o n v o l v e I m a g e                           %
1700 %                                                                             %
1701 %                                                                             %
1702 %                                                                             %
1703 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1704 */
1705 
ComputeConvolveImage(const Image * image,const ChannelType channel,const KernelInfo * kernel,ExceptionInfo * exception)1706 static Image *ComputeConvolveImage(const Image* image,
1707   const ChannelType channel,const KernelInfo *kernel,ExceptionInfo *exception)
1708 {
1709   cl_command_queue
1710     queue;
1711 
1712   cl_context
1713     context;
1714 
1715   cl_kernel
1716     clkernel;
1717 
1718   cl_event
1719     event;
1720 
1721   cl_int
1722     clStatus;
1723 
1724   cl_mem
1725     convolutionKernel,
1726     filteredImageBuffer,
1727     imageBuffer;
1728 
1729   cl_uint
1730     event_count;
1731 
1732   cl_ulong
1733     deviceLocalMemorySize;
1734 
1735   cl_event
1736     *events;
1737 
1738   float
1739     *kernelBufferPtr;
1740 
1741   Image
1742     *filteredImage;
1743 
1744   MagickBooleanType
1745     outputReady;
1746 
1747   MagickCLEnv
1748     clEnv;
1749 
1750   size_t
1751     global_work_size[3],
1752     localGroupSize[3],
1753     localMemoryRequirement;
1754 
1755   unsigned
1756     kernelSize;
1757 
1758   unsigned int
1759     filterHeight,
1760     filterWidth,
1761     i,
1762     imageHeight,
1763     imageWidth,
1764     matte;
1765 
1766   /* intialize all CL objects to NULL */
1767   context = NULL;
1768   imageBuffer = NULL;
1769   filteredImageBuffer = NULL;
1770   convolutionKernel = NULL;
1771   clkernel = NULL;
1772   queue = NULL;
1773 
1774   filteredImage = NULL;
1775   outputReady = MagickFalse;
1776 
1777   clEnv = GetDefaultOpenCLEnv();
1778 
1779   context = GetOpenCLContext(clEnv);
1780 
1781   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1782   if (filteredImage == (Image *) NULL)
1783     goto cleanup;
1784 
1785   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
1786   if (imageBuffer == (cl_mem) NULL)
1787   {
1788     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
1789       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
1790     goto cleanup;
1791   }
1792   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,exception);
1793   if (filteredImageBuffer == (cl_mem) NULL)
1794   {
1795     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
1796       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
1797     goto cleanup;
1798   }
1799 
1800   kernelSize = (unsigned int) (kernel->width * kernel->height);
1801   convolutionKernel = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1802   if (clStatus != CL_SUCCESS)
1803   {
1804     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1805     goto cleanup;
1806   }
1807 
1808   queue = AcquireOpenCLCommandQueue(clEnv);
1809 
1810   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1811           , 0, NULL, NULL, &clStatus);
1812   if (clStatus != CL_SUCCESS)
1813   {
1814     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1815     goto cleanup;
1816   }
1817   for (i = 0; i < kernelSize; i++)
1818   {
1819     kernelBufferPtr[i] = (float) kernel->values[i];
1820   }
1821   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1822   if (clStatus != CL_SUCCESS)
1823   {
1824     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
1825     goto cleanup;
1826   }
1827 
1828   deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
1829 
1830   /* Compute the local memory requirement for a 16x16 workgroup.
1831      If it's larger than 16k, reduce the workgroup size to 8x8 */
1832   localGroupSize[0] = 16;
1833   localGroupSize[1] = 16;
1834   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1835     + kernel->width*kernel->height*sizeof(float);
1836 
1837   if (localMemoryRequirement > deviceLocalMemorySize)
1838   {
1839     localGroupSize[0] = 8;
1840     localGroupSize[1] = 8;
1841     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1842       + kernel->width*kernel->height*sizeof(float);
1843   }
1844   if (localMemoryRequirement <= deviceLocalMemorySize)
1845   {
1846     /* get the OpenCL kernel */
1847     clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ConvolveOptimized");
1848     if (clkernel == NULL)
1849     {
1850       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1851       goto cleanup;
1852     }
1853 
1854     /* set the kernel arguments */
1855     i = 0;
1856     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1857     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1858     imageWidth = (unsigned int) image->columns;
1859     imageHeight = (unsigned int) image->rows;
1860     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1861     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1862     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1863     filterWidth = (unsigned int) kernel->width;
1864     filterHeight = (unsigned int) kernel->height;
1865     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1866     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1867     matte = (image->matte==MagickTrue)?1:0;
1868     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1869     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
1870     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1871     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1872     if (clStatus != CL_SUCCESS)
1873     {
1874       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1875       goto cleanup;
1876     }
1877 
1878     /* pad the global size to a multiple of the local work size dimension */
1879     global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1880     global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1881 
1882     /* launch the kernel */
1883     events = GetOpenCLEvents(image, &event_count);
1884     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, event_count, events, &event);
1885     events=(cl_event *) RelinquishMagickMemory(events);
1886     if (clStatus != CL_SUCCESS)
1887     {
1888       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1889       goto cleanup;
1890     }
1891     if (RecordProfileData(clEnv,ConvolveKernel,event) == MagickFalse)
1892       {
1893         AddOpenCLEvent(image, event);
1894         AddOpenCLEvent(filteredImage, event);
1895       }
1896     clEnv->library->clReleaseEvent(event);
1897   }
1898   else
1899   {
1900     /* get the OpenCL kernel */
1901     clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
1902     if (clkernel == NULL)
1903     {
1904       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1905       goto cleanup;
1906     }
1907 
1908     /* set the kernel arguments */
1909     i = 0;
1910     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1911     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1912     imageWidth = (unsigned int) image->columns;
1913     imageHeight = (unsigned int) image->rows;
1914     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1915     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1916     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1917     filterWidth = (unsigned int) kernel->width;
1918     filterHeight = (unsigned int) kernel->height;
1919     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1920     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1921     matte = (image->matte==MagickTrue)?1:0;
1922     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1923     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
1924     if (clStatus != CL_SUCCESS)
1925     {
1926       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1927       goto cleanup;
1928     }
1929 
1930     localGroupSize[0] = 8;
1931     localGroupSize[1] = 8;
1932     global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1933     global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1934     events=GetOpenCLEvents(image,&event_count);
1935     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, event_count, events, &event);
1936     events=(cl_event *) RelinquishMagickMemory(events);
1937 
1938     if (clStatus != CL_SUCCESS)
1939     {
1940       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1941       goto cleanup;
1942     }
1943     if (RecordProfileData(clEnv,ConvolveKernel,event) == MagickFalse)
1944       {
1945         AddOpenCLEvent(image,event);
1946         AddOpenCLEvent(filteredImage,event);
1947       }
1948     clEnv->library->clReleaseEvent(event);
1949   }
1950 
1951   outputReady = MagickTrue;
1952 
1953 cleanup:
1954   OpenCLLogException(__FUNCTION__,__LINE__,exception);
1955 
1956   if (imageBuffer != (cl_mem) NULL)
1957     clEnv->library->clReleaseMemObject(imageBuffer);
1958 
1959   if (filteredImageBuffer != (cl_mem) NULL)
1960     clEnv->library->clReleaseMemObject(filteredImageBuffer);
1961 
1962   if (convolutionKernel != NULL)
1963     clEnv->library->clReleaseMemObject(convolutionKernel);
1964 
1965   if (clkernel != NULL)
1966     RelinquishOpenCLKernel(clEnv, clkernel);
1967 
1968   if (queue != NULL)
1969     RelinquishOpenCLCommandQueue(clEnv, queue);
1970 
1971   if ((outputReady == MagickFalse) && (filteredImage != NULL))
1972     filteredImage=(Image *) DestroyImage(filteredImage);
1973 
1974   return(filteredImage);
1975 }
1976 
AccelerateConvolveImageChannel(const Image * image,const ChannelType channel,const KernelInfo * kernel,ExceptionInfo * exception)1977 MagickPrivate Image *AccelerateConvolveImageChannel(const Image *image,
1978   const ChannelType channel,const KernelInfo *kernel,ExceptionInfo *exception)
1979 {
1980   Image
1981     *filteredImage;
1982 
1983   assert(image != NULL);
1984   assert(kernel != (KernelInfo *) NULL);
1985   assert(exception != (ExceptionInfo *) NULL);
1986 
1987   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1988       (checkAccelerateCondition(image, channel) == MagickFalse))
1989     return NULL;
1990 
1991   filteredImage=ComputeConvolveImage(image, channel, kernel, exception);
1992   return(filteredImage);
1993 }
1994 
1995 /*
1996 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1997 %                                                                             %
1998 %                                                                             %
1999 %                                                                             %
2000 %     A c c e l e r a t e D e s p e c k l e I m a g e                         %
2001 %                                                                             %
2002 %                                                                             %
2003 %                                                                             %
2004 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2005 */
2006 
ComputeDespeckleImage(const Image * image,ExceptionInfo * exception)2007 static Image *ComputeDespeckleImage(const Image *image,
2008   ExceptionInfo*exception)
2009 {
2010   static const int
2011     X[4] = {0, 1, 1,-1},
2012     Y[4] = {1, 0, 1, 1};
2013 
2014   cl_command_queue
2015     queue;
2016 
2017   cl_context
2018     context;
2019 
2020   cl_int
2021     clStatus;
2022 
2023   cl_kernel
2024     hullPass1,
2025     hullPass2;
2026 
2027   cl_event
2028     event;
2029 
2030   cl_mem
2031     filteredImageBuffer,
2032     imageBuffer,
2033     tempImageBuffer[2];
2034 
2035   cl_uint
2036     event_count;
2037 
2038   cl_event
2039     *events;
2040 
2041   Image
2042     *filteredImage;
2043 
2044   int
2045     k,
2046     matte;
2047 
2048   MagickBooleanType
2049     outputReady;
2050 
2051   MagickCLEnv
2052     clEnv;
2053 
2054   size_t
2055     global_work_size[2];
2056 
2057   unsigned int
2058     imageHeight,
2059     imageWidth;
2060 
2061   outputReady = MagickFalse;
2062   clEnv = NULL;
2063   filteredImage = NULL;
2064   context = NULL;
2065   imageBuffer = NULL;
2066   filteredImageBuffer = NULL;
2067   hullPass1 = NULL;
2068   hullPass2 = NULL;
2069   queue = NULL;
2070   tempImageBuffer[0] = tempImageBuffer[1] = NULL;
2071   clEnv = GetDefaultOpenCLEnv();
2072   context = GetOpenCLContext(clEnv);
2073   queue = AcquireOpenCLCommandQueue(clEnv);
2074   events = NULL;
2075 
2076   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
2077   if (filteredImage == (Image *) NULL)
2078     goto cleanup;
2079 
2080   imageBuffer = GetAuthenticOpenCLBuffer(image, exception);
2081   if (imageBuffer == (cl_mem) NULL)
2082   {
2083     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
2084       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
2085     goto cleanup;
2086   }
2087   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage, exception);
2088   if (filteredImageBuffer == (cl_mem) NULL)
2089   {
2090     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
2091       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
2092     goto cleanup;
2093   }
2094 
2095   hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
2096   hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
2097 
2098   clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2099   clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2100   imageWidth = (unsigned int) image->columns;
2101   clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2102   imageHeight = (unsigned int) image->rows;
2103   clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2104   matte = (image->matte==MagickFalse)?0:1;
2105   clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2106   if (clStatus != CL_SUCCESS)
2107   {
2108     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2109     goto cleanup;
2110   }
2111 
2112   clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2113   clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2114   imageWidth = (unsigned int) image->columns;
2115   clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2116   imageHeight = (unsigned int) image->rows;
2117   clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2118   matte = (image->matte==MagickFalse)?0:1;
2119   clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2120   if (clStatus != CL_SUCCESS)
2121   {
2122     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2123     goto cleanup;
2124   }
2125 
2126 
2127   global_work_size[0] = image->columns;
2128   global_work_size[1] = image->rows;
2129 
2130   events=GetOpenCLEvents(image,&event_count);
2131   for (k = 0; k < 4; k++)
2132   {
2133     cl_int2 offset;
2134     int polarity;
2135 
2136 
2137     offset.s[0] = X[k];
2138     offset.s[1] = Y[k];
2139     polarity = 1;
2140     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2141     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2142     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2143     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2144     if (clStatus != CL_SUCCESS)
2145     {
2146       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2147       goto cleanup;
2148     }
2149     /* launch the kernel */
2150     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, event_count, events, &event);
2151     if (clStatus != CL_SUCCESS)
2152     {
2153       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2154       goto cleanup;
2155     }
2156     RecordProfileData(clEnv,HullPass1Kernel,event);
2157     clEnv->library->clReleaseEvent(event);
2158     /* launch the kernel */
2159     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, event_count, events, &event);
2160     if (clStatus != CL_SUCCESS)
2161     {
2162       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2163       goto cleanup;
2164     }
2165     RecordProfileData(clEnv,HullPass2Kernel,event);
2166     clEnv->library->clReleaseEvent(event);
2167 
2168 
2169     if (k == 0)
2170       clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2171     offset.s[0] = -X[k];
2172     offset.s[1] = -Y[k];
2173     polarity = 1;
2174     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2175     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2176     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2177     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2178     if (clStatus != CL_SUCCESS)
2179     {
2180       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2181       goto cleanup;
2182     }
2183     /* launch the kernel */
2184     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, event_count, events, &event);
2185     if (clStatus != CL_SUCCESS)
2186     {
2187       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2188       goto cleanup;
2189     }
2190     RecordProfileData(clEnv,HullPass1Kernel,event);
2191     clEnv->library->clReleaseEvent(event);
2192     /* launch the kernel */
2193     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, event_count, events, &event);
2194     if (clStatus != CL_SUCCESS)
2195     {
2196       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2197       goto cleanup;
2198     }
2199     RecordProfileData(clEnv,HullPass2Kernel,event);
2200     clEnv->library->clReleaseEvent(event);
2201 
2202     offset.s[0] = -X[k];
2203     offset.s[1] = -Y[k];
2204     polarity = -1;
2205     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2206     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2207     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2208     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2209     if (clStatus != CL_SUCCESS)
2210     {
2211       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2212       goto cleanup;
2213     }
2214     /* launch the kernel */
2215     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, event_count, events, &event);
2216     if (clStatus != CL_SUCCESS)
2217     {
2218       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2219       goto cleanup;
2220     }
2221     RecordProfileData(clEnv,HullPass1Kernel,event);
2222     clEnv->library->clReleaseEvent(event);
2223     /* launch the kernel */
2224     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, event_count, events, &event);
2225     if (clStatus != CL_SUCCESS)
2226     {
2227       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2228       goto cleanup;
2229     }
2230     RecordProfileData(clEnv,HullPass2Kernel,event);
2231     clEnv->library->clReleaseEvent(event);
2232 
2233     offset.s[0] = X[k];
2234     offset.s[1] = Y[k];
2235     polarity = -1;
2236     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2237     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2238     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2239     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2240 
2241     if (k == 3)
2242       clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2243 
2244     if (clStatus != CL_SUCCESS)
2245     {
2246       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2247       goto cleanup;
2248     }
2249     /* launch the kernel */
2250     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, event_count, events, &event);
2251     if (clStatus != CL_SUCCESS)
2252     {
2253       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2254       goto cleanup;
2255     }
2256     RecordProfileData(clEnv,HullPass1Kernel,event);
2257     clEnv->library->clReleaseEvent(event);
2258     /* launch the kernel */
2259     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, event_count, events, &event);
2260     if (clStatus != CL_SUCCESS)
2261     {
2262       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2263       goto cleanup;
2264     }
2265     if ((k == 3) && (RecordProfileData(clEnv,HullPass2Kernel,event) == MagickFalse))
2266       {
2267         AddOpenCLEvent(image,event);
2268         AddOpenCLEvent(filteredImage,event);
2269       }
2270     clEnv->library->clReleaseEvent(event);
2271   }
2272 
2273   outputReady=MagickTrue;
2274 
2275 cleanup:
2276   OpenCLLogException(__FUNCTION__,__LINE__,exception);
2277 
2278   if (imageBuffer != (cl_mem) NULL)
2279     clEnv->library->clReleaseMemObject(imageBuffer);
2280   if (filteredImageBuffer != (cl_mem) NULL)
2281     clEnv->library->clReleaseMemObject(filteredImageBuffer);
2282   events=(cl_event *) RelinquishMagickMemory(events);
2283   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
2284   for (k = 0; k < 2; k++)
2285   {
2286     if (tempImageBuffer[k]!=NULL)	      clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2287   }
2288   if (hullPass1!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass1);
2289   if (hullPass2!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass2);
2290   if ((outputReady == MagickFalse) && (filteredImage != NULL))
2291     filteredImage=(Image *) DestroyImage(filteredImage);
2292   return(filteredImage);
2293 }
2294 
AccelerateDespeckleImage(const Image * image,ExceptionInfo * exception)2295 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2296   ExceptionInfo* exception)
2297 {
2298   Image
2299     *filteredImage;
2300 
2301   assert(image != NULL);
2302   assert(exception != (ExceptionInfo *) NULL);
2303 
2304   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
2305       (checkAccelerateCondition(image, AllChannels) == MagickFalse))
2306     return NULL;
2307 
2308   filteredImage=ComputeDespeckleImage(image,exception);
2309   return(filteredImage);
2310 }
2311 
2312 /*
2313 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2314 %                                                                             %
2315 %                                                                             %
2316 %                                                                             %
2317 %     A c c e l e r a t e E q u a l i z e I m a g e                           %
2318 %                                                                             %
2319 %                                                                             %
2320 %                                                                             %
2321 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2322 */
2323 
ComputeEqualizeImage(Image * image,const ChannelType channel,ExceptionInfo * exception)2324 MagickPrivate MagickBooleanType ComputeEqualizeImage(Image *image,
2325   const ChannelType channel,ExceptionInfo *exception)
2326 {
2327 #define EqualizeImageTag  "Equalize/Image"
2328 
2329   cl_command_queue
2330     queue;
2331 
2332   cl_context
2333     context;
2334 
2335   cl_int
2336     clStatus;
2337 
2338   cl_mem
2339     equalizeMapBuffer,
2340     histogramBuffer,
2341     imageBuffer;
2342 
2343   cl_kernel
2344     equalizeKernel,
2345     histogramKernel;
2346 
2347   cl_event
2348     event;
2349 
2350   cl_uint
2351     event_count;
2352 
2353   cl_uint4
2354     *histogram;
2355 
2356   cl_event
2357     *events;
2358 
2359   cl_float4
2360     white,
2361     black,
2362     intensity,
2363     *map;
2364 
2365   MagickBooleanType
2366     outputReady,
2367     status;
2368 
2369   MagickCLEnv
2370     clEnv;
2371 
2372   MagickSizeType
2373     length;
2374 
2375   PixelPacket
2376     *equalize_map;
2377 
2378   ssize_t
2379     i;
2380 
2381   size_t
2382     global_work_size[2];
2383 
2384   map=NULL;
2385   histogram=NULL;
2386   equalize_map=NULL;
2387   imageBuffer = NULL;
2388   histogramBuffer = NULL;
2389   equalizeMapBuffer = NULL;
2390   histogramKernel = NULL;
2391   equalizeKernel = NULL;
2392   context = NULL;
2393   queue = NULL;
2394   outputReady = MagickFalse;
2395 
2396   assert(image != (Image *) NULL);
2397   assert(image->signature == MagickCoreSignature);
2398   if (image->debug != MagickFalse)
2399     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2400 
2401   /*
2402    * initialize opencl env
2403    */
2404   clEnv = GetDefaultOpenCLEnv();
2405   context = GetOpenCLContext(clEnv);
2406   queue = AcquireOpenCLCommandQueue(clEnv);
2407 
2408   /*
2409     Allocate and initialize histogram arrays.
2410   */
2411   length=MaxMap+1UL;
2412   histogram=(cl_uint4 *) AcquireQuantumMemory(length, sizeof(*histogram));
2413   if (histogram == (cl_uint4 *) NULL)
2414       ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2415 
2416   /* reset histogram */
2417   (void) memset(histogram,0,length*sizeof(*histogram));
2418 
2419   imageBuffer = GetAuthenticOpenCLBuffer(image, exception);
2420   if (imageBuffer == (cl_mem) NULL)
2421   {
2422     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
2423       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
2424     goto cleanup;
2425   }
2426 
2427   /* create a CL buffer for histogram  */
2428   histogramBuffer = clEnv->library->clCreateBuffer(context,  CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, length * sizeof(cl_uint4), histogram, &clStatus);
2429   if (clStatus != CL_SUCCESS)
2430   {
2431     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2432     goto cleanup;
2433   }
2434 
2435   status = LaunchHistogramKernel(clEnv, queue, imageBuffer, histogramBuffer, image, channel, exception);
2436   if (status == MagickFalse)
2437     goto cleanup;
2438 
2439   /* this blocks, should be fixed it in the future */
2440   events=GetOpenCLEvents(image,&event_count);
2441   clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), event_count, events, NULL, &clStatus);
2442   events=(cl_event *) RelinquishMagickMemory(events);
2443   if (clStatus != CL_SUCCESS)
2444   {
2445     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
2446     goto cleanup;
2447   }
2448 
2449   /* unmap, don't block gpu to use this buffer again.  */
2450   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2451   if (clStatus != CL_SUCCESS)
2452   {
2453     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
2454     goto cleanup;
2455   }
2456 
2457   /* CPU stuff */
2458   equalize_map=(PixelPacket *) AcquireQuantumMemory(length, sizeof(*equalize_map));
2459   if (equalize_map == (PixelPacket *) NULL)
2460     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2461 
2462   map=(cl_float4 *) AcquireQuantumMemory(length,sizeof(*map));
2463   if (map == (cl_float4 *) NULL)
2464     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2465 
2466   /*
2467     Integrate the histogram to get the equalization map.
2468   */
2469   (void) memset(&intensity,0,sizeof(intensity));
2470   for (i=0; i <= (ssize_t) MaxMap; i++)
2471   {
2472     if ((channel & SyncChannels) != 0)
2473     {
2474       intensity.z+=histogram[i].s[2];
2475       map[i]=intensity;
2476       continue;
2477     }
2478     if ((channel & RedChannel) != 0)
2479       intensity.z+=histogram[i].s[2];
2480     if ((channel & GreenChannel) != 0)
2481       intensity.y+=histogram[i].s[1];
2482     if ((channel & BlueChannel) != 0)
2483       intensity.x+=histogram[i].s[0];
2484     if ((channel & OpacityChannel) != 0)
2485       intensity.w+=histogram[i].s[3];
2486     /*
2487     if (((channel & IndexChannel) != 0) &&
2488         (image->colorspace == CMYKColorspace))
2489     {
2490       intensity.index+=histogram[i].index;
2491     }
2492     */
2493     map[i]=intensity;
2494   }
2495   black=map[0];
2496   white=map[(int) MaxMap];
2497   (void) memset(equalize_map,0,length*sizeof(*equalize_map));
2498   for (i=0; i <= (ssize_t) MaxMap; i++)
2499   {
2500     if ((channel & SyncChannels) != 0)
2501     {
2502       if (white.z != black.z)
2503         equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2504                 (map[i].z-black.z))/(white.z-black.z)));
2505       continue;
2506     }
2507     if (((channel & RedChannel) != 0) && (white.z != black.z))
2508       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2509               (map[i].z-black.z))/(white.z-black.z)));
2510     if (((channel & GreenChannel) != 0) && (white.y != black.y))
2511       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2512               (map[i].y-black.y))/(white.y-black.y)));
2513     if (((channel & BlueChannel) != 0) && (white.x != black.x))
2514       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2515               (map[i].x-black.x))/(white.x-black.x)));
2516     if (((channel & OpacityChannel) != 0) && (white.w != black.w))
2517       equalize_map[i].opacity=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2518               (map[i].w-black.w))/(white.w-black.w)));
2519     /*
2520     if ((((channel & IndexChannel) != 0) &&
2521           (image->colorspace == CMYKColorspace)) &&
2522         (white.index != black.index))
2523       equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2524               (map[i].index-black.index))/(white.index-black.index)));
2525     */
2526   }
2527 
2528   if (image->storage_class == PseudoClass)
2529   {
2530     /*
2531        Equalize colormap.
2532        */
2533     for (i=0; i < (ssize_t) image->colors; i++)
2534     {
2535       if ((channel & SyncChannels) != 0)
2536       {
2537         if (white.z != black.z)
2538         {
2539           image->colormap[i].red=equalize_map[
2540             ScaleQuantumToMap(image->colormap[i].red)].red;
2541           image->colormap[i].green=equalize_map[
2542             ScaleQuantumToMap(image->colormap[i].green)].red;
2543           image->colormap[i].blue=equalize_map[
2544             ScaleQuantumToMap(image->colormap[i].blue)].red;
2545           image->colormap[i].opacity=equalize_map[
2546             ScaleQuantumToMap(image->colormap[i].opacity)].red;
2547         }
2548         continue;
2549       }
2550       if (((channel & RedChannel) != 0) && (white.z != black.z))
2551         image->colormap[i].red=equalize_map[
2552           ScaleQuantumToMap(image->colormap[i].red)].red;
2553       if (((channel & GreenChannel) != 0) && (white.y != black.y))
2554         image->colormap[i].green=equalize_map[
2555           ScaleQuantumToMap(image->colormap[i].green)].green;
2556       if (((channel & BlueChannel) != 0) && (white.x != black.x))
2557         image->colormap[i].blue=equalize_map[
2558           ScaleQuantumToMap(image->colormap[i].blue)].blue;
2559       if (((channel & OpacityChannel) != 0) &&
2560           (white.w != black.w))
2561         image->colormap[i].opacity=equalize_map[
2562           ScaleQuantumToMap(image->colormap[i].opacity)].opacity;
2563     }
2564   }
2565 
2566   /* create a CL buffer for eqaulize_map  */
2567   equalizeMapBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, length * sizeof(PixelPacket), equalize_map, &clStatus);
2568   if (clStatus != CL_SUCCESS)
2569   {
2570     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2571     goto cleanup;
2572   }
2573 
2574   /* get the OpenCL kernel */
2575   equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
2576   if (equalizeKernel == NULL)
2577   {
2578     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2579     goto cleanup;
2580   }
2581 
2582   /* set the kernel arguments */
2583   i = 0;
2584   clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2585   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
2586   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2587   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&white);
2588   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&black);
2589   if (clStatus != CL_SUCCESS)
2590   {
2591     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2592     goto cleanup;
2593   }
2594 
2595   /* launch the kernel */
2596   global_work_size[0] = image->columns;
2597   global_work_size[1] = image->rows;
2598 
2599   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2600 
2601   if (clStatus != CL_SUCCESS)
2602   {
2603     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2604     goto cleanup;
2605   }
2606   if (RecordProfileData(clEnv,EqualizeKernel,event) == MagickFalse)
2607     AddOpenCLEvent(image,event);
2608   clEnv->library->clReleaseEvent(event);
2609 
2610 cleanup:
2611   OpenCLLogException(__FUNCTION__,__LINE__,exception);
2612 
2613   if (imageBuffer != (cl_mem) NULL)
2614     clEnv->library->clReleaseMemObject(imageBuffer);
2615 
2616   if (map!=NULL)
2617     map=(cl_float4 *) RelinquishMagickMemory(map);
2618 
2619   if (equalizeMapBuffer!=NULL)
2620     clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2621   if (equalize_map!=NULL)
2622     equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2623 
2624   if (histogramBuffer!=NULL)
2625     clEnv->library->clReleaseMemObject(histogramBuffer);
2626   if (histogram!=NULL)
2627     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2628 
2629   if (histogramKernel!=NULL)
2630     RelinquishOpenCLKernel(clEnv, histogramKernel);
2631   if (equalizeKernel!=NULL)
2632     RelinquishOpenCLKernel(clEnv, equalizeKernel);
2633 
2634   if (queue != NULL)
2635     RelinquishOpenCLCommandQueue(clEnv, queue);
2636 
2637   return(outputReady);
2638 }
2639 
AccelerateEqualizeImage(Image * image,const ChannelType channel,ExceptionInfo * exception)2640 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2641   const ChannelType channel,ExceptionInfo *exception)
2642 {
2643   MagickBooleanType
2644     status;
2645 
2646   assert(image != NULL);
2647   assert(exception != (ExceptionInfo *) NULL);
2648 
2649   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
2650       (checkAccelerateCondition(image, channel) == MagickFalse) ||
2651       (checkHistogramCondition(image, channel) == MagickFalse))
2652     return(MagickFalse);
2653 
2654   status=ComputeEqualizeImage(image,channel,exception);
2655   return(status);
2656 }
2657 
2658 /*
2659 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2660 %                                                                             %
2661 %                                                                             %
2662 %                                                                             %
2663 %     A c c e l e r a t e F u n c t i o n I m a g e                           %
2664 %                                                                             %
2665 %                                                                             %
2666 %                                                                             %
2667 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2668 */
2669 
ComputeFunctionImage(Image * image,const ChannelType channel,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2670 static MagickBooleanType ComputeFunctionImage(Image *image,
2671   const ChannelType channel,const MagickFunction function,
2672   const size_t number_parameters,const double *parameters,
2673   ExceptionInfo *exception)
2674 {
2675   cl_command_queue
2676     queue;
2677 
2678   cl_context
2679     context;
2680 
2681   cl_int
2682     clStatus;
2683 
2684   cl_kernel
2685     clkernel;
2686 
2687   cl_event
2688     event;
2689 
2690   cl_mem
2691     imageBuffer,
2692     parametersBuffer;
2693 
2694   cl_event
2695     *events;
2696 
2697   float
2698     *parametersBufferPtr;
2699 
2700   MagickBooleanType
2701     status;
2702 
2703   MagickCLEnv
2704     clEnv;
2705 
2706   size_t
2707     globalWorkSize[2];
2708 
2709   unsigned int
2710     event_count,
2711     i;
2712 
2713   status = MagickFalse;
2714 
2715   context = NULL;
2716   clkernel = NULL;
2717   queue = NULL;
2718   imageBuffer = NULL;
2719   parametersBuffer = NULL;
2720 
2721   clEnv = GetDefaultOpenCLEnv();
2722   context = GetOpenCLContext(clEnv);
2723 
2724   queue = AcquireOpenCLCommandQueue(clEnv);
2725 
2726   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
2727   if (imageBuffer == (cl_mem) NULL)
2728   {
2729     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
2730       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
2731     goto cleanup;
2732   }
2733 
2734 
2735   {
2736 	  parametersBufferPtr = (float*)AcquireMagickMemory(number_parameters * sizeof(float));
2737 
2738 	  for (i = 0; i < number_parameters; i++)
2739 		  parametersBufferPtr[i] = (float)parameters[i];
2740 
2741 	  parametersBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, number_parameters * sizeof(float), parametersBufferPtr, &clStatus);
2742 	  parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2743   }
2744 
2745   clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ComputeFunction");
2746   if (clkernel == NULL)
2747   {
2748     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2749     goto cleanup;
2750   }
2751 
2752   /* set the kernel arguments */
2753   i = 0;
2754   clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2755   clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
2756   clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
2757   clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
2758   clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
2759   if (clStatus != CL_SUCCESS)
2760   {
2761     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2762     goto cleanup;
2763   }
2764 
2765   globalWorkSize[0] = image->columns;
2766   globalWorkSize[1] = image->rows;
2767   /* launch the kernel */
2768   events=GetOpenCLEvents(image,&event_count);
2769   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, event_count, events, &event);
2770   events=(cl_event *) RelinquishMagickMemory(events);
2771   if (clStatus != CL_SUCCESS)
2772   {
2773     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2774     goto cleanup;
2775   }
2776   if (RecordProfileData(clEnv,ComputeFunctionKernel,event) == MagickFalse)
2777     AddOpenCLEvent(image,event);
2778   clEnv->library->clReleaseEvent(event);
2779   status = MagickTrue;
2780 
2781 cleanup:
2782   OpenCLLogException(__FUNCTION__,__LINE__,exception);
2783 
2784   if (imageBuffer != (cl_mem) NULL)
2785     clEnv->library->clReleaseMemObject(imageBuffer);
2786   if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
2787   if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2788   if (parametersBuffer != NULL) clEnv->library->clReleaseMemObject(parametersBuffer);
2789 
2790   return(status);
2791 }
2792 
AccelerateFunctionImage(Image * image,const ChannelType channel,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2793 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2794   const ChannelType channel,const MagickFunction function,
2795   const size_t number_parameters,const double *parameters,
2796   ExceptionInfo *exception)
2797 {
2798   MagickBooleanType
2799     status;
2800 
2801   assert(image != NULL);
2802   assert(exception != (ExceptionInfo *) NULL);
2803 
2804   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
2805       (checkAccelerateCondition(image, channel) == MagickFalse))
2806     return(MagickFalse);
2807 
2808   status=ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
2809   return(status);
2810 }
2811 
2812 /*
2813 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2814 %                                                                             %
2815 %                                                                             %
2816 %                                                                             %
2817 %     A c c e l e r a t e G r a y s c a l e I m a g e                         %
2818 %                                                                             %
2819 %                                                                             %
2820 %                                                                             %
2821 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2822 */
2823 
ComputeGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)2824 MagickBooleanType ComputeGrayscaleImage(Image *image,
2825   const PixelIntensityMethod method,ExceptionInfo *exception)
2826 {
2827   cl_command_queue
2828     queue;
2829 
2830   cl_context
2831     context;
2832 
2833   cl_int
2834     clStatus,
2835     intensityMethod;
2836 
2837   cl_int
2838     colorspace;
2839 
2840   cl_kernel
2841     grayscaleKernel;
2842 
2843   cl_event
2844     event;
2845 
2846   cl_mem
2847     imageBuffer;
2848 
2849   cl_uint
2850     event_count;
2851 
2852   cl_event
2853     *events;
2854 
2855   MagickBooleanType
2856     outputReady;
2857 
2858   MagickCLEnv
2859     clEnv;
2860 
2861   ssize_t
2862     i;
2863 
2864   imageBuffer = NULL;
2865   grayscaleKernel = NULL;
2866 
2867   assert(image != (Image *) NULL);
2868   assert(image->signature == MagickCoreSignature);
2869   if (image->debug != MagickFalse)
2870     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2871 
2872   /*
2873    * initialize opencl env
2874    */
2875   clEnv = GetDefaultOpenCLEnv();
2876   context = GetOpenCLContext(clEnv);
2877   queue = AcquireOpenCLCommandQueue(clEnv);
2878 
2879   outputReady = MagickFalse;
2880 
2881   imageBuffer = GetAuthenticOpenCLBuffer(image, exception);
2882   if (imageBuffer == (cl_mem) NULL)
2883   {
2884     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
2885       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
2886     goto cleanup;
2887   }
2888 
2889   intensityMethod = method;
2890   colorspace = image->colorspace;
2891 
2892   grayscaleKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Grayscale");
2893   if (grayscaleKernel == NULL)
2894   {
2895     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2896     goto cleanup;
2897   }
2898 
2899   i = 0;
2900   clStatus=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2901   clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&intensityMethod);
2902   clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&colorspace);
2903   if (clStatus != CL_SUCCESS)
2904   {
2905     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2906     printf("no kernel\n");
2907     goto cleanup;
2908   }
2909 
2910   {
2911     size_t global_work_size[2];
2912     global_work_size[0] = image->columns;
2913     global_work_size[1] = image->rows;
2914     /* launch the kernel */
2915     events=GetOpenCLEvents(image,&event_count);
2916     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, grayscaleKernel, 2, NULL, global_work_size, NULL, event_count, events, &event);
2917     events=(cl_event *) RelinquishMagickMemory(events);
2918     if (clStatus != CL_SUCCESS)
2919     {
2920       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2921       goto cleanup;
2922     }
2923     if (RecordProfileData(clEnv,GrayScaleKernel,event) == MagickFalse)
2924       AddOpenCLEvent(image,event);
2925     clEnv->library->clReleaseEvent(event);
2926   }
2927 
2928   outputReady=MagickTrue;
2929 
2930 cleanup:
2931   OpenCLLogException(__FUNCTION__,__LINE__,exception);
2932 
2933   if (imageBuffer != (cl_mem) NULL)
2934     clEnv->library->clReleaseMemObject(imageBuffer);
2935   if (grayscaleKernel!=NULL)
2936     RelinquishOpenCLKernel(clEnv, grayscaleKernel);
2937   if (queue != NULL)
2938     RelinquishOpenCLCommandQueue(clEnv, queue);
2939 
2940   return(outputReady);
2941 }
2942 
AccelerateGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)2943 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
2944   const PixelIntensityMethod method,ExceptionInfo *exception)
2945 {
2946   MagickBooleanType
2947     status;
2948 
2949   assert(image != NULL);
2950   assert(exception != (ExceptionInfo *) NULL);
2951 
2952   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
2953       (checkAccelerateCondition(image, AllChannels) == MagickFalse))
2954     return(MagickFalse);
2955 
2956   if (method == Rec601LuminancePixelIntensityMethod || method == Rec709LuminancePixelIntensityMethod)
2957     return(MagickFalse);
2958 
2959   if (image->colorspace != sRGBColorspace)
2960     return(MagickFalse);
2961 
2962   status=ComputeGrayscaleImage(image,method,exception);
2963   return(status);
2964 }
2965 
2966 /*
2967 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2968 %                                                                             %
2969 %                                                                             %
2970 %                                                                             %
2971 %     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
2972 %                                                                             %
2973 %                                                                             %
2974 %                                                                             %
2975 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2976 */
2977 
ComputeLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)2978 static Image *ComputeLocalContrastImage(const Image *image,
2979   const double radius,const double strength,ExceptionInfo *exception)
2980 {
2981   cl_command_queue
2982     queue;
2983 
2984   cl_context
2985     context;
2986 
2987   cl_int
2988     clStatus,
2989     iRadius;
2990 
2991   cl_kernel
2992     blurRowKernel,
2993     blurColumnKernel;
2994 
2995   cl_event
2996     event;
2997 
2998   cl_mem
2999     filteredImageBuffer,
3000     imageBuffer,
3001     tempImageBuffer;
3002 
3003   cl_event
3004     *events;
3005 
3006   Image
3007     *filteredImage;
3008 
3009   MagickBooleanType
3010     outputReady;
3011 
3012   MagickCLEnv
3013     clEnv;
3014 
3015   MagickSizeType
3016     length;
3017 
3018   unsigned int
3019     event_count,
3020     i,
3021     imageColumns,
3022     imageRows,
3023     passes;
3024 
3025   clEnv = NULL;
3026   filteredImage = NULL;
3027   context = NULL;
3028   imageBuffer = NULL;
3029   filteredImageBuffer = NULL;
3030   tempImageBuffer = NULL;
3031   blurRowKernel = NULL;
3032   blurColumnKernel = NULL;
3033   queue = NULL;
3034   outputReady = MagickFalse;
3035 
3036   clEnv = GetDefaultOpenCLEnv();
3037   context = GetOpenCLContext(clEnv);
3038   queue = AcquireOpenCLCommandQueue(clEnv);
3039 
3040   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
3041   if (filteredImage == (Image *) NULL)
3042     goto cleanup;
3043 
3044   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
3045   if (imageBuffer == (cl_mem) NULL)
3046   {
3047     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
3048       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
3049     goto cleanup;
3050   }
3051   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage,exception);
3052   if (filteredImageBuffer == (cl_mem) NULL)
3053   {
3054     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
3055     ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
3056     goto cleanup;
3057   }
3058 
3059   {
3060     /* create temp buffer */
3061     {
3062       length = image->columns * image->rows;
3063       tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3064       if (clStatus != CL_SUCCESS)
3065       {
3066         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3067         goto cleanup;
3068       }
3069     }
3070 
3071     /* get the opencl kernel */
3072     {
3073       blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "LocalContrastBlurRow");
3074       if (blurRowKernel == NULL)
3075       {
3076         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3077         goto cleanup;
3078       };
3079 
3080       blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "LocalContrastBlurApplyColumn");
3081       if (blurColumnKernel == NULL)
3082       {
3083         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3084         goto cleanup;
3085       };
3086     }
3087 
3088     {
3089       imageColumns = (unsigned int) image->columns;
3090       imageRows = (unsigned int) image->rows;
3091       iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius);  /*Normalized radius, 100% gives blur radius of 20% of the largest dimension */
3092 
3093       passes = (((1.0f * imageRows) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3094       passes = (passes < 1) ? 1: passes;
3095 
3096       /* set the kernel arguments */
3097       i = 0;
3098       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3099       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3100       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3101       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3102       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3103       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3104 
3105       if (clStatus != CL_SUCCESS)
3106       {
3107         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3108         goto cleanup;
3109       }
3110     }
3111 
3112     /* launch the kernel */
3113     {
3114       int x;
3115       for (x = 0; x < passes; ++x) {
3116         size_t gsize[2];
3117         size_t wsize[2];
3118         size_t goffset[2];
3119 
3120         gsize[0] = 256;
3121         gsize[1] = (image->rows + passes - 1) / passes;
3122         wsize[0] = 256;
3123         wsize[1] = 1;
3124         goffset[0] = 0;
3125         goffset[1] = x * gsize[1];
3126 
3127         events=GetOpenCLEvents(image,&event_count);
3128         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, event_count, events, &event);
3129         events=(cl_event *) RelinquishMagickMemory(events);
3130         if (clStatus != CL_SUCCESS)
3131         {
3132           (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3133           goto cleanup;
3134         }
3135         clEnv->library->clFlush(queue);
3136         if (RecordProfileData(clEnv,LocalContrastBlurRowKernel,event) == MagickFalse)
3137           {
3138             AddOpenCLEvent(image,event);
3139             AddOpenCLEvent(filteredImage, event);
3140           }
3141         clEnv->library->clReleaseEvent(event);
3142       }
3143     }
3144 
3145     {
3146       cl_float FStrength = strength;
3147       i = 0;
3148       clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3149       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3150       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3151       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3152       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3153       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3154       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3155 
3156       if (clStatus != CL_SUCCESS)
3157       {
3158         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3159         goto cleanup;
3160       }
3161     }
3162 
3163     /* launch the kernel */
3164     {
3165       int x;
3166       for (x = 0; x < passes; ++x) {
3167         size_t gsize[2];
3168         size_t wsize[2];
3169         size_t goffset[2];
3170 
3171         gsize[0] = ((image->columns + 3) / 4) * 4;
3172         gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3173         wsize[0] = 4;
3174         wsize[1] = 64;
3175         goffset[0] = 0;
3176         goffset[1] = x * gsize[1];
3177 
3178         events=GetOpenCLEvents(image,&event_count);
3179         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, event_count, events, &event);
3180         events=(cl_event *) RelinquishMagickMemory(events);
3181         if (clStatus != CL_SUCCESS)
3182         {
3183           (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3184           goto cleanup;
3185         }
3186         clEnv->library->clFlush(queue);
3187         if (RecordProfileData(clEnv, LocalContrastBlurApplyColumnKernel, event) == MagickFalse)
3188           {
3189             AddOpenCLEvent(image,event);
3190             AddOpenCLEvent(filteredImage,event);
3191           }
3192         clEnv->library->clReleaseEvent(event);
3193       }
3194     }
3195   }
3196 
3197   outputReady = MagickTrue;
3198 
3199 
3200 cleanup:
3201   OpenCLLogException(__FUNCTION__,__LINE__,exception);
3202 
3203   if (imageBuffer != (cl_mem) NULL)
3204     clEnv->library->clReleaseMemObject(imageBuffer);
3205   if (filteredImageBuffer != (cl_mem) NULL)
3206     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3207   if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
3208   if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
3209   if (blurColumnKernel!=NULL)                 RelinquishOpenCLKernel(clEnv, blurColumnKernel);
3210   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
3211   if ((outputReady == MagickFalse) && (filteredImage != NULL))
3212     filteredImage=(Image *) DestroyImage(filteredImage);
3213   return(filteredImage);
3214 }
3215 
AccelerateLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)3216 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3217   const double radius,const double strength,ExceptionInfo *exception)
3218 {
3219   Image
3220     *filteredImage;
3221 
3222   assert(image != NULL);
3223   assert(exception != (ExceptionInfo *) NULL);
3224 
3225   if ((checkOpenCLEnvironment(exception) == MagickFalse))
3226     return NULL;
3227 
3228   filteredImage=ComputeLocalContrastImage(image,radius,strength,exception);
3229 
3230   return(filteredImage);
3231 }
3232 
3233 /*
3234 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3235 %                                                                             %
3236 %                                                                             %
3237 %                                                                             %
3238 %     A c c e l e r a t e M o d u l a t e I m a g e                           %
3239 %                                                                             %
3240 %                                                                             %
3241 %                                                                             %
3242 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3243 */
3244 
ComputeModulateImage(Image * image,double percent_brightness,double percent_hue,double percent_saturation,ColorspaceType colorspace,ExceptionInfo * exception)3245 MagickBooleanType ComputeModulateImage(Image *image,
3246   double percent_brightness, double percent_hue, double percent_saturation,
3247   ColorspaceType colorspace, ExceptionInfo *exception)
3248 {
3249   cl_float
3250     bright,
3251     hue,
3252     saturation;
3253 
3254   cl_context
3255     context;
3256 
3257   cl_command_queue
3258     queue;
3259 
3260   cl_int
3261     color,
3262     clStatus;
3263 
3264   cl_kernel
3265     modulateKernel;
3266 
3267   cl_event
3268     event;
3269 
3270   cl_mem
3271     imageBuffer;
3272 
3273   cl_event
3274     *events;
3275 
3276   MagickBooleanType
3277     outputReady;
3278 
3279   MagickCLEnv
3280     clEnv;
3281 
3282   ssize_t
3283     i;
3284 
3285   unsigned int
3286     event_count;
3287 
3288   imageBuffer = NULL;
3289   modulateKernel = NULL;
3290   event_count = 0;
3291 
3292   assert(image != (Image *)NULL);
3293   assert(image->signature == MagickCoreSignature);
3294   if (image->debug != MagickFalse)
3295     (void) LogMagickEvent(TraceEvent, GetMagickModule(), "%s", image->filename);
3296 
3297   /*
3298   * initialize opencl env
3299   */
3300   clEnv = GetDefaultOpenCLEnv();
3301   context = GetOpenCLContext(clEnv);
3302   queue = AcquireOpenCLCommandQueue(clEnv);
3303 
3304   outputReady = MagickFalse;
3305 
3306   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
3307   if (imageBuffer == (cl_mem) NULL)
3308   {
3309     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
3310       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
3311     goto cleanup;
3312   }
3313 
3314   modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
3315   if (modulateKernel == NULL)
3316   {
3317     (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3318     goto cleanup;
3319   }
3320 
3321   bright = percent_brightness;
3322   hue = percent_hue;
3323   saturation = percent_saturation;
3324   color = colorspace;
3325 
3326   i = 0;
3327   clStatus = clEnv->library->clSetKernelArg(modulateKernel, i++, sizeof(cl_mem), (void *)&imageBuffer);
3328   clStatus |= clEnv->library->clSetKernelArg(modulateKernel, i++, sizeof(cl_float), &bright);
3329   clStatus |= clEnv->library->clSetKernelArg(modulateKernel, i++, sizeof(cl_float), &hue);
3330   clStatus |= clEnv->library->clSetKernelArg(modulateKernel, i++, sizeof(cl_float), &saturation);
3331   clStatus |= clEnv->library->clSetKernelArg(modulateKernel, i++, sizeof(cl_float), &color);
3332   if (clStatus != CL_SUCCESS)
3333   {
3334     (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3335     printf("no kernel\n");
3336     goto cleanup;
3337   }
3338 
3339   {
3340     size_t global_work_size[2];
3341     global_work_size[0] = image->columns;
3342     global_work_size[1] = image->rows;
3343     /* launch the kernel */
3344     events=GetOpenCLEvents(image,&event_count);
3345     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, event_count, events, &event);
3346     events=(cl_event *) RelinquishMagickMemory(events);
3347     if (clStatus != CL_SUCCESS)
3348     {
3349       (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3350       goto cleanup;
3351     }
3352     if (RecordProfileData(clEnv, ModulateKernel, event) == MagickFalse)
3353       AddOpenCLEvent(image,event);
3354     clEnv->library->clReleaseEvent(event);
3355   }
3356 
3357   outputReady=MagickTrue;
3358 
3359 cleanup:
3360   OpenCLLogException(__FUNCTION__, __LINE__, exception);
3361 
3362   if (imageBuffer != (cl_mem) NULL)
3363     clEnv->library->clReleaseMemObject(imageBuffer);
3364   if (modulateKernel != NULL)
3365     RelinquishOpenCLKernel(clEnv, modulateKernel);
3366   if (queue != NULL)
3367     RelinquishOpenCLCommandQueue(clEnv, queue);
3368 
3369   return(outputReady);
3370 }
3371 
AccelerateModulateImage(Image * image,double percent_brightness,double percent_hue,double percent_saturation,ColorspaceType colorspace,ExceptionInfo * exception)3372 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3373   double percent_brightness, double percent_hue, double percent_saturation,
3374   ColorspaceType colorspace, ExceptionInfo *exception)
3375 {
3376   MagickBooleanType
3377     status;
3378 
3379   assert(image != NULL);
3380   assert(exception != (ExceptionInfo *)NULL);
3381 
3382   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3383     (checkAccelerateCondition(image, AllChannels) == MagickFalse))
3384     return(MagickFalse);
3385 
3386   if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
3387     return(MagickFalse);
3388 
3389   status = ComputeModulateImage(image, percent_brightness, percent_hue, percent_saturation, colorspace, exception);
3390   return(status);
3391 }
3392 
3393 /*
3394 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3395 %                                                                             %
3396 %                                                                             %
3397 %                                                                             %
3398 %     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
3399 %                                                                             %
3400 %                                                                             %
3401 %                                                                             %
3402 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3403 */
3404 
ComputeMotionBlurImage(const Image * image,const ChannelType channel,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3405 static Image* ComputeMotionBlurImage(const Image *image,
3406   const ChannelType channel,const double *kernel,const size_t width,
3407   const OffsetInfo *offset,ExceptionInfo *exception)
3408 {
3409   cl_command_queue
3410     queue;
3411 
3412   cl_context
3413     context;
3414 
3415   cl_float4
3416     biasPixel;
3417 
3418   cl_int
3419     clStatus;
3420 
3421   cl_kernel
3422     motionBlurKernel;
3423 
3424   cl_event
3425     event;
3426 
3427   cl_mem
3428     filteredImageBuffer,
3429     imageBuffer,
3430     imageKernelBuffer,
3431     offsetBuffer;
3432 
3433   cl_uint
3434     event_count;
3435 
3436   cl_event
3437     *events;
3438 
3439   float
3440     *kernelBufferPtr;
3441 
3442   Image
3443     *filteredImage;
3444 
3445   int
3446     *offsetBufferPtr;
3447 
3448   MagickBooleanType
3449     outputReady;
3450 
3451   MagickCLEnv
3452    clEnv;
3453 
3454   MagickPixelPacket
3455     bias;
3456 
3457   size_t
3458     global_work_size[2],
3459     local_work_size[2];
3460 
3461   unsigned int
3462     i,
3463     imageHeight,
3464     imageWidth,
3465     matte;
3466 
3467   outputReady = MagickFalse;
3468   context = NULL;
3469   filteredImage = NULL;
3470   imageBuffer = NULL;
3471   filteredImageBuffer = NULL;
3472   imageKernelBuffer = NULL;
3473   motionBlurKernel = NULL;
3474   queue = NULL;
3475 
3476   clEnv = GetDefaultOpenCLEnv();
3477   context = GetOpenCLContext(clEnv);
3478 
3479   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
3480   if (filteredImage == (Image *) NULL)
3481     goto cleanup;
3482 
3483   imageBuffer = GetAuthenticOpenCLBuffer(image, exception);
3484   if (imageBuffer == (cl_mem) NULL)
3485   {
3486     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
3487       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
3488     goto cleanup;
3489   }
3490   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage, exception);
3491   if (filteredImageBuffer == (cl_mem) NULL)
3492   {
3493     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
3494       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
3495     goto cleanup;
3496   }
3497 
3498   imageKernelBuffer = clEnv->library->clCreateBuffer(context,
3499     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3500     &clStatus);
3501   if (clStatus != CL_SUCCESS)
3502   {
3503     (void) ThrowMagickException(exception, GetMagickModule(),
3504       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3505     goto cleanup;
3506   }
3507 
3508   queue = AcquireOpenCLCommandQueue(clEnv);
3509   events=GetOpenCLEvents(image,&event_count);
3510   /* this blocks, should be fixed it in the future */
3511   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3512     CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), event_count, events, NULL, &clStatus);
3513   events=(cl_event *) RelinquishMagickMemory(events);
3514   if (clStatus != CL_SUCCESS)
3515   {
3516     (void) ThrowMagickException(exception, GetMagickModule(),
3517       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3518     goto cleanup;
3519   }
3520   for (i = 0; i < width; i++)
3521   {
3522     kernelBufferPtr[i] = (float) kernel[i];
3523   }
3524   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3525     0, NULL, NULL);
3526  if (clStatus != CL_SUCCESS)
3527   {
3528     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3529       "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
3530     goto cleanup;
3531   }
3532 
3533   offsetBuffer = clEnv->library->clCreateBuffer(context,
3534     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3535     &clStatus);
3536   if (clStatus != CL_SUCCESS)
3537   {
3538     (void) ThrowMagickException(exception, GetMagickModule(),
3539       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3540     goto cleanup;
3541   }
3542 
3543   offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3544     CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3545   if (clStatus != CL_SUCCESS)
3546   {
3547     (void) ThrowMagickException(exception, GetMagickModule(),
3548       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3549     goto cleanup;
3550   }
3551   for (i = 0; i < width; i++)
3552   {
3553     offsetBufferPtr[2*i] = (int)offset[i].x;
3554     offsetBufferPtr[2*i+1] = (int)offset[i].y;
3555   }
3556   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3557     NULL, NULL);
3558  if (clStatus != CL_SUCCESS)
3559   {
3560     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3561       "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
3562     goto cleanup;
3563   }
3564 
3565 
3566   /*
3567     Get the OpenCL kernel.
3568   */
3569   motionBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
3570     "MotionBlur");
3571   if (motionBlurKernel == NULL)
3572   {
3573     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3574       "AcquireOpenCLKernel failed.", "'%s'", ".");
3575     goto cleanup;
3576   }
3577 
3578   /*
3579     Set the kernel arguments.
3580   */
3581   i = 0;
3582   clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3583     (void *)&imageBuffer);
3584   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3585     (void *)&filteredImageBuffer);
3586   imageWidth = (unsigned int) image->columns;
3587   imageHeight = (unsigned int) image->rows;
3588   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3589     &imageWidth);
3590   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3591     &imageHeight);
3592   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3593     (void *)&imageKernelBuffer);
3594   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3595     &width);
3596   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3597     (void *)&offsetBuffer);
3598 
3599   GetMagickPixelPacket(image,&bias);
3600   biasPixel.s[0] = bias.red;
3601   biasPixel.s[1] = bias.green;
3602   biasPixel.s[2] = bias.blue;
3603   biasPixel.s[3] = bias.opacity;
3604   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3605 
3606   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &channel);
3607   matte = (image->matte != MagickFalse)?1:0;
3608   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3609   if (clStatus != CL_SUCCESS)
3610   {
3611     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3612       "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3613     goto cleanup;
3614   }
3615 
3616   /*
3617     Launch the kernel.
3618   */
3619   local_work_size[0] = 16;
3620   local_work_size[1] = 16;
3621   global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3622                                 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3623   global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3624                                 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3625   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3626     global_work_size, local_work_size, 0, NULL, &event);
3627 
3628   if (clStatus != CL_SUCCESS)
3629   {
3630     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3631       "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3632     goto cleanup;
3633   }
3634   if (RecordProfileData(clEnv,MotionBlurKernel,event) == MagickFalse)
3635     {
3636       AddOpenCLEvent(image, event);
3637       AddOpenCLEvent(filteredImage, event);
3638     }
3639   clEnv->library->clReleaseEvent(event);
3640 
3641   outputReady = MagickTrue;
3642 
3643 cleanup:
3644 
3645   if (imageBuffer != (cl_mem) NULL)
3646     clEnv->library->clReleaseMemObject(imageBuffer);
3647   if (filteredImageBuffer != (cl_mem) NULL)
3648     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3649   if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
3650   if (motionBlurKernel!=NULL)  RelinquishOpenCLKernel(clEnv, motionBlurKernel);
3651   if (queue != NULL)           RelinquishOpenCLCommandQueue(clEnv, queue);
3652   if ((outputReady == MagickFalse) && (filteredImage != NULL))
3653     filteredImage=(Image *) DestroyImage(filteredImage);
3654 
3655   return(filteredImage);
3656 }
3657 
AccelerateMotionBlurImage(const Image * image,const ChannelType channel,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3658 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3659   const ChannelType channel,const double* kernel,const size_t width,
3660   const OffsetInfo *offset,ExceptionInfo *exception)
3661 {
3662   Image
3663     *filteredImage;
3664 
3665   assert(image != NULL);
3666   assert(kernel != (double *) NULL);
3667   assert(offset != (OffsetInfo *) NULL);
3668   assert(exception != (ExceptionInfo *) NULL);
3669 
3670   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3671       (checkAccelerateCondition(image, channel) == MagickFalse))
3672     return NULL;
3673 
3674   filteredImage=ComputeMotionBlurImage(image, channel, kernel, width,
3675     offset, exception);
3676   return(filteredImage);
3677 }
3678 
3679 /*
3680 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3681 %                                                                             %
3682 %                                                                             %
3683 %                                                                             %
3684 %     A c c e l e r a t e R a d i a l B l u r I m a g e                       %
3685 %                                                                             %
3686 %                                                                             %
3687 %                                                                             %
3688 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3689 */
3690 
ComputeRadialBlurImage(const Image * image,const ChannelType channel,const double angle,ExceptionInfo * exception)3691 static Image *ComputeRadialBlurImage(const Image *image,
3692   const ChannelType channel,const double angle,ExceptionInfo *exception)
3693 {
3694   cl_command_queue
3695     queue;
3696 
3697   cl_context
3698     context;
3699 
3700   cl_float2
3701     blurCenter;
3702 
3703   cl_float4
3704     biasPixel;
3705 
3706   cl_int
3707     clStatus;
3708 
3709   cl_mem
3710     cosThetaBuffer,
3711     filteredImageBuffer,
3712     imageBuffer,
3713     sinThetaBuffer;
3714 
3715   cl_kernel
3716     radialBlurKernel;
3717 
3718   cl_event
3719     event;
3720 
3721   cl_uint
3722     event_count;
3723 
3724   cl_event
3725     *events;
3726 
3727   float
3728     blurRadius,
3729     *cosThetaPtr,
3730     offset,
3731     *sinThetaPtr,
3732     theta;
3733 
3734   Image
3735     *filteredImage;
3736 
3737   MagickBooleanType
3738     outputReady;
3739 
3740   MagickCLEnv
3741     clEnv;
3742 
3743   MagickPixelPacket
3744     bias;
3745 
3746   size_t
3747     global_work_size[2];
3748 
3749   unsigned int
3750     cossin_theta_size,
3751     i,
3752     matte;
3753 
3754   outputReady = MagickFalse;
3755   context = NULL;
3756   filteredImage = NULL;
3757   imageBuffer = NULL;
3758   filteredImageBuffer = NULL;
3759   sinThetaBuffer = NULL;
3760   cosThetaBuffer = NULL;
3761   queue = NULL;
3762   radialBlurKernel = NULL;
3763 
3764 
3765   clEnv = GetDefaultOpenCLEnv();
3766   context = GetOpenCLContext(clEnv);
3767 
3768   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
3769   if (filteredImage == (Image *) NULL)
3770     goto cleanup;
3771 
3772   imageBuffer = GetAuthenticOpenCLBuffer(image, exception);
3773   if (imageBuffer == (cl_mem) NULL)
3774   {
3775     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
3776       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
3777     goto cleanup;
3778   }
3779   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage, exception);
3780   if (filteredImageBuffer == (cl_mem) NULL)
3781   {
3782     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
3783       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
3784     goto cleanup;
3785   }
3786 
3787   blurCenter.s[0] = (float) (image->columns-1)/2.0;
3788   blurCenter.s[1] = (float) (image->rows-1)/2.0;
3789   blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
3790   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
3791 
3792   /* create a buffer for sin_theta and cos_theta */
3793   sinThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
3794   if (clStatus != CL_SUCCESS)
3795   {
3796     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3797     goto cleanup;
3798   }
3799   cosThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
3800   if (clStatus != CL_SUCCESS)
3801   {
3802     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3803     goto cleanup;
3804   }
3805 
3806   queue = AcquireOpenCLCommandQueue(clEnv);
3807   events=GetOpenCLEvents(image,&event_count);
3808   /* this blocks, should be fixed it in the future */
3809   sinThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), event_count, events, NULL, &clStatus);
3810   events=(cl_event *) RelinquishMagickMemory(events);
3811   if (clStatus != CL_SUCCESS)
3812   {
3813     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
3814     goto cleanup;
3815   }
3816 
3817   cosThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
3818   if (clStatus != CL_SUCCESS)
3819   {
3820     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
3821     goto cleanup;
3822   }
3823 
3824   theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
3825   offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
3826   for (i=0; i < (ssize_t) cossin_theta_size; i++)
3827   {
3828     cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
3829     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
3830   }
3831 
3832   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
3833   clStatus |= clEnv->library->clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
3834   if (clStatus != CL_SUCCESS)
3835   {
3836     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
3837     goto cleanup;
3838   }
3839 
3840   /* get the OpenCL kernel */
3841   radialBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RadialBlur");
3842   if (radialBlurKernel == NULL)
3843   {
3844     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3845     goto cleanup;
3846   }
3847 
3848 
3849   /* set the kernel arguments */
3850   i = 0;
3851   clStatus=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3852   clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3853 
3854   GetMagickPixelPacket(image,&bias);
3855   biasPixel.s[0] = bias.red;
3856   biasPixel.s[1] = bias.green;
3857   biasPixel.s[2] = bias.blue;
3858   biasPixel.s[3] = bias.opacity;
3859   clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3860   clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
3861 
3862   matte = (image->matte != MagickFalse)?1:0;
3863   clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
3864 
3865   clStatus=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
3866 
3867   clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
3868   clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
3869   clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
3870   if (clStatus != CL_SUCCESS)
3871   {
3872     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3873     goto cleanup;
3874   }
3875 
3876 
3877   global_work_size[0] = image->columns;
3878   global_work_size[1] = image->rows;
3879   /* launch the kernel */
3880   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3881   if (clStatus != CL_SUCCESS)
3882   {
3883     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3884     goto cleanup;
3885   }
3886   if (RecordProfileData(clEnv,RadialBlurKernel,event) == MagickFalse)
3887     {
3888       AddOpenCLEvent(image,event);
3889       AddOpenCLEvent(filteredImage,event);
3890     }
3891   clEnv->library->clReleaseEvent(event);
3892 
3893   outputReady = MagickTrue;
3894 
3895 cleanup:
3896   OpenCLLogException(__FUNCTION__,__LINE__,exception);
3897 
3898   if (imageBuffer != (cl_mem) NULL)
3899     clEnv->library->clReleaseMemObject(imageBuffer);
3900   if (filteredImageBuffer != (cl_mem) NULL)
3901     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3902   if (sinThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(sinThetaBuffer);
3903   if (cosThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(cosThetaBuffer);
3904   if (radialBlurKernel!=NULL)     RelinquishOpenCLKernel(clEnv, radialBlurKernel);
3905   if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
3906   if ((outputReady == MagickFalse) && (filteredImage != NULL))
3907     filteredImage=(Image *) DestroyImage(filteredImage);
3908   return filteredImage;
3909 }
3910 
AccelerateRadialBlurImage(const Image * image,const ChannelType channel,const double angle,ExceptionInfo * exception)3911 MagickPrivate Image *AccelerateRadialBlurImage(const Image *image,
3912   const ChannelType channel,const double angle,ExceptionInfo *exception)
3913 {
3914   Image
3915     *filteredImage;
3916 
3917   assert(image != NULL);
3918   assert(exception != (ExceptionInfo *) NULL);
3919 
3920   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3921       (checkAccelerateCondition(image, channel) == MagickFalse))
3922     return NULL;
3923 
3924   filteredImage=ComputeRadialBlurImage(image, channel, angle, exception);
3925   return filteredImage;
3926 }
3927 
3928 /*
3929 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3930 %                                                                             %
3931 %                                                                             %
3932 %                                                                             %
3933 %     A c c e l e r a t e R e s i z e I m a g e                               %
3934 %                                                                             %
3935 %                                                                             %
3936 %                                                                             %
3937 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3938 */
3939 
resizeHorizontalFilter(const Image * image,const Image * filteredImage,cl_mem imageBuffer,const unsigned int imageColumns,const unsigned int imageRows,const unsigned int matte,cl_mem resizedImage,const unsigned int resizedColumns,const unsigned int resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float xFactor,MagickCLEnv clEnv,cl_command_queue queue,ExceptionInfo * exception)3940 static MagickBooleanType resizeHorizontalFilter(const Image *image,
3941   const Image *filteredImage,cl_mem imageBuffer,const unsigned int imageColumns,
3942   const unsigned int imageRows,const unsigned int matte,cl_mem resizedImage,
3943   const unsigned int resizedColumns,const unsigned int resizedRows,
3944   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3945   const float xFactor,MagickCLEnv clEnv,cl_command_queue queue,
3946   ExceptionInfo *exception)
3947 {
3948   cl_kernel
3949     horizontalKernel;
3950 
3951   cl_event
3952     event;
3953 
3954   cl_int
3955     clStatus;
3956 
3957   cl_uint
3958     event_count;
3959 
3960   cl_event
3961     *events;
3962 
3963   const unsigned int
3964     workgroupSize = 256;
3965 
3966   float
3967     resizeFilterScale,
3968     resizeFilterSupport,
3969     resizeFilterWindowSupport,
3970     resizeFilterBlur,
3971     scale,
3972     support;
3973 
3974   int
3975     cacheRangeStart,
3976     cacheRangeEnd,
3977     numCachedPixels,
3978     resizeFilterType,
3979     resizeWindowType;
3980 
3981   MagickBooleanType
3982     status = MagickFalse;
3983 
3984   size_t
3985     deviceLocalMemorySize,
3986     gammaAccumulatorLocalMemorySize,
3987     global_work_size[2],
3988     imageCacheLocalMemorySize,
3989     pixelAccumulatorLocalMemorySize,
3990     local_work_size[2],
3991     totalLocalMemorySize,
3992     weightAccumulatorLocalMemorySize;
3993 
3994   unsigned int
3995     chunkSize,
3996     i,
3997     pixelPerWorkgroup;
3998 
3999   horizontalKernel = NULL;
4000   status = MagickFalse;
4001 
4002   /*
4003   Apply filter to resize vertically from image to resize image.
4004   */
4005   scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
4006   support=scale*GetResizeFilterSupport(resizeFilter);
4007   if (support < 0.5)
4008   {
4009     /*
4010     Support too small even for nearest neighbour: Reduce to point
4011     sampling.
4012     */
4013     support=(MagickRealType) 0.5;
4014     scale=1.0;
4015   }
4016   scale=PerceptibleReciprocal(scale);
4017 
4018   if (resizedColumns < workgroupSize)
4019   {
4020     chunkSize = 32;
4021     pixelPerWorkgroup = 32;
4022   }
4023   else
4024   {
4025     chunkSize = workgroupSize;
4026     pixelPerWorkgroup = workgroupSize;
4027   }
4028 
4029   /* get the local memory size supported by the device */
4030   deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
4031 
4032 DisableMSCWarning(4127)
4033   while(1)
4034 RestoreMSCWarning
4035   {
4036     /* calculate the local memory size needed per workgroup */
4037     cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
4038     cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
4039     numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
4040     imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
4041     totalLocalMemorySize = imageCacheLocalMemorySize;
4042 
4043     /* local size for the pixel accumulator */
4044     pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
4045     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4046 
4047     /* local memory size for the weight accumulator */
4048     weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
4049     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4050 
4051     /* local memory size for the gamma accumulator */
4052     if (matte == 0)
4053       gammaAccumulatorLocalMemorySize = sizeof(float);
4054     else
4055       gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
4056     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4057 
4058     if (totalLocalMemorySize <= deviceLocalMemorySize)
4059       break;
4060     else
4061     {
4062       pixelPerWorkgroup = pixelPerWorkgroup/2;
4063       chunkSize = chunkSize/2;
4064       if (pixelPerWorkgroup == 0
4065           || chunkSize == 0)
4066       {
4067         /* quit, fallback to CPU */
4068         goto cleanup;
4069       }
4070     }
4071   }
4072 
4073   resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
4074   resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
4075 
4076   horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
4077   if (horizontalKernel == NULL)
4078   {
4079     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4080     goto cleanup;
4081   }
4082 
4083   i = 0;
4084   clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&imageBuffer);
4085   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
4086   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
4087   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
4088   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
4089   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
4090 
4091   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
4092   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
4093 
4094   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
4095   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
4096   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
4097 
4098   resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
4099   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
4100 
4101   resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
4102   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
4103 
4104   resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
4105   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
4106 
4107   resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
4108   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
4109 
4110 
4111   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
4112   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
4113   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
4114   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
4115 
4116 
4117   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
4118   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
4119   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
4120 
4121   if (clStatus != CL_SUCCESS)
4122   {
4123     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4124     goto cleanup;
4125   }
4126 
4127   global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
4128   global_work_size[1] = resizedRows;
4129 
4130   local_work_size[0] = workgroupSize;
4131   local_work_size[1] = 1;
4132   events=GetOpenCLEvents(image,&event_count);
4133   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, event_count, events, &event);
4134   events=(cl_event *) RelinquishMagickMemory(events);
4135   if (clStatus != CL_SUCCESS)
4136   {
4137     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4138     goto cleanup;
4139   }
4140   if (RecordProfileData(clEnv,ResizeHorizontalKernel,event) == MagickFalse)
4141     {
4142       AddOpenCLEvent(image,event);
4143       AddOpenCLEvent(filteredImage,event);
4144     }
4145   clEnv->library->clReleaseEvent(event);
4146   status = MagickTrue;
4147 
4148 
4149 cleanup:
4150   OpenCLLogException(__FUNCTION__,__LINE__,exception);
4151 
4152   if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
4153 
4154   return(status);
4155 }
4156 
resizeVerticalFilter(const Image * image,const Image * filteredImage,cl_mem imageBuffer,const unsigned int imageColumns,const unsigned int imageRows,const unsigned int matte,cl_mem resizedImage,const unsigned int resizedColumns,const unsigned int resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float yFactor,MagickCLEnv clEnv,cl_command_queue queue,ExceptionInfo * exception)4157 static MagickBooleanType resizeVerticalFilter(const Image *image,
4158   const Image *filteredImage,cl_mem imageBuffer,const unsigned int imageColumns,
4159   const unsigned int imageRows,const unsigned int matte,cl_mem resizedImage,
4160   const unsigned int resizedColumns,const unsigned int resizedRows,
4161   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4162   const float yFactor,MagickCLEnv clEnv,cl_command_queue queue,
4163   ExceptionInfo *exception)
4164 {
4165   cl_kernel
4166     horizontalKernel;
4167 
4168   cl_event
4169     event;
4170 
4171   cl_int
4172     clStatus;
4173 
4174   cl_uint
4175     event_count;
4176 
4177   cl_event
4178     *events;
4179 
4180   const unsigned int
4181     workgroupSize = 256;
4182 
4183   float
4184     resizeFilterScale,
4185     resizeFilterSupport,
4186     resizeFilterWindowSupport,
4187     resizeFilterBlur,
4188     scale,
4189     support;
4190 
4191   int
4192     cacheRangeStart,
4193     cacheRangeEnd,
4194     numCachedPixels,
4195     resizeFilterType,
4196     resizeWindowType;
4197 
4198   MagickBooleanType
4199     status = MagickFalse;
4200 
4201   size_t
4202     deviceLocalMemorySize,
4203     gammaAccumulatorLocalMemorySize,
4204     global_work_size[2],
4205     imageCacheLocalMemorySize,
4206     pixelAccumulatorLocalMemorySize,
4207     local_work_size[2],
4208     totalLocalMemorySize,
4209     weightAccumulatorLocalMemorySize;
4210 
4211   unsigned int
4212     chunkSize,
4213     i,
4214     pixelPerWorkgroup;
4215 
4216   horizontalKernel = NULL;
4217   status = MagickFalse;
4218 
4219   /*
4220   Apply filter to resize vertically from image to resize image.
4221   */
4222   scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4223   support=scale*GetResizeFilterSupport(resizeFilter);
4224   if (support < 0.5)
4225   {
4226     /*
4227     Support too small even for nearest neighbour: Reduce to point
4228     sampling.
4229     */
4230     support=(MagickRealType) 0.5;
4231     scale=1.0;
4232   }
4233   scale=PerceptibleReciprocal(scale);
4234 
4235   if (resizedRows < workgroupSize)
4236   {
4237     chunkSize = 32;
4238     pixelPerWorkgroup = 32;
4239   }
4240   else
4241   {
4242     chunkSize = workgroupSize;
4243     pixelPerWorkgroup = workgroupSize;
4244   }
4245 
4246   /* get the local memory size supported by the device */
4247   deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
4248 
4249 DisableMSCWarning(4127)
4250   while(1)
4251 RestoreMSCWarning
4252   {
4253     /* calculate the local memory size needed per workgroup */
4254     cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4255     cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
4256     numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
4257     imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
4258     totalLocalMemorySize = imageCacheLocalMemorySize;
4259 
4260     /* local size for the pixel accumulator */
4261     pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
4262     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4263 
4264     /* local memory size for the weight accumulator */
4265     weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
4266     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4267 
4268     /* local memory size for the gamma accumulator */
4269     if (matte == 0)
4270       gammaAccumulatorLocalMemorySize = sizeof(float);
4271     else
4272       gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
4273     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4274 
4275     if (totalLocalMemorySize <= deviceLocalMemorySize)
4276       break;
4277     else
4278     {
4279       pixelPerWorkgroup = pixelPerWorkgroup/2;
4280       chunkSize = chunkSize/2;
4281       if (pixelPerWorkgroup == 0
4282           || chunkSize == 0)
4283       {
4284         /* quit, fallback to CPU */
4285         goto cleanup;
4286       }
4287     }
4288   }
4289 
4290   resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
4291   resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
4292 
4293   horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
4294   if (horizontalKernel == NULL)
4295   {
4296     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4297     goto cleanup;
4298   }
4299 
4300   i = 0;
4301   clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&imageBuffer);
4302   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
4303   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
4304   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
4305   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
4306   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
4307 
4308   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
4309   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
4310 
4311   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
4312   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
4313   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
4314 
4315   resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
4316   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
4317 
4318   resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
4319   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
4320 
4321   resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
4322   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
4323 
4324   resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
4325   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
4326 
4327 
4328   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
4329   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
4330   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
4331   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
4332 
4333 
4334   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
4335   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
4336   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
4337 
4338   if (clStatus != CL_SUCCESS)
4339   {
4340     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4341     goto cleanup;
4342   }
4343 
4344   global_work_size[0] = resizedColumns;
4345   global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
4346 
4347   local_work_size[0] = 1;
4348   local_work_size[1] = workgroupSize;
4349   events=GetOpenCLEvents(image,&event_count);
4350   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, event_count, events, &event);
4351   events=(cl_event *) RelinquishMagickMemory(events);
4352   if (clStatus != CL_SUCCESS)
4353   {
4354     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4355     goto cleanup;
4356   }
4357   if (RecordProfileData(clEnv,ResizeVerticalKernel,event) == MagickFalse)
4358     {
4359       AddOpenCLEvent(image,event);
4360       AddOpenCLEvent(filteredImage,event);
4361     }
4362   clEnv->library->clReleaseEvent(event);
4363   status = MagickTrue;
4364 
4365 
4366 cleanup:
4367   OpenCLLogException(__FUNCTION__,__LINE__,exception);
4368 
4369   if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
4370 
4371   return(status);
4372 }
4373 
ComputeResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4374 static Image *ComputeResizeImage(const Image* image,
4375   const size_t resizedColumns,const size_t resizedRows,
4376   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4377 {
4378   cl_command_queue
4379     queue;
4380 
4381   cl_int
4382     clStatus;
4383 
4384   cl_context
4385     context;
4386 
4387   cl_mem
4388     cubicCoefficientsBuffer,
4389     filteredImageBuffer,
4390     imageBuffer,
4391     tempImageBuffer;
4392 
4393   const MagickRealType
4394     *resizeFilterCoefficient;
4395 
4396   float
4397     coefficientBuffer[7],
4398     xFactor,
4399     yFactor;
4400 
4401   MagickBooleanType
4402     outputReady,
4403     status;
4404 
4405   MagickCLEnv
4406     clEnv;
4407 
4408   MagickSizeType
4409     length;
4410 
4411   Image
4412     *filteredImage;
4413 
4414   size_t
4415     i;
4416 
4417   outputReady = MagickFalse;
4418   filteredImage = NULL;
4419   clEnv = NULL;
4420   context = NULL;
4421   imageBuffer = NULL;
4422   tempImageBuffer = NULL;
4423   filteredImageBuffer = NULL;
4424   cubicCoefficientsBuffer = NULL;
4425   queue = NULL;
4426 
4427   clEnv = GetDefaultOpenCLEnv();
4428   context = GetOpenCLContext(clEnv);
4429   queue = AcquireOpenCLCommandQueue(clEnv);
4430 
4431   filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,exception);
4432   if (filteredImage == (Image *) NULL)
4433     goto cleanup;
4434 
4435   imageBuffer=GetAuthenticOpenCLBuffer(image,exception);
4436   if (imageBuffer == (cl_mem) NULL)
4437   {
4438     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
4439       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
4440     goto cleanup;
4441   }
4442   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,exception);
4443   if (filteredImageBuffer == (cl_mem) NULL)
4444   {
4445     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
4446       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
4447     goto cleanup;
4448   }
4449 
4450   resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4451   for (i = 0; i < 7; i++)
4452     coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4453 
4454   cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(coefficientBuffer), coefficientBuffer, &clStatus);
4455   if (clStatus != CL_SUCCESS)
4456   {
4457     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4458     goto cleanup;
4459   }
4460 
4461   xFactor=(float) resizedColumns/(float) image->columns;
4462   yFactor=(float) resizedRows/(float) image->rows;
4463   if (xFactor > yFactor)
4464   {
4465 
4466     length = resizedColumns*image->rows;
4467     tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
4468     if (clStatus != CL_SUCCESS)
4469     {
4470       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4471       goto cleanup;
4472     }
4473 
4474     status = resizeHorizontalFilter(image,filteredImage,imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, (image->matte != MagickFalse)?1:0
4475           , tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows
4476           , resizeFilter, cubicCoefficientsBuffer
4477           , xFactor, clEnv, queue, exception);
4478     if (status != MagickTrue)
4479       goto cleanup;
4480 
4481     status = resizeVerticalFilter(image,filteredImage,tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows, (image->matte != MagickFalse)?1:0
4482        , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
4483        , resizeFilter, cubicCoefficientsBuffer
4484        , yFactor, clEnv, queue, exception);
4485     if (status != MagickTrue)
4486       goto cleanup;
4487   }
4488   else
4489   {
4490     length = image->columns*resizedRows;
4491     tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
4492     if (clStatus != CL_SUCCESS)
4493     {
4494       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4495       goto cleanup;
4496     }
4497 
4498     status = resizeVerticalFilter(image,filteredImage,imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, (image->matte != MagickFalse)?1:0
4499        , tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows
4500        , resizeFilter, cubicCoefficientsBuffer
4501        , yFactor, clEnv, queue, exception);
4502     if (status != MagickTrue)
4503       goto cleanup;
4504 
4505     status = resizeHorizontalFilter(image,filteredImage,tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows, (image->matte != MagickFalse)?1:0
4506        , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
4507        , resizeFilter, cubicCoefficientsBuffer
4508        , xFactor, clEnv, queue, exception);
4509     if (status != MagickTrue)
4510       goto cleanup;
4511   }
4512   outputReady=MagickTrue;
4513 
4514 cleanup:
4515   OpenCLLogException(__FUNCTION__,__LINE__,exception);
4516 
4517   if (imageBuffer != (cl_mem) NULL)
4518     clEnv->library->clReleaseMemObject(imageBuffer);
4519   if (filteredImageBuffer != (cl_mem) NULL)
4520     clEnv->library->clReleaseMemObject(filteredImageBuffer);
4521   if (tempImageBuffer!=NULL)		  clEnv->library->clReleaseMemObject(tempImageBuffer);
4522   if (cubicCoefficientsBuffer!=NULL)      clEnv->library->clReleaseMemObject(cubicCoefficientsBuffer);
4523   if (queue != NULL)  	                  RelinquishOpenCLCommandQueue(clEnv, queue);
4524   if ((outputReady == MagickFalse) && (filteredImage != NULL))
4525     filteredImage=(Image *) DestroyImage(filteredImage);
4526   return(filteredImage);
4527 }
4528 
gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)4529 static MagickBooleanType gpuSupportedResizeWeighting(
4530   ResizeWeightingFunctionType f)
4531 {
4532   unsigned int
4533     i;
4534 
4535   for (i = 0; ;i++)
4536   {
4537     if (supportedResizeWeighting[i] == LastWeightingFunction)
4538       break;
4539     if (supportedResizeWeighting[i] == f)
4540       return(MagickTrue);
4541   }
4542   return(MagickFalse);
4543 }
4544 
AccelerateResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4545 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4546   const size_t resizedColumns,const size_t resizedRows,
4547   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4548 {
4549   Image
4550     *filteredImage;
4551 
4552   assert(image != NULL);
4553   assert(exception != (ExceptionInfo *) NULL);
4554 
4555   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
4556       (checkAccelerateCondition(image, AllChannels) == MagickFalse))
4557     return NULL;
4558 
4559   if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse ||
4560       gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
4561     return NULL;
4562 
4563   filteredImage=ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
4564   return(filteredImage);
4565 }
4566 
4567 /*
4568 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4569 %                                                                             %
4570 %                                                                             %
4571 %                                                                             %
4572 %     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
4573 %                                                                             %
4574 %                                                                             %
4575 %                                                                             %
4576 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4577 */
4578 
ComputeUnsharpMaskImage(const Image * image,const ChannelType channel,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4579 static Image *ComputeUnsharpMaskImage(const Image *image,
4580   const ChannelType channel,const double radius,const double sigma,
4581   const double gain,const double threshold,ExceptionInfo *exception)
4582 {
4583   char
4584     geometry[MaxTextExtent];
4585 
4586   cl_command_queue
4587     queue;
4588 
4589   cl_context
4590     context;
4591 
4592   cl_event
4593     event;
4594 
4595   cl_int
4596     clStatus;
4597 
4598   cl_kernel
4599     blurRowKernel,
4600     unsharpMaskBlurColumnKernel;
4601 
4602   cl_mem
4603     filteredImageBuffer,
4604     imageBuffer,
4605     imageKernelBuffer,
4606     tempImageBuffer;
4607 
4608   cl_uint
4609     event_count;
4610 
4611   cl_event
4612     *events;
4613 
4614   float
4615     fGain,
4616     fThreshold,
4617     *kernelBufferPtr;
4618 
4619   Image
4620     *filteredImage;
4621 
4622   int
4623     chunkSize;
4624 
4625   KernelInfo
4626     *kernel;
4627 
4628   MagickBooleanType
4629     outputReady;
4630 
4631   MagickCLEnv
4632     clEnv;
4633 
4634   MagickSizeType
4635     length;
4636 
4637   unsigned int
4638     imageColumns,
4639     imageRows,
4640     kernelWidth;
4641 
4642   size_t
4643     i;
4644 
4645   clEnv = NULL;
4646   filteredImage = NULL;
4647   kernel = NULL;
4648   context = NULL;
4649   imageBuffer = NULL;
4650   filteredImageBuffer = NULL;
4651   tempImageBuffer = NULL;
4652   imageKernelBuffer = NULL;
4653   blurRowKernel = NULL;
4654   unsharpMaskBlurColumnKernel = NULL;
4655   queue = NULL;
4656   outputReady = MagickFalse;
4657 
4658   clEnv = GetDefaultOpenCLEnv();
4659   context = GetOpenCLContext(clEnv);
4660   queue = AcquireOpenCLCommandQueue(clEnv);
4661 
4662   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
4663   if (filteredImage == (Image *) NULL)
4664     goto cleanup;
4665 
4666   imageBuffer=GetAuthenticOpenCLBuffer(image,exception);
4667   if (imageBuffer == (cl_mem) NULL)
4668   {
4669     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
4670       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
4671     goto cleanup;
4672   }
4673   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,exception);
4674   if (filteredImageBuffer == (cl_mem) NULL)
4675   {
4676     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
4677       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
4678     goto cleanup;
4679   }
4680 
4681   /* create the blur kernel */
4682   {
4683     (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
4684     kernel=AcquireKernelInfo(geometry);
4685     if (kernel == (KernelInfo *) NULL)
4686     {
4687       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
4688       goto cleanup;
4689     }
4690 
4691     kernelBufferPtr=AcquireQuantumMemory(kernel->width,sizeof(float));
4692     if (kernelBufferPtr == (float *) NULL)
4693     {
4694       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Memory allocation failed.",".");
4695       goto cleanup;
4696     }
4697     for (i = 0; i < kernel->width; i++)
4698       kernelBufferPtr[i]=(float) kernel->values[i];
4699 
4700     imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, kernel->width * sizeof(float), kernelBufferPtr, &clStatus);
4701     kernelBufferPtr=RelinquishMagickMemory(kernelBufferPtr);
4702     if (clStatus != CL_SUCCESS)
4703     {
4704       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4705       goto cleanup;
4706     }
4707   }
4708 
4709   {
4710     /* create temp buffer */
4711     {
4712       length = image->columns * image->rows;
4713       tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
4714       if (clStatus != CL_SUCCESS)
4715       {
4716         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4717         goto cleanup;
4718       }
4719     }
4720 
4721     /* get the opencl kernel */
4722     {
4723       blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
4724       if (blurRowKernel == NULL)
4725       {
4726         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4727         goto cleanup;
4728       };
4729 
4730       unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
4731       if (unsharpMaskBlurColumnKernel == NULL)
4732       {
4733         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4734         goto cleanup;
4735       };
4736     }
4737 
4738     {
4739       chunkSize = 256;
4740 
4741       imageColumns = (unsigned int) image->columns;
4742       imageRows = (unsigned int) image->rows;
4743 
4744       kernelWidth = (unsigned int) kernel->width;
4745 
4746       /* set the kernel arguments */
4747       i = 0;
4748       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4749       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4750       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
4751       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4752       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
4753       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
4754       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
4755       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
4756       if (clStatus != CL_SUCCESS)
4757       {
4758         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4759         goto cleanup;
4760       }
4761     }
4762 
4763     /* launch the kernel */
4764     {
4765       size_t gsize[2];
4766       size_t wsize[2];
4767 
4768       gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
4769       gsize[1] = image->rows;
4770       wsize[0] = chunkSize;
4771       wsize[1] = 1;
4772 
4773       events=GetOpenCLEvents(image,&event_count);
4774       clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, event_count, events, NULL);
4775       events=(cl_event *) RelinquishMagickMemory(events);
4776       if (clStatus != CL_SUCCESS)
4777       {
4778         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4779         goto cleanup;
4780       }
4781     }
4782 
4783 
4784     {
4785       chunkSize = 256;
4786       imageColumns = (unsigned int) image->columns;
4787       imageRows = (unsigned int) image->rows;
4788       kernelWidth = (unsigned int) kernel->width;
4789       fGain = (float) gain;
4790       fThreshold = (float) threshold;
4791 
4792       i = 0;
4793       clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4794       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4795       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4796       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
4797       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
4798       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4799       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
4800       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
4801       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4802       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
4803       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4804       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4805 
4806       if (clStatus != CL_SUCCESS)
4807       {
4808         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4809         goto cleanup;
4810       }
4811     }
4812 
4813     /* launch the kernel */
4814     {
4815       size_t gsize[2];
4816       size_t wsize[2];
4817 
4818       gsize[0] = image->columns;
4819       gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
4820       wsize[0] = 1;
4821       wsize[1] = chunkSize;
4822 
4823       clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, event_count, events, &event);
4824       if (clStatus != CL_SUCCESS)
4825       {
4826         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4827         goto cleanup;
4828       }
4829       if (RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event) == MagickFalse)
4830         {
4831           AddOpenCLEvent(image,event);
4832           AddOpenCLEvent(filteredImage,event);
4833         }
4834       clEnv->library->clReleaseEvent(event);
4835     }
4836 
4837   }
4838 
4839   outputReady=MagickTrue;
4840 
4841 cleanup:
4842   OpenCLLogException(__FUNCTION__,__LINE__,exception);
4843 
4844   if (imageBuffer != (cl_mem) NULL)
4845     clEnv->library->clReleaseMemObject(imageBuffer);
4846   if (filteredImageBuffer != (cl_mem) NULL)
4847     clEnv->library->clReleaseMemObject(filteredImageBuffer);
4848   if (kernel != NULL)                         kernel=DestroyKernelInfo(kernel);
4849   if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
4850   if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
4851   if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
4852   if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
4853   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
4854   if ((outputReady == MagickFalse) && (filteredImage != NULL))
4855     filteredImage=(Image *) DestroyImage(filteredImage);
4856   return(filteredImage);
4857 }
4858 
ComputeUnsharpMaskImageSingle(const Image * image,const double radius,const double sigma,const double gain,const double threshold,int blurOnly,ExceptionInfo * exception)4859 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4860   const double radius,const double sigma,const double gain,
4861   const double threshold,int blurOnly, ExceptionInfo *exception)
4862 {
4863   char
4864     geometry[MaxTextExtent];
4865 
4866   cl_command_queue
4867     queue;
4868 
4869   cl_context
4870     context;
4871 
4872   cl_int
4873     justBlur,
4874     clStatus;
4875 
4876   cl_kernel
4877     unsharpMaskKernel;
4878 
4879   cl_event
4880     event;
4881 
4882   cl_mem
4883     filteredImageBuffer,
4884     imageBuffer,
4885     imageKernelBuffer;
4886 
4887   cl_event
4888     *events;
4889 
4890   float
4891     fGain,
4892     fThreshold;
4893 
4894   Image
4895     *filteredImage;
4896 
4897   KernelInfo
4898     *kernel;
4899 
4900   MagickBooleanType
4901     outputReady;
4902 
4903   MagickCLEnv
4904     clEnv;
4905 
4906   unsigned int
4907     event_count,
4908     i,
4909     imageColumns,
4910     imageRows,
4911     kernelWidth;
4912 
4913   clEnv = NULL;
4914   filteredImage = NULL;
4915   kernel = NULL;
4916   context = NULL;
4917   imageBuffer = NULL;
4918   filteredImageBuffer = NULL;
4919   imageKernelBuffer = NULL;
4920   unsharpMaskKernel = NULL;
4921   queue = NULL;
4922   outputReady = MagickFalse;
4923 
4924   clEnv = GetDefaultOpenCLEnv();
4925   context = GetOpenCLContext(clEnv);
4926   queue = AcquireOpenCLCommandQueue(clEnv);
4927 
4928   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
4929   if (filteredImage == (Image *) NULL)
4930     goto cleanup;
4931 
4932   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
4933   if (imageBuffer == (cl_mem) NULL)
4934   {
4935     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
4936       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
4937     goto cleanup;
4938   }
4939   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage,exception);
4940   if (filteredImageBuffer == (cl_mem) NULL)
4941   {
4942     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
4943       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
4944     goto cleanup;
4945   }
4946 
4947   /* create the blur kernel */
4948   {
4949     (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
4950     kernel=AcquireKernelInfo(geometry);
4951     if (kernel == (KernelInfo *) NULL)
4952     {
4953       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
4954       goto cleanup;
4955     }
4956 
4957 	{
4958 		float *kernelBufferPtr = (float *) AcquireQuantumMemory(kernel->width, sizeof(float));
4959 		for (i = 0; i < kernel->width; i++)
4960 			kernelBufferPtr[i] = (float)kernel->values[i];
4961 
4962 		imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, kernel->width * sizeof(float), kernelBufferPtr, &clStatus);
4963 		RelinquishMagickMemory(kernelBufferPtr);
4964 		if (clStatus != CL_SUCCESS)
4965 		{
4966 		  (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4967 		  goto cleanup;
4968 		}
4969 	}
4970   }
4971 
4972   {
4973     /* get the opencl kernel */
4974     {
4975       unsharpMaskKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMask");
4976       if (unsharpMaskKernel == NULL)
4977       {
4978         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4979         goto cleanup;
4980       };
4981     }
4982 
4983     {
4984       imageColumns = (unsigned int) image->columns;
4985       imageRows = (unsigned int) image->rows;
4986       kernelWidth = (unsigned int) kernel->width;
4987       fGain = (float) gain;
4988       fThreshold = (float) threshold;
4989 	  justBlur = blurOnly;
4990 
4991       /* set the kernel arguments */
4992       i = 0;
4993       clStatus=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4994       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4995       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4996       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
4997       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
4998       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageRows);
4999       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernel->width)),(void *) NULL);
5000       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
5001       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
5002       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&justBlur);
5003       if (clStatus != CL_SUCCESS)
5004       {
5005         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5006         goto cleanup;
5007       }
5008     }
5009 
5010     /* launch the kernel */
5011     {
5012       size_t gsize[2];
5013       size_t wsize[2];
5014 
5015       gsize[0] = ((image->columns + 7) / 8) * 8;
5016       gsize[1] = ((image->rows + 31) / 32) * 32;
5017       wsize[0] = 8;
5018       wsize[1] = 32;
5019 
5020       events=GetOpenCLEvents(image,&event_count);
5021       clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskKernel, 2, NULL, gsize, wsize, event_count, events, &event);
5022       events=(cl_event *) RelinquishMagickMemory(events);
5023       if (clStatus != CL_SUCCESS)
5024       {
5025         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
5026         goto cleanup;
5027       }
5028       if (RecordProfileData(clEnv,UnsharpMaskKernel,event) == MagickFalse)
5029         {
5030           AddOpenCLEvent(image,event);
5031           AddOpenCLEvent(filteredImage, event);
5032         }
5033       clEnv->library->clReleaseEvent(event);
5034     }
5035   }
5036 
5037   outputReady=MagickTrue;
5038 
5039 cleanup:
5040   OpenCLLogException(__FUNCTION__,__LINE__,exception);
5041 
5042   if (imageBuffer != (cl_mem) NULL)
5043     clEnv->library->clReleaseMemObject(imageBuffer);
5044   if (filteredImageBuffer != (cl_mem) NULL)
5045     clEnv->library->clReleaseMemObject(filteredImageBuffer);
5046   if (kernel != NULL)                         kernel=DestroyKernelInfo(kernel);
5047   if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
5048   if (unsharpMaskKernel!=NULL)                RelinquishOpenCLKernel(clEnv, unsharpMaskKernel);
5049   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
5050   if ((outputReady == MagickFalse) && (filteredImage != NULL))
5051     filteredImage=(Image *) DestroyImage(filteredImage);
5052   return(filteredImage);
5053 }
5054 
AccelerateUnsharpMaskImage(const Image * image,const ChannelType channel,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)5055 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5056   const ChannelType channel,const double radius,const double sigma,
5057   const double gain,const double threshold,ExceptionInfo *exception)
5058 {
5059   Image
5060     *filteredImage;
5061 
5062   assert(image != NULL);
5063   assert(exception != (ExceptionInfo *) NULL);
5064 
5065   if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
5066       (checkAccelerateCondition(image, channel) == MagickFalse))
5067     return NULL;
5068 
5069   if (radius < 12.1)
5070     filteredImage = ComputeUnsharpMaskImageSingle(image,radius,sigma,gain,threshold, 0, exception);
5071   else
5072     filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
5073 
5074   return(filteredImage);
5075 }
5076 
ComputeWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5077 static Image *ComputeWaveletDenoiseImage(const Image *image,
5078   const double threshold,ExceptionInfo *exception)
5079 {
5080   cl_command_queue
5081     queue;
5082 
5083   cl_context
5084     context;
5085 
5086   cl_int
5087     clStatus;
5088 
5089   cl_kernel
5090     denoiseKernel;
5091 
5092   cl_event
5093     event;
5094 
5095   cl_mem
5096     filteredImageBuffer,
5097     imageBuffer;
5098 
5099   cl_event
5100     *events;
5101 
5102   Image
5103     *filteredImage;
5104 
5105   MagickBooleanType
5106     outputReady;
5107 
5108   MagickCLEnv
5109     clEnv;
5110 
5111   unsigned int
5112     event_count,
5113     i,
5114     passes;
5115 
5116   clEnv = NULL;
5117   filteredImage = NULL;
5118   context = NULL;
5119   imageBuffer = NULL;
5120   filteredImageBuffer = NULL;
5121   denoiseKernel = NULL;
5122   queue = NULL;
5123   outputReady = MagickFalse;
5124 
5125   clEnv = GetDefaultOpenCLEnv();
5126 
5127   /* Work around an issue on low end Intel devices */
5128   if (paramMatchesValue(clEnv,MAGICK_OPENCL_ENV_PARAM_DEVICE_NAME,
5129       "Intel(R) HD Graphics",exception) != MagickFalse)
5130     goto cleanup;
5131 
5132   context = GetOpenCLContext(clEnv);
5133   queue = AcquireOpenCLCommandQueue(clEnv);
5134 
5135   filteredImage = CloneImage(image,0,0,MagickTrue, exception);
5136   if (filteredImage == (Image *) NULL)
5137     goto cleanup;
5138 
5139   imageBuffer = GetAuthenticOpenCLBuffer(image,exception);
5140   if (imageBuffer == (cl_mem) NULL)
5141   {
5142     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
5143       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
5144     goto cleanup;
5145   }
5146   filteredImageBuffer = GetAuthenticOpenCLBuffer(filteredImage,exception);
5147   if (filteredImageBuffer == (cl_mem) NULL)
5148   {
5149     (void) OpenCLThrowMagickException(exception,GetMagickModule(),
5150       ResourceLimitWarning,"GetAuthenticOpenCLBuffer failed.",".");
5151     goto cleanup;
5152   }
5153 
5154   /* get the opencl kernel */
5155   denoiseKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "WaveletDenoise");
5156   if (denoiseKernel == NULL)
5157   {
5158     (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
5159     goto cleanup;
5160   };
5161 
5162   /*
5163     Process image.
5164   */
5165   {
5166     int x;
5167     const int PASSES = 5;
5168     cl_int width = (cl_int)image->columns;
5169     cl_int height = (cl_int)image->rows;
5170     cl_float thresh = threshold;
5171 
5172     passes = (((1.0f * image->columns) * image->rows) + 1999999.0f) / 2000000.0f;
5173     passes = (passes < 1) ? 1 : passes;
5174 
5175     /* set the kernel arguments */
5176     i = 0;
5177     clStatus |= clEnv->library->clSetKernelArg(denoiseKernel, i++, sizeof(cl_mem), (void *)&imageBuffer);
5178     clStatus |= clEnv->library->clSetKernelArg(denoiseKernel, i++, sizeof(cl_mem), (void *)&filteredImageBuffer);
5179     clStatus |= clEnv->library->clSetKernelArg(denoiseKernel, i++, sizeof(cl_float), (void *)&thresh);
5180     clStatus |= clEnv->library->clSetKernelArg(denoiseKernel, i++, sizeof(cl_int), (void *)&PASSES);
5181     clStatus |= clEnv->library->clSetKernelArg(denoiseKernel, i++, sizeof(cl_int), (void *)&width);
5182     clStatus |= clEnv->library->clSetKernelArg(denoiseKernel, i++, sizeof(cl_int), (void *)&height);
5183 
5184     for (x = 0; x < passes; ++x)
5185     {
5186       const int TILESIZE = 64;
5187       const int PAD = 1 << (PASSES - 1);
5188       const int SIZE = TILESIZE - 2 * PAD;
5189 
5190       size_t gsize[2];
5191       size_t wsize[2];
5192       size_t goffset[2];
5193 
5194       gsize[0] = ((width + (SIZE - 1)) / SIZE) * TILESIZE;
5195       gsize[1] = ((((height + (SIZE - 1)) / SIZE) + passes - 1) / passes) * 4;
5196       wsize[0] = TILESIZE;
5197       wsize[1] = 4;
5198       goffset[0] = 0;
5199       goffset[1] = x * gsize[1];
5200 
5201       events=GetOpenCLEvents(image,&event_count);
5202       clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, denoiseKernel, 2, goffset, gsize, wsize, event_count, events, &event);
5203       events=(cl_event *) RelinquishMagickMemory(events);
5204       if (clStatus != CL_SUCCESS)
5205       {
5206         (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
5207         goto cleanup;
5208       }
5209       clEnv->library->clFlush(queue);
5210       if (RecordProfileData(clEnv, WaveletDenoiseKernel, event) == MagickFalse)
5211         {
5212           AddOpenCLEvent(image, event);
5213           AddOpenCLEvent(filteredImage, event);
5214         }
5215       clEnv->library->clReleaseEvent(event);
5216     }
5217   }
5218 
5219   outputReady=MagickTrue;
5220 
5221 cleanup:
5222   OpenCLLogException(__FUNCTION__, __LINE__, exception);
5223 
5224   if (imageBuffer != (cl_mem) NULL)
5225     clEnv->library->clReleaseMemObject(imageBuffer);
5226   if (filteredImageBuffer != (cl_mem) NULL)
5227     clEnv->library->clReleaseMemObject(filteredImageBuffer);
5228   if (denoiseKernel != NULL)
5229     RelinquishOpenCLKernel(clEnv, denoiseKernel);
5230   if (queue != NULL)
5231     RelinquishOpenCLCommandQueue(clEnv, queue);
5232   if ((outputReady == MagickFalse) && (filteredImage != NULL))
5233     filteredImage=(Image *) DestroyImage(filteredImage);
5234   return(filteredImage);
5235 }
5236 
AccelerateWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5237 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5238   const double threshold,ExceptionInfo *exception)
5239 {
5240   Image
5241   *filteredImage;
5242 
5243   assert(image != NULL);
5244   assert(exception != (ExceptionInfo *)NULL);
5245 
5246   if ((checkAccelerateCondition(image,DefaultChannels) == MagickFalse) ||
5247       (checkOpenCLEnvironment(exception) == MagickFalse))
5248     return (Image *) NULL;
5249 
5250   filteredImage=ComputeWaveletDenoiseImage(image,threshold,exception);
5251 
5252   return(filteredImage);
5253 }
5254 
5255 #endif /* MAGICKCORE_OPENCL_SUPPORT */
5256