1 //
2 // ConvExecution.cpp
3 // MNN
4 //
5 // Created by MNN on 2019/02/28.
6 // Copyright © 2018, Alibaba Group Holding Limited
7 //
8
9 #include "ConvExecution.hpp"
10 #include "ConvWinograd.hpp"
11 #include "core/ConvolutionCommon.hpp"
12 #include "core/Macro.h"
13 #include "core/TensorUtils.hpp"
14 #include "backend/opencl/core/OpenCLBackend.hpp"
15 #include "backend/opencl/core/OpenCLRunningUtils.hpp"
16
17 #define UNIT 4
18 namespace MNN {
19 namespace OpenCL {
20
ConvCommonExecution(const Convolution2D * conv2dParams,Backend * backend)21 ConvCommonExecution::ConvCommonExecution(const Convolution2D *conv2dParams, Backend *backend) : Execution(backend) {
22 auto openclBackend = (OpenCLBackend *)backend;
23 int biasSize = conv2dParams->bias()->size();
24 const float *biasDataPtr = conv2dParams->bias()->data();
25
26 int buffer_size = ALIGN_UP4(biasSize);
27 if(openclBackend->getOpenCLRuntime()->isWeightCpuTransHalf()) {
28 buffer_size *= sizeof(half_float::half);
29 } else {
30 buffer_size *= sizeof(float);
31 }
32 cl::Buffer biasBuffer(openclBackend->getOpenCLRuntime()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, buffer_size);
33 cl_int error;
34 auto biasPtrCL = openclBackend->getOpenCLRuntime()->commandQueue().enqueueMapBuffer(
35 biasBuffer, true, CL_MAP_WRITE, 0, buffer_size, nullptr, nullptr, &error);
36 if(biasPtrCL != nullptr && error == CL_SUCCESS){
37 if(openclBackend->getOpenCLRuntime()->isWeightCpuTransHalf()){
38 for(int i=0; i<biasSize; i++) {
39 ((half_float::half*)biasPtrCL)[i] = (half_float::half)(biasDataPtr[i]);
40 }
41 for(int i=biasSize; i<ALIGN_UP4(biasSize); i++) {
42 ((half_float::half*)biasPtrCL)[i] = (half_float::half)(0.0f);
43 }
44 }else{
45 ::memset(biasPtrCL, 0, ALIGN_UP4(biasSize) * sizeof(float));
46 ::memcpy(biasPtrCL, biasDataPtr, biasSize * sizeof(float));
47 }
48 }else{
49 MNN_ERROR("Map error biasPtrCL == nullptr \n");
50 }
51 openclBackend->getOpenCLRuntime()->commandQueue().enqueueUnmapMemObject(biasBuffer, biasPtrCL);
52 mBias.reset(Tensor::createDevice<float>({1, 1, 1, biasSize}));
53 backend->onAcquireBuffer(mBias.get(), Backend::STATIC);
54 copyBufferToImage(openclBackend->getOpenCLRuntime(), biasBuffer, openCLImage(mBias.get()), UP_DIV(biasSize, 4), 1);
55 }
~ConvCommonExecution()56 ConvCommonExecution::~ConvCommonExecution() {
57 MNN_ASSERT(nullptr != mBias);
58 backend()->onReleaseBuffer(mBias.get(), Backend::STATIC);
59 }
60
ConvExecution(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const MNN::Op * op,Backend * backend)61 ConvExecution::ConvExecution(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs, const MNN::Op *op, Backend *backend)
62 : ConvCommonExecution(op->main_as_Convolution2D(), backend) {
63 #ifdef LOG_VERBOSE
64 MNN_PRINT("Start ConvExecution init !\n");
65 #endif
66 mOpenCLBackend = static_cast<OpenCLBackend *>(backend);
67 const auto *conv2dParams = op->main_as_Convolution2D();
68 const auto *conv2dCommonParams = conv2dParams->common();
69 mConv2dCommonParams = conv2dCommonParams;
70 mStrides = {conv2dCommonParams->strideY(), conv2dCommonParams->strideX()};
71 mDilations = {conv2dCommonParams->dilateY(), conv2dCommonParams->dilateX()};
72
73 auto pad = ConvolutionCommon::convolutionPad(inputs[0], outputs[0], mConv2dCommonParams);
74 mPaddings[0] = pad.second;
75 mPaddings[1] = pad.first;
76
77 int kernelWidth = conv2dCommonParams->kernelX();
78 int kernelHeight = conv2dCommonParams->kernelY();
79 int outputChannel = conv2dCommonParams->outputCount();
80
81 int weightSize = 0;
82 const float *filterDataPtr = nullptr;
83
84 std::shared_ptr<MNN::ConvolutionCommon::Int8Common> quanCommon;
85 if (nullptr != conv2dParams->quanParameter()) {
86 quanCommon = ConvolutionCommon::load(conv2dParams->quanParameter(), true);
87 if (nullptr == quanCommon) {
88 MNN_ERROR("Memory not Enough, can't extract IDST Convolution: %s \n", op->name()->c_str());
89 }
90 if (quanCommon->weightFloat.get() == nullptr) {
91 MNN_PRINT("quanCommon->weightFloat.get() == nullptr \n");
92 }
93 // Back to float
94 filterDataPtr = quanCommon->weightFloat.get();
95 weightSize = quanCommon->weightFloat.size();
96 } else if (nullptr == conv2dParams->weight() || nullptr == conv2dParams->bias()) {
97 MNN_ERROR("%s has no weight or bias. The model may be benchmark model, please revert the weight/bias firstly\n", op->name()->c_str());
98 }
99
100 if (nullptr == filterDataPtr) {
101 weightSize = conv2dParams->weight()->size();
102 filterDataPtr = conv2dParams->weight()->data();
103 }
104 int inputChannel = weightSize / (kernelWidth * kernelHeight * outputChannel);
105
106 auto gpuType = mOpenCLBackend->getOpenCLRuntime()->getGpuType();
107
108 //select opt conv method
109 std::string kernelName = "conv_2d";
110 if (kernelHeight == kernelWidth && kernelHeight == 1 && mPaddings[0] == 0 &&
111 mPaddings[1] == 0) {
112 mConv1x1Opt = (mStrides[0] == 1 && mStrides[1] == 1 && gpuType == GpuType::MALI);
113 #if 0
114 if((gpuType == GpuType::ADRENO)){
115 uint64_t useLocalSize = UNIT*UNIT*4*sizeof(float)*4;
116 if(useLocalSize >= mOpenCLBackend->getOpenCLRuntime()->getMaxLocalMem()){
117 mUseLocalMem = false;
118 }else{
119 kernelName = "conv_2d_1x1_local";
120 mUseLocalMem=true;
121 }
122 }
123 #endif
124 if(!mUseLocalMem){
125 if(mConv1x1Opt){
126 kernelName = "conv_2d_1x1_mali";
127 }else{
128 kernelName = "conv_2d_1x1";
129 }
130 }
131 }
132
133 if(mConv1x1Opt && !mUseLocalMem){
134 cl_int error;
135 std::shared_ptr<Tensor> filterBuffer(Tensor::createDevice<float>({UP_DIV(outputChannel, 4)*4, UP_DIV(inputChannel, 4)*4, kernelWidth, kernelHeight}));
136
137 int buffer_size = filterBuffer->elementSize();
138 if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()) {
139 buffer_size *= sizeof(half_float::half);
140 } else {
141 buffer_size *= sizeof(float);
142 }
143
144 mKernelBuffer.reset(new cl::Buffer(mOpenCLBackend->getOpenCLRuntime()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, buffer_size));
145 auto kernelBufferPtr = mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueMapBuffer(*(mKernelBuffer.get()), true, CL_MAP_WRITE, 0, buffer_size, nullptr, nullptr, &error);
146 if(kernelBufferPtr != nullptr && error == CL_SUCCESS){
147 ::memset(kernelBufferPtr, 0, buffer_size);
148 for(int o = 0; o < outputChannel; o++){
149 for(int i = 0 ; i < inputChannel; i++){
150 int bufferIdx = (o/4) * ROUND_UP(inputChannel, 4)*4 + (i/4)*16 + (o%4)*4 + (i%4);
151 int filterIdx = o*inputChannel + i;
152 if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()){
153 ((half_float::half*)kernelBufferPtr)[bufferIdx] = (half_float::half)(filterDataPtr[filterIdx]);
154 }else{
155 ((float*)kernelBufferPtr)[bufferIdx] = (float)(filterDataPtr[filterIdx]);
156 }
157 }
158 }
159 }else{
160 MNN_ERROR("Map error ptrCL == nullptr \n");
161 }
162 mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueUnmapMemObject(*(mKernelBuffer.get()), kernelBufferPtr);
163
164 //bias
165 int biasSize = conv2dParams->bias()->size();
166 const float *biasDataPtr = conv2dParams->bias()->data();
167
168 buffer_size = ALIGN_UP4(biasSize);
169 if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()) {
170 buffer_size *= sizeof(half_float::half);
171 } else {
172 buffer_size *= sizeof(float);
173 }
174
175 mBiasBuffer.reset(new cl::Buffer(mOpenCLBackend->getOpenCLRuntime()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, buffer_size));
176 auto biasPtrCL = mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueMapBuffer(
177 *(mBiasBuffer.get()), true, CL_MAP_WRITE, 0, buffer_size, nullptr, nullptr, &error);
178 if(biasPtrCL != nullptr && error == CL_SUCCESS){
179 if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()){
180 for (int i = 0; i < biasSize; i++)
181 {
182 ((half_float::half*)biasPtrCL)[i] = (half_float::half)(biasDataPtr[i]);
183 }
184 for(int i=biasSize; i<ALIGN_UP4(biasSize); i++) {
185 ((half_float::half*)biasPtrCL)[i] = (half_float::half)(0.0f);
186 }
187 }else{
188 ::memset(biasPtrCL, 0, ALIGN_UP4(biasSize) * sizeof(float));
189 ::memcpy(biasPtrCL, biasDataPtr, biasSize * sizeof(float));
190 }
191 }else{
192 MNN_ERROR("Map error biasPtrCL == nullptr \n");
193 }
194 mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueUnmapMemObject(*(mBiasBuffer.get()), biasPtrCL);
195
196 }else{
197 std::vector<int> filterImageShape{(int)inputChannel, (int)(UP_DIV(outputChannel, 4) * kernelWidth * kernelHeight)};
198 std::shared_ptr<Tensor> filterBuffer(
199 Tensor::createDevice<float>({outputChannel, inputChannel, kernelWidth, kernelHeight}));
200
201 int buffer_size = filterBuffer->elementSize();
202 if(mOpenCLBackend->getOpenCLRuntime()->isWeightCpuTransHalf()) {
203 buffer_size *= sizeof(half_float::half);
204 } else {
205 buffer_size *= sizeof(float);
206 }
207 cl::Buffer filterBufferCL(mOpenCLBackend->getOpenCLRuntime()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, buffer_size);
208 filterBuffer->buffer().device = (uint64_t)(&filterBufferCL);
209
210 cl_int error;
211 auto ptrCL = mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueMapBuffer(filterBufferCL, true, CL_MAP_WRITE, 0, buffer_size, nullptr, nullptr, &error);
212 if(ptrCL != nullptr && error == CL_SUCCESS) {
213 ::memset(ptrCL, 0, buffer_size);
214 if(mOpenCLBackend->getOpenCLRuntime()->isWeightCpuTransHalf()){
215 for(int i = 0 ; i < filterBuffer->elementSize(); i++){
216 ((half_float::half*)ptrCL)[i] = (half_float::half)(filterDataPtr[i]);
217 }
218 }else{
219 ::memcpy(ptrCL, filterDataPtr, filterBuffer->size());
220 }
221 }else{
222 MNN_ERROR("Map error ptrCL == nullptr \n");
223 }
224 mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueUnmapMemObject(filterBufferCL, ptrCL);
225
226 mFilter.reset(Tensor::createDevice<float>({1, filterImageShape[1], 1, 4 * filterImageShape[0]}));
227 mOpenCLBackend->onAcquireBuffer(mFilter.get(), Backend::STATIC);
228 MNN::OpenCL::ImageBufferConvertor imageBufferConvertor{mOpenCLBackend->getOpenCLRuntime()};
229
230 std::string buildOption = "";
231 if(mOpenCLBackend->getOpenCLRuntime()->isWeightCpuTransHalf() == false){
232 buildOption = "-DBUFFER_INP_FP32";
233 }
234 imageBufferConvertor.convertBufferToImage(filterBuffer.get(), MNN::OpenCL::CONV2D_FILTER, mFilter.get(), false, buildOption);
235 }
236
237 // Create Kernel
238 std::set<std::string> buildOptions;
239 buildOptions.emplace("-DBIAS");
240 if (mConv2dCommonParams->relu()) {
241 buildOptions.emplace("-DRELU");
242 } else if (mConv2dCommonParams->relu6()) {
243 buildOptions.emplace("-DRELU6");
244 }
245
246
247 mKernel = mOpenCLBackend->getOpenCLRuntime()->buildKernel("conv_2d", kernelName, buildOptions);
248 mMaxWorkGroupSize = static_cast<uint32_t>(mOpenCLBackend->getOpenCLRuntime()->getMaxWorkGroupSize(mKernel));
249
250 #ifdef LOG_VERBOSE
251 MNN_PRINT("end ConvExecution init !\n");
252 #endif
253 }
254
~ConvExecution()255 ConvExecution::~ConvExecution() {
256 if(mUseLocalMem || !mConv1x1Opt){
257 mOpenCLBackend->onReleaseBuffer(mFilter.get(), Backend::STATIC);
258 }
259 }
260
onResize(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)261 ErrorCode ConvExecution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
262 #ifdef LOG_VERBOSE
263 MNN_PRINT("Start ConvExecution onResize !\n");
264 #endif
265 auto input = inputs[0];
266 auto output = outputs[0];
267
268 std::vector<int> inputShape = tensorShapeFormat(input);
269 std::vector<int> outputShape = tensorShapeFormat(output);
270 const int height = outputShape.at(1);
271 const int width = outputShape.at(2);
272
273 const int inputHeight = inputShape.at(1);
274 const int inputWidth = inputShape.at(2);
275 const int inputChannels = inputShape.at(3);
276
277 const int inputChannelBlocks = UP_DIV(inputChannels, 4);
278 int kernelHeight = mConv2dCommonParams->kernelY();
279 int kernelWidth = mConv2dCommonParams->kernelX();
280
281 auto pad = ConvolutionCommon::convolutionPad(input, output, mConv2dCommonParams);
282 mPaddings[0] = pad.second;
283 mPaddings[1] = pad.first;
284
285 if (kernelHeight == kernelWidth && kernelHeight == 1 && mPaddings[0] == 0 && mPaddings[1] == 0) {
286 if(mConv1x1Opt){
287
288 auto kernel = &mKernel;
289 uint32_t idx = 0;
290
291 if(mUseLocalMem){
292 mGlobalWorkSize = {static_cast<uint32_t>(UP_DIV(outputShape.at(3), 4)), static_cast<uint32_t>(UP_DIV(outputShape.at(2), 4)),
293 static_cast<uint32_t>(outputShape.at(0) * outputShape.at(1))};
294 std::vector<uint32_t> lws{UNIT, UNIT, 1};
295 mLocalWorkSize = lws;
296 kernel->setArg(idx++, mGlobalWorkSize[0]);
297 kernel->setArg(idx++, mGlobalWorkSize[1]);
298 kernel->setArg(idx++, mGlobalWorkSize[2]);
299 kernel->setArg(idx++, openCLImage(input));
300 kernel->setArg(idx++, openCLImage(mFilter.get()));
301 kernel->setArg(idx++, openCLImage(mBias.get()));
302 kernel->setArg(idx++, openCLImage(output));
303 kernel->setArg(idx++, static_cast<int>(inputChannelBlocks));
304 kernel->setArg(idx++, height);
305 kernel->setArg(idx++, width);
306 }else{
307 mGlobalWorkSize = {static_cast<uint32_t>(UP_DIV(outputShape.at(3), 4) * UP_DIV(outputShape.at(2), 4)),
308 static_cast<uint32_t>(outputShape.at(0) * outputShape.at(1))};
309 kernel->setArg(idx++, mGlobalWorkSize[0]);
310 kernel->setArg(idx++, mGlobalWorkSize[1]);
311 kernel->setArg(idx++, UP_DIV(width, 4));
312 kernel->setArg(idx++, openCLImage(input));
313 kernel->setArg(idx++, *mKernelBuffer.get());
314 kernel->setArg(idx++, *mBiasBuffer.get());
315 kernel->setArg(idx++, openCLImage(output));
316 kernel->setArg(idx++, static_cast<int>(inputChannelBlocks));
317 kernel->setArg(idx++, height);
318 kernel->setArg(idx++, width);
319
320 std::string kernelName = "conv_2d_1x1_mali";
321 mLocalWorkSize = localWS2DDefault(mGlobalWorkSize, mMaxWorkGroupSize, mOpenCLBackend->getOpenCLRuntime(), kernelName, mKernel).first;
322 }
323
324
325 }else{
326 mGlobalWorkSize = {
327 static_cast<uint32_t>(UP_DIV(outputShape.at(3), 4) * static_cast<uint32_t>(UP_DIV(outputShape.at(2), 4))),
328 static_cast<uint32_t>(outputShape.at(0) * outputShape.at(1))};
329
330 auto kernel = &mKernel;
331 uint32_t idx = 0;
332 int inputImageShape[2] = {inputHeight, inputWidth};
333 int outputImageShape[2] = {height, width};
334 int stideShape[2] = {mStrides[0], mStrides[1]};
335 kernel->setArg(idx++, mGlobalWorkSize[0]);
336 kernel->setArg(idx++, mGlobalWorkSize[1]);
337 kernel->setArg(idx++, openCLImage(input));
338 kernel->setArg(idx++, openCLImage(mFilter.get()));
339 kernel->setArg(idx++, openCLImage(mBias.get()));
340 kernel->setArg(idx++, openCLImage(output));
341 kernel->setArg(idx++, sizeof(inputImageShape), inputImageShape);
342 kernel->setArg(idx++, static_cast<int>(inputChannelBlocks));
343 kernel->setArg(idx++, sizeof(outputImageShape), outputImageShape);
344 kernel->setArg(idx++, sizeof(stideShape), stideShape);
345 kernel->setArg(idx++, UP_DIV(width, 4));
346 std::string kernelName = "conv_2d_1x1";
347 mLocalWorkSize = localWS2DDefault(mGlobalWorkSize, mMaxWorkGroupSize, mOpenCLBackend->getOpenCLRuntime(), kernelName, mKernel).first;
348 }
349 } else {
350 mGlobalWorkSize = {static_cast<uint32_t>(UP_DIV(outputShape.at(3), 4) * UP_DIV(outputShape.at(2), 4)),
351 static_cast<uint32_t>(outputShape.at(0) * outputShape.at(1))};
352
353 int inputImageShape[2] = {inputHeight, inputWidth};
354 int outputImageShape[2] = {height, width};
355 int kernelShape[2] = {kernelHeight, kernelWidth};
356 int strideShape[2] = {mStrides[0], mStrides[1]};
357 int paddingShape[2] = {mPaddings[0], mPaddings[1]};
358 int dilationShape[2] = {mDilations[0], mDilations[1]};
359 uint32_t idx = 0;
360 auto kernel = &mKernel;
361 kernel->setArg(idx++, mGlobalWorkSize[0]);
362 kernel->setArg(idx++, mGlobalWorkSize[1]);
363 kernel->setArg(idx++, openCLImage(input));
364 kernel->setArg(idx++, openCLImage(mFilter.get()));
365 kernel->setArg(idx++, openCLImage(mBias.get()));
366 kernel->setArg(idx++, openCLImage(output));
367 kernel->setArg(idx++, sizeof(inputImageShape), inputImageShape);
368 kernel->setArg(idx++, inputChannelBlocks);
369 kernel->setArg(idx++, sizeof(outputImageShape), outputImageShape);
370 kernel->setArg(idx++, sizeof(kernelShape), kernelShape);
371 kernel->setArg(idx++, sizeof(strideShape), strideShape);
372 kernel->setArg(idx++, sizeof(paddingShape), paddingShape);
373 kernel->setArg(idx++, sizeof(dilationShape), dilationShape);
374 kernel->setArg(idx++, UP_DIV(width, 4));
375
376 std::string kernelName = "conv_2d";
377 mLocalWorkSize = localWS2DDefault(mGlobalWorkSize, mMaxWorkGroupSize, mOpenCLBackend->getOpenCLRuntime(), kernelName, mKernel).first;
378 }
379
380 #ifdef LOG_VERBOSE
381 MNN_PRINT("end ConvExecution onResize !\n");
382 #endif
383 return NO_ERROR;
384 }
385
onExecute(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)386 ErrorCode ConvExecution::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
387 #ifdef LOG_VERBOSE
388 MNN_PRINT("Start ConvExecution onExecute !\n");
389 #endif
390 if(mUseLocalMem){
391 #ifdef ENABLE_OPENCL_TIME_PROFILER
392 cl::Event event;
393 run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
394 mOpenCLBackend->getOpenCLRuntime(), &event);
395
396 float costTime = mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
397 MNN_PRINT("kernel cost:%f us Conv UseLocalMem\n",costTime);
398 #else
399 run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
400 mOpenCLBackend->getOpenCLRuntime());
401 #endif
402 }
403
404 #ifdef ENABLE_OPENCL_TIME_PROFILER
405 cl::Event event;
406 runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
407 mOpenCLBackend->getOpenCLRuntime(), &event);
408
409 int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
410 MNN_PRINT("kernel cost:%d us Conv2D\n",costTime);
411 #else
412 runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
413 mOpenCLBackend->getOpenCLRuntime());
414 #endif
415
416 #ifdef LOG_VERBOSE
417 MNN_PRINT("end ConvExecution onExecute !\n");
418 #endif
419 return NO_ERROR;
420 }
421
422 class ConvolutionCreator : public OpenCLBackend::Creator {
423 public:
424 virtual ~ConvolutionCreator() = default;
onCreate(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const MNN::Op * op,Backend * backend) const425 virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
426 const MNN::Op *op, Backend *backend) const override {
427 if (inputs.size() > 1) {
428 return nullptr;
429 }
430 if (nullptr != op->main_as_Convolution2D()->quanParameter()) {
431 auto quan = op->main_as_Convolution2D()->quanParameter();
432 if (1 == quan->type() || 2 == quan->type()) {
433 if (quan->has_scaleInt()) {
434 // Don't support IDST-int8 because of error
435 return nullptr;
436 }
437 }
438 }
439
440 auto conv2D = op->main_as_Convolution2D();
441 if (ConvWinograd::valid(conv2D->common(), inputs[0])) {
442 return new ConvWinograd(conv2D, backend);
443 }
444
445 return new ConvExecution(inputs, outputs, op, backend);
446 }
447 };
448
449 OpenCLCreatorRegister<ConvolutionCreator> __conv_op(OpType_Convolution, IMAGE);
450
451 } // namespace OpenCL
452 } // namespace MNN
453