/dports/misc/mnn/MNN-1.2.0/source/backend/cpu/compute/ |
H A D | Convolution1x1Strassen.cpp | 85 auto icC4 = UP_DIV(ic, core->pack); in onResize() local 103 …mTempInputBatch.reset(Tensor::createDevice<float>(std::vector<int>{icC4, matrixSizeE, core->pack})… in onResize() 122 MNN_CONCURRENCY_BEGIN(y, icC4) { in onResize() 126 auto srcX = srcY + x * outputPlane * icC4 * core->pack * core->bytes; in onResize() 137 ::memset(dstOrigin, 0, outputPlane * batch * unitBytes * icC4); in onResize() 138 MNN_CONCURRENCY_BEGIN(z, icC4) { in onResize() 142 auto srcBatch = srcZ + b * iw * ih * icC4 * unitBytes; in onResize() 169 …mPretreatFunction = [outputPlane, padY, padX, strideX, strideY, ow, oh, iw, ih, icC4, oxStart, oyS… in onResize() 171 ::memset(dstOrigin, 0, outputPlane * batch * core->bytes * core->pack * icC4); in onResize() 176 MNN_CONCURRENCY_BEGIN(z, icC4) { in onResize() [all …]
|
H A D | ConvolutionTiledExecutor.hpp | 87 …auto icC4 = UP_DIV(input->channel(), unit); … 150 …auto inputBatchStride = src_width * src_height * icC4 * unit; …
|
/dports/misc/mnn/MNN-1.2.0/source/backend/cpu/ |
H A D | CPUDeconvolution.cpp | 133 auto icC4 = UP_DIV(input->channel(), core->pack); in onResize() local 161 Tensor::create<float>({icC4, plane, core->pack}, inputPtr)); in onResize() 162 AutoRelease<Tensor> tempInput(Tensor::createDevice<float>({icC4, plane, core->pack})); in onResize() 171 …mPreFunctions.emplace_back(std::make_pair([newInputPtr, icC4, plane, threadNumber, core](const flo… in onResize() 172 for (int c = tId; c<icC4; c+=threadNumber) { in onResize() 233 auto icC4 = UP_DIV(input->channel(), core->pack); in onExecute() local 239 …auto inputPtr = inputs[0]->host<uint8_t>() + i * src_width * src_height * icC4 * core->pack * core… in onExecute()
|
/dports/misc/mnn/MNN-1.2.0/source/backend/opencl/execution/buffer/ |
H A D | ConvBufWinograd.cpp | 52 auto icC4 = UP_DIV(ic, 4); in ConvBufWinograd() local 107 mWeight.reset(Tensor::createDevice<float>({1, ocC4 * alpha * alpha, icC4 * 4, 4}));//NHWC in ConvBufWinograd() 163 auto icC4 = UP_DIV(input->channel(), 4); in onResize() local 213 mGWS_S[b] = {static_cast<uint32_t>(wCount * hCount), static_cast<uint32_t>(icC4)}; in onResize() 225 mSourceTransform[b].setArg(index++, icC4); in onResize() 263 kernel[knl_idx].setArg(index++, icC4); in onResize() 288 mMatMul[b].setArg(index++, icC4); in onResize()
|
/dports/misc/mnn/MNN-1.2.0/source/backend/vulkan/execution/ |
H A D | VulkanConvolutionWinograd.cpp | 116 auto icC4 = UP_DIV(src->channel(), 4); in onEncode() local 148 value->inputSize[2] = icC4; in onEncode() 204 … UP_DIV(hCount, mTransformLocalSize[1]), UP_DIV(icC4, mTransformLocalSize[2])); in onEncode()
|
/dports/misc/mnn/MNN-1.2.0/source/backend/opencl/execution/image/ |
H A D | ConvWinograd.cpp | 216 auto icC4 = UP_DIV(input->channel(), 4); in onResize() local 290 mSourceTransform[index].setArg(8, icC4); in onResize() 295 mMatMul[index].setArg(5, icC4); in onResize() 324 … mGWS_S[index] = {static_cast<uint32_t>(wCount * hCount), static_cast<uint32_t>(icC4)}; in onResize()
|
/dports/misc/mnn/MNN-1.2.0/source/backend/opencl/execution/cl/ |
H A D | deconv_2d.cl | 77 //weights NC4HW4 [1, 4*icC4, ocC4*kh*kw, 1] xic4
|
H A D | conv_2d_buf.cl | 67 //weights NC4HW4 [1, 4*icC4, ocC4*kh*kw, 1] xic4 148 //weights NC4HW4 [1, 4*icC4, ocC4*kh*kw, 1] xic4 237 //weights NC4HW4 [1, 4*icC4, ocC4*kh*kw, 1] xic4 330 //weights NC4HW4 [1, 4*icC4, ocC4*kh*kw, 1] xic4
|