1 //
2 //  CPUScale.cpp
3 //  MNN
4 //
5 //  Created by MNN on 2018/08/07.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #include "CPUScale.hpp"
10 #include "CPUBackend.hpp"
11 #include "core/Macro.h"
12 #include "core/TensorUtils.hpp"
13 #include "core/Concurrency.h"
14 #include "compute/CommonOptFunction.h"
15 
16 namespace MNN {
CPUScale(const Op * op,Backend * bn)17 CPUScale::CPUScale(const Op* op, Backend* bn) : MNN::Execution(bn) {
18     auto scale      = op->main_as_Scale();
19     int outputCount = scale->scaleData()->size();
20     auto core = static_cast<CPUBackend*>(bn)->functions();
21     mScaleBias.reset(
22                      Tensor::createDevice<uint8_t>(
23                                            {2, UP_DIV(outputCount, core->pack) * core->pack * core->bytes}
24                                            ));
25     auto res = bn->onAcquireBuffer(mScaleBias.get(), Backend::STATIC);
26     if (!res) {
27         MNN_ERROR("Error for alloc buffer for CPUScale\n");
28         mScaleBias = nullptr;
29         mValid = false;
30         return;
31     }
32     ::memset(mScaleBias->host<float>(), 0, mScaleBias->size());
33     if (core->bytes < 4) {
34         core->MNNFp32ToLowp(scale->scaleData()->data(), mScaleBias->host<int16_t>(), outputCount);
35     } else {
36         ::memcpy(mScaleBias->host<float>(), scale->scaleData()->data(), outputCount * sizeof(float));
37     }
38     if (nullptr != scale->biasData() && nullptr != scale->biasData()->data()) {
39         if (core->bytes < 4) {
40             core->MNNFp32ToLowp(scale->biasData()->data(), (int16_t*)(mScaleBias->host<uint8_t>() + 1 * mScaleBias->length(1)), outputCount);
41         } else {
42             ::memcpy(mScaleBias->host<float>() + ALIGN_UP4(outputCount), scale->biasData()->data(), outputCount * sizeof(float));
43         }
44     }
45 }
~CPUScale()46 CPUScale::~CPUScale() {
47     if (nullptr != mScaleBias) {
48         backend()->onReleaseBuffer(mScaleBias.get(), Backend::STATIC);
49     }
50 }
onExecute(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)51 ErrorCode CPUScale::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
52     auto input  = inputs[0];
53     auto output = outputs[0];
54     auto core = static_cast<CPUBackend*>(backend())->functions();
55     auto scalePtr = mScaleBias->host<uint8_t>();
56     auto biasPtr = mScaleBias->host<uint8_t>() + 1 * mScaleBias->length(1);
57     //FUNC_PRINT(TensorUtils::getDescribe(input)->dimensionFormat);
58     auto batch       = input->buffer().dim[0].extent;
59     auto depthQuad   = UP_DIV(input->channel(), core->pack);
60     int planeNumber = 1;
61     for (int i = 2; i < input->buffer().dimensions; ++i) {
62         planeNumber *= input->length(i);
63     }
64     auto depthStride = planeNumber * core->pack;
65     auto totalDepth = batch * depthQuad;
66     int numberThread = ((CPUBackend*)backend())->threadNumber();
67     MNN_CONCURRENCY_BEGIN(tId, numberThread) {
68         for (int i = tId; i < totalDepth; i+=numberThread) {
69             auto depthIndex = i % depthQuad;
70             core->MNNScaleAndAddBias((float*)(output->host<uint8_t>() + depthStride * i * core->bytes), (const float*)(input->host<uint8_t>() + depthStride * i * core->bytes), (const float*)(biasPtr + core->pack * core->bytes * depthIndex),
71                                      (const float*)(scalePtr + core->pack * core->bytes * depthIndex), planeNumber, 1);
72         }
73     }
74     MNN_CONCURRENCY_END();
75     return NO_ERROR;
76 }
77 class CPUScaleCreator : public CPUBackend::Creator {
78 public:
onCreate(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const MNN::Op * op,Backend * backend) const79     virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
80                                 const MNN::Op* op, Backend* backend) const override {
81         return new CPUScale(op, backend);
82     }
83 };
84 
85 REGISTER_CPU_OP_CREATOR(CPUScaleCreator, OpType_Scale);
86 } // namespace MNN
87