1 //
2 //  ConvolutionCommon.cpp
3 //  MNN
4 //
5 //  Created by MNN on 2020/03/02.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #include "ConvolutionCommon.hpp"
10 #include <math.h>
11 #include "half.hpp"
12 namespace MNN {
MNNMemoryAllocAlignZeroAlign(size_t size)13 static inline void *MNNMemoryAllocAlignZeroAlign(size_t size) {
14     return MNNMemoryCallocAlign(size, MNN_MEMORY_ALIGN_DEFAULT);
15 }
ReadBlobDim(unsigned char * & myfile,unsigned short * shape,int shapeBufCnt)16 static int ReadBlobDim(unsigned char *&myfile, unsigned short *shape, int shapeBufCnt) {
17     int uSize = myfile[0];
18     myfile++;
19     if (uSize > 4) {
20         printf("Read shape error!\n");
21         return 0;
22     }
23     int copyLength = uSize;
24     if (copyLength > shapeBufCnt) {
25         copyLength = shapeBufCnt;
26     }
27     ::memcpy(shape, myfile, sizeof(unsigned short) * copyLength);
28     myfile += copyLength * sizeof(unsigned short);
29     return copyLength;
30 }
31 
_log2(double x)32 static double _log2(double x) {
33     return log(x) / log(2);
34 }
35 
atLestBitsCnt(uint32_t n)36 static uint32_t atLestBitsCnt(uint32_t n) {
37     for (uint32_t i = 0; i < 32; i++) {
38         int32_t t = n << i;
39         if (t < 0)
40             return 32 - i - (((t << 1) == 0) ? 1 : 0);
41     }
42     return 0;
43 }
44 
SplitBufToArray(uint8_t * buf,size_t bufLen,uint8_t * arr,size_t arrLen,size_t iNeedBits)45 static void SplitBufToArray(uint8_t *buf, size_t bufLen, uint8_t *arr, size_t arrLen, size_t iNeedBits) {
46     unsigned char cMask = (1 << (iNeedBits)) - 1;
47     unsigned char *tmp  = (unsigned char *)buf;
48     int iOffset         = 0;
49     for (unsigned int i = 0; i < arrLen; i++) {
50         unsigned char idx = 0;
51         long uShift       = 8 - iNeedBits - iOffset % 8;
52         if (uShift < 0) {
53             idx = (tmp[iOffset / 8] << (0 - uShift)) & cMask;
54             idx |= (tmp[(iOffset / 8) + 1] >> (8 + uShift)) & cMask;
55         } else {
56             idx = (tmp[iOffset / 8] >> uShift) & cMask;
57         }
58         iOffset += iNeedBits;
59         if (iOffset % 8 == 0) {
60             tmp += iOffset / 8;
61             iOffset = 0;
62         }
63         arr[i] = idx;
64     }
65 }
66 
67 // fixme!!! not efficiency
68 typedef struct _SIMPLE_SET {
69     int8_t *UniSet;
70     uint32_t UniSetSize;
71     uint32_t CurUniCnt;
72 } SIMPLE_SET, *PSIMPLE_SET;
73 
CreateSimpleSet(uint32_t maxSize)74 static PSIMPLE_SET CreateSimpleSet(uint32_t maxSize) {
75     PSIMPLE_SET set = (PSIMPLE_SET)calloc(1, sizeof(SIMPLE_SET));
76     if (set == nullptr)
77         return nullptr;
78     set->UniSet     = (int8_t *)calloc(maxSize, sizeof(int8_t));
79     set->UniSetSize = maxSize;
80     set->CurUniCnt  = 0;
81     return set;
82 }
83 
SimpleRank(int8_t * data,uint32_t cnt,int up)84 static void SimpleRank(int8_t *data, uint32_t cnt, int up) {
85     if (up) {
86         for (uint32_t i = 0; i < cnt; i++) {
87             for (uint32_t j = i + 1; j < cnt; j++) {
88                 if (data[i] > data[j]) {
89                     int8_t tmp = data[i];
90                     data[i]    = data[j];
91                     data[j]    = tmp;
92                 }
93             }
94         }
95     } else {
96         for (uint32_t i = 0; i < cnt; i++) {
97             for (uint32_t j = i + 1; j < cnt; j++) {
98                 if (data[i] < data[j]) {
99                     int8_t tmp = data[i];
100                     data[i]    = data[j];
101                     data[j]    = tmp;
102                 }
103             }
104         }
105     }
106 }
107 
InsertSimpleSet(PSIMPLE_SET set,int8_t value)108 static void InsertSimpleSet(PSIMPLE_SET set, int8_t value) {
109     if (set->CurUniCnt >= set->UniSetSize)
110         return;
111     for (uint32_t i = 0; i < set->CurUniCnt; i++) {
112         if (set->UniSet[i] == value)
113             return;
114     }
115     set->UniSet[set->CurUniCnt++] = value;
116     //    SimpleRank(set->UniSet, set->CurUniCnt, 1);
117 }
118 
DestorySimpleSet(PSIMPLE_SET set)119 void DestorySimpleSet(PSIMPLE_SET set) {
120     if (set->UniSet != nullptr)
121         free(set->UniSet);
122     free(set);
123 }
124 
125 typedef struct _SIMPLE_MAP {
126     int8_t *CharCharMap;
127     uint32_t CharMapSize;
128     uint32_t CurMapCnt;
129 } SIMPLE_MAP, *PSIMPLE_MAP;
130 
CreateSimpleMap(uint32_t MaxCnt)131 static PSIMPLE_MAP CreateSimpleMap(uint32_t MaxCnt) {
132     PSIMPLE_MAP map = (PSIMPLE_MAP)calloc(1, sizeof(SIMPLE_MAP));
133     if (map == nullptr)
134         return nullptr;
135     map->CharMapSize = MaxCnt * sizeof(int8_t);
136     map->CurMapCnt   = 0;
137     map->CharCharMap = (int8_t *)calloc(1, MaxCnt * 2);
138     return map;
139 }
140 
DestroySimpleMap(PSIMPLE_MAP map)141 static void DestroySimpleMap(PSIMPLE_MAP map) {
142     if (map->CharCharMap)
143         free(map->CharCharMap);
144     free(map);
145 }
146 
InsertMap(PSIMPLE_MAP map,int8_t k,int8_t v)147 static void InsertMap(PSIMPLE_MAP map, int8_t k, int8_t v) {
148     for (uint32_t i = 0; i < map->CurMapCnt; i++) {
149         if (map->CharCharMap[i * 2] == k) {
150             map->CharCharMap[i * 2 + 1] = v;
151             return;
152         }
153     }
154     if (map->CurMapCnt >= map->CharMapSize)
155         return;
156     map->CharCharMap[map->CurMapCnt * 2]     = k;
157     map->CharCharMap[map->CurMapCnt * 2 + 1] = v;
158     map->CurMapCnt++;
159 }
160 
FindInMap(PSIMPLE_MAP map,int8_t k,int * found)161 static int8_t FindInMap(PSIMPLE_MAP map, int8_t k, int *found) {
162     for (uint32_t i = 0; i < map->CurMapCnt; i++) {
163         if (map->CharCharMap[i * 2] == k) {
164             if (found != nullptr)
165                 *found = 1;
166             return map->CharCharMap[i * 2 + 1];
167         }
168     }
169     if (found != nullptr)
170         *found = 0;
171     return 0;
172 }
173 
StreamSizeRead(void * dst,int unit,size_t count,unsigned char * & file)174 static void StreamSizeRead(void *dst, int unit, size_t count, unsigned char *&file) {
175     ::memcpy(dst, file, unit * count);
176     file += (unit * count);
177 }
178 
ReadQuanData_c(unsigned char * & s,uint32_t * len)179 static int8_t *ReadQuanData_c(unsigned char *&s, uint32_t *len) {
180     int8_t *blob      = nullptr;
181     int8_t *samples   = nullptr;
182     uint8_t *idxBuf   = nullptr;
183     uint8_t *idxBytes = nullptr;
184     uint32_t dataCnt  = 1;
185 
186     do {
187         // blob shape
188         unsigned short shape[64] = {0};
189         uint32_t shapeDim        = (uint32_t)ReadBlobDim(s, shape, 64);
190         if (shapeDim == 0 || shapeDim > 64)
191             break;
192         for (uint32_t i = 0; i < shapeDim; i++)
193             dataCnt *= shape[i];
194 
195         // sample
196         uint32_t sampleCnt = 0;
197         StreamSizeRead(&sampleCnt, 1, 1, s);
198         if (0 == sampleCnt) {
199             sampleCnt = 256;
200         }
201         samples = (int8_t *)MNNMemoryAllocAlignZeroAlign(sampleCnt);
202         if (samples == nullptr)
203             break;
204         StreamSizeRead(samples, 1, sampleCnt, s);
205         SimpleRank(samples, sampleCnt, 1);
206         // index
207         uint32_t idxBitsCnt = atLestBitsCnt(sampleCnt);
208         idxBitsCnt = idxBitsCnt < 1 ? 1 : idxBitsCnt;
209         size_t idxBufSize   = ceil(idxBitsCnt * dataCnt * 0.125);
210         idxBuf              = (uint8_t *)MNNMemoryAllocAlignZeroAlign(idxBufSize);
211         if (nullptr == idxBuf) {
212             MNN_ERROR("Not enought memory\n");
213             break;
214         }
215         StreamSizeRead(idxBuf, 1, idxBufSize, s);
216         // split index value into bytes
217         idxBytes = (uint8_t *)MNNMemoryAllocAlignZeroAlign(dataCnt * sizeof(uint8_t));
218         if (idxBitsCnt == 0 || nullptr == idxBytes) {
219             break;
220         }
221         SplitBufToArray(idxBuf, (uint32_t)idxBufSize, idxBytes, (uint32_t)dataCnt, (uint32_t)idxBitsCnt);
222         int i = 0;
223         blob  = (int8_t *)MNNMemoryAllocAlignZeroAlign((size_t)dataCnt);
224         if (nullptr == blob) {
225             break;
226         }
227         for (i = 0; i < dataCnt; i++) {
228             if (idxBytes[i] >= sampleCnt) {
229                 MNN_PRINT("iNeedBits is %u\nRead quan weights error with idx:%d\n", idxBitsCnt, (int)idxBytes[i]);
230                 break;
231             }
232             blob[i] = samples[idxBytes[i]];
233         }
234         if (i < dataCnt) {
235             MNNMemoryFreeAlign(blob);
236             blob = nullptr;
237             break;
238         }
239     } while (0);
240 
241     if (samples != nullptr)
242         MNNMemoryFreeAlign(samples);
243     if (idxBuf != nullptr)
244         MNNMemoryFreeAlign(idxBuf);
245     if (idxBytes != nullptr)
246         MNNMemoryFreeAlign(idxBytes);
247     if (len)
248         *len = blob ? dataCnt : 0;
249     return blob;
250 }
251 
ReadSparseQuanData_c(unsigned char * & myfile,uint32_t * len,const flatbuffers::Vector<float> * alpha)252 static int8_t *ReadSparseQuanData_c(unsigned char *&myfile, uint32_t *len, const flatbuffers::Vector<float> *alpha) {
253     // MNN_ERROR("sparse:%d\n", 1);
254     unsigned short shape[64] = {0};
255     uint32_t ucMapSize = 0;
256     PSIMPLE_SET setWeight = CreateSimpleSet(256);
257     if (setWeight == nullptr) {
258         return nullptr;
259     }
260     std::shared_ptr<unsigned int> __autoReleaseSetWeight(nullptr, [setWeight](void *) { DestorySimpleSet(setWeight); });
261     unsigned int nnz;
262     unsigned char iIdxNeedBits;
263     int8_t *blob = nullptr;
264     // 1. weights blob shape(unsigned int32)
265     int ShapeDim = ReadBlobDim(myfile, shape, 64);
266     int Size     = sizeof(int8_t);
267     for (int i = 0; i < ShapeDim; i++)
268         Size *= shape[i];
269     blob = (int8_t *)MNNMemoryAllocAlignZeroAlign((size_t)Size);
270     if (blob == nullptr)
271         return nullptr;
272     // 2. nnz
273     StreamSizeRead(&nnz, 4, 1, myfile);
274     // 3. max_step use # bits () (unsigned char)
275     StreamSizeRead(&iIdxNeedBits, 1, 1, myfile);
276     // read idx array
277     // 4. buf for steps ceil(nnz*step need bits/8)
278     AutoStorage<unsigned char> arrIdxBuffer(nnz);
279     unsigned char *arrIdx = arrIdxBuffer.get();
280     if (nullptr == arrIdx) {
281         return nullptr;
282     }
283     {
284         size_t bufLen = (size_t)(ceil(0.125 * iIdxNeedBits * nnz));
285         char *buf     = (char *)MNNMemoryAllocAlignZeroAlign(bufLen * sizeof(char));
286         if (nullptr == buf) {
287             return nullptr;
288         }
289         StreamSizeRead(buf, 1, bufLen, myfile);
290         SplitBufToArray((uint8_t *)buf, (uint32_t)bufLen, (uint8_t *)arrIdx, (uint32_t)nnz, (uint32_t)iIdxNeedBits);
291         MNNMemoryFreeAlign(buf);
292     }
293     // 5. Avalable values Count(unsigned char)
294     StreamSizeRead(&ucMapSize, 1, 1, myfile);
295     if (0 == ucMapSize) {
296         ucMapSize = 256;
297     }
298     // 6. valueset(signed char * valueset_size)
299     for (int i = 0; i < ucMapSize; i++) {
300         int8_t tmp;
301         StreamSizeRead(&tmp, 1, 1, myfile);
302         InsertSimpleSet(setWeight, tmp);
303     }
304     SimpleRank(setWeight->UniSet, setWeight->CurUniCnt, 1);
305     // map<unsigned char, signed char> mapWeight;
306     PSIMPLE_MAP mapWeight = CreateSimpleMap(256);
307     if (mapWeight == nullptr) {
308         return nullptr;
309     }
310     std::shared_ptr<unsigned int> __autoReleaseMapWeight(nullptr, [mapWeight](void *) { DestroySimpleMap(mapWeight); });
311 
312     for (int i = 0; i < setWeight->CurUniCnt; i++) {
313         InsertMap(mapWeight, i, setWeight->UniSet[i]);
314     }
315     //    unsigned char iIdx = 0;
316     // 7. none zero weights indexes(nnz*ceil(log2(Avalable_values_Count))/8)
317     AutoStorage<unsigned char> arrWeightIdxBuffer(nnz);
318     unsigned char *arrWeightIdx = arrWeightIdxBuffer.get();
319     if (nullptr == arrWeightIdx) {
320         return nullptr;
321     }
322     {
323         int iDataNeedBits = (int)ceil(_log2(ucMapSize));
324         iDataNeedBits = iDataNeedBits < 1 ? 1 : iDataNeedBits;
325         size_t bufLen     = (size_t)(ceil(0.125 * iDataNeedBits * nnz));
326         char *buf         = (char *)MNNMemoryAllocAlignZeroAlign(bufLen * sizeof(char));
327         if (nullptr == buf) {
328             return nullptr;
329         }
330         StreamSizeRead(buf, 1, bufLen, myfile);
331         SplitBufToArray((uint8_t *)buf, (uint32_t)bufLen, (uint8_t *)arrWeightIdx, (uint32_t)nnz,
332                         (uint32_t)iDataNeedBits);
333         MNNMemoryFreeAlign(buf);
334     }
335     // set blob data with idx and weight idx
336     {
337         if (alpha->size() == 2 * shape[0]) {
338             auto alphaPtr = alpha->data();
339             int area = Size / shape[0];
340             for (int i = 0; i < shape[0]; i++) {
341                 float min = alphaPtr[2*i];
342                 float scale = alphaPtr[2*i+1];
343                 int zeroQuant = -128;
344                 if (scale > 1e-6) {
345                     zeroQuant = round((0.0f - min) / scale) + (-128);
346                 }
347                 memset(blob+area*i, zeroQuant, area * sizeof(signed char));
348             }
349         } else {
350             memset(blob, 0, Size * sizeof(signed char)); //backward compability with previous symmetric weight quant
351         }
352         int iPreIdx = 0;
353         for (int i = 0; i < nnz; i++) {
354             iPreIdx += arrIdx[i];
355             int found    = 0;
356             int8_t value = FindInMap(mapWeight, arrWeightIdx[i], &found);
357             if (!found) {
358                 MNN_ERROR("Read quan weights error with idx:%d\n", arrWeightIdx[i]);
359                 MNNMemoryFreeAlign(blob);
360                 return nullptr;
361             }
362             blob[iPreIdx] = value;
363         }
364     }
365     *len = Size;
366     return blob;
367 }
load(const IDSTQuan * quan,bool forceFloat,bool forceInt8)368 std::shared_ptr<ConvolutionCommon::Int8Common> ConvolutionCommon::load(const IDSTQuan *quan, bool forceFloat, bool forceInt8) {
369     auto result           = std::make_shared<Int8Common>();
370     uint32_t weightLength = 0;
371     int8_t *buffer        = nullptr;
372     auto originBuffer     = (unsigned char *)quan->buffer()->data();
373     if (1 == quan->type()) {
374         buffer = ReadQuanData_c(originBuffer, &weightLength);
375     }
376     if (2 == quan->type()) {
377         buffer = ReadSparseQuanData_c(originBuffer, &weightLength, quan->alpha());
378     }
379     // read fp16 data
380     if (3 == quan->type()) {
381         weightLength = quan->buffer()->size() / sizeof(half_float::half);
382         std::vector<int8_t> tempHalfWeight(quan->buffer()->size());
383         ::memcpy(tempHalfWeight.data(), quan->buffer()->data(), quan->buffer()->size());
384         auto halfWeight = reinterpret_cast<half_float::half *>(tempHalfWeight.data());
385         result->weightFloat.reset(weightLength);
386         if (nullptr == result->weightFloat.get()) {
387             MNN_PRINT("Alloc memory error for extract fp16 back to float\n");
388             return nullptr;
389         }
390         std::transform(halfWeight, halfWeight + weightLength, result->weightFloat.get(),
391                        [](half_float::half h) { return float(h); });
392         return result;
393     }
394 
395     // weight int8 only
396     if (4 == quan->type()) {
397         weightLength = quan->buffer()->size();
398         result->weight.reset(weightLength);
399         ::memcpy(result->weight.get(), quan->buffer()->data(), weightLength);
400     }
401 
402     if (result->weight.get() == nullptr) {
403         if (nullptr == buffer) {
404             MNN_PRINT("Alloc memory error for extract idst int8\n");
405             return nullptr;
406         }
407         result->weight.set(buffer, weightLength);
408     }
409     result->quan = quan;
410     result->alpha.reset(quan->alpha()->size());
411     if (nullptr == result->alpha.get()) {
412         MNN_PRINT("Alloc memory error for extract idst int8\n");
413         return nullptr;
414     }
415     ::memcpy(result->alpha.get(), quan->alpha()->data(), quan->alpha()->size() * sizeof(float));
416     if (forceInt8) {
417         return result;
418     }
419     if (!quan->has_scaleInt() || forceFloat) {
420         // Back to float
421         result->weightFloat.reset(weightLength);
422         if (nullptr == result->weightFloat.get()) {
423             MNN_PRINT("Alloc memory error for extract idst int8/ Back to float\n");
424             return nullptr;
425         }
426         int outputCount = 0;
427         bool oldType4 = (quan->type() == 4 && quan->aMin() == 0 && std::abs(quan->quantScale()) < 1e-6);
428         if (quan->readType() != 0 || oldType4) {
429             outputCount   = result->alpha.size() / 2;
430         } else {
431             outputCount   = result->alpha.size(); // backward compability with previous symmetric quantization
432         }
433         int partWeightSize = weightLength / outputCount;
434         for (int o = 0; o < outputCount; ++o) {
435             auto dstW   = result->weightFloat.get() + o * partWeightSize;
436             auto srcW   = result->weight.get() + o * partWeightSize;
437             float extraFactor = quan->quantScale();
438             // for old type 4 models, their quan->quantScale is 0. which will introduce a bug here
439             if (oldType4) {
440                 extraFactor = 1.0f;
441             }
442             if (result->alpha.size() == 2 * outputCount) {
443                 float min = result->alpha.get()[2*o];
444                 float alpha = result->alpha.get()[2*o+1];
445                 // clampMin is minVal in asymmetric quant, clampMin = -(2^(bit))
446                 // and old version clampMin is -128
447                 float clampMin = quan->aMin() == 0 ? -128 : quan->aMin();
448                 for (int j = 0; j < partWeightSize; ++j) {
449                     dstW[j] = (( (float)srcW[j] - clampMin ) * alpha + min) * extraFactor;
450                 }
451             } else {
452                 float alpha = result->alpha.get()[o];
453                 for (int j = 0; j < partWeightSize; ++j) {
454                     dstW[j] = ((float)srcW[j]) * alpha * extraFactor;
455                 }
456             }
457         }
458 
459         result->weight.release();
460         result->alpha.release();
461     }
462 
463     return result;
464 }
465 
getConvParameters(std::shared_ptr<Int8Common> * quanCommon,const MNN::Convolution2D * conv2d,const float ** originWeight,int * originWeightSize)466 void ConvolutionCommon::getConvParameters(std::shared_ptr<Int8Common> *quanCommon, const MNN::Convolution2D *conv2d, const float** originWeight, int* originWeightSize) {
467     *originWeight = nullptr;
468     *originWeightSize = 0;
469     if (nullptr != conv2d->quanParameter()) {
470         *quanCommon = load(conv2d->quanParameter(), false);
471         *originWeight     = (*quanCommon)->weightFloat.get();
472         *originWeightSize = (*quanCommon)->weightFloat.size();
473     }
474     if (*originWeight == nullptr) {
475         *originWeight = conv2d->weight()->data();
476         *originWeightSize = conv2d->weight()->size();
477     }
478 }
479 
getConvInt8Parameters(const MNN::Convolution2D * conv2d,std::shared_ptr<Int8Common> & quanCommon,const int8_t * & weight,float * & scale,int32_t * & bias,float inputScale,float outputScale,int inputZeroPoint,int outputZeroPoint)480 bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d, std::shared_ptr<Int8Common>& quanCommon,
481                                               const int8_t*& weight, float*& scale, int32_t*& bias,
482                                               float inputScale, float outputScale, int inputZeroPoint, int outputZeroPoint) {
483     int outputCount = conv2d->common()->outputCount();
484     weight = conv2d->symmetricQuan()->weight()->data();
485     if (conv2d->quanParameter() && conv2d->quanParameter()->buffer()) {
486         quanCommon = ConvolutionCommon::load(conv2d->quanParameter(), false, true);
487         weight = quanCommon->weight.get();
488     }
489     if (weight == nullptr) {
490         MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No weight data!");
491         return false;
492     }
493     if (conv2d->symmetricQuan()->bias() && conv2d->symmetricQuan()->scale()) {
494         MNN_ASSERT(conv2d->symmetricQuan()->bias()->size() == outputCount && conv2d->symmetricQuan()->scale()->size() == outputCount);
495         ::memcpy(bias, conv2d->symmetricQuan()->bias()->data(), outputCount * sizeof(int32_t));
496         ::memcpy(scale, conv2d->symmetricQuan()->scale()->data(), outputCount * sizeof(float));
497         return true;
498     }
499     if (conv2d->bias() && conv2d->quanParameter()->alpha()) {
500         const int kernelNum = conv2d->common()->outputCount();
501         int kernelChannel = conv2d->common()->inputCount();
502         int group = conv2d->common()->group();
503         if ((kernelChannel == kernelNum) && (group == kernelChannel)) {
504             kernelChannel = 1; // depthwise
505         }
506         const int kernelSize = kernelChannel * conv2d->common()->kernelX() * conv2d->common()->kernelY();
507 
508         // // reference for how to get quantized bias
509         // auto remains = _ReduceSum(_Cast<int32_t>(mInputZeroPoint) * _Cast<int32_t>(quanWeight), {1, 2, 3}, true);
510         // MNN_ASSERT((mOutputZeroPoint->getInfo()->dim.size() == 0) && (mOutputZeroPoint->getInfo()->size == 1)); // only support per-tensor, per-channel is removed.
511         // auto outputZeroPointFused = _Cast<int32_t>(_Cast<float>(mOutputZeroPoint) * _Reciprocal(convScale));
512         // auto quanBias = _Cast<int32_t>(fusedBias * _Reciprocal(weightScale * mInputScale)) - remains + outputZeroPointFused;
513 
514 
515         // compute remains used in asymmetric quant
516         std::vector<int> remains;
517         for (int i = 0; i < kernelNum; i++) {
518             int temp = 0;
519             int offset = i * kernelSize;
520             for (int j = 0; j < kernelSize; j++) {
521                 temp += inputZeroPoint * weight[offset + j];
522             }
523             remains.emplace_back(temp);
524         }
525 
526         inputScale  = inputScale == 0.f ? conv2d->quanParameter()->scaleIn() : inputScale;
527         outputScale = outputScale == 0.f ? conv2d->quanParameter()->scaleOut() : outputScale;
528         auto biasData    = conv2d->bias()->data();
529         auto alphaData   = conv2d->quanParameter()->alpha()->data();
530         auto alphaScale  = inputScale / outputScale;
531         for (int i = 0; i < outputCount; i++) {
532             scale[i] = alphaData[i] * alphaScale;
533             // compute outputZeroPointFused in asymmetric quant
534             int outputZeroPointFused = static_cast<int32_t>(outputZeroPoint / scale[i]);
535             bias[i] = static_cast<int32_t>(biasData[i] / (inputScale * alphaData[i])) - remains[i] + outputZeroPointFused;
536         }
537         return true;
538     }
539     MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No bias & scale data!");
540     return false;
541 }
542 
convolutionPad(const Tensor * input,const Tensor * output,const Convolution2DCommon * mCommon)543 std::pair<int, int> ConvolutionCommon::convolutionPad(const Tensor *input, const Tensor *output,
544                                                       const Convolution2DCommon *mCommon) {
545     if (mCommon->padMode() == PadMode_SAME) {
546         int kernelWidthSize  = (mCommon->kernelX() - 1) * mCommon->dilateX() + 1;
547         int kernelHeightSize = (mCommon->kernelY() - 1) * mCommon->dilateY() + 1;
548 
549         int padNeededWidth  = (output->width() - 1) * mCommon->strideX() + kernelWidthSize - input->width();
550         int padNeededHeight = (output->height() - 1) * mCommon->strideY() + kernelHeightSize - input->height();
551         auto mPadX          = padNeededWidth / 2;
552         auto mPadY          = padNeededHeight / 2;
553         return std::make_pair(mPadX, mPadY);
554     }
555     auto mPadX = mCommon->padX();
556     auto mPadY = mCommon->padY();
557     if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {
558         mPadX = mCommon->pads()->data()[1];
559         mPadY = mCommon->pads()->data()[0];
560     }
561     return std::make_pair(mPadX, mPadY);
562 }
563 
convolutionPadFull(const Tensor * input,const Tensor * output,const Convolution2DCommon * common)564 std::tuple<int, int, int, int> ConvolutionCommon::convolutionPadFull(const Tensor* input, const Tensor* output,
565                                                          const Convolution2DCommon* common) {
566     auto pad = convolutionPad(input, output, common);
567     int iw = input->width();
568     int ih = input->height();
569     int ow = output->width();
570     int oh = output->height();
571 
572     int right = (ow - 1) * common->strideX() + (common->kernelX() - 1) * common->dilateX() - pad.first;
573     int padRight = 0;
574     if (right >= iw) {
575         padRight = right - iw + 1;
576     }
577     int bottom = (oh - 1) * common->strideY() + (common->kernelY() - 1) * common->dilateY() - pad.second;
578     int padBottom = 0;
579     if (bottom >= ih) {
580         padBottom = bottom - ih + 1;
581     }
582     return std::make_tuple(pad.first, pad.second, padRight, padBottom);
583 }
584 
convolutionTransposePad(const Tensor * input,const Tensor * output,const Convolution2DCommon * mCommon)585 std::pair<int, int> ConvolutionCommon::convolutionTransposePad(const Tensor *input, const Tensor *output,
586                                                                const Convolution2DCommon *mCommon) {
587     if (mCommon->padMode() == PadMode_SAME) {
588         const int outputWidth  = output->width();
589         const int outputHeight = output->height();
590 
591         const int outputWidthPadded  = (input->width() - 1) * mCommon->strideX() + mCommon->kernelX();
592         const int outputHeightPadded = (input->height() - 1) * mCommon->strideY() + mCommon->kernelY();
593 
594         const int padNeededWidth  = outputWidthPadded - outputWidth;
595         const int padNeededHeight = outputHeightPadded - outputHeight;
596 
597         auto mPadX = padNeededWidth / 2;
598         auto mPadY = padNeededHeight / 2;
599         return std::make_pair(mPadX, mPadY);
600     }
601     auto mPadX = mCommon->padX();
602     auto mPadY = mCommon->padY();
603     if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {
604         mPadY = mCommon->pads()->data()[0];
605         mPadX = mCommon->pads()->data()[1];
606     }
607     return std::make_pair(mPadX, mPadY);
608 }
609 
610 } // namespace MNN
611