/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolutiondepthwise_5x5_packn.h | 196 r0 += packn; in convdw5x5s1_packn_rvv() 197 r1 += packn; in convdw5x5s1_packn_rvv() 198 r2 += packn; in convdw5x5s1_packn_rvv() 199 r3 += packn; in convdw5x5s1_packn_rvv() 200 r4 += packn; in convdw5x5s1_packn_rvv() 204 r0 += 4 * packn + w * packn; in convdw5x5s1_packn_rvv() 205 r1 += 4 * packn + w * packn; in convdw5x5s1_packn_rvv() 206 r2 += 4 * packn + w * packn; in convdw5x5s1_packn_rvv() 207 r3 += 4 * packn + w * packn; in convdw5x5s1_packn_rvv() 208 r4 += 4 * packn + w * packn; in convdw5x5s1_packn_rvv() [all …]
|
H A D | convolutiondepthwise_5x5_packn_fp16s.h | 196 r0 += packn; in convdw5x5s1_packn_fp16sa_rvv() 197 r1 += packn; in convdw5x5s1_packn_fp16sa_rvv() 198 r2 += packn; in convdw5x5s1_packn_fp16sa_rvv() 199 r3 += packn; in convdw5x5s1_packn_fp16sa_rvv() 200 r4 += packn; in convdw5x5s1_packn_fp16sa_rvv() 204 r0 += 4 * packn + w * packn; in convdw5x5s1_packn_fp16sa_rvv() 205 r1 += 4 * packn + w * packn; in convdw5x5s1_packn_fp16sa_rvv() 206 r2 += 4 * packn + w * packn; in convdw5x5s1_packn_fp16sa_rvv() 207 r3 += 4 * packn + w * packn; in convdw5x5s1_packn_fp16sa_rvv() 208 r4 += 4 * packn + w * packn; in convdw5x5s1_packn_fp16sa_rvv() [all …]
|
H A D | convolution_sgemm_packn.h | 37 tmp.create(2 * maxk, inch, size / 2 + size % 2, 4u * packn, packn, opt.workspace_allocator); in im2col_sgemm_packn_rvv() 39 tmp.create(maxk, inch, size, 4u * packn, packn, opt.workspace_allocator); in im2col_sgemm_packn_rvv() 245 kptr0 += packn; in im2col_sgemm_packn_rvv() 257 outptr0 += packn * 8; in im2col_sgemm_packn_rvv() 291 kptr0 += packn; in im2col_sgemm_packn_rvv() 299 outptr0 += packn * 4; in im2col_sgemm_packn_rvv() 325 kptr0 += packn; in im2col_sgemm_packn_rvv() 331 outptr0 += packn * 2; in im2col_sgemm_packn_rvv() 353 kptr0 += packn; in im2col_sgemm_packn_rvv() 358 outptr0 += packn; in im2col_sgemm_packn_rvv() [all …]
|
H A D | convolutiondepthwise_3x3_packn.h | 191 r0 += packn; in convdw3x3s1_packn_rvv() 192 r1 += packn; in convdw3x3s1_packn_rvv() 193 r2 += packn; in convdw3x3s1_packn_rvv() 194 r3 += packn; in convdw3x3s1_packn_rvv() 197 r0 += 2 * packn + w * packn; in convdw3x3s1_packn_rvv() 198 r1 += 2 * packn + w * packn; in convdw3x3s1_packn_rvv() 199 r2 += 2 * packn + w * packn; in convdw3x3s1_packn_rvv() 200 r3 += 2 * packn + w * packn; in convdw3x3s1_packn_rvv() 290 r0 += packn; in convdw3x3s1_packn_rvv() 291 r1 += packn; in convdw3x3s1_packn_rvv() [all …]
|
H A D | convolution_sgemm_packn_fp16s.h | 17 const int packn = csrr_vlenb() / 2; in im2col_sgemm_packn_fp16sa_rvv() local 37 tmp.create(2 * maxk, inch, size / 2 + size % 2, 2u * packn, packn, opt.workspace_allocator); in im2col_sgemm_packn_fp16sa_rvv() 39 tmp.create(maxk, inch, size, 2u * packn, packn, opt.workspace_allocator); in im2col_sgemm_packn_fp16sa_rvv() 277 kptr0 += packn; in im2col_sgemm_packn_fp16sa_rvv() 289 outptr0 += packn * 8; in im2col_sgemm_packn_fp16sa_rvv() 323 kptr0 += packn; in im2col_sgemm_packn_fp16sa_rvv() 331 outptr0 += packn * 4; in im2col_sgemm_packn_fp16sa_rvv() 357 kptr0 += packn; in im2col_sgemm_packn_fp16sa_rvv() 385 kptr0 += packn; in im2col_sgemm_packn_fp16sa_rvv() 390 outptr0 += packn; in im2col_sgemm_packn_fp16sa_rvv() [all …]
|
H A D | convolutiondepthwise_3x3_packn_fp16s.h | 191 r0 += packn; in convdw3x3s1_packn_fp16sa_rvv() 192 r1 += packn; in convdw3x3s1_packn_fp16sa_rvv() 193 r2 += packn; in convdw3x3s1_packn_fp16sa_rvv() 194 r3 += packn; in convdw3x3s1_packn_fp16sa_rvv() 197 r0 += 2 * packn + w * packn; in convdw3x3s1_packn_fp16sa_rvv() 198 r1 += 2 * packn + w * packn; in convdw3x3s1_packn_fp16sa_rvv() 199 r2 += 2 * packn + w * packn; in convdw3x3s1_packn_fp16sa_rvv() 200 r3 += 2 * packn + w * packn; in convdw3x3s1_packn_fp16sa_rvv() 290 r0 += packn; in convdw3x3s1_packn_fp16sa_rvv() 291 r1 += packn; in convdw3x3s1_packn_fp16sa_rvv() [all …]
|
H A D | crop_riscv.cpp | 122 int out_elempack = _outw % packn == 0 ? packn : 1; in forward() 131 if (_woffset % packn == 0 && out_elempack == packn) in forward() 148 int out_elempack = _outh % packn == 0 ? packn : 1; in forward() 157 if (_hoffset % packn == 0 && out_elempack == packn) in forward() 174 int out_elempack = _outc % packn == 0 ? packn : 1; in forward() 183 if (_coffset % packn == 0 && out_elempack == packn) in forward() 216 int out_elempack = _outc % packn == 0 ? packn : 1; in forward() 225 if (_coffset % packn == 0 && out_elempack == packn) in forward() 312 int out_elempack = _outw % packn == 0 ? packn : 1; in forward() 338 int out_elempack = _outh % packn == 0 ? packn : 1; in forward() [all …]
|
H A D | interp_bicubic_packn_fp16s.h | 24 Mat rowsbuf0(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn_fp16s() 25 Mat rowsbuf1(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn_fp16s() 26 Mat rowsbuf2(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn_fp16s() 27 Mat rowsbuf3(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn_fp16s() 233 Dp += packn; in resize_bicubic_image_packn_fp16s() 234 rows0p += packn; in resize_bicubic_image_packn_fp16s() 253 Mat rowsbuf0(w, (size_t)packn * 2u, packn); in resize_bicubic_image_packn_fp16sa() 254 Mat rowsbuf1(w, (size_t)packn * 2u, packn); in resize_bicubic_image_packn_fp16sa() 255 Mat rowsbuf2(w, (size_t)packn * 2u, packn); in resize_bicubic_image_packn_fp16sa() 256 Mat rowsbuf3(w, (size_t)packn * 2u, packn); in resize_bicubic_image_packn_fp16sa() [all …]
|
H A D | convolution_3x3_packn.h | 17 const int packn = csrr_vlenb() / 4; in conv3x3s1_winograd64_transform_kernel_packn_rvv() local 72 …kernel_tm_packn.create(inch / packn, 64, outch / packn, (size_t)4u * packn * packn, packn * packn); in conv3x3s1_winograd64_transform_kernel_packn_rvv() 74 for (int q = 0; q + (packn - 1) < outch; q += packn) in conv3x3s1_winograd64_transform_kernel_packn_rvv() 82 for (int p = 0; p + (packn - 1) < inch; p += packn) in conv3x3s1_winograd64_transform_kernel_packn_rvv() 100 const int packn = csrr_vlenb() / 4; in conv3x3s1_winograd64_packn_rvv() local 172 float tmp[8][8][packn]; in conv3x3s1_winograd64_packn_rvv() 434 tmpptr += packn; in conv3x3s1_winograd64_packn_rvv() 635 float tmp[6][8][packn]; in conv3x3s1_winograd64_packn_rvv() 800 …kernel_tm_packn.create(inch / packn, 36, outch / packn, (size_t)4u * packn * packn, packn * packn); in conv3x3s1_winograd42_transform_kernel_packn_rvv() 802 for (int q = 0; q + (packn - 1) < outch; q += packn) in conv3x3s1_winograd42_transform_kernel_packn_rvv() [all …]
|
H A D | convolution_sgemm_packnto1.h | 38 tmp.create(maxk, inch, size, 4u * packn, packn, opt.workspace_allocator); in im2col_sgemm_packnto1_rvv() 196 int p = pp * packn; in im2col_sgemm_packnto1_rvv() 358 const float* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_rvv() 450 const float* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_rvv() 510 const float* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_rvv() 554 const float* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_rvv() 600 kernel_tm.create(packn * packn * maxk, inch / packn, outch / packn + outch % packn); in convolution_im2col_sgemm_transform_kernel_packnto1_rvv() 603 for (; q + (packn - 1) < outch; q += packn) in convolution_im2col_sgemm_transform_kernel_packnto1_rvv() 607 for (int p = 0; p + (packn - 1) < inch; p += packn) in convolution_im2col_sgemm_transform_kernel_packnto1_rvv() 629 float* g00 = kernel_tm.channel(q / packn + q % packn); in convolution_im2col_sgemm_transform_kernel_packnto1_rvv() [all …]
|
H A D | interp_bicubic_packn.h | 17 const int packn = csrr_vlenb() / 4; in resize_bicubic_image_packn() local 24 Mat rowsbuf0(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn() 25 Mat rowsbuf1(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn() 26 Mat rowsbuf2(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn() 27 Mat rowsbuf3(w, (size_t)packn * 4u, packn); in resize_bicubic_image_packn() 57 int sx = xofs[dx] * packn; in resize_bicubic_image_packn() 233 Dp += packn; in resize_bicubic_image_packn() 234 rows0p += packn; in resize_bicubic_image_packn() 235 rows1p += packn; in resize_bicubic_image_packn() 236 rows2p += packn; in resize_bicubic_image_packn() [all …]
|
H A D | convolution_3x3_packn_fp16s.h | 17 const int packn = csrr_vlenb() / 2; in conv3x3s1_winograd64_transform_kernel_packn_fp16sa_rvv() local 72 …kernel_tm_packn.create(inch / packn, 64, outch / packn, (size_t)2u * packn * packn, packn * packn); in conv3x3s1_winograd64_transform_kernel_packn_fp16sa_rvv() 74 for (int q = 0; q + (packn - 1) < outch; q += packn) in conv3x3s1_winograd64_transform_kernel_packn_fp16sa_rvv() 82 for (int p = 0; p + (packn - 1) < inch; p += packn) in conv3x3s1_winograd64_transform_kernel_packn_fp16sa_rvv() 100 const int packn = csrr_vlenb() / 2; in conv3x3s1_winograd64_packn_fp16sa_rvv() local 172 __fp16 tmp[8][8][packn]; in conv3x3s1_winograd64_packn_fp16sa_rvv() 434 tmpptr += packn; in conv3x3s1_winograd64_packn_fp16sa_rvv() 492 k0 += packn; in conv3x3s1_winograd64_packn_fp16sa_rvv() 800 …kernel_tm_packn.create(inch / packn, 36, outch / packn, (size_t)2u * packn * packn, packn * packn); in conv3x3s1_winograd42_transform_kernel_packn_fp16sa_rvv() 802 for (int q = 0; q + (packn - 1) < outch; q += packn) in conv3x3s1_winograd42_transform_kernel_packn_fp16sa_rvv() [all …]
|
H A D | convolution_sgemm_packnto1_fp16s.h | 38 tmp.create(maxk, inch, size, 2u * packn, packn, opt.workspace_allocator); in im2col_sgemm_packnto1_fp16sa_rvv() 196 int p = pp * packn; in im2col_sgemm_packnto1_fp16sa_rvv() 358 const __fp16* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_fp16sa_rvv() 450 const __fp16* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_fp16sa_rvv() 510 const __fp16* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_fp16sa_rvv() 554 const __fp16* kptr0 = kernel.channel(p / packn + p % packn); in im2col_sgemm_packnto1_fp16sa_rvv() 600 kernel_tm.create(packn * packn * maxk, inch / packn, outch / packn + outch % packn, (size_t)2u); in convolution_im2col_sgemm_transform_kernel_packnto1_fp16sa_rvv() 603 for (; q + (packn - 1) < outch; q += packn) in convolution_im2col_sgemm_transform_kernel_packnto1_fp16sa_rvv() 607 for (int p = 0; p + (packn - 1) < inch; p += packn) in convolution_im2col_sgemm_transform_kernel_packnto1_fp16sa_rvv() 629 __fp16* g00 = kernel_tm.channel(q / packn + q % packn); in convolution_im2col_sgemm_transform_kernel_packnto1_fp16sa_rvv() [all …]
|
H A D | padding_packn.h | 18 …const int packn = csrr_vlenb() / sizeof(T); … 19 …const word_type vl = vsetvl_e##SEW##m##LMUL(packn); … 30 …outptr += packn; … 39 …outptr += packn; … 45 …ptr += packn; … 46 …outptr += packn; … 51 …outptr += packn; … 60 …outptr += packn; … 172 …v##VT##m##LMUL##_t _p = vle##SEW##_v_##TSEW##m##LMUL(ptr0 - packn * 2 - x * packn, vl); … 196 …v##VT##m##LMUL##_t _p = vle##SEW##_v_##TSEW##m##LMUL(ptr - packn * 2 - x * packn, vl); … [all …]
|
H A D | padding_riscv.cpp | 116 int out_elempack = outw % packn == 0 ? packn : 1; in forward() 137 int out_elempack = outh % packn == 0 ? packn : 1; in forward() 140 if (top % packn == 0 && out_elempack == packn && type == 0) in forward() 159 int out_elempack = outc % packn == 0 ? packn : 1; in forward() 255 out_elempack = top_blob_unpacked.c % packn == 0 ? packn : 1; in forward() 286 int out_elempack = outw % packn == 0 ? packn : 1; in forward_bf16s_fp16s() 327 int out_elempack = outh % packn == 0 ? packn : 1; in forward_bf16s_fp16s() 369 int out_elempack = outc % packn == 0 ? packn : 1; in forward_bf16s_fp16s() 536 int out_elempack = outw % packn == 0 ? packn : 1; in forward_int8() 557 int out_elempack = outh % packn == 0 ? packn : 1; in forward_int8() [all …]
|
H A D | interp_bilinear_packn_fp16s.h | 17 const int packn = csrr_vlenb() / 2; in resize_bilinear_image_packn_fp16s() local 24 Mat rowsbuf0(w, (size_t)packn * 4u, packn); in resize_bilinear_image_packn_fp16s() 25 Mat rowsbuf1(w, (size_t)packn * 4u, packn); in resize_bilinear_image_packn_fp16s() 113 Dp += packn; in resize_bilinear_image_packn_fp16s() 114 rows0p += packn; in resize_bilinear_image_packn_fp16s() 115 rows1p += packn; in resize_bilinear_image_packn_fp16s() 131 Mat rowsbuf0(w, (size_t)packn * 2u, packn); in resize_bilinear_image_packn_fp16sa() 132 Mat rowsbuf1(w, (size_t)packn * 2u, packn); in resize_bilinear_image_packn_fp16sa() 220 Dp += packn; in resize_bilinear_image_packn_fp16sa() 221 rows0p += packn; in resize_bilinear_image_packn_fp16sa() [all …]
|
H A D | convolution1d_riscv.cpp | 52 const int packn = csrr_vlenb() / 4; in create_pipeline() local 62 elempack = num_input % packn == 0 ? packn : 1; in create_pipeline() 63 out_elempack = num_output % packn == 0 ? packn : 1; in create_pipeline() 146 out_elempack = num_output % packn == 0 ? packn : 1; in forward() 159 if (elempack == packn && out_elempack == packn) in forward() 353 elempack = num_input % packn == 0 ? packn : 1; in create_pipeline_fp16s() 354 out_elempack = num_output % packn == 0 ? packn : 1; in create_pipeline_fp16s() 415 int out_elempack = (opt.use_packing_layout && num_output % packn == 0) ? packn : 1; in forward_fp16s() 425 if (elempack == packn && out_elempack == packn) in forward_fp16s() 636 int out_elempack = (opt.use_packing_layout && num_output % packn == 0) ? packn : 1; in forward_fp16sa() [all …]
|
H A D | innerproduct_riscv.cpp | 219 if (elempack == packn) in forward() 256 int p = pp * packn; in forward() 338 out_elempack = num_output % packn == 0 ? packn : 1; in create_pipeline_fp16s() 389 if (elempack == packn && num_output_elempack == packn) in forward_fp16s() 548 int out_elempack = opt.use_packing_layout && num_output % packn == 0 ? packn : 1; in forward_fp16s() 555 if (out_elempack == packn) in forward_fp16s() 581 kptr += packn; in forward_fp16s() 651 if (elempack == packn && num_output_elempack == packn) in forward_fp16sa() 810 int out_elempack = opt.use_packing_layout && num_output % packn == 0 ? packn : 1; in forward_fp16sa() 817 if (out_elempack == packn) in forward_fp16sa() [all …]
|
H A D | deconvolution_packn_fp16s.h | 17 const int packn = csrr_vlenb() / 2; in deconvolution_packn_fp16s_rvv() local 18 const word_type vl = vsetvl_e16m1(packn); in deconvolution_packn_fp16s_rvv() 83 for (int l = 0; l < packn; l++) in deconvolution_packn_fp16s_rvv() 86 … vfloat16m1_t _w0 = vle16_v_f16m1(kptr + k * packn * packn + packn * l, vl); in deconvolution_packn_fp16s_rvv() 92 kptr += maxk * packn * packn; in deconvolution_packn_fp16s_rvv() 100 outptr += outw * packn; in deconvolution_packn_fp16s_rvv() 107 const int packn = csrr_vlenb() / 2; in deconvolution_packn_fp16sa_rvv() local 108 const word_type vl = vsetvl_e16m1(packn); in deconvolution_packn_fp16sa_rvv() 176 … vfloat16m1_t _w0 = vle16_v_f16m1(kptr + k * packn * packn + packn * l, vl); in deconvolution_packn_fp16sa_rvv() 182 kptr += maxk * packn * packn; in deconvolution_packn_fp16sa_rvv() [all …]
|
H A D | interp_bilinear_packn.h | 17 const int packn = csrr_vlenb() / 4; in resize_bilinear_image_packn() local 18 const word_type vl = vsetvl_e32m1(packn); in resize_bilinear_image_packn() 24 Mat rowsbuf0(w, (size_t)packn * 4u, packn); in resize_bilinear_image_packn() 25 Mat rowsbuf1(w, (size_t)packn * 4u, packn); in resize_bilinear_image_packn() 52 int sx = xofs[dx] * packn; in resize_bilinear_image_packn() 59 vse32_v_f32m1(rows1p + dx * packn, _rows1, vl); in resize_bilinear_image_packn() 76 int sx = xofs[dx] * packn; in resize_bilinear_image_packn() 87 vse32_v_f32m1(rows0p + dx * packn, _rows0, vl); in resize_bilinear_image_packn() 113 Dp += packn; in resize_bilinear_image_packn() 114 rows0p += packn; in resize_bilinear_image_packn() [all …]
|
H A D | convolution_7x7_pack1ton.h | 17 const int packn = csrr_vlenb() / 4; in conv7x7s2_pack1ton_rvv() local 18 const word_type vl = vsetvl_e32m1(packn); in conv7x7s2_pack1ton_rvv() 79 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() 146 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() 213 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() 280 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() 347 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() 414 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() 574 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() 613 kptr += packn * 7; in conv7x7s2_pack1ton_rvv() [all …]
|
H A D | convolution_7x7_pack1ton_fp16s.h | 17 const int packn = csrr_vlenb() / 2; in conv7x7s2_pack1ton_fp16sa_rvv() local 18 const word_type vl = vsetvl_e16m1(packn); in conv7x7s2_pack1ton_fp16sa_rvv() 79 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() 146 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() 213 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() 280 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() 347 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() 414 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() 574 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() 613 kptr += packn * 7; in conv7x7s2_pack1ton_fp16sa_rvv() [all …]
|
H A D | deconvolution_riscv.cpp | 93 elempack = num_input % packn == 0 ? packn : 1; in create_pipeline() 94 out_elempack = num_output % packn == 0 ? packn : 1; in create_pipeline() 133 if (elempack == packn && out_elempack == packn) in create_pipeline() 199 out_elempack = num_output % packn == 0 ? packn : 1; in forward() 220 if (elempack == packn && out_elempack == packn) in forward() 335 elempack = num_input % packn == 0 ? packn : 1; in create_pipeline_fp16s() 336 out_elempack = num_output % packn == 0 ? packn : 1; in create_pipeline_fp16s() 390 if (elempack == packn && out_elempack == packn) in create_pipeline_fp16s() 434 int out_elempack = (opt.use_packing_layout && num_output % packn == 0) ? packn : 1; in forward_fp16s() 450 if (elempack == packn && out_elempack == packn) in forward_fp16s() [all …]
|
H A D | convolution_sgemm.h | 35 if (size >= packn) in im2col_sgemm_rvv() 36 tmp.create(packn * maxk, inch, size / packn + size % packn, 4u, 1, opt.workspace_allocator); in im2col_sgemm_rvv() 45 int i = ii * packn; in im2col_sgemm_rvv() 67 float* tmpptr = tmp.channel(i / packn + i % packn); in im2col_sgemm_rvv() 127 for (; i + (packn - 1) < size; i += packn) in im2col_sgemm_rvv() 167 outptr0 += packn; in im2col_sgemm_rvv() 178 const float* tmpptr = tmp.channel(i / packn + i % packn); in im2col_sgemm_rvv() 242 for (; i + (packn - 1) < size; i += packn) in im2col_sgemm_rvv() 277 const float* tmpptr = tmp.channel(i / packn + i % packn); in im2col_sgemm_rvv() 319 for (; i + (packn - 1) < size; i += packn) in im2col_sgemm_rvv() [all …]
|
H A D | convolution_3x3_pack1ton.h | 17 const int packn = csrr_vlenb() / 4; in conv3x3s1_pack1ton_rvv() local 18 const word_type vl = vsetvl_e32m1(packn); in conv3x3s1_pack1ton_rvv() 157 outptr0 += packn * 8; in conv3x3s1_pack1ton_rvv() 214 outptr0 += packn * 4; in conv3x3s1_pack1ton_rvv() 249 outptr0 += packn * 2; in conv3x3s1_pack1ton_rvv() 273 outptr0 += packn; in conv3x3s1_pack1ton_rvv() 285 k0 += 9 * packn; in conv3x3s1_pack1ton_rvv() 292 const int packn = csrr_vlenb() / 4; in conv3x3s2_pack1ton_rvv() local 435 outptr0 += packn * 8; in conv3x3s2_pack1ton_rvv() 551 outptr0 += packn; in conv3x3s2_pack1ton_rvv() [all …]
|