/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/tvm_cnnlayer/libxsmm_wrapper/ |
H A D | batch_reduce_plus_init.cc | 15 …const float *weight, const float *input, float *output, int blocks, int ofmblock, int ifmblock, in… in batch_reduce_kernel_update() argument 17 …cebatch_addr batchreduce_kernela = libxsmm_smmdispatch_reducebatch_addr(ofmblock,ofw, ifmblock,NUL… in batch_reduce_kernel_update() 21 int weight_stride = ofmblock*ifmblock*r*s; in batch_reduce_kernel_update() 32 … A[k*r*s + i*s + j] = &weight[k*r*s*ofmblock*ifmblock + (i*s + j)*ofmblock*ifmblock]; in batch_reduce_kernel_update() 47 int lda = ofmblock; in batch_reduce_kernel_init_update() 48 int ldx = ofmblock; in batch_reduce_kernel_init_update() 51 …cebatch_addr batchreduce_kernela = libxsmm_smmdispatch_reducebatch_addr(ofmblock,ofw, ifmblock,&ld… in batch_reduce_kernel_init_update() 56 int weight_stride = ofmblock*ifmblock*r*s; in batch_reduce_kernel_init_update() 67 A[k*r*s + i*s + j] = &weight[k*r*s*ofmblock*ifmblock + (i*s + j)*ofmblock*ifmblock]; in batch_reduce_kernel_init_update() 79 extern "C" int batch_reduce_kernel_init(float *output, int ofmblock, int ofw){ in batch_reduce_kernel_init() argument [all …]
|
/dports/math/libxsmm/libxsmm-1.16.3/src/ |
H A D | libxsmm_dnn_convolution_forward.c | 30 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS() 31 const libxsmm_blasint ldC = handle->ofmblock; in LIBXSMM_INTRINSICS() 56 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS() 57 const libxsmm_blasint ldC = handle->ofmblock; in LIBXSMM_INTRINSICS() 92 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS() 93 const libxsmm_blasint ldC = handle->ofmblock; in LIBXSMM_INTRINSICS() 175 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS() 176 const libxsmm_blasint ldC = handle->blocksofm*handle->ofmblock; in LIBXSMM_INTRINSICS() 265 const libxsmm_blasint ldA = handle->ofmblock; in libxsmm_dnn_convolve_st_fwd_custom_custom() 266 const libxsmm_blasint ldC = handle->ofmblock; in libxsmm_dnn_convolve_st_fwd_custom_custom() [all …]
|
H A D | libxsmm_dnn_convolution_backward.c | 28 const libxsmm_blasint ldB = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS() 65 const libxsmm_blasint ldB = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS() 106 const libxsmm_blasint ldB = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS() 155 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS() 171 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS() 200 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS() 216 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS() 275 const libxsmm_blasint ldx = ((libxsmm_blasint)handle->ofmblock); in libxsmm_dnn_convolve_st_bwd_custom_custom() 332 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in libxsmm_dnn_convolve_st_bwd_nhwc_rsck() 348 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in libxsmm_dnn_convolve_st_bwd_nhwc_rsck() [all …]
|
H A D | libxsmm_dnn_convolution.c | 79 int result = handle->desc.K / handle->ofmblock; in libxsmm_dnn_convolution_setup_blocksofm() 939 _ldi = handle->ofmblock * handle->ofwp; in libxsmm_dnn_convolution_setup() 940 _ldo = handle->ofmblock * handle->ofwp; in libxsmm_dnn_convolution_setup() 947 const libxsmm_blasint ldA = handle->ofmblock; in libxsmm_dnn_convolution_setup() 948 const libxsmm_blasint ldC = handle->ofmblock; in libxsmm_dnn_convolution_setup() 988 const libxsmm_blasint ldA = handle->ofmblock; in libxsmm_dnn_convolution_setup() 989 const libxsmm_blasint ldC = handle->ofmblock; in libxsmm_dnn_convolution_setup() 1205 handle->ofmblock = 1; in libxsmm_dnn_create_conv_layer() 1505 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_create_tensor_datalayout() 1526 layout->dim_size[1] = handle->ofmblock; in libxsmm_dnn_create_tensor_datalayout() [all …]
|
H A D | libxsmm_dnn_pooling.c | 35 &(handle->ifmblock), &(handle->ofmblock), &lpb, in libxsmm_dnn_create_pooling() 39 handle->blocksofm = handle->desc.C / handle->ofmblock; in libxsmm_dnn_create_pooling() 48 * LIBXSMM_MAX( handle->ofmblock, handle->ifmblock ) in libxsmm_dnn_create_pooling() 117 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout() 123 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout() 163 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout() 169 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout()
|
H A D | libxsmm_dnn_fullyconnected_forward.c | 31 libxsmm_blasint lda = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS() 36 …gemm_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->desc.N, handle->desc.C, … in LIBXSMM_INTRINSICS() 58 libxsmm_blasint lda = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS() 65 …gemm_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->desc.N, handle->desc.C, … in LIBXSMM_INTRINSICS() 285 libxsmm_blasint lda = (libxsmm_blasint)handle->ofmblock; in libxsmm_dnn_fullyconnected_st_fwd_custom() 292 …gemm_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->desc.N, handle->desc.C, … in libxsmm_dnn_fullyconnected_st_fwd_custom()
|
H A D | libxsmm_dnn_pooling_forward.c | 211 (handle->ofmblock == 16) ) { in libxsmm_dnn_pooling_st_fwd_custom() 223 (handle->ofmblock == 32) ) { in libxsmm_dnn_pooling_st_fwd_custom() 235 (handle->ofmblock == 64) ) { in libxsmm_dnn_pooling_st_fwd_custom()
|
H A D | libxsmm_dnn_pooling_backward.c | 211 (handle->ofmblock == 16) ) { in libxsmm_dnn_pooling_st_bwd_custom() 223 (handle->ofmblock == 32) ) { in libxsmm_dnn_pooling_st_bwd_custom() 235 (handle->ofmblock == 64) ) { in libxsmm_dnn_pooling_st_bwd_custom()
|
H A D | libxsmm_dnn_fusedbatchnorm.c | 44 &(handle->ifmblock), &(handle->ofmblock), &lpb, in libxsmm_dnn_create_fusedbatchnorm() 48 handle->blocksofm = handle->desc.C / handle->ofmblock; in libxsmm_dnn_create_fusedbatchnorm() 117 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout() 153 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout() 283 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout() 304 layout->dim_size[0] = handle->ofmblock*handle->blocksofm; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout()
|
H A D | libxsmm_dnn_fusedgroupnorm.c | 35 &(handle->ifmblock), &(handle->ofmblock), &lpb, in libxsmm_dnn_create_fusedgroupnorm() 39 handle->blocksofm = handle->desc.C / handle->ofmblock; in libxsmm_dnn_create_fusedgroupnorm() 108 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout() 144 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout() 303 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout() 324 layout->dim_size[0] = handle->ofmblock*handle->blocksofm; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout()
|
H A D | libxsmm_main.h | 383 int ofmblock; member 530 int ofmblock; member 586 int ofmblock; member 606 int ofmblock; member 652 int ofmblock; member
|
/dports/math/libxsmm/libxsmm-1.16.3/src/template/ |
H A D | libxsmm_dnn_convolve_st_upd_custom_custom_generic_bf16.tpl.c | 25 src_out += 2* handle->ofmblock;\ 26 tr_out += 2*handle->ofmblock;\ 61 src_out += 2* handle->ofmblock;\ 62 tr_out += 2*handle->ofmblock;\ 131 libxsmm_blasint LDA = handle->ofmblock; 264 LDA = handle->ofmblock; 266 LDC = handle->ofmblock; 331 LDA = handle->ofmblock; 333 LDC = handle->ofmblock; 442 LDA = handle->ofmblock; [all …]
|
H A D | libxsmm_dnn_convolve_st_fwd_custom_custom_generic_bf16.tpl.c | 126 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 129 temp_ptr += handle->ofmblock; 243 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 246 temp_ptr += handle->ofmblock; 307 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 310 temp_ptr += handle->ofmblock; 432 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 435 temp_ptr += handle->ofmblock; 504 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 507 temp_ptr += handle->ofmblock; [all …]
|
H A D | libxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i32.tpl.c | 36 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 84 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 87 temp_ptr += handle->ofmblock; 100 …g, ofm1, oj, oi+1, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks); 104 …img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks); 108 …img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks); 130 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 133 temp_ptr += handle->ofmblock; 139 …img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks); 155 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { [all …]
|
H A D | libxsmm_dnn_convolve_st_fwd_nhwc_custom-rsck_generic.tpl.c | 29 …element_output_type, output, out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock); 164 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 167 temp_ptr += handle->blocksofm*handle->ofmblock; 256 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 259 temp_ptr += handle->blocksofm*handle->ofmblock; 313 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 316 temp_ptr += handle->blocksofm*handle->ofmblock; 410 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 413 temp_ptr += handle->blocksofm * handle->ofmblock; 474 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { [all …]
|
H A D | libxsmm_dnn_convolve_st_upd_nhwc_custom-rsck_generic.tpl.c | 15 libxsmm_blasint LDA = handle->blocksofm * handle->ofmblock; 18 libxsmm_blasint LDC = handle->ofmblock; 21 libxsmm_blasint LDC = handle->blocksofm * handle->ofmblock; 25 … (const element_output_type*)out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock); 224 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) { 247 …(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock), 252 …(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock), 468 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) { 521 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++ ) { 620 const int fm_blocking = (handle->ofmblock % 16 == 0) ? 16 : handle->ofmblock; [all …]
|
H A D | libxsmm_dnn_convolve_st_fwd_custom_custom_generic.tpl.c | 29 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 158 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 161 temp_ptr += handle->ofmblock; 235 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 238 temp_ptr += handle->ofmblock; 287 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 290 temp_ptr += handle->ofmblock; 369 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 372 temp_ptr += handle->ofmblock; 428 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { [all …]
|
H A D | libxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_fallback_generic.tpl.c | 33 …le->desc.pad_h_out * handle->ofwp + handle->desc.pad_w_out) * handle->blocksofm * handle->ofmblock; 37 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); 40 …r->data, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->blocksofm, handle->ofmblock); 65 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 68 … ofm2, ifm2, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… 69 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); 73 …1, ofm2, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->blocksofm, handle->ofmblock); 89 …element_output_type, output, out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock); 90 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… 121 …5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock), [all …]
|
H A D | libxsmm_dnn_convolve_st_upd_custom_custom_generic.tpl.c | 15 libxsmm_blasint LDA = handle->ofmblock; 17 libxsmm_blasint LDC = handle->ofmblock; 20 … (const element_output_type*)out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 99 …(5, output, img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), 197 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) { 214 …(5, output, img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), 400 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) { 438 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++ ) { 522 const int fm_blocking = (handle->ofmblock % 16 == 0) ? 16 : handle->ofmblock; 523 …cksofm * handle->blocksifm * handle->desc.R * handle->desc.S * (handle->ofmblock/fm_blocking) * ha… [all …]
|
H A D | libxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_generic.tpl.c | 39 …element_output_type, output, out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock); 43 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); 51 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… 60 if (handle->ifmblock == 64 && handle->ofmblock == 64) { 69 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 71 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 73 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 75 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 92 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 96 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); [all …]
|
H A D | libxsmm_dnn_convolve_st_bwd_custom_custom_generic.tpl.c | 39 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 42 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); 46 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… 54 if (handle->ifmblock == 64 && handle->ofmblock == 64) { 63 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 65 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 67 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 69 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in, 85 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 88 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); [all …]
|
H A D | libxsmm_dnn_convolve_st_bwd_custom_custom_fallback_generic.tpl.c | 33 …data + ((size_t)handle->desc.pad_h_out * handle->ofwp + handle->desc.pad_w_out) * handle->ofmblock; 36 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); 60 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 62 … ofm2, ifm2, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… 63 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock); 78 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 79 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… 106 …0, 0, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… 107 … img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), 154 …0, 0, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock… [all …]
|
H A D | libxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i8.tpl.c | 30 …fwp + handle->desc.pad_w_out) * handle->ofmblock, handle->blocksofm, handle->ofhp, handle->ofwp, h… 32 …handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, handle->fm_lp_bl… 43 …handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, handle->fm_lp_bl… 45 …ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks, &_sc… 55 …handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, handle->fm_lp_bl… 57 …ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks, hand…
|
H A D | libxsmm_dnn_convolve_st_bwd_custom_custom_generic_bf16.tpl.c | 21 int ofmblock_lp = handle->ofmblock/handle->fm_lp_block; 50 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 53 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, lpb); 70 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) { 73 …, ifm2%lpb, handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, lpb); 159 … oj_use + kj, oi_use + ki + 1, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 183 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 207 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 274 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock); 338 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
|
/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/tvm_cnnlayer/ |
H A D | mb1_tuned_latest.py | 131 def intrin_libxsmm_hxw(ofmblock,ofw,ifmblock, stride_width,ifw,rco, ifh,r,s, ifh_stride, ifw_stride… argument 141 A = tvm.placeholder((rco,r,s,ifmblock, ofmblock), name='w') 146 (ofh,ofw,ofmblock), 169 strides=[output_width*ofmblock, ofmblock, 1], 194 A = tvm.placeholder((rco,r,s,ifmblock, ofmblock), name='w') 201 (ofw,ofmblock), 241 def conv_auto_tuned(ofmblock,ofw, ifmblock, stride_width,input_width,\ argument 245 …ceholder((math.ceil(out_channel/ofmblock), math.ceil(in_channel/ifmblock), filter_height, filter_w… 275 B1 = tvm.compute((batch, math.ceil(out_channel/ofmblock),ofh, ofw,ofmblock), 282 B1 = tvm.compute((batch, math.ceil(out_channel/ofmblock),ofh, ofw,ofmblock), [all …]
|