Home
last modified time | relevance | path

Searched refs:ofmblock (Results 1 – 25 of 35) sorted by relevance

12

/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/tvm_cnnlayer/libxsmm_wrapper/
H A Dbatch_reduce_plus_init.cc15 …const float *weight, const float *input, float *output, int blocks, int ofmblock, int ifmblock, in… in batch_reduce_kernel_update() argument
17 …cebatch_addr batchreduce_kernela = libxsmm_smmdispatch_reducebatch_addr(ofmblock,ofw, ifmblock,NUL… in batch_reduce_kernel_update()
21 int weight_stride = ofmblock*ifmblock*r*s; in batch_reduce_kernel_update()
32 … A[k*r*s + i*s + j] = &weight[k*r*s*ofmblock*ifmblock + (i*s + j)*ofmblock*ifmblock]; in batch_reduce_kernel_update()
47 int lda = ofmblock; in batch_reduce_kernel_init_update()
48 int ldx = ofmblock; in batch_reduce_kernel_init_update()
51 …cebatch_addr batchreduce_kernela = libxsmm_smmdispatch_reducebatch_addr(ofmblock,ofw, ifmblock,&ld… in batch_reduce_kernel_init_update()
56 int weight_stride = ofmblock*ifmblock*r*s; in batch_reduce_kernel_init_update()
67 A[k*r*s + i*s + j] = &weight[k*r*s*ofmblock*ifmblock + (i*s + j)*ofmblock*ifmblock]; in batch_reduce_kernel_init_update()
79 extern "C" int batch_reduce_kernel_init(float *output, int ofmblock, int ofw){ in batch_reduce_kernel_init() argument
[all …]
/dports/math/libxsmm/libxsmm-1.16.3/src/
H A Dlibxsmm_dnn_convolution_forward.c30 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS()
31 const libxsmm_blasint ldC = handle->ofmblock; in LIBXSMM_INTRINSICS()
56 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS()
57 const libxsmm_blasint ldC = handle->ofmblock; in LIBXSMM_INTRINSICS()
92 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS()
93 const libxsmm_blasint ldC = handle->ofmblock; in LIBXSMM_INTRINSICS()
175 const libxsmm_blasint ldA = handle->ofmblock; in LIBXSMM_INTRINSICS()
176 const libxsmm_blasint ldC = handle->blocksofm*handle->ofmblock; in LIBXSMM_INTRINSICS()
265 const libxsmm_blasint ldA = handle->ofmblock; in libxsmm_dnn_convolve_st_fwd_custom_custom()
266 const libxsmm_blasint ldC = handle->ofmblock; in libxsmm_dnn_convolve_st_fwd_custom_custom()
[all …]
H A Dlibxsmm_dnn_convolution_backward.c28 const libxsmm_blasint ldB = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS()
65 const libxsmm_blasint ldB = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS()
106 const libxsmm_blasint ldB = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS()
155 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS()
171 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS()
200 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS()
216 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in LIBXSMM_INTRINSICS()
275 const libxsmm_blasint ldx = ((libxsmm_blasint)handle->ofmblock); in libxsmm_dnn_convolve_st_bwd_custom_custom()
332 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in libxsmm_dnn_convolve_st_bwd_nhwc_rsck()
348 const libxsmm_blasint ldB = (libxsmm_blasint)(handle->blocksofm * handle->ofmblock); in libxsmm_dnn_convolve_st_bwd_nhwc_rsck()
[all …]
H A Dlibxsmm_dnn_convolution.c79 int result = handle->desc.K / handle->ofmblock; in libxsmm_dnn_convolution_setup_blocksofm()
939 _ldi = handle->ofmblock * handle->ofwp; in libxsmm_dnn_convolution_setup()
940 _ldo = handle->ofmblock * handle->ofwp; in libxsmm_dnn_convolution_setup()
947 const libxsmm_blasint ldA = handle->ofmblock; in libxsmm_dnn_convolution_setup()
948 const libxsmm_blasint ldC = handle->ofmblock; in libxsmm_dnn_convolution_setup()
988 const libxsmm_blasint ldA = handle->ofmblock; in libxsmm_dnn_convolution_setup()
989 const libxsmm_blasint ldC = handle->ofmblock; in libxsmm_dnn_convolution_setup()
1205 handle->ofmblock = 1; in libxsmm_dnn_create_conv_layer()
1505 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_create_tensor_datalayout()
1526 layout->dim_size[1] = handle->ofmblock; in libxsmm_dnn_create_tensor_datalayout()
[all …]
H A Dlibxsmm_dnn_pooling.c35 &(handle->ifmblock), &(handle->ofmblock), &lpb, in libxsmm_dnn_create_pooling()
39 handle->blocksofm = handle->desc.C / handle->ofmblock; in libxsmm_dnn_create_pooling()
48 * LIBXSMM_MAX( handle->ofmblock, handle->ifmblock ) in libxsmm_dnn_create_pooling()
117 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout()
123 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout()
163 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout()
169 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_pooling_create_tensor_datalayout()
H A Dlibxsmm_dnn_fullyconnected_forward.c31 libxsmm_blasint lda = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS()
36 …gemm_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->desc.N, handle->desc.C, … in LIBXSMM_INTRINSICS()
58 libxsmm_blasint lda = (libxsmm_blasint)handle->ofmblock; in LIBXSMM_INTRINSICS()
65 …gemm_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->desc.N, handle->desc.C, … in LIBXSMM_INTRINSICS()
285 libxsmm_blasint lda = (libxsmm_blasint)handle->ofmblock; in libxsmm_dnn_fullyconnected_st_fwd_custom()
292 …gemm_function gemm_kernel = libxsmm_smmdispatch(handle->ofmblock, handle->desc.N, handle->desc.C, … in libxsmm_dnn_fullyconnected_st_fwd_custom()
H A Dlibxsmm_dnn_pooling_forward.c211 (handle->ofmblock == 16) ) { in libxsmm_dnn_pooling_st_fwd_custom()
223 (handle->ofmblock == 32) ) { in libxsmm_dnn_pooling_st_fwd_custom()
235 (handle->ofmblock == 64) ) { in libxsmm_dnn_pooling_st_fwd_custom()
H A Dlibxsmm_dnn_pooling_backward.c211 (handle->ofmblock == 16) ) { in libxsmm_dnn_pooling_st_bwd_custom()
223 (handle->ofmblock == 32) ) { in libxsmm_dnn_pooling_st_bwd_custom()
235 (handle->ofmblock == 64) ) { in libxsmm_dnn_pooling_st_bwd_custom()
H A Dlibxsmm_dnn_fusedbatchnorm.c44 &(handle->ifmblock), &(handle->ofmblock), &lpb, in libxsmm_dnn_create_fusedbatchnorm()
48 handle->blocksofm = handle->desc.C / handle->ofmblock; in libxsmm_dnn_create_fusedbatchnorm()
117 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout()
153 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout()
283 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout()
304 layout->dim_size[0] = handle->ofmblock*handle->blocksofm; in libxsmm_dnn_fusedbatchnorm_create_tensor_datalayout()
H A Dlibxsmm_dnn_fusedgroupnorm.c35 &(handle->ifmblock), &(handle->ofmblock), &lpb, in libxsmm_dnn_create_fusedgroupnorm()
39 handle->blocksofm = handle->desc.C / handle->ofmblock; in libxsmm_dnn_create_fusedgroupnorm()
108 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout()
144 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout()
303 layout->dim_size[0] = handle->ofmblock; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout()
324 layout->dim_size[0] = handle->ofmblock*handle->blocksofm; in libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout()
H A Dlibxsmm_main.h383 int ofmblock; member
530 int ofmblock; member
586 int ofmblock; member
606 int ofmblock; member
652 int ofmblock; member
/dports/math/libxsmm/libxsmm-1.16.3/src/template/
H A Dlibxsmm_dnn_convolve_st_upd_custom_custom_generic_bf16.tpl.c25 src_out += 2* handle->ofmblock;\
26 tr_out += 2*handle->ofmblock;\
61 src_out += 2* handle->ofmblock;\
62 tr_out += 2*handle->ofmblock;\
131 libxsmm_blasint LDA = handle->ofmblock;
264 LDA = handle->ofmblock;
266 LDC = handle->ofmblock;
331 LDA = handle->ofmblock;
333 LDC = handle->ofmblock;
442 LDA = handle->ofmblock;
[all …]
H A Dlibxsmm_dnn_convolve_st_fwd_custom_custom_generic_bf16.tpl.c126 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
129 temp_ptr += handle->ofmblock;
243 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
246 temp_ptr += handle->ofmblock;
307 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
310 temp_ptr += handle->ofmblock;
432 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
435 temp_ptr += handle->ofmblock;
504 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
507 temp_ptr += handle->ofmblock;
[all …]
H A Dlibxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i32.tpl.c36 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
84 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
87 temp_ptr += handle->ofmblock;
100 …g, ofm1, oj, oi+1, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks);
104 …img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks);
108 …img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks);
130 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
133 temp_ptr += handle->ofmblock;
139 …img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks);
155 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
[all …]
H A Dlibxsmm_dnn_convolve_st_fwd_nhwc_custom-rsck_generic.tpl.c29 …element_output_type, output, out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock);
164 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
167 temp_ptr += handle->blocksofm*handle->ofmblock;
256 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
259 temp_ptr += handle->blocksofm*handle->ofmblock;
313 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
316 temp_ptr += handle->blocksofm*handle->ofmblock;
410 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
413 temp_ptr += handle->blocksofm * handle->ofmblock;
474 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
[all …]
H A Dlibxsmm_dnn_convolve_st_upd_nhwc_custom-rsck_generic.tpl.c15 libxsmm_blasint LDA = handle->blocksofm * handle->ofmblock;
18 libxsmm_blasint LDC = handle->ofmblock;
21 libxsmm_blasint LDC = handle->blocksofm * handle->ofmblock;
25 … (const element_output_type*)out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock);
224 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) {
247 …(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock),
252 …(5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock),
468 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) {
521 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++ ) {
620 const int fm_blocking = (handle->ofmblock % 16 == 0) ? 16 : handle->ofmblock;
[all …]
H A Dlibxsmm_dnn_convolve_st_fwd_custom_custom_generic.tpl.c29 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
158 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
161 temp_ptr += handle->ofmblock;
235 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
238 temp_ptr += handle->ofmblock;
287 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
290 temp_ptr += handle->ofmblock;
369 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
372 temp_ptr += handle->ofmblock;
428 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
[all …]
H A Dlibxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_fallback_generic.tpl.c33 …le->desc.pad_h_out * handle->ofwp + handle->desc.pad_w_out) * handle->blocksofm * handle->ofmblock;
37 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
40 …r->data, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->blocksofm, handle->ofmblock);
65 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
68 … ofm2, ifm2, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
69 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
73 …1, ofm2, handle->desc.S, handle->blocksifm, handle->ifmblock, handle->blocksofm, handle->ofmblock);
89 …element_output_type, output, out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock);
90 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
121 …5, output, img, oj, oi, ofm1, 0, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock),
[all …]
H A Dlibxsmm_dnn_convolve_st_upd_custom_custom_generic.tpl.c15 libxsmm_blasint LDA = handle->ofmblock;
17 libxsmm_blasint LDC = handle->ofmblock;
20 … (const element_output_type*)out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
99 …(5, output, img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock),
197 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) {
214 …(5, output, img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock),
400 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++) {
438 for (ofm2 = 0; ofm2 < handle->ofmblock; ofm2++ ) {
522 const int fm_blocking = (handle->ofmblock % 16 == 0) ? 16 : handle->ofmblock;
523 …cksofm * handle->blocksifm * handle->desc.R * handle->desc.S * (handle->ofmblock/fm_blocking) * ha…
[all …]
H A Dlibxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_generic.tpl.c39 …element_output_type, output, out, handle->ofhp, handle->ofwp, handle->blocksofm, handle->ofmblock);
43 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
51 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
60 if (handle->ifmblock == 64 && handle->ofmblock == 64) {
69 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
71 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
73 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
75 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
92 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
96 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
[all …]
H A Dlibxsmm_dnn_convolve_st_bwd_custom_custom_generic.tpl.c39 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
42 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
46 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
54 if (handle->ifmblock == 64 && handle->ofmblock == 64) {
63 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
65 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
67 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
69 … 0, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock), &ld_in,
85 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
88 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
[all …]
H A Dlibxsmm_dnn_convolve_st_bwd_custom_custom_fallback_generic.tpl.c33 …data + ((size_t)handle->desc.pad_h_out * handle->ofwp + handle->desc.pad_w_out) * handle->ofmblock;
36 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
60 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
62 … ofm2, ifm2, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
63 …ifm2, ofm2, handle->blocksifm, handle->desc.R, handle->desc.S, handle->ifmblock, handle->ofmblock);
78 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
79 …weight_base, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
106 …0, 0, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
107 … img, ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock),
154 …0, 0, handle->blocksofm, handle->desc.R, handle->desc.S, handle->ofmblock, handle->ifmblock…
[all …]
H A Dlibxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i8.tpl.c30 …fwp + handle->desc.pad_w_out) * handle->ofmblock, handle->blocksofm, handle->ofhp, handle->ofwp, h…
32 …handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, handle->fm_lp_bl…
43 …handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, handle->fm_lp_bl…
45 …ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks, &_sc…
55 …handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, handle->fm_lp_bl…
57 …ofm1, oj, oi, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock), &n_blocks, hand…
H A Dlibxsmm_dnn_convolve_st_bwd_custom_custom_generic_bf16.tpl.c21 int ofmblock_lp = handle->ofmblock/handle->fm_lp_block;
50 …element_output_type, output, out, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
53 …lter->data, handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, lpb);
70 for (ofm2 = 0; ofm2 < handle->ofmblock; ++ofm2) {
73 …, ifm2%lpb, handle->blocksifm, handle->desc.R, handle->desc.S, ifmblock_lp, handle->ofmblock, lpb);
159 … oj_use + kj, oi_use + ki + 1, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
183 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
207 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
274 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
338 …fm2, oj_use + kj, oi_use + ki, 0, handle->blocksofm, handle->ofhp, handle->ofwp, handle->ofmblock);
/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/tvm_cnnlayer/
H A Dmb1_tuned_latest.py131 def intrin_libxsmm_hxw(ofmblock,ofw,ifmblock, stride_width,ifw,rco, ifh,r,s, ifh_stride, ifw_stride… argument
141 A = tvm.placeholder((rco,r,s,ifmblock, ofmblock), name='w')
146 (ofh,ofw,ofmblock),
169 strides=[output_width*ofmblock, ofmblock, 1],
194 A = tvm.placeholder((rco,r,s,ifmblock, ofmblock), name='w')
201 (ofw,ofmblock),
241 def conv_auto_tuned(ofmblock,ofw, ifmblock, stride_width,input_width,\ argument
245 …ceholder((math.ceil(out_channel/ofmblock), math.ceil(in_channel/ifmblock), filter_height, filter_w…
275 B1 = tvm.compute((batch, math.ceil(out_channel/ofmblock),ofh, ofw,ofmblock),
282 B1 = tvm.compute((batch, math.ceil(out_channel/ofmblock),ofh, ofw,ofmblock),
[all …]

12