/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/generic/ |
H A D | conv2d.py | 161 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) 168 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 171 parallel_axis = s[C].fuse(batch, oc_chunk, oh) 177 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 189 oc_chunk, 204 oc_chunk, 287 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) 293 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 304 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 316 oc_chunk, [all …]
|
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/x86/ |
H A D | conv2d_avx_common.py | 111 parallel_axis = s[W].fuse(oc_chunk, oh) 118 _, oc_chunk, oh, ow, oc_block = s[C].op.axis 121 s[C].fuse(oc_chunk, oh) 125 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 137 s[CC].fuse(oc_chunk, oh) 146 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 148 parallel_axis = s[O].fuse(batch, oc_chunk, oh) 173 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 176 parallel_axis = s[C].fuse(batch, oc_chunk, oh) 182 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis [all …]
|
H A D | conv2d_avx_1x1.py | 85 parallel_axis = s[W].fuse(oc_chunk, oh) 91 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 96 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 114 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 119 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer) 143 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 149 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer) 154 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 163 s[CC].fuse(oc_chunk, oh_outer) 170 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis [all …]
|
H A D | conv2d.py | 77 oc_chunk, k_ic_chunk, kh, kw, k_ic_bn, oc_bn = kshape 81 oc = oc_chunk*oc_bn 191 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 195 kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kh, kw, 373 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \ 375 num_filter = oc_chunk * oc_bn
|
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/x86/ |
H A D | conv2d_avx_common.py | 111 parallel_axis = s[W].fuse(oc_chunk, oh) 118 _, oc_chunk, oh, ow, oc_block = s[C].op.axis 121 s[C].fuse(oc_chunk, oh) 125 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 137 s[CC].fuse(oc_chunk, oh) 146 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 148 parallel_axis = s[O].fuse(batch, oc_chunk, oh) 173 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 176 parallel_axis = s[C].fuse(batch, oc_chunk, oh) 182 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis [all …]
|
H A D | conv2d_avx_1x1.py | 85 parallel_axis = s[W].fuse(oc_chunk, oh) 91 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 96 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 114 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 119 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer) 143 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 149 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer) 154 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 163 s[CC].fuse(oc_chunk, oh_outer) 170 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis [all …]
|
H A D | conv2d.py | 77 oc_chunk, k_ic_chunk, kh, kw, k_ic_bn, oc_bn = kshape 81 oc = oc_chunk*oc_bn 191 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 195 kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kh, kw, 373 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \ 375 num_filter = oc_chunk * oc_bn
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/x86/ |
H A D | conv2d_avx_common.py | 111 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) 118 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 120 s[C].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block) 121 parallel_axis = s[C].fuse(batch, oc_chunk, oh) 127 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 145 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis 147 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block) 148 parallel_axis = s[O].fuse(batch, oc_chunk, oh) 155 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 156 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block) [all …]
|
H A D | conv2d_avx_1x1.py | 89 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) 95 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 98 s[C].reorder(oc_chunk, oh_outer, ow_outer, oh_inner, ow_inner, oc_block) 101 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer) 106 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 115 s[CC].fuse(oc_chunk, oh_outer) 124 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis 129 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer) 135 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 139 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer) [all …]
|
H A D | conv3d.py | 580 parallel_axis = s[W].fuse(oc_chunk, od, oh) 587 _, oc_chunk, od, oh, ow, oc_block = s[C].op.axis 590 s[C].fuse(oc_chunk, od, oh) 594 _, oc_chunk, od, oh, ow, oc_block = s[CC].op.axis 606 s[CC].fuse(oc_chunk, od, oh) 616 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 654 parallel_axis = s[W].fuse(oc_chunk, od, oh) 661 _, oc_chunk, od, oh, ow, oc_block = s[C].op.axis 664 s[C].fuse(oc_chunk, od, oh) 668 _, oc_chunk, od, oh, ow, oc_block = s[CC].op.axis [all …]
|
H A D | depthwise_conv2d.py | 94 oc_chunk = oc // oc_bn 103 (oc_chunk, 1, kh, kw, 1, oc_bn), 283 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis 285 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block) 286 parallel_axis = s[O].fuse(oc_chunk, oh) 293 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn) 294 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block) 295 parallel_axis = s[O].fuse(oc_chunk, oh)
|
H A D | conv2d_int8.py | 104 oc_chunk = oc // oc_bn 113 (oc_chunk, ic_chunk, kh, kw, ic_bn // n_elems, oc_bn, n_elems), 129 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = get_const_tuple( 132 num_filter = oc_chunk * oc_bn
|
H A D | conv2d.py | 141 oc_chunk = oc // oc_bn 160 (oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn), 175 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = get_const_tuple( 179 num_filter = oc_chunk * oc_bn
|
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/generic/ |
H A D | conv2d.py | 133 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 135 s[C].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block) 136 parallel_axis = s[C].fuse(batch, oc_chunk, oh) 142 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 166 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis 169 parallel_axis = s[O].fuse(batch, oc_chunk, oh) 197 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 203 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer) 208 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 221 s[CC].fuse(oc_chunk, oh_outer) [all …]
|
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/generic/ |
H A D | conv2d.py | 133 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 135 s[C].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block) 136 parallel_axis = s[C].fuse(batch, oc_chunk, oh) 142 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 166 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis 169 parallel_axis = s[O].fuse(batch, oc_chunk, oh) 197 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis 203 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer) 208 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis 221 s[CC].fuse(oc_chunk, oh_outer) [all …]
|
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/cuda/ |
H A D | conv2d_int8.py | 89 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: 90 kernel[oc_chunk * oc_block_factor + oc_block, 100 oc_chunk, ic_chunk, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple( 124 oshape = (batch, oc_chunk, out_height, out_width, oc_block) 131 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 135 packed_kernel[oc_chunk, icc, 140 output = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 141 conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype), 145 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
|
H A D | group_conv2d_nchw.py | 92 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: 93 kernel[oc_chunk * oc_block_factor + oc_block, 102 oc_chunk, _, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple( 109 assert groups <= oc_chunk, \ 111 'output channel chunk size {}'.format(groups, oc_chunk)) 139 oshape = (batch, oc_chunk, out_height, out_width, oc_block) 158 tvm.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc, 170 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \ 221 oc_chunk = get_const_int(output.shape[1]) 226 cfg.define_split("tile_f", cfg.axis(oc_chunk // groups), num_outputs=4)
|
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/cuda/ |
H A D | conv2d_int8.py | 89 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: 90 kernel[oc_chunk * oc_block_factor + oc_block, 100 oc_chunk, ic_chunk, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple( 124 oshape = (batch, oc_chunk, out_height, out_width, oc_block) 131 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 135 packed_kernel[oc_chunk, icc, 140 output = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 141 conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype), 145 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
|
H A D | group_conv2d_nchw.py | 92 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: 93 kernel[oc_chunk * oc_block_factor + oc_block, 102 oc_chunk, _, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple( 109 assert groups <= oc_chunk, \ 111 'output channel chunk size {}'.format(groups, oc_chunk)) 139 oshape = (batch, oc_chunk, out_height, out_width, oc_block) 158 tvm.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc, 170 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \ 221 oc_chunk = get_const_int(output.shape[1]) 226 cfg.define_split("tile_f", cfg.axis(oc_chunk // groups), num_outputs=4)
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/cuda/ |
H A D | conv2d_int8.py | 113 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: kernel[ 114 oc_chunk * oc_block_factor + oc_block, ic_chunk * ic_block_factor + ic_block, kh, kw 124 oc_chunk, ic_chunk, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple( 148 oshape = (batch, oc_chunk, out_height, out_width, oc_block) 157 lambda n, oc_chunk, oh, ow, oc_block: te.sum( 161 * packed_kernel[oc_chunk, icc, kh, kw, oc_block, icb].astype("int32"), 168 lambda n, oc_chunk, oh, ow, oc_block: conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype), 175 * oc_chunk
|
H A D | group_conv2d_nchw.py | 249 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: kernel[ 250 oc_chunk * oc_block_factor + oc_block, ic_chunk * ic_block_factor + ic_block, kh, kw 259 oc_chunk, _, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple(packed_kernel.shape) 266 groups <= oc_chunk 268 groups, oc_chunk 296 oshape = (batch, oc_chunk, out_height, out_width, oc_block) 319 occ // (oc_chunk // groups) * (ic_chunk // groups) + icc, 336 * oc_chunk 423 oc_chunk = get_const_int(output.shape[1]) 428 cfg.define_split("tile_f", cfg.axis(oc_chunk // groups), num_outputs=4)
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/nn/ |
H A D | conv2d.py | 456 num_filter = oc_chunk * oc_bn 471 oshape = (n, oc_chunk, out_height, out_width, oc_bn) 491 lambda n, oc_chunk, oh, ow, oc_block: te.sum( 499 * kernel[oc_chunk, idxdiv(ic, ic_bn), kh, kw, idxmod(ic, ic_bn), oc_block], 557 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = get_const_tuple( 560 num_filter = oc_chunk * oc_bn 575 oshape = (n, oc_chunk, out_height, out_width, oc_bn) 597 lambda n, oc_chunk, oh, ow, oc_block: te.sum( 619 oshape = (n, oc_chunk, out_height, out_width, oc_bn) 931 n, oc_chunk, oh, ow, oc_bn = get_const_tuple(packed_out.shape) [all …]
|
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/nn/ |
H A D | conv2d.py | 459 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \ 461 num_filter = oc_chunk * oc_bn 470 oshape = (n, oc_chunk, out_height, out_width, oc_bn) 486 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 492 * kernel[oc_chunk, 598 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = \ 600 num_filter = oc_chunk * oc_bn 609 oshape = (n, oc_chunk, out_height, out_width, oc_bn) 627 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 633 * kernel[oc_chunk, [all …]
|
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/nn/ |
H A D | conv2d.py | 459 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \ 461 num_filter = oc_chunk * oc_bn 470 oshape = (n, oc_chunk, out_height, out_width, oc_bn) 486 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 492 * kernel[oc_chunk, 598 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = \ 600 num_filter = oc_chunk * oc_bn 609 oshape = (n, oc_chunk, out_height, out_width, oc_bn) 627 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block: 633 * kernel[oc_chunk, [all …]
|
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/arm_cpu/ |
H A D | conv2d_int8.py | 53 oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn, n_elems = get_const_tuple(kernel.shape) 54 num_filter = oc_chunk * oc_bn
|