Home
last modified time | relevance | path

Searched refs:oc_chunk (Results 1 – 25 of 47) sorted by relevance

12

/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/generic/
H A Dconv2d.py161 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh)
168 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
171 parallel_axis = s[C].fuse(batch, oc_chunk, oh)
177 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
189 oc_chunk,
204 oc_chunk,
287 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh)
293 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
304 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
316 oc_chunk,
[all …]
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/x86/
H A Dconv2d_avx_common.py111 parallel_axis = s[W].fuse(oc_chunk, oh)
118 _, oc_chunk, oh, ow, oc_block = s[C].op.axis
121 s[C].fuse(oc_chunk, oh)
125 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
137 s[CC].fuse(oc_chunk, oh)
146 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
148 parallel_axis = s[O].fuse(batch, oc_chunk, oh)
173 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
176 parallel_axis = s[C].fuse(batch, oc_chunk, oh)
182 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
[all …]
H A Dconv2d_avx_1x1.py85 parallel_axis = s[W].fuse(oc_chunk, oh)
91 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
96 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
114 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
119 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer)
143 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
149 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer)
154 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
163 s[CC].fuse(oc_chunk, oh_outer)
170 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis
[all …]
H A Dconv2d.py77 oc_chunk, k_ic_chunk, kh, kw, k_ic_bn, oc_bn = kshape
81 oc = oc_chunk*oc_bn
191 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
195 kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kh, kw,
373 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \
375 num_filter = oc_chunk * oc_bn
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/x86/
H A Dconv2d_avx_common.py111 parallel_axis = s[W].fuse(oc_chunk, oh)
118 _, oc_chunk, oh, ow, oc_block = s[C].op.axis
121 s[C].fuse(oc_chunk, oh)
125 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
137 s[CC].fuse(oc_chunk, oh)
146 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
148 parallel_axis = s[O].fuse(batch, oc_chunk, oh)
173 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
176 parallel_axis = s[C].fuse(batch, oc_chunk, oh)
182 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
[all …]
H A Dconv2d_avx_1x1.py85 parallel_axis = s[W].fuse(oc_chunk, oh)
91 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
96 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
114 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
119 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer)
143 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
149 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer)
154 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
163 s[CC].fuse(oc_chunk, oh_outer)
170 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis
[all …]
H A Dconv2d.py77 oc_chunk, k_ic_chunk, kh, kw, k_ic_bn, oc_bn = kshape
81 oc = oc_chunk*oc_bn
191 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
195 kernel_vec[oc_chunk, idxdiv(ic, ic_bn), kh, kw,
373 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \
375 num_filter = oc_chunk * oc_bn
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/x86/
H A Dconv2d_avx_common.py111 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh)
118 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
120 s[C].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block)
121 parallel_axis = s[C].fuse(batch, oc_chunk, oh)
127 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
145 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis
147 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block)
148 parallel_axis = s[O].fuse(batch, oc_chunk, oh)
155 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
156 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block)
[all …]
H A Dconv2d_avx_1x1.py89 parallel_axis = s[kernel_vec].fuse(oc_chunk, oh)
95 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
98 s[C].reorder(oc_chunk, oh_outer, ow_outer, oh_inner, ow_inner, oc_block)
101 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer)
106 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
115 s[CC].fuse(oc_chunk, oh_outer)
124 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis
129 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer)
135 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
139 parallel_axis = s[O].fuse(batch, oc_chunk, oh_outer)
[all …]
H A Dconv3d.py580 parallel_axis = s[W].fuse(oc_chunk, od, oh)
587 _, oc_chunk, od, oh, ow, oc_block = s[C].op.axis
590 s[C].fuse(oc_chunk, od, oh)
594 _, oc_chunk, od, oh, ow, oc_block = s[CC].op.axis
606 s[CC].fuse(oc_chunk, od, oh)
616 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
654 parallel_axis = s[W].fuse(oc_chunk, od, oh)
661 _, oc_chunk, od, oh, ow, oc_block = s[C].op.axis
664 s[C].fuse(oc_chunk, od, oh)
668 _, oc_chunk, od, oh, ow, oc_block = s[CC].op.axis
[all …]
H A Ddepthwise_conv2d.py94 oc_chunk = oc // oc_bn
103 (oc_chunk, 1, kh, kw, 1, oc_bn),
283 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis
285 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block)
286 parallel_axis = s[O].fuse(oc_chunk, oh)
293 oc_chunk, oc_block = s[O].split(oc, factor=oc_bn)
294 s[O].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block)
295 parallel_axis = s[O].fuse(oc_chunk, oh)
H A Dconv2d_int8.py104 oc_chunk = oc // oc_bn
113 (oc_chunk, ic_chunk, kh, kw, ic_bn // n_elems, oc_bn, n_elems),
129 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = get_const_tuple(
132 num_filter = oc_chunk * oc_bn
H A Dconv2d.py141 oc_chunk = oc // oc_bn
160 (oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn),
175 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = get_const_tuple(
179 num_filter = oc_chunk * oc_bn
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/generic/
H A Dconv2d.py133 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
135 s[C].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block)
136 parallel_axis = s[C].fuse(batch, oc_chunk, oh)
142 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
166 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis
169 parallel_axis = s[O].fuse(batch, oc_chunk, oh)
197 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
203 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer)
208 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
221 s[CC].fuse(oc_chunk, oh_outer)
[all …]
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/generic/
H A Dconv2d.py133 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
135 s[C].reorder(oc_chunk, oh, ow_chunk, ow_block, oc_block)
136 parallel_axis = s[C].fuse(batch, oc_chunk, oh)
142 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
166 batch, oc_chunk, oh, ow, oc_block = s[O].op.axis
169 parallel_axis = s[O].fuse(batch, oc_chunk, oh)
197 batch, oc_chunk, oh, ow, oc_block = s[C].op.axis
203 parallel_axis = s[C].fuse(batch, oc_chunk, oh_outer)
208 _, oc_chunk, oh, ow, oc_block = s[CC].op.axis
221 s[CC].fuse(oc_chunk, oh_outer)
[all …]
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/cuda/
H A Dconv2d_int8.py89 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block:
90 kernel[oc_chunk * oc_block_factor + oc_block,
100 oc_chunk, ic_chunk, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple(
124 oshape = (batch, oc_chunk, out_height, out_width, oc_block)
131 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
135 packed_kernel[oc_chunk, icc,
140 output = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
141 conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype),
145 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
H A Dgroup_conv2d_nchw.py92 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block:
93 kernel[oc_chunk * oc_block_factor + oc_block,
102 oc_chunk, _, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple(
109 assert groups <= oc_chunk, \
111 'output channel chunk size {}'.format(groups, oc_chunk))
139 oshape = (batch, oc_chunk, out_height, out_width, oc_block)
158 tvm.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc,
170 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
221 oc_chunk = get_const_int(output.shape[1])
226 cfg.define_split("tile_f", cfg.axis(oc_chunk // groups), num_outputs=4)
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/cuda/
H A Dconv2d_int8.py89 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block:
90 kernel[oc_chunk * oc_block_factor + oc_block,
100 oc_chunk, ic_chunk, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple(
124 oshape = (batch, oc_chunk, out_height, out_width, oc_block)
131 conv = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
135 packed_kernel[oc_chunk, icc,
140 output = tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
141 conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype),
145 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
H A Dgroup_conv2d_nchw.py92 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block:
93 kernel[oc_chunk * oc_block_factor + oc_block,
102 oc_chunk, _, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple(
109 assert groups <= oc_chunk, \
111 'output channel chunk size {}'.format(groups, oc_chunk))
139 oshape = (batch, oc_chunk, out_height, out_width, oc_block)
158 tvm.sum(pad_data[n, occ//(oc_chunk//groups)*(ic_chunk//groups)+icc,
170 num_flop = batch * oc_chunk * oc_block * out_height * out_width * \
221 oc_chunk = get_const_int(output.shape[1])
226 cfg.define_split("tile_f", cfg.axis(oc_chunk // groups), num_outputs=4)
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/cuda/
H A Dconv2d_int8.py113 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: kernel[
114 oc_chunk * oc_block_factor + oc_block, ic_chunk * ic_block_factor + ic_block, kh, kw
124 oc_chunk, ic_chunk, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple(
148 oshape = (batch, oc_chunk, out_height, out_width, oc_block)
157 lambda n, oc_chunk, oh, ow, oc_block: te.sum(
161 * packed_kernel[oc_chunk, icc, kh, kw, oc_block, icb].astype("int32"),
168 lambda n, oc_chunk, oh, ow, oc_block: conv[n, oc_chunk, oh, ow, oc_block].astype(out_dtype),
175 * oc_chunk
H A Dgroup_conv2d_nchw.py249 lambda oc_chunk, ic_chunk, kh, kw, oc_block, ic_block: kernel[
250 oc_chunk * oc_block_factor + oc_block, ic_chunk * ic_block_factor + ic_block, kh, kw
259 oc_chunk, _, kernel_h, kernel_w, oc_block, ic_block = get_const_tuple(packed_kernel.shape)
266 groups <= oc_chunk
268 groups, oc_chunk
296 oshape = (batch, oc_chunk, out_height, out_width, oc_block)
319 occ // (oc_chunk // groups) * (ic_chunk // groups) + icc,
336 * oc_chunk
423 oc_chunk = get_const_int(output.shape[1])
428 cfg.define_split("tile_f", cfg.axis(oc_chunk // groups), num_outputs=4)
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/tvm/python/tvm/topi/nn/
H A Dconv2d.py456 num_filter = oc_chunk * oc_bn
471 oshape = (n, oc_chunk, out_height, out_width, oc_bn)
491 lambda n, oc_chunk, oh, ow, oc_block: te.sum(
499 * kernel[oc_chunk, idxdiv(ic, ic_bn), kh, kw, idxmod(ic, ic_bn), oc_block],
557 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = get_const_tuple(
560 num_filter = oc_chunk * oc_bn
575 oshape = (n, oc_chunk, out_height, out_width, oc_bn)
597 lambda n, oc_chunk, oh, ow, oc_block: te.sum(
619 oshape = (n, oc_chunk, out_height, out_width, oc_bn)
931 n, oc_chunk, oh, ow, oc_bn = get_const_tuple(packed_out.shape)
[all …]
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/nn/
H A Dconv2d.py459 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \
461 num_filter = oc_chunk * oc_bn
470 oshape = (n, oc_chunk, out_height, out_width, oc_bn)
486 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
492 * kernel[oc_chunk,
598 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = \
600 num_filter = oc_chunk * oc_bn
609 oshape = (n, oc_chunk, out_height, out_width, oc_bn)
627 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
633 * kernel[oc_chunk,
[all …]
/dports/misc/py-tvm/incubator-tvm-0.6.1/topi/python/topi/nn/
H A Dconv2d.py459 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn = \
461 num_filter = oc_chunk * oc_bn
470 oshape = (n, oc_chunk, out_height, out_width, oc_bn)
486 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
492 * kernel[oc_chunk,
598 oc_chunk, ic_chunk_group, kernel_height, kernel_width, _, oc_bn, _ = \
600 num_filter = oc_chunk * oc_bn
609 oshape = (n, oc_chunk, out_height, out_width, oc_bn)
627 return tvm.compute(oshape, lambda n, oc_chunk, oh, ow, oc_block:
633 * kernel[oc_chunk,
[all …]
/dports/misc/tvm/incubator-tvm-0.6.1/topi/python/topi/arm_cpu/
H A Dconv2d_int8.py53 oc_chunk, ic_chunk, kh, kw, ic_bn, oc_bn, n_elems = get_const_tuple(kernel.shape)
54 num_filter = oc_chunk * oc_bn

12