/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/ |
H A D | jit_avx512_core_x8s8s32x_1x1_convolution.cpp | 284 if (ocb == ocb_start) { in execute_forward_thr() 305 int ocb = ocb_start; in execute_forward_thr() 320 int ocb = ocb_start; in execute_forward_thr() 342 int ocb = ocb_start; in execute_forward_thr() 357 int ocb = ocb_start; in execute_forward_thr() 379 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 395 for (int ocb = ocb_start; ocb < ocb_end; in execute_forward_thr() 443 while (ocb_start < ocb_end) { in execute_forward_thr() 468 ocb_start + load_step); in execute_forward_thr() 474 ocb_start += load_step; in execute_forward_thr() [all …]
|
H A D | jit_uni_x8s8s32x_1x1_convolution.cpp | 277 if (ocb == ocb_start) { in execute_forward_thr() 300 int ocb = ocb_start; in execute_forward_thr() 316 int ocb = ocb_start; in execute_forward_thr() 340 int ocb = ocb_start; in execute_forward_thr() 356 int ocb = ocb_start; in execute_forward_thr() 378 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 394 for (int ocb = ocb_start; ocb < ocb_end; in execute_forward_thr() 442 while (ocb_start < ocb_end) { in execute_forward_thr() 467 ocb_start + load_step); in execute_forward_thr() 473 ocb_start += load_step; in execute_forward_thr() [all …]
|
H A D | jit_sse41_1x1_convolution.cpp | 179 auto conv_1x1 = [&](int bcast_start, int bcast_end, int ocb_start, in execute_forward_thr() 181 if (bcast_start >= bcast_end || ocb_start >= ocb_end) return; in execute_forward_thr() 207 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 272 int bcast_start {0}, bcast_end {0}, ocb_start, ocb_end; in execute_forward_thr() local 274 bcast_end, nb_oc, ocb_start, ocb_end, 1); in execute_forward_thr() 276 while (ocb_start < ocb_end) { in execute_forward_thr() 278 init_load(ocb_start, ocb_end, load_step); in execute_forward_thr() 299 conv_1x1(bcast_start_1x1, bcast_end_1x1, ocb_start, in execute_forward_thr() 300 ocb_start + load_step); in execute_forward_thr() 302 ker_dw(n, g * nb_oc + ocb_start, load_step, oh_dw); in execute_forward_thr() [all …]
|
H A D | jit_avx512_common_1x1_convolution.cpp | 212 if (ocb == ocb_start) { in execute_forward_thr() 235 int ocb = ocb_start; in execute_forward_thr() 253 int ocb = ocb_start; in execute_forward_thr() 281 int ocb = ocb_start; in execute_forward_thr() 299 int ocb = ocb_start; in execute_forward_thr() 324 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 391 while (ocb_start < ocb_end) { in execute_forward_thr() 415 ocb_start + load_step); in execute_forward_thr() 421 ocb_start += load_step; in execute_forward_thr() 1016 int g_start {0}, ocb_start {0}; in execute_backward_weights() local [all …]
|
H A D | jit_avx2_1x1_convolution.cpp | 207 if (ocb == ocb_start) { in execute_forward_thr() 231 int ocb = ocb_start; in execute_forward_thr() 250 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 318 bcast_end, nb_oc, ocb_start, ocb_end, 1); in execute_forward_thr() 320 while (ocb_start < ocb_end) { in execute_forward_thr() 322 init_load(ocb_start, ocb_end, load_step); in execute_forward_thr() 344 conv_1x1(bcast_start_1x1, bcast_end_1x1, ocb_start, in execute_forward_thr() 345 ocb_start + load_step); in execute_forward_thr() 351 ocb_start += load_step; in execute_forward_thr() 782 int g_start {0}, ocb_start {0}; in execute_backward_weights() local [all …]
|
H A D | jit_avx512_core_bf16_1x1_convolution.cpp | 254 if (ocb == ocb_start) { in execute_forward_thr() 283 int ocb = ocb_start; in execute_forward_thr() 309 int ocb = ocb_start; in execute_forward_thr() 333 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 399 int bcast_start {0}, bcast_end {0}, ocb_start, ocb_end; in execute_forward_thr() local 403 while (ocb_start < ocb_end) { in execute_forward_thr() 405 init_load(ocb_start, ocb_end, load_step); in execute_forward_thr() 428 ocb_start + load_step); in execute_forward_thr() 434 ocb_start += load_step; in execute_forward_thr() 444 ocb_start, ocb_end, jcp.load_grp_count); in execute_forward_thr() [all …]
|
H A D | jit_avx2_convolution.cpp | 411 int g_start {0}, ocb_start {0}, icb_start {0}; in execute_backward_weights() local 412 nd_iterator_init(w_job_start, g_start, jcp.ngroups, ocb_start, in execute_backward_weights() 416 int g = g_start, ocb = ocb_start, icb = icb_start; in execute_backward_weights() 481 int g_start {0}, ocb_start {0}; in execute_backward_weights() local 483 b_job_start, g_start, jcp.ngroups, ocb_start, jcp.nb_oc); in execute_backward_weights() 486 int g = g_start, ocb = ocb_start; in execute_backward_weights()
|
H A D | jit_avx512_core_bf16_convolution.hpp | 254 dim_t spatial_start_offset, int ocb_start, dim_t chb_stride,
|
H A D | jit_avx512_core_amx_convolution.hpp | 282 dim_t spatial_start_offset, int ocb_start, dim_t chb_stride,
|
H A D | jit_avx512_core_amx_convolution.cpp | 1033 int spatial_start, dim_t spatial_start_offset, int ocb_start, in trans_dst_nxc() argument 1049 assert(ocb_start + ocb < jcp.nb_oc); in trans_dst_nxc() 1050 ctx.ch_work = (ocb_start + ocb + 1) == jcp.nb_oc ? oc_tail_work in trans_dst_nxc() 1637 int ocb_start = oc_b; in compute_diff_weights() local 1654 ocb_start = oc_b; in compute_diff_weights() 1680 ocb_start, ch_shift, my_work); in compute_diff_weights()
|
H A D | jit_avx512_core_bf16_convolution.cpp | 972 int spatial_start, dim_t spatial_start_offset, int ocb_start, in trans_dst_nxc() argument 988 assert(ocb_start + ocb < jcp.nb_oc); in trans_dst_nxc() 989 ctx.ch_work = (ocb_start + ocb + 1) == jcp.nb_oc ? oc_tail_work in trans_dst_nxc() 1617 int ocb_start = oc_b; in compute_diff_weights() local 1644 ocb_start = oc_b; in compute_diff_weights() 1675 sp_start_offset, ocb_start, ch_shift, my_work); in compute_diff_weights()
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/ |
H A D | jit_uni_x8s8s32x_1x1_convolution.cpp | 284 if (ocb == ocb_start) { in execute_forward_thr() 304 int ocb = ocb_start; in execute_forward_thr() 320 int ocb = ocb_start; in execute_forward_thr() 344 int ocb = ocb_start; in execute_forward_thr() 360 int ocb = ocb_start; in execute_forward_thr() 382 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 398 for (int ocb = ocb_start; ocb < ocb_end; in execute_forward_thr() 446 while (ocb_start < ocb_end) { in execute_forward_thr() 471 ocb_start + load_step); in execute_forward_thr() 477 ocb_start += load_step; in execute_forward_thr() [all …]
|
H A D | jit_avx512_core_x8s8s32x_1x1_convolution.cpp | 287 if (ocb == ocb_start) { in execute_forward_thr() 308 int ocb = ocb_start; in execute_forward_thr() 323 int ocb = ocb_start; in execute_forward_thr() 345 int ocb = ocb_start; in execute_forward_thr() 360 int ocb = ocb_start; in execute_forward_thr() 382 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 398 for (int ocb = ocb_start; ocb < ocb_end; in execute_forward_thr() 446 while (ocb_start < ocb_end) { in execute_forward_thr() 471 ocb_start + load_step); in execute_forward_thr() 477 ocb_start += load_step; in execute_forward_thr() [all …]
|
H A D | jit_sse41_1x1_convolution.cpp | 179 auto conv_1x1 = [&](int bcast_start, int bcast_end, int ocb_start, in execute_forward_thr() 181 if (bcast_start >= bcast_end || ocb_start >= ocb_end) return; in execute_forward_thr() 207 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 272 int bcast_start {0}, bcast_end {0}, ocb_start, ocb_end; in execute_forward_thr() local 274 bcast_end, nb_oc, ocb_start, ocb_end, 1); in execute_forward_thr() 276 while (ocb_start < ocb_end) { in execute_forward_thr() 278 init_load(ocb_start, ocb_end, load_step); in execute_forward_thr() 299 conv_1x1(bcast_start_1x1, bcast_end_1x1, ocb_start, in execute_forward_thr() 300 ocb_start + load_step); in execute_forward_thr() 302 ker_dw(n, g * nb_oc + ocb_start, load_step, oh_dw); in execute_forward_thr() [all …]
|
H A D | jit_avx512_common_1x1_convolution.cpp | 212 if (ocb == ocb_start) { in execute_forward_thr() 235 int ocb = ocb_start; in execute_forward_thr() 253 int ocb = ocb_start; in execute_forward_thr() 281 int ocb = ocb_start; in execute_forward_thr() 299 int ocb = ocb_start; in execute_forward_thr() 324 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 391 while (ocb_start < ocb_end) { in execute_forward_thr() 415 ocb_start + load_step); in execute_forward_thr() 421 ocb_start += load_step; in execute_forward_thr() 1018 int g_start {0}, ocb_start {0}; in execute_backward_weights() local [all …]
|
H A D | jit_avx2_1x1_convolution.cpp | 207 if (ocb == ocb_start) { in execute_forward_thr() 231 int ocb = ocb_start; in execute_forward_thr() 250 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 318 bcast_end, nb_oc, ocb_start, ocb_end, 1); in execute_forward_thr() 320 while (ocb_start < ocb_end) { in execute_forward_thr() 322 init_load(ocb_start, ocb_end, load_step); in execute_forward_thr() 344 conv_1x1(bcast_start_1x1, bcast_end_1x1, ocb_start, in execute_forward_thr() 345 ocb_start + load_step); in execute_forward_thr() 351 ocb_start += load_step; in execute_forward_thr() 782 int g_start {0}, ocb_start {0}; in execute_backward_weights() local [all …]
|
H A D | jit_avx512_core_bf16_1x1_convolution.cpp | 254 if (ocb == ocb_start) { in execute_forward_thr() 283 int ocb = ocb_start; in execute_forward_thr() 309 int ocb = ocb_start; in execute_forward_thr() 333 const auto ocb_end = ocb_start + load_step; in execute_forward_thr() 399 int bcast_start {0}, bcast_end {0}, ocb_start, ocb_end; in execute_forward_thr() local 403 while (ocb_start < ocb_end) { in execute_forward_thr() 405 init_load(ocb_start, ocb_end, load_step); in execute_forward_thr() 428 ocb_start + load_step); in execute_forward_thr() 434 ocb_start += load_step; in execute_forward_thr() 444 ocb_start, ocb_end, jcp.load_grp_count); in execute_forward_thr() [all …]
|
H A D | jit_avx2_convolution.cpp | 411 int g_start {0}, ocb_start {0}, icb_start {0}; in execute_backward_weights() local 412 nd_iterator_init(w_job_start, g_start, jcp.ngroups, ocb_start, in execute_backward_weights() 416 int g = g_start, ocb = ocb_start, icb = icb_start; in execute_backward_weights() 481 int g_start {0}, ocb_start {0}; in execute_backward_weights() local 483 b_job_start, g_start, jcp.ngroups, ocb_start, jcp.nb_oc); in execute_backward_weights() 486 int g = g_start, ocb = ocb_start; in execute_backward_weights()
|
H A D | jit_avx512_core_bf16_convolution.hpp | 251 dim_t spatial_start_offset, int ocb_start, dim_t chb_stride,
|
H A D | jit_avx512_core_amx_convolution.hpp | 279 dim_t spatial_start_offset, int ocb_start, dim_t chb_stride,
|
H A D | jit_avx512_core_amx_convolution.cpp | 1014 int spatial_start, dim_t spatial_start_offset, int ocb_start, in trans_dst_nxc() argument 1030 assert(ocb_start + ocb < jcp.nb_oc); in trans_dst_nxc() 1031 ctx.ch_work = (ocb_start + ocb + 1) == jcp.nb_oc ? oc_tail_work in trans_dst_nxc() 1618 int ocb_start = oc_b; in compute_diff_weights() local 1635 ocb_start = oc_b; in compute_diff_weights() 1661 ocb_start, ch_shift, my_work); in compute_diff_weights()
|
H A D | jit_avx512_core_bf16_convolution.cpp | 972 int spatial_start, dim_t spatial_start_offset, int ocb_start, in trans_dst_nxc() argument 988 assert(ocb_start + ocb < jcp.nb_oc); in trans_dst_nxc() 989 ctx.ch_work = (ocb_start + ocb + 1) == jcp.nb_oc ? oc_tail_work in trans_dst_nxc() 1617 int ocb_start = oc_b; in compute_diff_weights() local 1644 ocb_start = oc_b; in compute_diff_weights() 1675 sp_start_offset, ocb_start, ch_shift, my_work); in compute_diff_weights()
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/aarch64/ |
H A D | jit_sve_512_1x1_convolution.cpp | 182 if (ocb == ocb_start) { in execute_forward_thr() 194 if (bcast_start >= bcast_end || ocb_start >= ocb_end) return; in execute_forward_thr() 199 int ocb = ocb_start; in execute_forward_thr() 217 int ocb = ocb_start; in execute_forward_thr() 245 int ocb = ocb_start; in execute_forward_thr() 263 int ocb = ocb_start; in execute_forward_thr() 282 int bcast_start {0}, bcast_end {0}, ocb_start {0}, ocb_end {0}; in execute_forward_thr() local 284 ocb_start, ocb_end, jcp.load_grp_count); in execute_forward_thr() 286 conv_1x1(bcast_start, bcast_end, ocb_start, ocb_end); in execute_forward_thr() 761 int g_start {0}, ocb_start {0}; in execute_backward_weights() local [all …]
|
H A D | jit_sve_512_convolution.cpp | 1666 int g_start {0}, ocb_start {0}; in compute_diff_bias() local 1667 nd_iterator_init(b_job_start, g_start, jcp.ngroups, ocb_start, jcp.nb_oc); in compute_diff_bias() 1669 int g = g_start, ocb = ocb_start; in compute_diff_bias()
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/aarch64/ |
H A D | jit_sve_512_1x1_convolution.cpp | 182 if (ocb == ocb_start) { in execute_forward_thr() 194 if (bcast_start >= bcast_end || ocb_start >= ocb_end) return; in execute_forward_thr() 199 int ocb = ocb_start; in execute_forward_thr() 217 int ocb = ocb_start; in execute_forward_thr() 245 int ocb = ocb_start; in execute_forward_thr() 263 int ocb = ocb_start; in execute_forward_thr() 282 int bcast_start {0}, bcast_end {0}, ocb_start {0}, ocb_end {0}; in execute_forward_thr() local 284 ocb_start, ocb_end, jcp.load_grp_count); in execute_forward_thr() 286 conv_1x1(bcast_start, bcast_end, ocb_start, ocb_end); in execute_forward_thr() 761 int g_start {0}, ocb_start {0}; in execute_backward_weights() local [all …]
|