/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/ |
H A D | jit_avx512_core_bf16_1x1_conv_kernel.cpp | 174 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in iterate() local 340 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 417 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 630 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 646 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 666 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 920 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local 952 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local 985 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local 992 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local [all …]
|
H A D | jit_uni_x8s8s32x_1x1_conv_kernel.cpp | 138 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in iterate() local 139 f(i_ur, i_load); in iterate() 232 auto vreg_load = [&](int i_load) { in reduce_loop() argument 269 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 305 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 363 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 377 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 386 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 418 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 419 uni_vmovups(vreg_load(i_load), load_ptr(i_reduce, i_load)); in reduce_loop() [all …]
|
H A D | jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp | 145 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in iterate() local 293 = [=](int i_load) { return Vmm(ur * load_loop_blk + i_load); }; in reduce_loop() argument 298 auto bias_ptr = [=](int i_load) { in reduce_loop() argument 335 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 368 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 417 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 429 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 439 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 483 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 484 vmovups(vreg_load(i_load), load_ptr(i_reduce, i_load)); in reduce_loop() [all …]
|
H A D | jit_avx512_common_1x1_conv_kernel.cpp | 192 + (i_load ? reg_output_stride * i_load in output_ptr() 205 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in iterate() local 287 auto bias_ptr = [=](int i_load) { in reduce_loop() argument 337 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 349 for (int i_load = 0; i_load < load_loop_blk; i_load++) in reduce_loop() local 361 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 377 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 399 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 521 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 566 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local [all …]
|
H A D | jit_uni_x8s8s32x_1x1_conv_kernel.hpp | 107 const int load_loop_blk, const int i_load, const int i_ur); 108 Vmm vreg_accum(const int load_loop_blk, const int i_load, const int i_ur); 109 int output_ptr(const int i_load, const int i_ur);
|
H A D | jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp | 110 Xbyak::Address output_ptr(const int i_load, const int i_ur); 111 int vreg_accum_idx(const int load_loop_blk, int i_load, int i_ur) const; 112 Vmm vreg_accum(const int load_loop_blk, int i_load, int i_ur) const;
|
H A D | jit_avx512_common_1x1_conv_kernel.hpp | 92 const bool out_layout_nxc, const int i_load, const int i_ur);
|
H A D | jit_avx512_core_bf16_1x1_conv_kernel.hpp | 128 const int i_load, const int i_ur, const int scale = 1);
|
H A D | jit_avx512_common_conv_kernel.cpp | 141 for (int i_load = 0; i_load < nb_oc_blocking; i_load++) { in iterate() local 143 = force_masking || (oc_tail && i_load + 1 == nb_oc_blocking); in iterate() 145 fun(mask_flag, i_load, i_ur); in iterate() 162 [&](const bool mask_flag, const int i_load, const int i_ur) { in apply_postops() argument 163 const int aux_output_l_off = get_output_offset(i_ur, i_load) in apply_postops() 165 const auto vmm_idx = vmm_out_idx(i_ur, i_load); in apply_postops() 171 vmm_idx, i_load * jcp.oc_block); in apply_postops() 191 [&](const bool, const int i_load, const int i_ur) { in apply_postops() argument 192 vmm_idxs.emplace(vmm_out_idx(i_ur, i_load)); in apply_postops()
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/ |
H A D | jit_avx512_core_bf16_1x1_conv_kernel.cpp | 174 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in iterate() local 321 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 398 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 611 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 627 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 647 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 901 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local 933 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local 966 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local 973 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in compute_diff_bias() local [all …]
|
H A D | jit_uni_x8s8s32x_1x1_conv_kernel.cpp | 138 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in iterate() local 139 f(i_ur, i_load); in iterate() 236 auto vreg_load = [&](int i_load) { in reduce_loop() argument 273 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 311 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 369 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 383 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 392 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 424 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 425 uni_vmovups(vreg_load(i_load), load_ptr(i_reduce, i_load)); in reduce_loop() [all …]
|
H A D | jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp | 145 for (int i_load = 0; i_load < load_loop_blk; i_load++) { in iterate() local 285 = [=](int i_load) { return Vmm(ur * load_loop_blk + i_load); }; in reduce_loop() argument 290 auto bias_ptr = [=](int i_load) { in reduce_loop() argument 327 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 373 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 422 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 434 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 444 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 488 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 489 vmovups(vreg_load(i_load), load_ptr(i_reduce, i_load)); in reduce_loop() [all …]
|
H A D | jit_avx512_common_1x1_conv_kernel.cpp | 192 + (i_load ? reg_output_stride * i_load in output_ptr() 205 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in iterate() local 266 auto bias_ptr = [=](int i_load) { in reduce_loop() argument 316 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 328 for (int i_load = 0; i_load < load_loop_blk; i_load++) in reduce_loop() local 340 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 356 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 378 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 500 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 545 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local [all …]
|
H A D | jit_avx512_common_1x1_conv_kernel.hpp | 92 const bool is_out_layout_nxc, const int i_load, const int i_ur) { in get_output_offset() 98 return jcp.typesize_out * (i_load * i_load_shift + i_ur * i_ur_shift); in get_output_offset() 102 const bool out_layout_nxc, const int i_load, const int i_ur);
|
H A D | jit_uni_x8s8s32x_1x1_conv_kernel.hpp | 109 const int load_loop_blk, const int i_load, const int i_ur); 110 Vmm vreg_accum(const int load_loop_blk, const int i_load, const int i_ur); 111 int output_ptr(const int i_load, const int i_ur);
|
H A D | jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp | 111 Xbyak::Address output_ptr(const int i_load, const int i_ur); 112 int vreg_accum_idx(const int load_loop_blk, int i_load, int i_ur) const; 113 Vmm vreg_accum(const int load_loop_blk, int i_load, int i_ur) const;
|
H A D | jit_avx512_core_bf16_1x1_conv_kernel.hpp | 129 const int i_load, const int i_ur, const int scale = 1); 189 inline size_t get_output_offset(const int i_load, const int i_ur) { in get_output_offset() 196 return jcp.typesize_out * (i_load * i_load_shift + i_ur * i_ur_shift); in get_output_offset()
|
H A D | jit_avx512_common_conv_kernel.cpp | 141 for (int i_load = 0; i_load < nb_oc_blocking; i_load++) { in iterate() local 143 = force_masking || (oc_tail && i_load + 1 == nb_oc_blocking); in iterate() 145 fun(mask_flag, i_load, i_ur); in iterate() 161 [&](const bool mask_flag, const int i_load, const int i_ur) { in apply_postops() argument 163 = get_output_offset(i_ur, i_load); in apply_postops() 164 const auto vmm_idx = vmm_out_idx(i_ur, i_load); in apply_postops() 178 [&](const bool, const int i_load, const int i_ur) { in apply_postops() argument 179 vmm_idxs.emplace(vmm_out_idx(i_ur, i_load)); in apply_postops()
|
/dports/devel/libsimdpp/libsimdpp-2.1-400-g9dac213d/simdpp/detail/insn/ |
H A D | load.h | 28 void i_load(uint8x16& a, const char* p) in i_load() function 45 void i_load(uint16x8& a, const char* p) { uint8x16 r; i_load(r, p); a = r; } in i_load() function 47 void i_load(uint32x4& a, const char* p) { uint8x16 r; i_load(r, p); a = r; } in i_load() function 50 void i_load(uint64x2& a, const char* p) in i_load() function 56 uint8x16 r; i_load(r, p); a = r; in i_load() 61 void i_load(float32x4& a, const char* p) in i_load() function 80 void i_load(float64x2& a, const char* p) in i_load() function 100 void i_load(uint8x32& a, const char* p) in i_load() function 168 void i_load(V& a, const char* p) in i_load() function 173 i_load(a.vec(i), p); in i_load() [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/aarch64/ |
H A D | jit_sve_512_1x1_conv_kernel.cpp | 202 ofs = i_load * lmul + u0 * rmul; in reduce_loop() 236 if (bwd_iload) mov(r, i_load); in reduce_loop() 284 if (bwd_iload) mov(r, i_load); in reduce_loop() 339 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 352 for (int i_load = 0; i_load < load_loop_blk; i_load++) in reduce_loop() local 361 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 377 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 398 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 422 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { // OC in reduce_loop() local 438 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/aarch64/ |
H A D | jit_sve_512_1x1_conv_kernel.cpp | 202 ofs = i_load * lmul + u0 * rmul; in reduce_loop() 236 if (bwd_iload) mov(r, i_load); in reduce_loop() 284 if (bwd_iload) mov(r, i_load); in reduce_loop() 339 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 352 for (int i_load = 0; i_load < load_loop_blk; i_load++) in reduce_loop() local 361 for (int i_load = 0; i_load < load_loop_blk; ++i_load) in reduce_loop() local 377 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 398 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local 422 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { // OC in reduce_loop() local 438 for (int i_load = 0; i_load < load_loop_blk; ++i_load) { in reduce_loop() local [all …]
|
/dports/math/cppad/CppAD-20210000.8/include/cppad/local/ |
H A D | load_op.hpp | 565 { size_t i_load = size_t( load_op2var[ arg[2] ] ); in reverse_load_op() local 570 CPPAD_ASSERT_UNKNOWN( i_load < i_z ); in reverse_load_op() 572 if( i_load > 0 ) in reverse_load_op() 575 Base* py_x = partial + i_load * nc_partial; in reverse_load_op()
|
/dports/math/SCIP/scip-7.0.3/src/cppad/local/ |
H A D | load_op.hpp | 557 { size_t i_load = size_t( var_by_load_op[ arg[2] ] ); in reverse_load_op() local 562 CPPAD_ASSERT_UNKNOWN( i_load < i_z ); in reverse_load_op() 564 if( i_load > 0 ) in reverse_load_op() 567 Base* py_x = partial + i_load * nc_partial; in reverse_load_op()
|
/dports/graphics/sane-backends/sane-backends-1.0.32/backend/ |
H A D | canon_dr.h | 573 static SANE_Status object_position (struct scanner *s, int i_load);
|
/dports/cad/verilator/verilator-4.216/test_regress/t/ |
H A D | t_altera_lpm.v | 4244 wire i_load; net 4251 buf (i_load, load); 4343 else if (i_load) 4345 else if (!i_load)
|