/dports/math/openblas/OpenBLAS-0.3.18/kernel/x86_64/ |
H A D | dgemm_ncopy_8_bulldozer.S | 141 vunpcklpd %xmm1, %xmm0 , %xmm0 142 vunpcklpd %xmm3, %xmm2 , %xmm2 143 vunpcklpd %xmm5, %xmm4 , %xmm4 144 vunpcklpd %xmm7, %xmm6 , %xmm6 175 vunpcklpd %xmm1, %xmm0 , %xmm0 177 vunpcklpd %xmm3, %xmm2 , %xmm2 189 vunpcklpd %xmm5, %xmm4 , %xmm4 192 vunpcklpd %xmm7, %xmm6 , %xmm6 220 vunpcklpd %xmm1, %xmm0 , %xmm0 222 vunpcklpd %xmm3, %xmm2 , %xmm2 [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/gemm/f32/ |
H A D | jit_avx2_f32_copy_at_kern_autogen.cpp | 125 vunpcklpd(ymm0, ymm4, ymm1); in generate() 127 vunpcklpd(ymm2, ymm5, ymm3); in generate() 154 vunpcklpd(ymm0, ymm4, ymm1); in generate() 156 vunpcklpd(ymm2, ymm5, ymm3); in generate() 183 vunpcklpd(ymm0, ymm4, ymm1); in generate() 185 vunpcklpd(ymm2, ymm5, ymm3); in generate() 214 vunpcklpd(ymm0, ymm4, ymm1); in generate() 232 vunpcklpd(ymm0, ymm4, ymm1); in generate() 250 vunpcklpd(ymm0, ymm4, ymm1); in generate() 267 vunpcklpd(xmm4, xmm0, xmm2); in generate() [all …]
|
H A D | jit_avx512_core_f32_copy_at_kern_part2_autogen.cpp | 61 vunpcklpd(zmm0, zmm4, zmm1); in generate_part2() 95 vunpcklpd(zmm0, zmm4, zmm1); in generate_part2() 114 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 122 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() 133 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 141 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() 152 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 160 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() 171 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 179 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() [all …]
|
H A D | jit_avx512_core_f32_copy_at_kern_part1_autogen.cpp | 165 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 167 vunpcklpd(zmm2, zmm5, zmm3); in generate_part1() 210 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 212 vunpcklpd(zmm2, zmm5, zmm3); in generate_part1() 255 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 257 vunpcklpd(zmm2, zmm5, zmm3); in generate_part1() 300 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 332 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 364 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 381 vunpcklpd(xmm4, xmm0, xmm2); in generate_part1() [all …]
|
H A D | jit_avx_f32_copy_at_kern_autogen.cpp | 124 vunpcklpd(ymm0, ymm4, ymm1); in generate() 126 vunpcklpd(ymm2, ymm5, ymm3); in generate() 153 vunpcklpd(ymm0, ymm4, ymm1); in generate() 155 vunpcklpd(ymm2, ymm5, ymm3); in generate() 184 vunpcklpd(ymm0, ymm4, ymm1); in generate() 202 vunpcklpd(ymm0, ymm4, ymm1); in generate() 219 vunpcklpd(xmm4, xmm0, xmm2); in generate() 227 vunpcklpd(xmm0, xmm0, xmm2); in generate() 237 vunpcklpd(xmm4, xmm0, xmm2); in generate() 245 vunpcklpd(xmm0, xmm0, xmm2); in generate() [all …]
|
H A D | jit_avx512_core_f32_copy_bn_kern_autogen.cpp | 124 vunpcklpd(ymm0, ymm4, ymm1); in generate() 126 vunpcklpd(ymm2, ymm5, ymm3); in generate() 155 vunpcklpd(ymm0, ymm4, ymm1); in generate() 172 vunpcklpd(xmm4, xmm0, xmm2); in generate() 180 vunpcklpd(xmm0, xmm0, xmm2); in generate() 215 vunpcklpd(xmm0, xmm4, xmm1); in generate() 217 vunpcklpd(xmm2, xmm5, xmm3); in generate() 239 vunpcklpd(xmm0, xmm4, xmm1); in generate() 257 vunpcklpd(xmm0, xmm0, xmm2); in generate() 425 vunpcklpd(ymm0, ymm4, ymm1); in generate() [all …]
|
H A D | jit_avx2_f32_copy_bn_kern_autogen.cpp | 112 vunpcklpd(xmm0, xmm4, xmm1); in generate() 114 vunpcklpd(xmm2, xmm5, xmm3); in generate() 136 vunpcklpd(xmm0, xmm4, xmm1); in generate() 154 vunpcklpd(xmm0, xmm0, xmm2); in generate() 311 vunpcklpd(xmm0, xmm4, xmm1); in generate() 313 vunpcklpd(xmm2, xmm5, xmm3); in generate() 339 vunpcklpd(xmm0, xmm4, xmm1); in generate() 359 vunpcklpd(xmm0, xmm0, xmm2); in generate() 520 vunpcklpd(xmm0, xmm4, xmm1); in generate() 522 vunpcklpd(xmm2, xmm5, xmm3); in generate() [all …]
|
H A D | jit_avx_f32_copy_bn_kern_autogen.cpp | 111 vunpcklpd(xmm0, xmm4, xmm1); in generate() 113 vunpcklpd(xmm2, xmm5, xmm3); in generate() 135 vunpcklpd(xmm0, xmm4, xmm1); in generate() 153 vunpcklpd(xmm0, xmm0, xmm2); in generate() 310 vunpcklpd(xmm0, xmm4, xmm1); in generate() 312 vunpcklpd(xmm2, xmm5, xmm3); in generate() 338 vunpcklpd(xmm0, xmm4, xmm1); in generate() 358 vunpcklpd(xmm0, xmm0, xmm2); in generate() 519 vunpcklpd(xmm0, xmm4, xmm1); in generate() 521 vunpcklpd(xmm2, xmm5, xmm3); in generate() [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/gemm/f32/ |
H A D | jit_avx2_f32_copy_at_kern_autogen.cpp | 125 vunpcklpd(ymm0, ymm4, ymm1); in generate() 127 vunpcklpd(ymm2, ymm5, ymm3); in generate() 154 vunpcklpd(ymm0, ymm4, ymm1); in generate() 156 vunpcklpd(ymm2, ymm5, ymm3); in generate() 183 vunpcklpd(ymm0, ymm4, ymm1); in generate() 185 vunpcklpd(ymm2, ymm5, ymm3); in generate() 214 vunpcklpd(ymm0, ymm4, ymm1); in generate() 232 vunpcklpd(ymm0, ymm4, ymm1); in generate() 250 vunpcklpd(ymm0, ymm4, ymm1); in generate() 267 vunpcklpd(xmm4, xmm0, xmm2); in generate() [all …]
|
H A D | jit_avx512_core_f32_copy_at_kern_part2_autogen.cpp | 61 vunpcklpd(zmm0, zmm4, zmm1); in generate_part2() 95 vunpcklpd(zmm0, zmm4, zmm1); in generate_part2() 114 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 122 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() 133 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 141 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() 152 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 160 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() 171 vunpcklpd(xmm4, xmm0, xmm2); in generate_part2() 179 vunpcklpd(xmm0, xmm0, xmm2); in generate_part2() [all …]
|
H A D | jit_avx512_core_f32_copy_at_kern_part1_autogen.cpp | 165 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 167 vunpcklpd(zmm2, zmm5, zmm3); in generate_part1() 210 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 212 vunpcklpd(zmm2, zmm5, zmm3); in generate_part1() 255 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 257 vunpcklpd(zmm2, zmm5, zmm3); in generate_part1() 300 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 332 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 364 vunpcklpd(zmm0, zmm4, zmm1); in generate_part1() 381 vunpcklpd(xmm4, xmm0, xmm2); in generate_part1() [all …]
|
H A D | jit_avx_f32_copy_at_kern_autogen.cpp | 124 vunpcklpd(ymm0, ymm4, ymm1); in generate() 126 vunpcklpd(ymm2, ymm5, ymm3); in generate() 153 vunpcklpd(ymm0, ymm4, ymm1); in generate() 155 vunpcklpd(ymm2, ymm5, ymm3); in generate() 184 vunpcklpd(ymm0, ymm4, ymm1); in generate() 202 vunpcklpd(ymm0, ymm4, ymm1); in generate() 219 vunpcklpd(xmm4, xmm0, xmm2); in generate() 227 vunpcklpd(xmm0, xmm0, xmm2); in generate() 237 vunpcklpd(xmm4, xmm0, xmm2); in generate() 245 vunpcklpd(xmm0, xmm0, xmm2); in generate() [all …]
|
H A D | jit_avx512_core_f32_copy_bn_kern_autogen.cpp | 124 vunpcklpd(ymm0, ymm4, ymm1); in generate() 126 vunpcklpd(ymm2, ymm5, ymm3); in generate() 155 vunpcklpd(ymm0, ymm4, ymm1); in generate() 172 vunpcklpd(xmm4, xmm0, xmm2); in generate() 180 vunpcklpd(xmm0, xmm0, xmm2); in generate() 215 vunpcklpd(xmm0, xmm4, xmm1); in generate() 217 vunpcklpd(xmm2, xmm5, xmm3); in generate() 239 vunpcklpd(xmm0, xmm4, xmm1); in generate() 257 vunpcklpd(xmm0, xmm0, xmm2); in generate() 425 vunpcklpd(ymm0, ymm4, ymm1); in generate() [all …]
|
H A D | jit_avx2_f32_copy_bn_kern_autogen.cpp | 112 vunpcklpd(xmm0, xmm4, xmm1); in generate() 114 vunpcklpd(xmm2, xmm5, xmm3); in generate() 136 vunpcklpd(xmm0, xmm4, xmm1); in generate() 154 vunpcklpd(xmm0, xmm0, xmm2); in generate() 311 vunpcklpd(xmm0, xmm4, xmm1); in generate() 313 vunpcklpd(xmm2, xmm5, xmm3); in generate() 339 vunpcklpd(xmm0, xmm4, xmm1); in generate() 359 vunpcklpd(xmm0, xmm0, xmm2); in generate() 520 vunpcklpd(xmm0, xmm4, xmm1); in generate() 522 vunpcklpd(xmm2, xmm5, xmm3); in generate() [all …]
|
H A D | jit_avx_f32_copy_bn_kern_autogen.cpp | 111 vunpcklpd(xmm0, xmm4, xmm1); in generate() 113 vunpcklpd(xmm2, xmm5, xmm3); in generate() 135 vunpcklpd(xmm0, xmm4, xmm1); in generate() 153 vunpcklpd(xmm0, xmm0, xmm2); in generate() 310 vunpcklpd(xmm0, xmm4, xmm1); in generate() 312 vunpcklpd(xmm2, xmm5, xmm3); in generate() 338 vunpcklpd(xmm0, xmm4, xmm1); in generate() 358 vunpcklpd(xmm0, xmm0, xmm2); in generate() 519 vunpcklpd(xmm0, xmm4, xmm1); in generate() 521 vunpcklpd(xmm2, xmm5, xmm3); in generate() [all …]
|
/dports/math/blis/blis-0.8.1/kernels/haswell/3/sup/d6x8/ |
H A D | bli_gemmsup_rv_haswell_asm_dMx6.c | 491 vunpcklpd(ymm6, ymm4, ymm0) in GEMMSUP_KER_PROT() 493 vunpcklpd(ymm10, ymm8, ymm2) in GEMMSUP_KER_PROT() 530 vunpcklpd(ymm7, ymm5, ymm0) in GEMMSUP_KER_PROT() 532 vunpcklpd(ymm11, ymm9, ymm2) in GEMMSUP_KER_PROT() 610 vunpcklpd(ymm6, ymm4, ymm0) in GEMMSUP_KER_PROT() 612 vunpcklpd(ymm10, ymm8, ymm2) in GEMMSUP_KER_PROT() 639 vunpcklpd(ymm7, ymm5, ymm0) in GEMMSUP_KER_PROT() 1076 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_5x6() 1114 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_5x6() 1188 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_5x6() [all …]
|
H A D | bli_gemmsup_rv_haswell_asm_dMx8.c | 587 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 589 vunpcklpd(ymm10, ymm8, ymm2) in bli_dgemmsup_rv_haswell_asm_6x8() 626 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 628 vunpcklpd(ymm11, ymm9, ymm2) in bli_dgemmsup_rv_haswell_asm_6x8() 718 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 720 vunpcklpd(ymm10, ymm8, ymm2) in bli_dgemmsup_rv_haswell_asm_6x8() 747 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 1195 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_5x8() 1233 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_5x8() 1319 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_5x8() [all …]
|
H A D | bli_gemmsup_rv_haswell_asm_dMx2.c | 427 vunpcklpd(xmm6, xmm4, xmm0) in GEMMSUP_KER_PROT() 429 vunpcklpd(xmm10, xmm8, xmm2) in GEMMSUP_KER_PROT() 501 vunpcklpd(xmm6, xmm4, xmm0) in GEMMSUP_KER_PROT() 503 vunpcklpd(xmm10, xmm8, xmm2) in GEMMSUP_KER_PROT() 883 vunpcklpd(xmm6, xmm4, xmm0) in bli_dgemmsup_rv_haswell_asm_5x2() 885 vunpcklpd(xmm10, xmm8, xmm2) in bli_dgemmsup_rv_haswell_asm_5x2() 952 vunpcklpd(xmm6, xmm4, xmm0) in bli_dgemmsup_rv_haswell_asm_5x2() 1311 vunpcklpd(xmm6, xmm4, xmm0) in bli_dgemmsup_rv_haswell_asm_4x2() 1367 vunpcklpd(xmm6, xmm4, xmm0) in bli_dgemmsup_rv_haswell_asm_4x2() 1702 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_3x2() [all …]
|
/dports/devel/llvm-devel/llvm-project-f05c95f10fc1d8171071735af8ad3a9e87633120/llvm/test/CodeGen/X86/ |
H A D | vector-interleaved-load-i64-stride-2.ll | 78 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] 89 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 102 ; AVX512-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 162 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm4[0],ymm0[2],ymm4[2] 165 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm2[0],ymm5[0],ymm2[2],ymm5[2] 181 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 183 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] 294 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm6[0],ymm8[0],ymm6[2],ymm8[2] 297 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm4[0],ymm9[0],ymm4[2],ymm9[2] 329 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm8 = ymm7[0],ymm6[0],ymm7[2],ymm6[2] [all …]
|
/dports/math/blis/blis-0.8.1/kernels/haswell/3/sup/d6x8/old/ |
H A D | bli_gemmsup_rv_haswell_asm_d6x8.c | 591 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 593 vunpcklpd(ymm10, ymm8, ymm2) in bli_dgemmsup_rv_haswell_asm_6x8() 630 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 632 vunpcklpd(ymm11, ymm9, ymm2) in bli_dgemmsup_rv_haswell_asm_6x8() 722 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 724 vunpcklpd(ymm10, ymm8, ymm2) in bli_dgemmsup_rv_haswell_asm_6x8() 751 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_6x8() 1172 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_5x8() 1226 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_5x8() 1328 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_5x8() [all …]
|
/dports/math/blis/blis-0.8.1/kernels/haswell/3/sup/ |
H A D | bli_gemmsup_rv_haswell_asm_d6x8m.c | 658 vunpcklpd(ymm6, ymm4, ymm0) in GEMMSUP_KER_PROT() 660 vunpcklpd(ymm10, ymm8, ymm2) in GEMMSUP_KER_PROT() 697 vunpcklpd(ymm7, ymm5, ymm0) in GEMMSUP_KER_PROT() 699 vunpcklpd(ymm11, ymm9, ymm2) in GEMMSUP_KER_PROT() 789 vunpcklpd(ymm6, ymm4, ymm0) in GEMMSUP_KER_PROT() 791 vunpcklpd(ymm10, ymm8, ymm2) in GEMMSUP_KER_PROT() 818 vunpcklpd(ymm7, ymm5, ymm0) in GEMMSUP_KER_PROT() 1457 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_6x6m() 1496 vunpcklpd(ymm7, ymm5, ymm0) in bli_dgemmsup_rv_haswell_asm_6x6m() 1576 vunpcklpd(ymm6, ymm4, ymm0) in bli_dgemmsup_rv_haswell_asm_6x6m() [all …]
|
/dports/java/openjdk17/jdk17u-jdk-17.0.1-12-1/src/jdk.incubator.vector/linux/native/libsvml/ |
H A D | svml_s_tanh_linux_x86.S | 94 vunpcklpd %ymm12, %ymm11, %ymm4 #476.764 99 vunpcklpd %ymm1, %ymm2, %ymm6 #475.764 102 vunpcklpd %ymm12, %ymm11, %ymm13 #478.769 105 vunpcklpd %ymm1, %ymm15, %ymm2 #477.769 256 vunpcklpd %ymm2, %ymm3, %ymm7 #546.764 258 vunpcklpd %ymm14, %ymm13, %ymm5 #547.764 263 vunpcklpd %ymm0, %ymm2, %ymm3 #548.769 265 vunpcklpd %ymm14, %ymm13, %ymm0 #549.769 841 vunpcklpd %ymm15, %ymm4, %ymm3 #779.804 860 vunpcklpd %ymm6, %ymm7, %ymm11 #780.804 [all …]
|
H A D | svml_d_tanh_linux_x86.S | 103 vunpcklpd %ymm8, %ymm9, %ymm10 #1295.759 105 vunpcklpd %ymm4, %ymm5, %ymm12 #1296.759 107 vunpcklpd %ymm14, %ymm11, %ymm8 #1297.759 115 vunpcklpd %ymm4, %ymm5, %ymm6 #1298.759 117 vunpcklpd %ymm0, %ymm11, %ymm4 #1299.759 121 vunpcklpd %ymm0, %ymm11, %ymm2 #1300.763 127 vunpcklpd %ymm0, %ymm15, %ymm14 #1301.763 306 vunpcklpd %ymm15, %ymm4, %ymm0 #1394.759 316 vunpcklpd %ymm3, %ymm2, %ymm10 #1397.759 319 vunpcklpd %ymm11, %ymm12, %ymm14 #1395.759 [all …]
|
/dports/math/blasfeo/blasfeo-0.1.2/kernel/avx2/ |
H A D | kernel_dgemm_8x8_lib.S | 164 vunpcklpd %ymm1, %ymm0, %ymm12 166 vunpcklpd %ymm3, %ymm2, %ymm14 181 vunpcklpd %ymm5, %ymm4, %ymm12 183 vunpcklpd %ymm7, %ymm6, %ymm14 198 vunpcklpd %ymm9, %ymm8, %ymm12 200 vunpcklpd %ymm11, %ymm10, %ymm14 424 vunpcklpd %ymm1, %ymm0, %ymm12 426 vunpcklpd %ymm3, %ymm2, %ymm14 441 vunpcklpd %ymm5, %ymm4, %ymm12 443 vunpcklpd %ymm7, %ymm6, %ymm14 [all …]
|
/dports/math/blis/blis-0.8.1/kernels/haswell/1m/ |
H A D | bli_packm_haswell_asm_d8xk.c | 184 vunpcklpd(ymm2, ymm0, ymm10) in PACKM_KER_PROT() 186 vunpcklpd(ymm6, ymm4, ymm12) in PACKM_KER_PROT() 205 vunpcklpd(ymm3, ymm1, ymm10) in PACKM_KER_PROT() 207 vunpcklpd(ymm7, ymm5, ymm12) in PACKM_KER_PROT()
|