/dports/math/openblas/OpenBLAS-0.3.18/kernel/arm/ |
H A D | sgemm_tcopy_4_vfp.S | 63 #define BO1 r7 macro 93 vstmia.f32 BO1, { s0 - s15 } 95 add BO1, BO1, M4 114 add BO1, BO1, M4 166 vstmia.f32 BO1, { s0 - s7 } 168 add BO1, BO1, M4 204 vstmia.f32 BO1, { s0 - s3 } 206 add BO1, BO1, M4 278 ldr BO1, B 338 ldr BO1, B [all …]
|
H A D | cgemm_tcopy_2_vfp.S | 63 #define BO1 r7 macro 81 vstmia.f32 BO1, { s0 - s7 } 83 add BO1, BO1, M4 105 vstmia.f32 BO1, { s0 - s3 } 107 add BO1, BO1, M4 166 ldr BO1, B 167 add r3, BO1, #32 // B = B + 4 * SIZE *2 206 ldr BO1, B 207 add r3, BO1, #16 // B = B + 2 * SIZE *2
|
H A D | zgemm_tcopy_2_vfp.S | 63 #define BO1 r7 macro 83 vstmia.f64 BO1, { d0 - d7 } 85 add BO1, BO1, M4 107 vstmia.f64 BO1, { d0 - d3 } 109 add BO1, BO1, M4 168 ldr BO1, B 169 add r3, BO1, #64 // B = B + 4 * SIZE *2 208 ldr BO1, B 209 add r3, BO1, #32 // B = B + 2 * SIZE *2
|
H A D | dgemm_tcopy_4_vfp.S | 63 #define BO1 r7 macro 93 vstmia.f64 BO1, { d0 - d15 } 95 add BO1, BO1, M4 148 vstmia.f64 BO1, { d0 - d7 } 150 add BO1, BO1, M4 187 vstmia.f64 BO1, { d0 - d3 } 189 add BO1, BO1, M4 261 ldr BO1, B 316 ldr BO1, B 317 add r3, BO1, #64 // B = B + 8 * SIZE [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/gemm/f32/ |
H A D | jit_avx512_common_gemm_f32.cpp | 116 auto BO1 = abi_param4; in generate() local 154 mov(BO1, A); in generate() 1332 lea(BO1, ptr[BO1 + LDB * 2]); in generate() 1336 lea(BO1, ptr[BO1 + LDB3]); in generate() 1340 lea(BO1, ptr[BO1 + LDB * 4]); in generate() 1344 lea(BO1, ptr[BO1 + LDB * 4]); in generate() 1350 lea(BO1, ptr[BO1 + LDB3 * 2]); in generate() 1354 lea(BO1, ptr[BO1 + LDB * 8]); in generate() 1360 lea(BO1, ptr[BO1 + LDB * 8]); in generate() 1423 mov(BO1, B); in generate() [all …]
|
H A D | jit_avx_gemm_f32.cpp | 816 add(BO1, LDB); in innerkernel1() 1209 lea(BO1, ptr[BO1 + LDB * 2]); in kernel() 1213 lea(BO1, ptr[BO1 + LDB3]); in kernel() 1217 lea(BO1, ptr[BO1 + LDB * 4]); in kernel() 1221 lea(BO1, ptr[BO1 + LDB * 4]); in kernel() 1227 lea(BO1, ptr[BO1 + LDB3 * 2]); in kernel() 1231 sub(BO1, rax); in kernel() 1236 sub(BO1, rax); in kernel() 1335 mov(BO1, A); in do_pack() 1606 add(BO1, LDA); in do_pack() [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/gemm/f32/ |
H A D | jit_avx512_common_gemm_f32.cpp | 116 auto BO1 = abi_param4; in generate() local 154 mov(BO1, A); in generate() 1332 lea(BO1, ptr[BO1 + LDB * 2]); in generate() 1336 lea(BO1, ptr[BO1 + LDB3]); in generate() 1340 lea(BO1, ptr[BO1 + LDB * 4]); in generate() 1344 lea(BO1, ptr[BO1 + LDB * 4]); in generate() 1350 lea(BO1, ptr[BO1 + LDB3 * 2]); in generate() 1354 lea(BO1, ptr[BO1 + LDB * 8]); in generate() 1360 lea(BO1, ptr[BO1 + LDB * 8]); in generate() 1423 mov(BO1, B); in generate() [all …]
|
H A D | jit_avx_gemm_f32.cpp | 816 add(BO1, LDB); in innerkernel1() 1209 lea(BO1, ptr[BO1 + LDB * 2]); in kernel() 1213 lea(BO1, ptr[BO1 + LDB3]); in kernel() 1217 lea(BO1, ptr[BO1 + LDB * 4]); in kernel() 1221 lea(BO1, ptr[BO1 + LDB * 4]); in kernel() 1227 lea(BO1, ptr[BO1 + LDB3 * 2]); in kernel() 1231 sub(BO1, rax); in kernel() 1236 sub(BO1, rax); in kernel() 1335 mov(BO1, A); in do_pack() 1606 add(BO1, LDA); in do_pack() [all …]
|
/dports/multimedia/opentoonz/opentoonz-1.5.0/thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/x86_64/ |
H A D | gemm_tcopy_4_opteron.S | 67 #define BO1 %r13 macro 92 #define BO1 %rsi macro 145 leaq (B, %rax, SIZE), BO1 273 MOVNTQ %mm0, 0 * SIZE(BO1) 274 MOVNTQ %mm1, 1 * SIZE(BO1) 275 MOVNTQ %mm2, 2 * SIZE(BO1) 276 MOVNTQ %mm3, 3 * SIZE(BO1) 277 MOVNTQ %mm4, 4 * SIZE(BO1) 286 addq $8 * SIZE, BO1 375 addq $4 * SIZE, BO1 [all …]
|
H A D | gemm_tcopy_2.S | 64 #define BO1 %r13 macro 88 #define BO1 %rsi macro 129 leaq (B, %rax, SIZE), BO1 186 movss %xmm0, 0 * SIZE(BO1) 187 movss %xmm1, 1 * SIZE(BO1) 192 movapd %xmm0, 0 * SIZE(BO1) 195 addq $2 * SIZE, BO1 239 movss %xmm0, 0 * SIZE(BO1) 242 movsd %xmm0, 0 * SIZE(BO1) 244 addq $1 * SIZE, BO1
|
H A D | gemm_tcopy_4.S | 99 #define BO1 %r13 macro 124 #define BO1 %rsi macro 169 leaq (B, %rax, SIZE), BO1 281 movaps %xmm0, 0 * SIZE(BO1) 282 movaps %xmm1, 4 * SIZE(BO1) 294 movapd %xmm0, 0 * SIZE(BO1) 295 movapd %xmm1, 2 * SIZE(BO1) 296 movapd %xmm2, 4 * SIZE(BO1) 304 addq $8 * SIZE, BO1 424 addq $4 * SIZE, BO1 [all …]
|
H A D | zgemm_tcopy_2.S | 58 #define BO1 %r13 macro 80 #define BO1 %rdi macro 120 leaq (B, %rax, 1), BO1 277 movaps %xmm0, 0 * SIZE(BO1) 284 movapd %xmm0, 0 * SIZE(BO1) 285 movapd %xmm1, 2 * SIZE(BO1) 288 addq $4 * SIZE, BO1 396 movlps %xmm0, 0 * SIZE(BO1) 401 movapd %xmm0, 0 * SIZE(BO1)
|
/dports/cad/meshlab/meshlab-Meshlab-2020.05/src/plugins_unsupported/external/GotoBLAS2/kernel/x86_64/ |
H A D | gemm_tcopy_4_opteron.S | 50 #define BO1 %r13 macro 75 #define BO1 %rsi macro 128 leaq (B, %rax, SIZE), BO1 256 MOVNTQ %mm0, 0 * SIZE(BO1) 257 MOVNTQ %mm1, 1 * SIZE(BO1) 258 MOVNTQ %mm2, 2 * SIZE(BO1) 259 MOVNTQ %mm3, 3 * SIZE(BO1) 260 MOVNTQ %mm4, 4 * SIZE(BO1) 269 addq $8 * SIZE, BO1 358 addq $4 * SIZE, BO1 [all …]
|
H A D | gemm_tcopy_2.S | 47 #define BO1 %r13 macro 71 #define BO1 %rsi macro 112 leaq (B, %rax, SIZE), BO1 169 movss %xmm0, 0 * SIZE(BO1) 170 movss %xmm1, 1 * SIZE(BO1) 175 movapd %xmm0, 0 * SIZE(BO1) 178 addq $2 * SIZE, BO1 222 movss %xmm0, 0 * SIZE(BO1) 225 movsd %xmm0, 0 * SIZE(BO1) 227 addq $1 * SIZE, BO1
|
H A D | gemm_tcopy_4.S | 82 #define BO1 %r13 macro 107 #define BO1 %rsi macro 152 leaq (B, %rax, SIZE), BO1 264 movaps %xmm0, 0 * SIZE(BO1) 265 movaps %xmm1, 4 * SIZE(BO1) 277 movapd %xmm0, 0 * SIZE(BO1) 278 movapd %xmm1, 2 * SIZE(BO1) 279 movapd %xmm2, 4 * SIZE(BO1) 287 addq $8 * SIZE, BO1 407 addq $4 * SIZE, BO1 [all …]
|
H A D | zgemm_tcopy_2.S | 41 #define BO1 %r13 macro 63 #define BO1 %rdi macro 103 leaq (B, %rax, 1), BO1 260 movaps %xmm0, 0 * SIZE(BO1) 267 movapd %xmm0, 0 * SIZE(BO1) 268 movapd %xmm1, 2 * SIZE(BO1) 271 addq $4 * SIZE, BO1 379 movlps %xmm0, 0 * SIZE(BO1) 384 movapd %xmm0, 0 * SIZE(BO1)
|
/dports/math/openblas/OpenBLAS-0.3.18/kernel/x86_64/ |
H A D | gemm_tcopy_4_opteron.S | 67 #define BO1 %r13 macro 92 #define BO1 %rsi macro 145 leaq (B, %rax, SIZE), BO1 273 MOVNTQ %mm0, 0 * SIZE(BO1) 274 MOVNTQ %mm1, 1 * SIZE(BO1) 275 MOVNTQ %mm2, 2 * SIZE(BO1) 276 MOVNTQ %mm3, 3 * SIZE(BO1) 277 MOVNTQ %mm4, 4 * SIZE(BO1) 286 addq $8 * SIZE, BO1 375 addq $4 * SIZE, BO1 [all …]
|
H A D | gemm_tcopy_2.S | 71 #define BO1 %r13 macro 95 #define BO1 %rsi macro 136 leaq (B, %rax, SIZE), BO1 193 movss %xmm0, 0 * SIZE(BO1) 194 movss %xmm1, 1 * SIZE(BO1) 199 movapd %xmm0, 0 * SIZE(BO1) 202 addq $2 * SIZE, BO1 246 movss %xmm0, 0 * SIZE(BO1) 249 movsd %xmm0, 0 * SIZE(BO1) 251 addq $1 * SIZE, BO1
|
H A D | gemm_tcopy_4.S | 99 #define BO1 %r13 macro 124 #define BO1 %rsi macro 169 leaq (B, %rax, SIZE), BO1 281 movaps %xmm0, 0 * SIZE(BO1) 282 movaps %xmm1, 4 * SIZE(BO1) 294 movapd %xmm0, 0 * SIZE(BO1) 295 movapd %xmm1, 2 * SIZE(BO1) 296 movapd %xmm2, 4 * SIZE(BO1) 304 addq $8 * SIZE, BO1 424 addq $4 * SIZE, BO1 [all …]
|
H A D | gemm_tcopy_2_bulldozer.S | 57 #define BO1 %r13 macro 81 #define BO1 %rsi macro 122 leaq (B, %rax, SIZE), BO1 285 vmovss %xmm0, 0 * SIZE(BO1) 286 vmovss %xmm1, 1 * SIZE(BO1) 291 vmovups %xmm0, 0 * SIZE(BO1) 294 addq $2 * SIZE, BO1 337 vmovss %xmm0, 0 * SIZE(BO1) 340 vmovsd %xmm0, 0 * SIZE(BO1) 342 addq $1 * SIZE, BO1
|
H A D | zgemm_tcopy_2.S | 58 #define BO1 %r13 macro 80 #define BO1 %rdi macro 120 leaq (B, %rax, 1), BO1 277 movaps %xmm0, 0 * SIZE(BO1) 284 movapd %xmm0, 0 * SIZE(BO1) 285 movapd %xmm1, 2 * SIZE(BO1) 288 addq $4 * SIZE, BO1 396 movlps %xmm0, 0 * SIZE(BO1) 401 movapd %xmm0, 0 * SIZE(BO1)
|
/dports/math/gotoblas/GotoBLAS2/kernel/x86_64/ |
H A D | gemm_tcopy_4_opteron.S | 67 #define BO1 %r13 macro 92 #define BO1 %rsi macro 145 leaq (B, %rax, SIZE), BO1 273 MOVNTQ %mm0, 0 * SIZE(BO1) 274 MOVNTQ %mm1, 1 * SIZE(BO1) 275 MOVNTQ %mm2, 2 * SIZE(BO1) 276 MOVNTQ %mm3, 3 * SIZE(BO1) 277 MOVNTQ %mm4, 4 * SIZE(BO1) 286 addq $8 * SIZE, BO1 375 addq $4 * SIZE, BO1 [all …]
|
H A D | gemm_tcopy_2.S | 64 #define BO1 %r13 macro 88 #define BO1 %rsi macro 129 leaq (B, %rax, SIZE), BO1 186 movss %xmm0, 0 * SIZE(BO1) 187 movss %xmm1, 1 * SIZE(BO1) 192 movapd %xmm0, 0 * SIZE(BO1) 195 addq $2 * SIZE, BO1 239 movss %xmm0, 0 * SIZE(BO1) 242 movsd %xmm0, 0 * SIZE(BO1) 244 addq $1 * SIZE, BO1
|
H A D | gemm_tcopy_4.S | 99 #define BO1 %r13 macro 124 #define BO1 %rsi macro 169 leaq (B, %rax, SIZE), BO1 281 movaps %xmm0, 0 * SIZE(BO1) 282 movaps %xmm1, 4 * SIZE(BO1) 294 movapd %xmm0, 0 * SIZE(BO1) 295 movapd %xmm1, 2 * SIZE(BO1) 296 movapd %xmm2, 4 * SIZE(BO1) 304 addq $8 * SIZE, BO1 424 addq $4 * SIZE, BO1 [all …]
|
H A D | zgemm_tcopy_2.S | 58 #define BO1 %r13 macro 80 #define BO1 %rdi macro 120 leaq (B, %rax, 1), BO1 277 movaps %xmm0, 0 * SIZE(BO1) 284 movapd %xmm0, 0 * SIZE(BO1) 285 movapd %xmm1, 2 * SIZE(BO1) 288 addq $4 * SIZE, BO1 396 movlps %xmm0, 0 * SIZE(BO1) 401 movapd %xmm0, 0 * SIZE(BO1)
|