Home
last modified time | relevance | path

Searched refs:BO1 (Results 1 – 25 of 205) sorted by relevance

123456789

/dports/math/openblas/OpenBLAS-0.3.18/kernel/arm/
H A Dsgemm_tcopy_4_vfp.S63 #define BO1 r7 macro
93 vstmia.f32 BO1, { s0 - s15 }
95 add BO1, BO1, M4
114 add BO1, BO1, M4
166 vstmia.f32 BO1, { s0 - s7 }
168 add BO1, BO1, M4
204 vstmia.f32 BO1, { s0 - s3 }
206 add BO1, BO1, M4
278 ldr BO1, B
338 ldr BO1, B
[all …]
H A Dcgemm_tcopy_2_vfp.S63 #define BO1 r7 macro
81 vstmia.f32 BO1, { s0 - s7 }
83 add BO1, BO1, M4
105 vstmia.f32 BO1, { s0 - s3 }
107 add BO1, BO1, M4
166 ldr BO1, B
167 add r3, BO1, #32 // B = B + 4 * SIZE *2
206 ldr BO1, B
207 add r3, BO1, #16 // B = B + 2 * SIZE *2
H A Dzgemm_tcopy_2_vfp.S63 #define BO1 r7 macro
83 vstmia.f64 BO1, { d0 - d7 }
85 add BO1, BO1, M4
107 vstmia.f64 BO1, { d0 - d3 }
109 add BO1, BO1, M4
168 ldr BO1, B
169 add r3, BO1, #64 // B = B + 4 * SIZE *2
208 ldr BO1, B
209 add r3, BO1, #32 // B = B + 2 * SIZE *2
H A Ddgemm_tcopy_4_vfp.S63 #define BO1 r7 macro
93 vstmia.f64 BO1, { d0 - d15 }
95 add BO1, BO1, M4
148 vstmia.f64 BO1, { d0 - d7 }
150 add BO1, BO1, M4
187 vstmia.f64 BO1, { d0 - d3 }
189 add BO1, BO1, M4
261 ldr BO1, B
316 ldr BO1, B
317 add r3, BO1, #64 // B = B + 8 * SIZE
[all …]
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/gemm/f32/
H A Djit_avx512_common_gemm_f32.cpp116 auto BO1 = abi_param4; in generate() local
154 mov(BO1, A); in generate()
1332 lea(BO1, ptr[BO1 + LDB * 2]); in generate()
1336 lea(BO1, ptr[BO1 + LDB3]); in generate()
1340 lea(BO1, ptr[BO1 + LDB * 4]); in generate()
1344 lea(BO1, ptr[BO1 + LDB * 4]); in generate()
1350 lea(BO1, ptr[BO1 + LDB3 * 2]); in generate()
1354 lea(BO1, ptr[BO1 + LDB * 8]); in generate()
1360 lea(BO1, ptr[BO1 + LDB * 8]); in generate()
1423 mov(BO1, B); in generate()
[all …]
H A Djit_avx_gemm_f32.cpp816 add(BO1, LDB); in innerkernel1()
1209 lea(BO1, ptr[BO1 + LDB * 2]); in kernel()
1213 lea(BO1, ptr[BO1 + LDB3]); in kernel()
1217 lea(BO1, ptr[BO1 + LDB * 4]); in kernel()
1221 lea(BO1, ptr[BO1 + LDB * 4]); in kernel()
1227 lea(BO1, ptr[BO1 + LDB3 * 2]); in kernel()
1231 sub(BO1, rax); in kernel()
1236 sub(BO1, rax); in kernel()
1335 mov(BO1, A); in do_pack()
1606 add(BO1, LDA); in do_pack()
[all …]
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/gemm/f32/
H A Djit_avx512_common_gemm_f32.cpp116 auto BO1 = abi_param4; in generate() local
154 mov(BO1, A); in generate()
1332 lea(BO1, ptr[BO1 + LDB * 2]); in generate()
1336 lea(BO1, ptr[BO1 + LDB3]); in generate()
1340 lea(BO1, ptr[BO1 + LDB * 4]); in generate()
1344 lea(BO1, ptr[BO1 + LDB * 4]); in generate()
1350 lea(BO1, ptr[BO1 + LDB3 * 2]); in generate()
1354 lea(BO1, ptr[BO1 + LDB * 8]); in generate()
1360 lea(BO1, ptr[BO1 + LDB * 8]); in generate()
1423 mov(BO1, B); in generate()
[all …]
H A Djit_avx_gemm_f32.cpp816 add(BO1, LDB); in innerkernel1()
1209 lea(BO1, ptr[BO1 + LDB * 2]); in kernel()
1213 lea(BO1, ptr[BO1 + LDB3]); in kernel()
1217 lea(BO1, ptr[BO1 + LDB * 4]); in kernel()
1221 lea(BO1, ptr[BO1 + LDB * 4]); in kernel()
1227 lea(BO1, ptr[BO1 + LDB3 * 2]); in kernel()
1231 sub(BO1, rax); in kernel()
1236 sub(BO1, rax); in kernel()
1335 mov(BO1, A); in do_pack()
1606 add(BO1, LDA); in do_pack()
[all …]
/dports/multimedia/opentoonz/opentoonz-1.5.0/thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/x86_64/
H A Dgemm_tcopy_4_opteron.S67 #define BO1 %r13 macro
92 #define BO1 %rsi macro
145 leaq (B, %rax, SIZE), BO1
273 MOVNTQ %mm0, 0 * SIZE(BO1)
274 MOVNTQ %mm1, 1 * SIZE(BO1)
275 MOVNTQ %mm2, 2 * SIZE(BO1)
276 MOVNTQ %mm3, 3 * SIZE(BO1)
277 MOVNTQ %mm4, 4 * SIZE(BO1)
286 addq $8 * SIZE, BO1
375 addq $4 * SIZE, BO1
[all …]
H A Dgemm_tcopy_2.S64 #define BO1 %r13 macro
88 #define BO1 %rsi macro
129 leaq (B, %rax, SIZE), BO1
186 movss %xmm0, 0 * SIZE(BO1)
187 movss %xmm1, 1 * SIZE(BO1)
192 movapd %xmm0, 0 * SIZE(BO1)
195 addq $2 * SIZE, BO1
239 movss %xmm0, 0 * SIZE(BO1)
242 movsd %xmm0, 0 * SIZE(BO1)
244 addq $1 * SIZE, BO1
H A Dgemm_tcopy_4.S99 #define BO1 %r13 macro
124 #define BO1 %rsi macro
169 leaq (B, %rax, SIZE), BO1
281 movaps %xmm0, 0 * SIZE(BO1)
282 movaps %xmm1, 4 * SIZE(BO1)
294 movapd %xmm0, 0 * SIZE(BO1)
295 movapd %xmm1, 2 * SIZE(BO1)
296 movapd %xmm2, 4 * SIZE(BO1)
304 addq $8 * SIZE, BO1
424 addq $4 * SIZE, BO1
[all …]
H A Dzgemm_tcopy_2.S58 #define BO1 %r13 macro
80 #define BO1 %rdi macro
120 leaq (B, %rax, 1), BO1
277 movaps %xmm0, 0 * SIZE(BO1)
284 movapd %xmm0, 0 * SIZE(BO1)
285 movapd %xmm1, 2 * SIZE(BO1)
288 addq $4 * SIZE, BO1
396 movlps %xmm0, 0 * SIZE(BO1)
401 movapd %xmm0, 0 * SIZE(BO1)
/dports/cad/meshlab/meshlab-Meshlab-2020.05/src/plugins_unsupported/external/GotoBLAS2/kernel/x86_64/
H A Dgemm_tcopy_4_opteron.S50 #define BO1 %r13 macro
75 #define BO1 %rsi macro
128 leaq (B, %rax, SIZE), BO1
256 MOVNTQ %mm0, 0 * SIZE(BO1)
257 MOVNTQ %mm1, 1 * SIZE(BO1)
258 MOVNTQ %mm2, 2 * SIZE(BO1)
259 MOVNTQ %mm3, 3 * SIZE(BO1)
260 MOVNTQ %mm4, 4 * SIZE(BO1)
269 addq $8 * SIZE, BO1
358 addq $4 * SIZE, BO1
[all …]
H A Dgemm_tcopy_2.S47 #define BO1 %r13 macro
71 #define BO1 %rsi macro
112 leaq (B, %rax, SIZE), BO1
169 movss %xmm0, 0 * SIZE(BO1)
170 movss %xmm1, 1 * SIZE(BO1)
175 movapd %xmm0, 0 * SIZE(BO1)
178 addq $2 * SIZE, BO1
222 movss %xmm0, 0 * SIZE(BO1)
225 movsd %xmm0, 0 * SIZE(BO1)
227 addq $1 * SIZE, BO1
H A Dgemm_tcopy_4.S82 #define BO1 %r13 macro
107 #define BO1 %rsi macro
152 leaq (B, %rax, SIZE), BO1
264 movaps %xmm0, 0 * SIZE(BO1)
265 movaps %xmm1, 4 * SIZE(BO1)
277 movapd %xmm0, 0 * SIZE(BO1)
278 movapd %xmm1, 2 * SIZE(BO1)
279 movapd %xmm2, 4 * SIZE(BO1)
287 addq $8 * SIZE, BO1
407 addq $4 * SIZE, BO1
[all …]
H A Dzgemm_tcopy_2.S41 #define BO1 %r13 macro
63 #define BO1 %rdi macro
103 leaq (B, %rax, 1), BO1
260 movaps %xmm0, 0 * SIZE(BO1)
267 movapd %xmm0, 0 * SIZE(BO1)
268 movapd %xmm1, 2 * SIZE(BO1)
271 addq $4 * SIZE, BO1
379 movlps %xmm0, 0 * SIZE(BO1)
384 movapd %xmm0, 0 * SIZE(BO1)
/dports/math/openblas/OpenBLAS-0.3.18/kernel/x86_64/
H A Dgemm_tcopy_4_opteron.S67 #define BO1 %r13 macro
92 #define BO1 %rsi macro
145 leaq (B, %rax, SIZE), BO1
273 MOVNTQ %mm0, 0 * SIZE(BO1)
274 MOVNTQ %mm1, 1 * SIZE(BO1)
275 MOVNTQ %mm2, 2 * SIZE(BO1)
276 MOVNTQ %mm3, 3 * SIZE(BO1)
277 MOVNTQ %mm4, 4 * SIZE(BO1)
286 addq $8 * SIZE, BO1
375 addq $4 * SIZE, BO1
[all …]
H A Dgemm_tcopy_2.S71 #define BO1 %r13 macro
95 #define BO1 %rsi macro
136 leaq (B, %rax, SIZE), BO1
193 movss %xmm0, 0 * SIZE(BO1)
194 movss %xmm1, 1 * SIZE(BO1)
199 movapd %xmm0, 0 * SIZE(BO1)
202 addq $2 * SIZE, BO1
246 movss %xmm0, 0 * SIZE(BO1)
249 movsd %xmm0, 0 * SIZE(BO1)
251 addq $1 * SIZE, BO1
H A Dgemm_tcopy_4.S99 #define BO1 %r13 macro
124 #define BO1 %rsi macro
169 leaq (B, %rax, SIZE), BO1
281 movaps %xmm0, 0 * SIZE(BO1)
282 movaps %xmm1, 4 * SIZE(BO1)
294 movapd %xmm0, 0 * SIZE(BO1)
295 movapd %xmm1, 2 * SIZE(BO1)
296 movapd %xmm2, 4 * SIZE(BO1)
304 addq $8 * SIZE, BO1
424 addq $4 * SIZE, BO1
[all …]
H A Dgemm_tcopy_2_bulldozer.S57 #define BO1 %r13 macro
81 #define BO1 %rsi macro
122 leaq (B, %rax, SIZE), BO1
285 vmovss %xmm0, 0 * SIZE(BO1)
286 vmovss %xmm1, 1 * SIZE(BO1)
291 vmovups %xmm0, 0 * SIZE(BO1)
294 addq $2 * SIZE, BO1
337 vmovss %xmm0, 0 * SIZE(BO1)
340 vmovsd %xmm0, 0 * SIZE(BO1)
342 addq $1 * SIZE, BO1
H A Dzgemm_tcopy_2.S58 #define BO1 %r13 macro
80 #define BO1 %rdi macro
120 leaq (B, %rax, 1), BO1
277 movaps %xmm0, 0 * SIZE(BO1)
284 movapd %xmm0, 0 * SIZE(BO1)
285 movapd %xmm1, 2 * SIZE(BO1)
288 addq $4 * SIZE, BO1
396 movlps %xmm0, 0 * SIZE(BO1)
401 movapd %xmm0, 0 * SIZE(BO1)
/dports/math/gotoblas/GotoBLAS2/kernel/x86_64/
H A Dgemm_tcopy_4_opteron.S67 #define BO1 %r13 macro
92 #define BO1 %rsi macro
145 leaq (B, %rax, SIZE), BO1
273 MOVNTQ %mm0, 0 * SIZE(BO1)
274 MOVNTQ %mm1, 1 * SIZE(BO1)
275 MOVNTQ %mm2, 2 * SIZE(BO1)
276 MOVNTQ %mm3, 3 * SIZE(BO1)
277 MOVNTQ %mm4, 4 * SIZE(BO1)
286 addq $8 * SIZE, BO1
375 addq $4 * SIZE, BO1
[all …]
H A Dgemm_tcopy_2.S64 #define BO1 %r13 macro
88 #define BO1 %rsi macro
129 leaq (B, %rax, SIZE), BO1
186 movss %xmm0, 0 * SIZE(BO1)
187 movss %xmm1, 1 * SIZE(BO1)
192 movapd %xmm0, 0 * SIZE(BO1)
195 addq $2 * SIZE, BO1
239 movss %xmm0, 0 * SIZE(BO1)
242 movsd %xmm0, 0 * SIZE(BO1)
244 addq $1 * SIZE, BO1
H A Dgemm_tcopy_4.S99 #define BO1 %r13 macro
124 #define BO1 %rsi macro
169 leaq (B, %rax, SIZE), BO1
281 movaps %xmm0, 0 * SIZE(BO1)
282 movaps %xmm1, 4 * SIZE(BO1)
294 movapd %xmm0, 0 * SIZE(BO1)
295 movapd %xmm1, 2 * SIZE(BO1)
296 movapd %xmm2, 4 * SIZE(BO1)
304 addq $8 * SIZE, BO1
424 addq $4 * SIZE, BO1
[all …]
H A Dzgemm_tcopy_2.S58 #define BO1 %r13 macro
80 #define BO1 %rdi macro
120 leaq (B, %rax, 1), BO1
277 movaps %xmm0, 0 * SIZE(BO1)
284 movapd %xmm0, 0 * SIZE(BO1)
285 movapd %xmm1, 2 * SIZE(BO1)
288 addq $4 * SIZE, BO1
396 movlps %xmm0, 0 * SIZE(BO1)
401 movapd %xmm0, 0 * SIZE(BO1)

123456789