Lines Matching refs:BO

38 #define BO	%rsi  macro
92 movaps -32 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm1 ;\
95 movaps -28 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
98 mulps -20 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm0 ;\
100 movaps -24 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
107 movaps 0 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm1 ;\
110 movaps -12 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
112 mulps -20 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm2 ;\
114 movaps -8 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
121 movaps -16 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm7 ;\
124 movaps -12 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
126 mulps -4 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm4 ;\
128 movaps -8 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
135 movaps 16 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm7 ;\
138 movaps 4 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
140 mulps -4 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm6 ;\
142 movaps 8 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
150 movaps 0 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm1 ;\
153 movaps 4 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
155 mulps 12 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm0 ;\
157 movaps 8 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
164 movaps 32 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm1 ;\
167 movaps 20 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
169 mulps 12 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm2 ;\
171 movaps 24 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
178 movaps 16 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm7 ;\
181 movaps 20 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
183 mulps 28 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm4 ;\
185 movaps 24 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
192 movaps 48 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm7 ;\
195 movaps 36 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
197 mulps 28 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm6 ;\
199 movaps 40 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
207 movaps -32 * SIZE + 2 * (xx) * SIZE(BO), %xmm1 ;\
210 movaps -28 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
213 mulps -20 * SIZE + 2 * (xx) * SIZE(BO), %xmm0 ;\
215 movaps -24 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
222 movaps 0 * SIZE + 2 * (xx) * SIZE(BO), %xmm1 ;\
225 movaps -12 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
227 mulps -20 * SIZE + 2 * (xx) * SIZE(BO), %xmm2 ;\
229 movaps -8 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
236 movaps -16 * SIZE + 2 * (xx) * SIZE(BO), %xmm7 ;\
239 movaps -12 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
241 mulps -4 * SIZE + 2 * (xx) * SIZE(BO), %xmm4 ;\
243 movaps -8 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
250 movaps 16 * SIZE + 2 * (xx) * SIZE(BO), %xmm7 ;\
253 movaps 4 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
255 mulps -4 * SIZE + 2 * (xx) * SIZE(BO), %xmm6 ;\
257 movaps 8 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
265 movaps 0 * SIZE + 2 * (xx) * SIZE(BO), %xmm1 ;\
268 movaps 4 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
270 mulps 12 * SIZE + 2 * (xx) * SIZE(BO), %xmm0 ;\
272 movaps 8 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
279 movaps 32 * SIZE + 2 * (xx) * SIZE(BO), %xmm1 ;\
282 movaps 20 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
284 mulps 12 * SIZE + 2 * (xx) * SIZE(BO), %xmm2 ;\
286 movaps 24 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
293 movaps 16 * SIZE + 2 * (xx) * SIZE(BO), %xmm7 ;\
296 movaps 20 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
298 mulps 28 * SIZE + 2 * (xx) * SIZE(BO), %xmm4 ;\
300 movaps 24 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
307 movaps 48 * SIZE + 2 * (xx) * SIZE(BO), %xmm7 ;\
310 movaps 36 * SIZE + 2 * (xx) * SIZE(BO), %xmm3 ;\
312 mulps 28 * SIZE + 2 * (xx) * SIZE(BO), %xmm6 ;\
314 movaps 40 * SIZE + 2 * (xx) * SIZE(BO), %xmm5 ;\
403 leaq BUFFER, BO
425 PREFETCHW (WPREFETCHSIZE + 0) * SIZE(BO)
428 movq %mm0, 0 * SIZE(BO)
429 movq %mm0, 2 * SIZE(BO)
432 movq %mm1, 4 * SIZE(BO)
433 movq %mm1, 6 * SIZE(BO)
435 movq %mm2, 8 * SIZE(BO)
436 movq %mm2, 10 * SIZE(BO)
438 movq %mm3, 12 * SIZE(BO)
439 movq %mm3, 14 * SIZE(BO)
441 PREFETCHW (WPREFETCHSIZE + 16) * SIZE(BO)
444 movq %mm4, 16 * SIZE(BO)
445 movq %mm4, 18 * SIZE(BO)
447 movq %mm5, 20 * SIZE(BO)
448 movq %mm5, 22 * SIZE(BO)
450 movq %mm6, 24 * SIZE(BO)
451 movq %mm6, 26 * SIZE(BO)
453 movq %mm7, 28 * SIZE(BO)
454 movq %mm7, 30 * SIZE(BO)
458 addq $32 * SIZE, BO
482 movq %mm0, 0 * SIZE(BO)
483 movq %mm0, 2 * SIZE(BO)
484 movq %mm1, 4 * SIZE(BO)
485 movq %mm1, 6 * SIZE(BO)
486 movq %mm2, 8 * SIZE(BO)
487 movq %mm2, 10 * SIZE(BO)
488 movq %mm3, 12 * SIZE(BO)
489 movq %mm3, 14 * SIZE(BO)
492 addq $16 * SIZE, BO
514 leaq 32 * SIZE + BUFFER, BO
516 leaq 32 * SIZE + BUFFER, BO
520 leaq (BO, %rax, 8), BO
524 movaps -32 * SIZE(BO), %xmm1
527 movaps -28 * SIZE(BO), %xmm3
530 movaps -24 * SIZE(BO), %xmm5
533 movaps -16 * SIZE(BO), %xmm7
566 leaq (BO, %rax, 8), BO
766 addq $64 * SIZE, BO
785 addq $ 64 * SIZE, BO
805 leaq (BO, %rax, 8), BO
812 movaps -28 * SIZE(BO, %rax, 8), %xmm1
815 movaps -24 * SIZE(BO, %rax, 8), %xmm1
817 mulps -20 * SIZE(BO, %rax, 8), %xmm0
819 movaps -32 * SIZE(BO, %rax, 8), %xmm1
824 movaps -28 * SIZE(BO, %rax, 8), %xmm1
827 movaps -24 * SIZE(BO, %rax, 8), %xmm1
829 mulps -20 * SIZE(BO, %rax, 8), %xmm2
831 movaps -16 * SIZE(BO, %rax, 8), %xmm1
912 leaq (BO, %rax, 8), BO
933 leaq BUFFER, BO
935 leaq BUFFER, BO
939 leaq (BO, %rax, 8), BO
945 movaps 0 * SIZE(BO), %xmm9
946 movaps 16 * SIZE(BO), %xmm11
947 movaps 32 * SIZE(BO), %xmm13
948 movaps 48 * SIZE(BO), %xmm15
980 movaps 4 * SIZE(BO), %xmm9
983 movaps 8 * SIZE(BO), %xmm9
985 mulps 12 * SIZE(BO), %xmm8
987 movaps 64 * SIZE(BO), %xmm9
993 movaps 20 * SIZE(BO), %xmm11
996 movaps 24 * SIZE(BO), %xmm11
998 mulps 28 * SIZE(BO), %xmm8
1000 movaps 80 * SIZE(BO), %xmm11
1006 movaps 36 * SIZE(BO), %xmm13
1009 movaps 40 * SIZE(BO), %xmm13
1011 mulps 44 * SIZE(BO), %xmm8
1013 movaps 96 * SIZE(BO), %xmm13
1019 movaps 52 * SIZE(BO), %xmm15
1022 movaps 56 * SIZE(BO), %xmm15
1024 mulps 60 * SIZE(BO), %xmm8
1026 movaps 112 * SIZE(BO), %xmm15
1035 movaps 68 * SIZE(BO), %xmm9
1038 movaps 72 * SIZE(BO), %xmm9
1040 mulps 76 * SIZE(BO), %xmm10
1042 movaps 128 * SIZE(BO), %xmm9
1048 movaps 84 * SIZE(BO), %xmm11
1051 movaps 88 * SIZE(BO), %xmm11
1053 mulps 92 * SIZE(BO), %xmm10
1055 movaps 144 * SIZE(BO), %xmm11
1061 movaps 100 * SIZE(BO), %xmm13
1064 movaps 104 * SIZE(BO), %xmm13
1066 mulps 108 * SIZE(BO), %xmm10
1068 movaps 160 * SIZE(BO), %xmm13
1074 movaps 116 * SIZE(BO), %xmm15
1077 movaps 120 * SIZE(BO), %xmm15
1079 mulps 124 * SIZE(BO), %xmm10
1081 movaps 176 * SIZE(BO), %xmm15
1086 addq $128 * SIZE, BO
1106 movaps 4 * SIZE(BO), %xmm9
1109 movaps 8 * SIZE(BO), %xmm9
1111 mulps 12 * SIZE(BO), %xmm8
1113 movaps 16 * SIZE(BO), %xmm9
1118 addq $16 * SIZE, BO # boffset1 += 8
1162 leaq (BO, %rax, 8), BO
1181 leaq BUFFER, BO
1183 leaq BUFFER, BO
1187 leaq (BO, %rax, 8), BO
1193 movaps 0 * SIZE(BO), %xmm9
1194 movaps 16 * SIZE(BO), %xmm11
1195 movaps 32 * SIZE(BO), %xmm13
1196 movaps 48 * SIZE(BO), %xmm15
1228 movaps 4 * SIZE(BO), %xmm9
1231 movaps 8 * SIZE(BO), %xmm9
1234 movaps 12 * SIZE(BO), %xmm9
1238 movaps 64 * SIZE(BO), %xmm9
1242 movaps 20 * SIZE(BO), %xmm11
1245 movaps 24 * SIZE(BO), %xmm11
1248 movaps 28 * SIZE(BO), %xmm11
1252 movaps 80 * SIZE(BO), %xmm11
1256 movaps 36 * SIZE(BO), %xmm13
1259 movaps 40 * SIZE(BO), %xmm13
1262 movaps 44 * SIZE(BO), %xmm13
1266 movaps 96 * SIZE(BO), %xmm13
1270 movaps 52 * SIZE(BO), %xmm15
1273 movaps 56 * SIZE(BO), %xmm15
1276 movaps 60 * SIZE(BO), %xmm15
1280 movaps 112 * SIZE(BO), %xmm15
1284 movaps 68 * SIZE(BO), %xmm9
1287 movaps 72 * SIZE(BO), %xmm9
1290 movaps 76 * SIZE(BO), %xmm9
1294 movaps 128 * SIZE(BO), %xmm9
1298 movaps 84 * SIZE(BO), %xmm11
1301 movaps 88 * SIZE(BO), %xmm11
1304 movaps 92 * SIZE(BO), %xmm11
1308 movaps 144 * SIZE(BO), %xmm11
1312 movaps 100 * SIZE(BO), %xmm13
1315 movaps 104 * SIZE(BO), %xmm13
1318 movaps 108 * SIZE(BO), %xmm13
1322 movaps 160 * SIZE(BO), %xmm13
1326 movaps 116 * SIZE(BO), %xmm15
1329 movaps 120 * SIZE(BO), %xmm15
1332 movaps 124 * SIZE(BO), %xmm15
1336 movaps 176 * SIZE(BO), %xmm15
1339 addq $128 * SIZE, BO
1359 movaps 4 * SIZE(BO), %xmm9
1362 movaps 8 * SIZE(BO), %xmm9
1365 movaps 12 * SIZE(BO), %xmm9
1369 movaps 16 * SIZE(BO), %xmm9
1372 addq $16 * SIZE, BO # boffset1 += 8
1418 leaq (BO, %rax, 8), BO
1437 leaq BUFFER, BO
1439 leaq BUFFER, BO
1443 leaq (BO, %rax, 8), BO
1444 leaq (BO, %rax, 8), BO
1450 movss 0 * SIZE(BO), %xmm9
1451 movss 16 * SIZE(BO), %xmm11
1452 movss 32 * SIZE(BO), %xmm13
1453 movss 48 * SIZE(BO), %xmm15
1485 movss 4 * SIZE(BO), %xmm9
1488 movss 8 * SIZE(BO), %xmm9
1491 movss 12 * SIZE(BO), %xmm9
1495 movss 64 * SIZE(BO), %xmm9
1499 movss 20 * SIZE(BO), %xmm11
1502 movss 24 * SIZE(BO), %xmm11
1505 movss 28 * SIZE(BO), %xmm11
1509 movss 80 * SIZE(BO), %xmm11
1513 movss 36 * SIZE(BO), %xmm13
1516 movss 40 * SIZE(BO), %xmm13
1519 movss 44 * SIZE(BO), %xmm13
1523 movss 96 * SIZE(BO), %xmm13
1527 movss 52 * SIZE(BO), %xmm15
1530 movss 56 * SIZE(BO), %xmm15
1533 movss 60 * SIZE(BO), %xmm15
1537 movss 112 * SIZE(BO), %xmm15
1541 movss 68 * SIZE(BO), %xmm9
1544 movss 72 * SIZE(BO), %xmm9
1547 movss 76 * SIZE(BO), %xmm9
1551 movss 128 * SIZE(BO), %xmm9
1555 movss 84 * SIZE(BO), %xmm11
1558 movss 88 * SIZE(BO), %xmm11
1561 movss 92 * SIZE(BO), %xmm11
1565 movss 144 * SIZE(BO), %xmm11
1569 movss 100 * SIZE(BO), %xmm13
1572 movss 104 * SIZE(BO), %xmm13
1575 movss 108 * SIZE(BO), %xmm13
1579 movss 160 * SIZE(BO), %xmm13
1583 movss 116 * SIZE(BO), %xmm15
1586 movss 120 * SIZE(BO), %xmm15
1589 movss 124 * SIZE(BO), %xmm15
1593 movss 176 * SIZE(BO), %xmm15
1596 addq $128 * SIZE, BO
1616 movss 4 * SIZE(BO), %xmm9
1619 movss 8 * SIZE(BO), %xmm9
1622 movss 12 * SIZE(BO), %xmm9
1626 movss 16 * SIZE(BO), %xmm9
1629 addq $16 * SIZE, BO # boffset1 += 8
1663 leaq (BO, %rax, 8), BO
1664 leaq (BO, %rax, 8), BO
1691 leaq BUFFER, BO
1720 movaps %xmm0, 0 * SIZE(BO)
1721 movaps %xmm1, 4 * SIZE(BO)
1722 movaps %xmm2, 8 * SIZE(BO)
1723 movaps %xmm3, 12 * SIZE(BO)
1724 movaps %xmm4, 16 * SIZE(BO)
1725 movaps %xmm5, 20 * SIZE(BO)
1726 movaps %xmm6, 24 * SIZE(BO)
1727 movaps %xmm7, 28 * SIZE(BO)
1730 addq $32 * SIZE, BO
1754 movq %mm0, 0 * SIZE(BO)
1755 movq %mm0, 2 * SIZE(BO)
1756 movq %mm1, 4 * SIZE(BO)
1757 movq %mm1, 6 * SIZE(BO)
1758 movq %mm2, 8 * SIZE(BO)
1759 movq %mm2, 10 * SIZE(BO)
1760 movq %mm3, 12 * SIZE(BO)
1761 movq %mm3, 14 * SIZE(BO)
1762 movq %mm4, 16 * SIZE(BO)
1763 movq %mm4, 18 * SIZE(BO)
1764 movq %mm5, 20 * SIZE(BO)
1765 movq %mm5, 22 * SIZE(BO)
1766 movq %mm6, 24 * SIZE(BO)
1767 movq %mm6, 26 * SIZE(BO)
1768 movq %mm7, 28 * SIZE(BO)
1769 movq %mm7, 30 * SIZE(BO)
1772 addq $32 * SIZE, BO
1794 movaps %xmm0, 0 * SIZE(BO)
1795 movaps %xmm1, 4 * SIZE(BO)
1805 movq %mm0, 0 * SIZE(BO)
1806 movq %mm0, 2 * SIZE(BO)
1807 movq %mm1, 4 * SIZE(BO)
1808 movq %mm1, 6 * SIZE(BO)
1812 addq $ 8 * SIZE, BO
1832 leaq BUFFER, BO
1834 leaq BUFFER, BO
1838 leaq (BO, %rax, 4), BO
1846 movaps 0 * SIZE(BO), %xmm9
1847 movaps 16 * SIZE(BO), %xmm11
1848 movaps 32 * SIZE(BO), %xmm13
1849 movaps 48 * SIZE(BO), %xmm15
1883 mulps 4 * SIZE(BO), %xmm8
1885 movaps 0 * SIZE(BO), %xmm9
1889 mulps 4 * SIZE(BO), %xmm8
1891 movaps 8 * SIZE(BO), %xmm9
1896 mulps 12 * SIZE(BO), %xmm8
1898 movaps 8 * SIZE(BO), %xmm9
1902 mulps 12 * SIZE(BO), %xmm8
1904 movaps 64 * SIZE(BO), %xmm9
1912 mulps 20 * SIZE(BO), %xmm10
1914 movaps 16 * SIZE(BO), %xmm11
1918 mulps 20 * SIZE(BO), %xmm10
1920 movaps 24 * SIZE(BO), %xmm11
1925 mulps 28 * SIZE(BO), %xmm10
1927 movaps 24 * SIZE(BO), %xmm11
1931 mulps 28 * SIZE(BO), %xmm10
1933 movaps 80 * SIZE(BO), %xmm11
1941 mulps 36 * SIZE(BO), %xmm12
1943 movaps 32 * SIZE(BO), %xmm13
1947 mulps 36 * SIZE(BO), %xmm12
1949 movaps 40 * SIZE(BO), %xmm13
1954 mulps 44 * SIZE(BO), %xmm12
1956 movaps 40 * SIZE(BO), %xmm13
1960 mulps 44 * SIZE(BO), %xmm12
1962 movaps 96 * SIZE(BO), %xmm13
1970 mulps 52 * SIZE(BO), %xmm14
1972 movaps 48 * SIZE(BO), %xmm15
1976 mulps 52 * SIZE(BO), %xmm14
1978 movaps 56 * SIZE(BO), %xmm15
1983 mulps 60 * SIZE(BO), %xmm14
1985 movaps 56 * SIZE(BO), %xmm15
1989 mulps 60 * SIZE(BO), %xmm14
1991 movaps 112 * SIZE(BO), %xmm15
1996 addq $64 * SIZE, BO
2015 mulps 4 * SIZE(BO), %xmm8
2017 movaps 0 * SIZE(BO), %xmm9
2021 mulps 4 * SIZE(BO), %xmm8
2023 movaps 8 * SIZE(BO), %xmm9
2028 addq $8 * SIZE, BO # boffset1 += 8
2074 leaq (BO, %rax, 4), BO
2096 leaq BUFFER, BO
2098 leaq BUFFER, BO
2102 leaq (BO, %rax, 4), BO
2108 movaps 0 * SIZE(BO), %xmm9
2109 movaps 16 * SIZE(BO), %xmm11
2110 movaps 32 * SIZE(BO), %xmm13
2111 movaps 48 * SIZE(BO), %xmm15
2143 mulps 4 * SIZE(BO), %xmm8
2145 movaps 8 * SIZE(BO), %xmm9
2150 mulps 12 * SIZE(BO), %xmm8
2152 movaps 64 * SIZE(BO), %xmm9
2157 mulps 20 * SIZE(BO), %xmm8
2159 movaps 24 * SIZE(BO), %xmm11
2164 mulps 28 * SIZE(BO), %xmm8
2166 movaps 80 * SIZE(BO), %xmm11
2171 mulps 36 * SIZE(BO), %xmm10
2173 movaps 40 * SIZE(BO), %xmm13
2178 mulps 44 * SIZE(BO), %xmm10
2180 movaps 96 * SIZE(BO), %xmm13
2185 mulps 52 * SIZE(BO), %xmm10
2187 movaps 56 * SIZE(BO), %xmm15
2192 mulps 60 * SIZE(BO), %xmm10
2194 movaps 112 * SIZE(BO), %xmm15
2199 addq $64 * SIZE, BO
2218 mulps 4 * SIZE(BO), %xmm8
2220 movaps 8 * SIZE(BO), %xmm9
2225 addq $8 * SIZE, BO # boffset1 += 8
2260 leaq (BO, %rax, 4), BO
2279 leaq BUFFER, BO
2281 leaq BUFFER, BO
2285 leaq (BO, %rax, 4), BO
2291 movaps 0 * SIZE(BO), %xmm9
2292 movaps 16 * SIZE(BO), %xmm11
2293 movaps 32 * SIZE(BO), %xmm13
2294 movaps 48 * SIZE(BO), %xmm15
2326 movaps 4 * SIZE(BO), %xmm9
2330 movaps 8 * SIZE(BO), %xmm9
2334 movaps 12 * SIZE(BO), %xmm9
2338 movaps 64 * SIZE(BO), %xmm9
2342 movaps 20 * SIZE(BO), %xmm11
2346 movaps 24 * SIZE(BO), %xmm11
2350 movaps 28 * SIZE(BO), %xmm11
2354 movaps 80 * SIZE(BO), %xmm11
2358 movaps 36 * SIZE(BO), %xmm13
2362 movaps 40 * SIZE(BO), %xmm13
2366 movaps 44 * SIZE(BO), %xmm13
2370 movaps 96 * SIZE(BO), %xmm13
2374 movaps 52 * SIZE(BO), %xmm15
2378 movaps 56 * SIZE(BO), %xmm15
2382 movaps 60 * SIZE(BO), %xmm15
2386 movaps 112 * SIZE(BO), %xmm15
2389 addq $64 * SIZE, BO
2409 movaps 4 * SIZE(BO), %xmm9
2413 movaps 8 * SIZE(BO), %xmm9
2416 addq $8 * SIZE, BO # boffset1 += 8
2453 leaq (BO, %rax, 4), BO
2472 leaq BUFFER, BO
2474 leaq BUFFER, BO
2478 leaq (BO, %rax, 8), BO
2484 movss 0 * SIZE(BO), %xmm9
2485 movss 16 * SIZE(BO), %xmm11
2486 movss 32 * SIZE(BO), %xmm13
2487 movss 48 * SIZE(BO), %xmm15
2519 movss 4 * SIZE(BO), %xmm9
2523 movss 8 * SIZE(BO), %xmm9
2527 movss 12 * SIZE(BO), %xmm9
2531 movss 64 * SIZE(BO), %xmm9
2535 movss 20 * SIZE(BO), %xmm11
2539 movss 24 * SIZE(BO), %xmm11
2543 movss 28 * SIZE(BO), %xmm11
2547 movss 80 * SIZE(BO), %xmm11
2551 movss 36 * SIZE(BO), %xmm13
2555 movss 40 * SIZE(BO), %xmm13
2559 movss 44 * SIZE(BO), %xmm13
2563 movss 96 * SIZE(BO), %xmm13
2567 movss 52 * SIZE(BO), %xmm15
2571 movss 56 * SIZE(BO), %xmm15
2575 movss 60 * SIZE(BO), %xmm15
2579 movss 112 * SIZE(BO), %xmm15
2582 addq $64 * SIZE, BO
2602 movss 4 * SIZE(BO), %xmm9
2606 movss 8 * SIZE(BO), %xmm9
2609 addq $8 * SIZE, BO # boffset1 += 8
2639 leaq (BO, %rax, 8), BO
2666 leaq BUFFER, BO
2696 movaps %xmm0, 0 * SIZE(BO)
2697 movaps %xmm1, 4 * SIZE(BO)
2698 movaps %xmm2, 8 * SIZE(BO)
2699 movaps %xmm3, 12 * SIZE(BO)
2700 movaps %xmm4, 16 * SIZE(BO)
2701 movaps %xmm5, 20 * SIZE(BO)
2702 movaps %xmm6, 24 * SIZE(BO)
2703 movaps %xmm7, 28 * SIZE(BO)
2706 addq $32 * SIZE, BO
2730 movq %mm0, 0 * SIZE(BO)
2731 movq %mm0, 2 * SIZE(BO)
2732 movq %mm1, 4 * SIZE(BO)
2733 movq %mm1, 6 * SIZE(BO)
2734 movq %mm2, 8 * SIZE(BO)
2735 movq %mm2, 10 * SIZE(BO)
2736 movq %mm3, 12 * SIZE(BO)
2737 movq %mm3, 14 * SIZE(BO)
2738 movq %mm4, 16 * SIZE(BO)
2739 movq %mm4, 18 * SIZE(BO)
2740 movq %mm5, 20 * SIZE(BO)
2741 movq %mm5, 22 * SIZE(BO)
2742 movq %mm6, 24 * SIZE(BO)
2743 movq %mm6, 26 * SIZE(BO)
2744 movq %mm7, 28 * SIZE(BO)
2745 movq %mm7, 30 * SIZE(BO)
2748 addq $32 * SIZE, BO
2766 movaps %xmm0, 0 * SIZE(BO)
2772 movq %mm0, 0 * SIZE(BO)
2773 movq %mm0, 2 * SIZE(BO)
2777 addq $ 4 * SIZE, BO
2796 leaq BUFFER, BO
2798 leaq BUFFER, BO
2802 leaq (BO, %rax, 2), BO
2810 movaps 0 * SIZE(BO), %xmm9
2811 movaps 16 * SIZE(BO), %xmm11
2812 movaps 32 * SIZE(BO), %xmm13
2813 movaps 48 * SIZE(BO), %xmm15
2851 movaps 4 * SIZE(BO), %xmm9
2858 movaps 8 * SIZE(BO), %xmm9
2868 movaps 12 * SIZE(BO), %xmm9
2875 movaps 32 * SIZE(BO), %xmm9
2885 movaps 20 * SIZE(BO), %xmm11
2892 movaps 24 * SIZE(BO), %xmm11
2902 movaps 28 * SIZE(BO), %xmm11
2909 movaps 48 * SIZE(BO), %xmm11
2912 addq $32 * SIZE, BO
2935 movaps 4 * SIZE(BO), %xmm9
2938 addq $4 * SIZE, BO # boffset1 += 8
2969 leaq (BO, %rax, 2), BO
2989 leaq BUFFER, BO
2991 leaq BUFFER, BO
2995 leaq (BO, %rax, 2), BO
3001 movaps 0 * SIZE(BO), %xmm9
3002 movaps 16 * SIZE(BO), %xmm11
3034 mulps 4 * SIZE(BO), %xmm8
3036 movaps 32 * SIZE(BO), %xmm9
3039 mulps 8 * SIZE(BO), %xmm8
3042 mulps 12 * SIZE(BO), %xmm8
3051 mulps 20 * SIZE(BO), %xmm10
3053 movaps 48 * SIZE(BO), %xmm11
3056 mulps 24 * SIZE(BO), %xmm10
3059 mulps 28 * SIZE(BO), %xmm10
3064 addq $32 * SIZE, BO
3085 movaps 4 * SIZE(BO), %xmm9
3088 addq $4 * SIZE, BO # boffset1 += 8
3117 leaq (BO, %rax, 2), BO
3135 leaq BUFFER, BO
3137 leaq BUFFER, BO
3141 leaq (BO, %rax, 2), BO
3147 movaps 0 * SIZE(BO), %xmm9
3148 movaps 16 * SIZE(BO), %xmm11
3181 movaps 4 * SIZE(BO), %xmm9
3185 movaps 8 * SIZE(BO), %xmm9
3190 movaps 12 * SIZE(BO), %xmm9
3195 movaps 32 * SIZE(BO), %xmm9
3200 movaps 20 * SIZE(BO), %xmm11
3205 movaps 24 * SIZE(BO), %xmm11
3210 movaps 28 * SIZE(BO), %xmm11
3215 movaps 48 * SIZE(BO), %xmm11
3218 addq $32 * SIZE, BO
3239 movaps 4 * SIZE(BO), %xmm9
3242 addq $4 * SIZE, BO # boffset1 += 8
3267 leaq (BO, %rax, 2), BO
3285 leaq BUFFER, BO
3287 leaq BUFFER, BO
3291 leaq (BO, %rax, 4), BO
3297 movss 0 * SIZE(BO), %xmm9
3298 movss 16 * SIZE(BO), %xmm11
3330 mulss 4 * SIZE(BO), %xmm8
3332 movss 32 * SIZE(BO), %xmm9
3335 mulss 8 * SIZE(BO), %xmm8
3338 mulss 12 * SIZE(BO), %xmm8
3343 mulss 20 * SIZE(BO), %xmm10
3345 movss 48 * SIZE(BO), %xmm11
3348 mulss 24 * SIZE(BO), %xmm10
3351 mulss 28 * SIZE(BO), %xmm10
3356 addq $32 * SIZE, BO
3377 movss 4 * SIZE(BO), %xmm9
3380 addq $4 * SIZE, BO