Lines Matching refs:BO

126 #define BO	%rsi  macro
197 vmovddup -6 * SIZE(BO, BI, 8), %xmm1 ;\
200 vmovddup -5 * SIZE(BO, BI, 8), %xmm2 ;\
202 vmovddup -4 * SIZE(BO, BI, 8), %xmm3 ;\
219 vmovddup -3 * SIZE(BO, BI, 8), %xmm1 ;\
222 vmovddup -2 * SIZE(BO, BI, 8), %xmm2 ;\
224 vmovddup -1 * SIZE(BO, BI, 8), %xmm3 ;\
241 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
244 vmovddup 1 * SIZE(BO, BI, 8), %xmm2 ;\
246 vmovddup 2 * SIZE(BO, BI, 8), %xmm3 ;\
263 vmovddup 3 * SIZE(BO, BI, 8), %xmm1 ;\
266 vmovddup 4 * SIZE(BO, BI, 8), %xmm2 ;\
268 vmovddup 5 * SIZE(BO, BI, 8), %xmm3 ;\
286 vmovddup -6 * SIZE(BO, BI, 8), %xmm1 ;\
289 vmovddup -5 * SIZE(BO, BI, 8), %xmm2 ;\
291 vmovddup -4 * SIZE(BO, BI, 8), %xmm3 ;\
310 vmovddup -6 * SIZE(BO, BI, 8), %xmm1 ;\
313 vmovddup -5 * SIZE(BO, BI, 8), %xmm2 ;\
315 vmovddup -4 * SIZE(BO, BI, 8), %xmm3 ;\
323 vmovddup -3 * SIZE(BO, BI, 8), %xmm1 ;\
326 vmovddup -2 * SIZE(BO, BI, 8), %xmm2 ;\
328 vmovddup -1 * SIZE(BO, BI, 8), %xmm3 ;\
336 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
339 vmovddup 1 * SIZE(BO, BI, 8), %xmm2 ;\
341 vmovddup 2 * SIZE(BO, BI, 8), %xmm3 ;\
349 vmovddup 3 * SIZE(BO, BI, 8), %xmm1 ;\
352 vmovddup 4 * SIZE(BO, BI, 8), %xmm2 ;\
354 vmovddup 5 * SIZE(BO, BI, 8), %xmm3 ;\
364 vmovddup -6 * SIZE(BO, BI, 8), %xmm1 ;\
367 vmovddup -5 * SIZE(BO, BI, 8), %xmm2 ;\
369 vmovddup -4 * SIZE(BO, BI, 8), %xmm3 ;\
383 vmovddup -6 * SIZE(BO, BI, 8), %xmm1 ;\
386 vmovddup -5 * SIZE(BO, BI, 8), %xmm2 ;\
388 vmovddup -4 * SIZE(BO, BI, 8), %xmm3 ;\
392 vmovddup -3 * SIZE(BO, BI, 8), %xmm1 ;\
395 vmovddup -2 * SIZE(BO, BI, 8), %xmm2 ;\
397 vmovddup -1 * SIZE(BO, BI, 8), %xmm3 ;\
401 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
404 vmovddup 1 * SIZE(BO, BI, 8), %xmm2 ;\
406 vmovddup 2 * SIZE(BO, BI, 8), %xmm3 ;\
410 vmovddup 3 * SIZE(BO, BI, 8), %xmm1 ;\
413 vmovddup 4 * SIZE(BO, BI, 8), %xmm2 ;\
415 vmovddup 5 * SIZE(BO, BI, 8), %xmm3 ;\
421 vmovddup -6 * SIZE(BO, BI, 8), %xmm1 ;\
424 vmovddup -5 * SIZE(BO, BI, 8), %xmm2 ;\
426 vmovddup -4 * SIZE(BO, BI, 8), %xmm3 ;\
432 vmovsd -6 * SIZE(BO, BI, 8), %xmm1 ;\
435 vmovsd -5 * SIZE(BO, BI, 8), %xmm2 ;\
437 vmovsd -4 * SIZE(BO, BI, 8), %xmm3 ;\
441 vmovsd -3 * SIZE(BO, BI, 8), %xmm1 ;\
444 vmovsd -2 * SIZE(BO, BI, 8), %xmm2 ;\
446 vmovsd -1 * SIZE(BO, BI, 8), %xmm3 ;\
450 vmovsd 0 * SIZE(BO, BI, 8), %xmm1 ;\
453 vmovsd 1 * SIZE(BO, BI, 8), %xmm2 ;\
455 vmovsd 2 * SIZE(BO, BI, 8), %xmm3 ;\
459 vmovsd 3 * SIZE(BO, BI, 8), %xmm1 ;\
462 vmovsd 4 * SIZE(BO, BI, 8), %xmm2 ;\
464 vmovsd 5 * SIZE(BO, BI, 8), %xmm3 ;\
470 vmovsd -6 * SIZE(BO, BI, 8), %xmm1 ;\
473 vmovsd -5 * SIZE(BO, BI, 8), %xmm2 ;\
475 vmovsd -4 * SIZE(BO, BI, 8), %xmm3 ;\
486 vmovddup -4 * SIZE(BO, BI, 8), %xmm1 ;\
489 vmovddup -3 * SIZE(BO, BI, 8), %xmm2 ;\
503 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
506 vmovddup -1 * SIZE(BO, BI, 8), %xmm2 ;\
520 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
523 vmovddup 1 * SIZE(BO, BI, 8), %xmm2 ;\
537 vmovddup 2 * SIZE(BO, BI, 8), %xmm1 ;\
540 vmovddup 3 * SIZE(BO, BI, 8), %xmm2 ;\
555 vmovddup -4 * SIZE(BO, BI, 8), %xmm1 ;\
558 vmovddup -3 * SIZE(BO, BI, 8), %xmm2 ;\
574 vmovddup -4 * SIZE(BO, BI, 8), %xmm1 ;\
577 vmovddup -3 * SIZE(BO, BI, 8), %xmm2 ;\
584 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
587 vmovddup -1 * SIZE(BO, BI, 8), %xmm2 ;\
594 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
597 vmovddup 1 * SIZE(BO, BI, 8), %xmm2 ;\
604 vmovddup 2 * SIZE(BO, BI, 8), %xmm1 ;\
607 vmovddup 3 * SIZE(BO, BI, 8), %xmm2 ;\
616 vmovddup -4 * SIZE(BO, BI, 8), %xmm1 ;\
619 vmovddup -3 * SIZE(BO, BI, 8), %xmm2 ;\
629 vmovddup -4 * SIZE(BO, BI, 8), %xmm1 ;\
632 vmovddup -3 * SIZE(BO, BI, 8), %xmm2 ;\
636 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
639 vmovddup -1 * SIZE(BO, BI, 8), %xmm2 ;\
643 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
646 vmovddup 1 * SIZE(BO, BI, 8), %xmm2 ;\
650 vmovddup 2 * SIZE(BO, BI, 8), %xmm1 ;\
653 vmovddup 3 * SIZE(BO, BI, 8), %xmm2 ;\
659 vmovddup -4 * SIZE(BO, BI, 8), %xmm1 ;\
662 vmovddup -3 * SIZE(BO, BI, 8), %xmm2 ;\
668 vmovsd -4 * SIZE(BO, BI, 8), %xmm1 ;\
671 vmovsd -3 * SIZE(BO, BI, 8), %xmm2 ;\
675 vmovsd -2 * SIZE(BO, BI, 8), %xmm1 ;\
678 vmovsd -1 * SIZE(BO, BI, 8), %xmm2 ;\
682 vmovsd 0 * SIZE(BO, BI, 8), %xmm1 ;\
685 vmovsd 1 * SIZE(BO, BI, 8), %xmm2 ;\
689 vmovsd 2 * SIZE(BO, BI, 8), %xmm1 ;\
692 vmovsd 3 * SIZE(BO, BI, 8), %xmm2 ;\
698 vmovsd -4 * SIZE(BO, BI, 8), %xmm1 ;\
701 vmovsd -3 * SIZE(BO, BI, 8), %xmm2 ;\
712 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
724 vmovddup -1 * SIZE(BO, BI, 8), %xmm1 ;\
736 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
748 vmovddup 1 * SIZE(BO, BI, 8), %xmm1 ;\
761 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
775 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
782 vmovddup -1 * SIZE(BO, BI, 8), %xmm1 ;\
789 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
796 vmovddup 1 * SIZE(BO, BI, 8), %xmm1 ;\
805 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
815 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
820 vmovddup -1 * SIZE(BO, BI, 8), %xmm1 ;\
825 vmovddup 0 * SIZE(BO, BI, 8), %xmm1 ;\
830 vmovddup 1 * SIZE(BO, BI, 8), %xmm1 ;\
837 vmovddup -2 * SIZE(BO, BI, 8), %xmm1 ;\
844 vmovsd -2 * SIZE(BO, BI, 8), %xmm1 ;\
849 vmovsd -1 * SIZE(BO, BI, 8), %xmm1 ;\
854 vmovsd 0 * SIZE(BO, BI, 8), %xmm1 ;\
859 vmovsd 1 * SIZE(BO, BI, 8), %xmm1 ;\
866 vmovsd -2 * SIZE(BO, BI, 8), %xmm1 ;\
962 leaq BUFFER1, BO // first buffer to BO
971 prefetchw 512(BO)
980 vmovups %xmm0, (BO)
981 vmovsd %xmm1, 2*SIZE(BO)
982 vmovups %xmm2, 3*SIZE(BO)
983 vmovsd %xmm3, 5*SIZE(BO)
984 vmovups %xmm4, 6*SIZE(BO)
985 vmovsd %xmm5, 8*SIZE(BO)
986 vmovups %xmm6, 9*SIZE(BO)
987 vmovsd %xmm7,11*SIZE(BO)
990 addq $12*SIZE,BO
1005 vmovups %xmm0, (BO)
1006 vmovsd %xmm1, 2*SIZE(BO)
1009 addq $3*SIZE,BO
1019 leaq BUFFER2, BO // second buffer to BO
1029 prefetchw 512(BO)
1038 vmovsd %xmm1, 0*SIZE(BO)
1039 vmovups %xmm0, 1*SIZE(BO)
1040 vmovsd %xmm3, 3*SIZE(BO)
1041 vmovups %xmm2, 4*SIZE(BO)
1042 vmovsd %xmm5, 6*SIZE(BO)
1043 vmovups %xmm4, 7*SIZE(BO)
1044 vmovsd %xmm7, 9*SIZE(BO)
1045 vmovups %xmm6,10*SIZE(BO)
1048 addq $12*SIZE,BO
1064 vmovsd %xmm0, (BO)
1065 vmovups %xmm1, 1*SIZE(BO)
1068 addq $3*SIZE,BO
1093 leaq BUFFER1, BO // first buffer to BO
1094 addq $6 * SIZE, BO
1107 leaq (BO, BI, 8), BO
1114 prefetcht0 B_PR1(BO,BI,8)
1118 prefetcht0 B_PR1+64(BO,BI,8)
1123 prefetcht0 B_PR1+32(BO,BI,8)
1129 prefetcht0 B_PR1(BO,BI,8)
1133 prefetcht0 B_PR1+64(BO,BI,8)
1138 prefetcht0 B_PR1+32(BO,BI,8)
1158 leaq (BO, BI, 8), BO
1228 leaq BUFFER1, BO // first buffer to BO
1229 addq $6 * SIZE, BO
1242 leaq (BO, BI, 8), BO
1249 prefetcht0 B_PR1(BO,BI,8)
1253 prefetcht0 B_PR1+64(BO,BI,8)
1258 prefetcht0 B_PR1+32(BO,BI,8)
1264 prefetcht0 B_PR1(BO,BI,8)
1268 prefetcht0 B_PR1+64(BO,BI,8)
1273 prefetcht0 B_PR1+32(BO,BI,8)
1293 leaq (BO, BI, 8), BO
1343 leaq BUFFER1, BO // first buffer to BO
1344 addq $6 * SIZE, BO
1358 leaq (BO, BI, 8), BO
1365 prefetcht0 B_PR1(BO,BI,8)
1369 prefetcht0 B_PR1+64(BO,BI,8)
1374 prefetcht0 B_PR1+32(BO,BI,8)
1380 prefetcht0 B_PR1(BO,BI,8)
1384 prefetcht0 B_PR1+64(BO,BI,8)
1389 prefetcht0 B_PR1+32(BO,BI,8)
1409 leaq (BO, BI, 8), BO
1448 leaq BUFFER1, BO // first buffer to BO
1449 addq $6 * SIZE, BO
1461 leaq (BO, BI, 8), BO
1468 prefetcht0 B_PR1(BO,BI,8)
1472 prefetcht0 B_PR1+64(BO,BI,8)
1477 prefetcht0 B_PR1+32(BO,BI,8)
1483 prefetcht0 B_PR1(BO,BI,8)
1487 prefetcht0 B_PR1+64(BO,BI,8)
1492 prefetcht0 B_PR1+32(BO,BI,8)
1511 leaq (BO, BI, 8), BO
1562 leaq BUFFER2, BO // second buffer to BO
1563 addq $6 * SIZE, BO
1578 leaq (BO, BI, 8), BO
1586 prefetcht0 B_PR1(BO,BI,8)
1590 prefetcht0 B_PR1+64(BO,BI,8)
1595 prefetcht0 B_PR1+32(BO,BI,8)
1601 prefetcht0 B_PR1(BO,BI,8)
1605 prefetcht0 B_PR1+64(BO,BI,8)
1610 prefetcht0 B_PR1+32(BO,BI,8)
1630 leaq (BO, BI, 8), BO
1699 leaq BUFFER2, BO // second buffer to BO
1700 addq $6 * SIZE, BO
1714 leaq (BO, BI, 8), BO
1721 prefetcht0 B_PR1(BO,BI,8)
1725 prefetcht0 B_PR1+64(BO,BI,8)
1730 prefetcht0 B_PR1+32(BO,BI,8)
1736 prefetcht0 B_PR1(BO,BI,8)
1740 prefetcht0 B_PR1+64(BO,BI,8)
1745 prefetcht0 B_PR1+32(BO,BI,8)
1765 leaq (BO, BI, 8), BO
1815 leaq BUFFER2, BO // second buffer to BO
1816 addq $6 * SIZE, BO
1829 leaq (BO, BI, 8), BO
1836 prefetcht0 B_PR1(BO,BI,8)
1840 prefetcht0 B_PR1+64(BO,BI,8)
1845 prefetcht0 B_PR1+32(BO,BI,8)
1851 prefetcht0 B_PR1(BO,BI,8)
1855 prefetcht0 B_PR1+64(BO,BI,8)
1860 prefetcht0 B_PR1+32(BO,BI,8)
1880 leaq (BO, BI, 8), BO
1923 leaq BUFFER2, BO // second buffer to BO
1924 addq $6 * SIZE, BO
1937 leaq (BO, BI, 8), BO
1944 prefetcht0 B_PR1(BO,BI,8)
1948 prefetcht0 B_PR1+64(BO,BI,8)
1953 prefetcht0 B_PR1+32(BO,BI,8)
1959 prefetcht0 B_PR1(BO,BI,8)
1963 prefetcht0 B_PR1+64(BO,BI,8)
1968 prefetcht0 B_PR1+32(BO,BI,8)
1987 leaq (BO, BI, 8), BO
2040 leaq BUFFER1, BO // first buffer to BO
2047 vmovups %xmm0, (BO)
2049 addq $2*SIZE,BO
2073 leaq BUFFER1, BO // first buffer to BO
2074 addq $4 * SIZE, BO
2087 leaq (BO, BI, 8), BO
2094 prefetcht0 B_PR1(BO,BI,8)
2100 prefetcht0 B_PR1(BO,BI,8)
2108 prefetcht0 B_PR1(BO,BI,8)
2114 prefetcht0 B_PR1(BO,BI,8)
2136 leaq (BO, BI, 8), BO
2195 leaq BUFFER1, BO // first buffer to BO
2196 addq $4 * SIZE, BO
2209 leaq (BO, BI, 8), BO
2216 prefetcht0 B_PR1(BO,BI,8)
2222 prefetcht0 B_PR1(BO,BI,8)
2230 prefetcht0 B_PR1(BO,BI,8)
2236 prefetcht0 B_PR1(BO,BI,8)
2258 leaq (BO, BI, 8), BO
2299 leaq BUFFER1, BO // first buffer to BO
2300 addq $4 * SIZE, BO
2313 leaq (BO, BI, 8), BO
2320 prefetcht0 B_PR1(BO,BI,8)
2326 prefetcht0 B_PR1(BO,BI,8)
2334 prefetcht0 B_PR1(BO,BI,8)
2340 prefetcht0 B_PR1(BO,BI,8)
2362 leaq (BO, BI, 8), BO
2397 leaq BUFFER1, BO // first buffer to BO
2398 addq $4 * SIZE, BO
2410 leaq (BO, BI, 8), BO
2417 prefetcht0 B_PR1(BO,BI,8)
2423 prefetcht0 B_PR1(BO,BI,8)
2431 prefetcht0 B_PR1(BO,BI,8)
2437 prefetcht0 B_PR1(BO,BI,8)
2458 leaq (BO, BI, 8), BO
2509 leaq BUFFER1, BO // first buffer to BO
2516 vmovsd %xmm0, (BO)
2518 addq $1*SIZE,BO
2540 leaq BUFFER1, BO // first buffer to BO
2541 addq $2 * SIZE, BO
2553 leaq (BO, BI, 8), BO
2560 prefetcht0 B_PR1(BO,BI,8)
2573 prefetcht0 B_PR1(BO,BI,8)
2599 leaq (BO, BI, 8), BO
2647 leaq BUFFER1, BO // first buffer to BO
2648 addq $2 * SIZE, BO
2661 leaq (BO, BI, 8), BO
2668 prefetcht0 B_PR1(BO,BI,8)
2681 prefetcht0 B_PR1(BO,BI,8)
2707 leaq (BO, BI, 8), BO
2742 leaq BUFFER1, BO // first buffer to BO
2743 addq $2 * SIZE, BO
2756 leaq (BO, BI, 8), BO
2763 prefetcht0 B_PR1(BO,BI,8)
2801 leaq (BO, BI, 8), BO
2834 leaq BUFFER1, BO // first buffer to BO
2835 addq $2 * SIZE, BO
2846 leaq (BO, BI, 8), BO
2853 prefetcht0 B_PR1(BO,BI,8)
2866 prefetcht0 B_PR1(BO,BI,8)
2891 leaq (BO, BI, 8), BO
3048 leaq BUFFER1, BO // first buffer to BO
3055 vmovups %xmm0, (BO)
3057 addq $2*SIZE,BO
3088 leaq BUFFER1, BO // first buffer to BO
3089 addq $4 * SIZE, BO
3092 leaq BUFFER1, BO // first buffer to BO
3093 addq $4 * SIZE, BO
3096 leaq (BO, BI, 8), BO
3127 leaq (BO, BI, 8), BO
3134 prefetcht0 B_PR1(BO,BI,8)
3140 prefetcht0 B_PR1(BO,BI,8)
3148 prefetcht0 B_PR1(BO,BI,8)
3154 prefetcht0 B_PR1(BO,BI,8)
3180 leaq (BO, BI, 8), BO
3240 leaq (BO, BI, 8), BO
3273 leaq BUFFER1, BO // first buffer to BO
3274 addq $4 * SIZE, BO
3277 leaq BUFFER1, BO // first buffer to BO
3278 addq $4 * SIZE, BO
3281 leaq (BO, BI, 8), BO
3312 leaq (BO, BI, 8), BO
3319 prefetcht0 B_PR1(BO,BI,8)
3325 prefetcht0 B_PR1(BO,BI,8)
3333 prefetcht0 B_PR1(BO,BI,8)
3339 prefetcht0 B_PR1(BO,BI,8)
3365 leaq (BO, BI, 8), BO
3413 leaq (BO, BI, 8), BO
3437 leaq BUFFER1, BO // first buffer to BO
3438 addq $4 * SIZE, BO
3441 leaq BUFFER1, BO // first buffer to BO
3442 addq $4 * SIZE, BO
3445 leaq (BO, BI, 8), BO
3477 leaq (BO, BI, 8), BO
3484 prefetcht0 B_PR1(BO,BI,8)
3490 prefetcht0 B_PR1(BO,BI,8)
3498 prefetcht0 B_PR1(BO,BI,8)
3504 prefetcht0 B_PR1(BO,BI,8)
3530 leaq (BO, BI, 8), BO
3568 leaq (BO, BI, 8), BO
3592 leaq BUFFER1, BO // first buffer to BO
3593 addq $4 * SIZE, BO
3596 leaq BUFFER1, BO // first buffer to BO
3597 addq $4 * SIZE, BO
3600 leaq (BO, BI, 8), BO
3628 leaq (BO, BI, 8), BO
3635 prefetcht0 B_PR1(BO,BI,8)
3641 prefetcht0 B_PR1(BO,BI,8)
3649 prefetcht0 B_PR1(BO,BI,8)
3655 prefetcht0 B_PR1(BO,BI,8)
3680 leaq (BO, BI, 8), BO
3718 leaq (BO, BI, 8), BO
3757 leaq BUFFER1, BO // first buffer to BO
3764 vmovsd %xmm0, (BO)
3766 addq $1*SIZE,BO
3796 leaq BUFFER1, BO // first buffer to BO
3797 addq $2 * SIZE, BO
3800 leaq BUFFER1, BO // first buffer to BO
3801 addq $2 * SIZE, BO
3803 leaq (BO, BI, 8), BO
3833 leaq (BO, BI, 8), BO
3840 prefetcht0 B_PR1(BO,BI,8)
3853 prefetcht0 B_PR1(BO,BI,8)
3883 leaq (BO, BI, 8), BO
3926 leaq (BO, BI, 8), BO
3958 leaq BUFFER1, BO // first buffer to BO
3959 addq $2 * SIZE, BO
3962 leaq BUFFER1, BO // first buffer to BO
3963 addq $2 * SIZE, BO
3965 leaq (BO, BI, 8), BO
3996 leaq (BO, BI, 8), BO
4003 prefetcht0 B_PR1(BO,BI,8)
4016 prefetcht0 B_PR1(BO,BI,8)
4046 leaq (BO, BI, 8), BO
4083 leaq (BO, BI, 8), BO
4107 leaq BUFFER1, BO // first buffer to BO
4108 addq $2 * SIZE, BO
4111 leaq BUFFER1, BO // first buffer to BO
4112 addq $2 * SIZE, BO
4114 leaq (BO, BI, 8), BO
4145 leaq (BO, BI, 8), BO
4152 prefetcht0 B_PR1(BO,BI,8)
4194 leaq (BO, BI, 8), BO
4228 leaq (BO, BI, 8), BO
4251 leaq BUFFER1, BO // first buffer to BO
4252 addq $2 * SIZE, BO
4255 leaq BUFFER1, BO // first buffer to BO
4256 addq $2 * SIZE, BO
4258 leaq (BO, BI, 8), BO
4286 leaq (BO, BI, 8), BO
4293 prefetcht0 B_PR1(BO,BI,8)
4306 prefetcht0 B_PR1(BO,BI,8)
4335 leaq (BO, BI, 8), BO
4369 leaq (BO, BI, 8), BO