1; PROLOGUE(mpn_lshift1)
2
3;  Copyright 2008 Jason Moxham
4;
5;  Windows Conversion Copyright 2008 Brian Gladman
6;
7;  This file is part of the MPIR Library.
8;
9;  The MPIR Library is free software; you can redistribute it and/or modify
10;  it under the terms of the GNU Lesser General Public License as published
11;  by the Free Software Foundation; either version 2.1 of the License, or (at
12;  your option) any later version.
13;  The MPIR Library is distributed in the hope that it will be useful, but
14;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16;  License for more details.
17;  You should have received a copy of the GNU Lesser General Public License
18;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
19;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20;  Boston, MA 02110-1301, USA.
21;
22;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)
23;  rax                      rdi     rsi        rdx
24;  rax                      rcx     rdx         r8
25
26%include "yasm_mac.inc"
27
28    CPU  Athlon64
29    BITS 64
30
31    LEAF_PROC mpn_lshift1
32    mov     rax, r8
33	and     r8, 7
34	inc     r8
35	mov     [rsp+0x18], r8
36	shr     rax, 3
37	cmp     rax, 0
38	jz      .2
39
40	xalign  16
41.1:	mov     r8, [rdx]
42	mov     r9, [rdx+8]
43	mov     r10, [rdx+16]
44	mov     r11, [rdx+24]
45	adc     r8, r8
46	adc     r9, r9
47	adc     r10, r10
48	adc     r11, r11
49	mov     [rcx], r8
50	mov     [rcx+8], r9
51	mov     [rcx+16], r10
52	mov     [rcx+24], r11
53	mov     r8, [rdx+32]
54	mov     r9, [rdx+40]
55	mov     r10, [rdx+48]
56	mov     r11, [rdx+56]
57	adc     r8, r8
58	adc     r9, r9
59	adc     r10, r10
60	adc     r11, r11
61	mov     [rcx+32], r8
62	mov     [rcx+40], r9
63	mov     [rcx+48], r10
64	mov     [rcx+56], r11
65	lea     rcx, [rcx+64]
66	dec     rax
67	lea     rdx, [rdx+64]
68	jnz     .1
69.2:	mov     rax, [rsp+0x18]
70	dec     rax
71	jz      .3
72;	Could still have cache-bank conflicts in this tail part
73	mov     r8, [rdx]
74	adc     r8, r8
75	mov     [rcx], r8
76	dec     rax
77	jz      .3
78	mov     r8, [rdx+8]
79	adc     r8, r8
80	mov     [rcx+8], r8
81	dec     rax
82	jz      .3
83	mov     r8, [rdx+16]
84	adc     r8, r8
85	mov     [rcx+16], r8
86	dec     rax
87	jz      .3
88	mov     r8, [rdx+24]
89	adc     r8, r8
90	mov     [rcx+24], r8
91	dec     rax
92	jz      .3
93	mov     r8, [rdx+32]
94	adc     r8, r8
95	mov     [rcx+32], r8
96	dec     rax
97	jz      .3
98	mov     r8, [rdx+40]
99	adc     r8, r8
100	mov     [rcx+40], r8
101	dec     rax
102	jz      .3
103	mov     r8, [rdx+48]
104	adc     r8, r8
105	mov     [rcx+48], r8
106.3:	sbb     rax, rax
107	neg     rax
108	ret
109
110	end
111