1dnl  mpn_rshift2
2
3dnl  Copyright 2009 Jason Moxham
4
5dnl  This file is part of the MPIR Library.
6
7dnl  The MPIR Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The MPIR Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write
19dnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20dnl  Boston, MA 02110-1301, USA.
21
22include(`../config.m4')
23
24C	ret mpn_rshift2(mp_ptr,mp_ptr,mp_size_t)
25C	rax                 rdi,   rsi,      rdx
26
27ASM_START()
28PROLOGUE(mpn_rshift2)
29mov %rdx,%rcx
30lea 24(%rsi),%rsi
31lea 24(%rdi),%rdi
32xor %eax,%eax
33xor %edx,%edx
34sub $4,%rcx
35jc skiplp
36ALIGN(16)
37lp:
38	mov (%rsi,%rcx,8),%r8
39	mov -8(%rsi,%rcx,8),%r9
40	mov -16(%rsi,%rcx,8),%r10
41	mov -24(%rsi,%rcx,8),%r11
42	add %rax,%rax
43	rcr $1,%r8
44	rcr $1,%r9
45	rcr $1,%r10
46	rcr $1,%r11
47	sbb %rax,%rax
48	add %rdx,%rdx
49	rcr $1,%r8
50	rcr $1,%r9
51	rcr $1,%r10
52	rcr $1,%r11
53	mov %r11,-24(%rdi,%rcx,8)
54	sbb %rdx,%rdx
55	mov %r8,(%rdi,%rcx,8)
56	sub $4,%rcx
57	mov %r9,24(%rdi,%rcx,8)
58	mov %r10,16(%rdi,%rcx,8)
59	jnc lp
60skiplp:
61cmp $-2,%rcx
62ja case3
63je case2
64jp case1
65case0:
66	lea (%rax,%rdx,2),%rax
67	neg %rax
68	shl $62,%rax
69	ret
70ALIGN(16)
71case3:
72	mov (%rsi,%rcx,8),%r8
73	mov -8(%rsi,%rcx,8),%r9
74	mov -16(%rsi,%rcx,8),%r10
75	add %rax,%rax
76	rcr $1,%r8
77	rcr $1,%r9
78	rcr $1,%r10
79	sbb %rax,%rax
80	add %rdx,%rdx
81	rcr $1,%r8
82	rcr $1,%r9
83	rcr $1,%r10
84	sbb %rdx,%rdx
85	mov %r8,(%rdi,%rcx,8)
86	mov %r9,-8(%rdi,%rcx,8)
87	mov %r10,-16(%rdi,%rcx,8)
88	lea (%rax,%rdx,2),%rax
89	neg %rax
90	shl $62,%rax
91	ret
92ALIGN(16)
93case2:
94	mov (%rsi,%rcx,8),%r8
95	mov -8(%rsi,%rcx,8),%r9
96	add %rax,%rax
97	rcr $1,%r8
98	rcr $1,%r9
99	sbb %rax,%rax
100	add %rdx,%rdx
101	rcr $1,%r8
102	rcr $1,%r9
103	sbb %rdx,%rdx
104	mov %r8,(%rdi,%rcx,8)
105	mov %r9,-8(%rdi,%rcx,8)
106	lea (%rax,%rdx,2),%rax
107	neg %rax
108	shl $62,%rax
109	ret
110ALIGN(16)
111case1:
112	mov (%rsi,%rcx,8),%r8
113	add %rax,%rax
114	rcr $1,%r8
115	sbb %rax,%rax
116	add %rdx,%rdx
117	rcr $1,%r8
118	sbb %rdx,%rdx
119	mov %r8,(%rdi,%rcx,8)
120	lea (%rax,%rdx,2),%rax
121	neg %rax
122	shl $62,%rax
123	ret
124EPILOGUE()
125