1dnl  Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  Contributed to the GNU project by Marco Bodrato.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C			    cycles/limb
36C P5
37C P6 model 0-8,10-12
38C P6 model 9  (Banias)
39C P6 model 13 (Dothan)
40C P4 model 0  (Willamette)
41C P4 model 1  (?)
42C P4 model 2  (Northwood)
43C P4 model 3  (Prescott)
44C P4 model 4  (Nocona)
45C Intel Atom			 3
46C AMD K6
47C AMD K7
48C AMD K8
49C AMD K10
50
51ifdef(`OPERATION_add_n', `
52	define(M4_inst,        adcl)
53	define(M4_function_n,  mpn_add_n)
54	define(M4_function_nc, mpn_add_nc)
55	define(M4_description, add)
56',`ifdef(`OPERATION_sub_n', `
57	define(M4_inst,        sbbl)
58	define(M4_function_n,  mpn_sub_n)
59	define(M4_function_nc, mpn_sub_nc)
60	define(M4_description, subtract)
61',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
62')')')
63
64MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
65
66C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
67C                         mp_size_t size);
68C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
69C	                   mp_size_t size, mp_limb_t carry);
70C
71C Calculate src1,size M4_description src2,size, and store the result in
72C dst,size.  The return value is the carry bit from the top of the result (1
73C or 0).
74C
75C The _nc version accepts 1 or 0 for an initial carry into the low limb of
76C the calculation.  Note values other than 1 or 0 here will lead to garbage
77C results.
78
79defframe(PARAM_CARRY,20)
80defframe(PARAM_SIZE, 16)
81defframe(PARAM_SRC2, 12)
82defframe(PARAM_SRC1, 8)
83defframe(PARAM_DST,  4)
84
85dnl  re-use parameter space
86define(SAVE_RP,`PARAM_SIZE')
87define(SAVE_VP,`PARAM_SRC1')
88define(SAVE_UP,`PARAM_DST')
89
90define(`rp',  `%edi')
91define(`up',  `%esi')
92define(`vp',  `%ebx')
93define(`cy',  `%ecx')
94define(`r1',  `%ecx')
95define(`r2',  `%edx')
96
97ASM_START()
98	TEXT
99	ALIGN(16)
100deflit(`FRAME',0)
101
102PROLOGUE(M4_function_n)
103	xor	cy, cy			C carry
104L(start):
105	mov	PARAM_SIZE, %eax	C size
106	mov	rp, SAVE_RP
107	mov	PARAM_DST, rp
108	mov	up, SAVE_UP
109	mov	PARAM_SRC1, up
110	shr	%eax			C size >> 1
111	mov	vp, SAVE_VP
112	mov	PARAM_SRC2, vp
113	jz	L(one)			C size == 1
114	jc	L(three)		C size % 2 == 1
115
116	shr	cy
117	mov	(up), r2
118	lea	4(up), up
119	lea	4(vp), vp
120	lea	-4(rp), rp
121	jmp	L(entry)
122L(one):
123	shr	cy
124	mov	(up), r1
125	jmp	L(end)
126L(three):
127	shr	cy
128	mov	(up), r1
129
130	ALIGN(16)
131L(oop):
132	M4_inst	(vp), r1
133	lea	8(up), up
134	mov	-4(up), r2
135	lea	8(vp), vp
136	mov	r1, (rp)
137L(entry):
138	M4_inst	-4(vp), r2
139	lea	8(rp), rp
140	dec	%eax
141	mov	(up), r1
142	mov	r2, -4(rp)
143	jnz	L(oop)
144
145L(end):					C %eax is zero here
146	mov	SAVE_UP, up
147	M4_inst	(vp), r1
148	mov	SAVE_VP, vp
149	mov	r1, (rp)
150	adc	%eax, %eax
151	mov	SAVE_RP, rp
152	ret
153EPILOGUE()
154
155PROLOGUE(M4_function_nc)
156	mov	PARAM_CARRY, cy		C carry
157	jmp	L(start)
158EPILOGUE()
159ASM_END()
160