1; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2; store difference in a third limb vector.
3
4; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
5
6;  This file is part of the GNU MP Library.
7;
8;  The GNU MP Library is free software; you can redistribute it and/or modify
9;  it under the terms of either:
10;
11;    * the GNU Lesser General Public License as published by the Free
12;      Software Foundation; either version 3 of the License, or (at your
13;      option) any later version.
14;
15;  or
16;
17;    * the GNU General Public License as published by the Free Software
18;      Foundation; either version 2 of the License, or (at your option) any
19;      later version.
20;
21;  or both in parallel, as here.
22;
23;  The GNU MP Library is distributed in the hope that it will be useful, but
24;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26;  for more details.
27;
28;  You should have received copies of the GNU General Public License and the
29;  GNU Lesser General Public License along with the GNU MP Library.  If not,
30;  see https://www.gnu.org/licenses/.
31
32
33; INPUT PARAMETERS
34#define res_ptr	r2
35#define s1_ptr	r3
36#define s2_ptr	r4
37#define size	r5
38
39#include "sysdep.h"
40
41	text
42	align	16
43	global	C_SYMBOL_NAME(__gmpn_sub_n)
44C_SYMBOL_NAME(__gmpn_sub_n):
45	subu.co	 r0,r0,r0		; set cy flag
46	xor	 r12,s2_ptr,res_ptr
47	bb1	 2,r12,L1
48; **  V1a  **
49L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned
50/* Add least significant limb separately to align res_ptr and s2_ptr */
51	ld	 r10,s1_ptr,0
52	addu	 s1_ptr,s1_ptr,4
53	ld	 r8,s2_ptr,0
54	addu	 s2_ptr,s2_ptr,4
55	subu	 size,size,1
56	subu.co	 r6,r10,r8
57	st	 r6,res_ptr,0
58	addu	 res_ptr,res_ptr,4
59L_v1:	cmp	 r12,size,2
60	bb1	 lt,r12,Lend2
61
62	ld	 r10,s1_ptr,0
63	ld	 r12,s1_ptr,4
64	ld.d	 r8,s2_ptr,0
65	subu	 size,size,10
66	bcnd	 lt0,size,Lfin1
67/* Add blocks of 8 limbs until less than 8 limbs remain */
68	align	 8
69Loop1:	subu	 size,size,8
70	subu.cio r6,r10,r8
71	ld	 r10,s1_ptr,8
72	subu.cio r7,r12,r9
73	ld	 r12,s1_ptr,12
74	ld.d	 r8,s2_ptr,8
75	st.d	 r6,res_ptr,0
76	subu.cio r6,r10,r8
77	ld	 r10,s1_ptr,16
78	subu.cio r7,r12,r9
79	ld	 r12,s1_ptr,20
80	ld.d	 r8,s2_ptr,16
81	st.d	 r6,res_ptr,8
82	subu.cio r6,r10,r8
83	ld	 r10,s1_ptr,24
84	subu.cio r7,r12,r9
85	ld	 r12,s1_ptr,28
86	ld.d	 r8,s2_ptr,24
87	st.d	 r6,res_ptr,16
88	subu.cio r6,r10,r8
89	ld	 r10,s1_ptr,32
90	subu.cio r7,r12,r9
91	ld	 r12,s1_ptr,36
92	addu	 s1_ptr,s1_ptr,32
93	ld.d	 r8,s2_ptr,32
94	addu	 s2_ptr,s2_ptr,32
95	st.d	 r6,res_ptr,24
96	addu	 res_ptr,res_ptr,32
97	bcnd	 ge0,size,Loop1
98
99Lfin1:	addu	 size,size,8-2
100	bcnd	 lt0,size,Lend1
101/* Add blocks of 2 limbs until less than 2 limbs remain */
102Loope1:	subu.cio r6,r10,r8
103	ld	 r10,s1_ptr,8
104	subu.cio r7,r12,r9
105	ld	 r12,s1_ptr,12
106	ld.d	 r8,s2_ptr,8
107	st.d	 r6,res_ptr,0
108	subu	 size,size,2
109	addu	 s1_ptr,s1_ptr,8
110	addu	 s2_ptr,s2_ptr,8
111	addu	 res_ptr,res_ptr,8
112	bcnd	 ge0,size,Loope1
113Lend1:	subu.cio r6,r10,r8
114	subu.cio r7,r12,r9
115	st.d	 r6,res_ptr,0
116
117	bb0	 0,size,Lret1
118/* Add last limb */
119	ld	 r10,s1_ptr,8
120	ld	 r8,s2_ptr,8
121	subu.cio r6,r10,r8
122	st	 r6,res_ptr,8
123
124Lret1:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
125	jmp.n	 r1
126	 xor	r2,r2,1
127
128L1:	xor	 r12,s1_ptr,res_ptr
129	bb1	 2,r12,L2
130; **  V1b  **
131	bb0	 2,res_ptr,L_v1b	; branch if res_ptr is aligned
132/* Add least significant limb separately to align res_ptr and s1_ptr */
133	ld	 r10,s2_ptr,0
134	addu	 s2_ptr,s2_ptr,4
135	ld	 r8,s1_ptr,0
136	addu	 s1_ptr,s1_ptr,4
137	subu	 size,size,1
138	subu.co	 r6,r8,r10
139	st	 r6,res_ptr,0
140	addu	 res_ptr,res_ptr,4
141L_v1b:	cmp	 r12,size,2
142	bb1	 lt,r12,Lend2
143
144	ld	 r10,s2_ptr,0
145	ld	 r12,s2_ptr,4
146	ld.d	 r8,s1_ptr,0
147	subu	 size,size,10
148	bcnd	 lt0,size,Lfin1b
149/* Add blocks of 8 limbs until less than 8 limbs remain */
150	align	 8
151Loop1b:	subu	 size,size,8
152	subu.cio r6,r8,r10
153	ld	 r10,s2_ptr,8
154	subu.cio r7,r9,r12
155	ld	 r12,s2_ptr,12
156	ld.d	 r8,s1_ptr,8
157	st.d	 r6,res_ptr,0
158	subu.cio r6,r8,r10
159	ld	 r10,s2_ptr,16
160	subu.cio r7,r9,r12
161	ld	 r12,s2_ptr,20
162	ld.d	 r8,s1_ptr,16
163	st.d	 r6,res_ptr,8
164	subu.cio r6,r8,r10
165	ld	 r10,s2_ptr,24
166	subu.cio r7,r9,r12
167	ld	 r12,s2_ptr,28
168	ld.d	 r8,s1_ptr,24
169	st.d	 r6,res_ptr,16
170	subu.cio r6,r8,r10
171	ld	 r10,s2_ptr,32
172	subu.cio r7,r9,r12
173	ld	 r12,s2_ptr,36
174	addu	 s2_ptr,s2_ptr,32
175	ld.d	 r8,s1_ptr,32
176	addu	 s1_ptr,s1_ptr,32
177	st.d	 r6,res_ptr,24
178	addu	 res_ptr,res_ptr,32
179	bcnd	 ge0,size,Loop1b
180
181Lfin1b:	addu	 size,size,8-2
182	bcnd	 lt0,size,Lend1b
183/* Add blocks of 2 limbs until less than 2 limbs remain */
184Loope1b:subu.cio r6,r8,r10
185	ld	 r10,s2_ptr,8
186	subu.cio r7,r9,r12
187	ld	 r12,s2_ptr,12
188	ld.d	 r8,s1_ptr,8
189	st.d	 r6,res_ptr,0
190	subu	 size,size,2
191	addu	 s1_ptr,s1_ptr,8
192	addu	 s2_ptr,s2_ptr,8
193	addu	 res_ptr,res_ptr,8
194	bcnd	 ge0,size,Loope1b
195Lend1b:	subu.cio r6,r8,r10
196	subu.cio r7,r9,r12
197	st.d	 r6,res_ptr,0
198
199	bb0	 0,size,Lret1b
200/* Add last limb */
201	ld	 r10,s2_ptr,8
202	ld	 r8,s1_ptr,8
203	subu.cio r6,r8,r10
204	st	 r6,res_ptr,8
205
206Lret1b:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
207	jmp.n	 r1
208	 xor	r2,r2,1
209
210; **  V2  **
211/* If we come here, the alignment of s1_ptr and res_ptr as well as the
212   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
213   things can be aligned (that we care about) we now know that the alignment
214   of s1_ptr and s2_ptr are the same.  */
215
216L2:	cmp	 r12,size,1
217	bb1	 eq,r12,Ljone
218	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
219/* Add least significant limb separately to align res_ptr and s2_ptr */
220	ld	 r10,s1_ptr,0
221	addu	 s1_ptr,s1_ptr,4
222	ld	 r8,s2_ptr,0
223	addu	 s2_ptr,s2_ptr,4
224	subu	 size,size,1
225	subu.co	 r6,r10,r8
226	st	 r6,res_ptr,0
227	addu	 res_ptr,res_ptr,4
228
229L_v2:	subu	 size,size,8
230	bcnd	 lt0,size,Lfin2
231/* Add blocks of 8 limbs until less than 8 limbs remain */
232	align	 8
233Loop2:	subu	 size,size,8
234	ld.d	 r8,s1_ptr,0
235	ld.d	 r6,s2_ptr,0
236	subu.cio r8,r8,r6
237	st	 r8,res_ptr,0
238	subu.cio r9,r9,r7
239	st	 r9,res_ptr,4
240	ld.d	 r8,s1_ptr,8
241	ld.d	 r6,s2_ptr,8
242	subu.cio r8,r8,r6
243	st	 r8,res_ptr,8
244	subu.cio r9,r9,r7
245	st	 r9,res_ptr,12
246	ld.d	 r8,s1_ptr,16
247	ld.d	 r6,s2_ptr,16
248	subu.cio r8,r8,r6
249	st	 r8,res_ptr,16
250	subu.cio r9,r9,r7
251	st	 r9,res_ptr,20
252	ld.d	 r8,s1_ptr,24
253	ld.d	 r6,s2_ptr,24
254	subu.cio r8,r8,r6
255	st	 r8,res_ptr,24
256	subu.cio r9,r9,r7
257	st	 r9,res_ptr,28
258	addu	 s1_ptr,s1_ptr,32
259	addu	 s2_ptr,s2_ptr,32
260	addu	 res_ptr,res_ptr,32
261	bcnd	 ge0,size,Loop2
262
263Lfin2:	addu	 size,size,8-2
264	bcnd	 lt0,size,Lend2
265Loope2:	ld.d	 r8,s1_ptr,0
266	ld.d	 r6,s2_ptr,0
267	subu.cio r8,r8,r6
268	st	 r8,res_ptr,0
269	subu.cio r9,r9,r7
270	st	 r9,res_ptr,4
271	subu	 size,size,2
272	addu	 s1_ptr,s1_ptr,8
273	addu	 s2_ptr,s2_ptr,8
274	addu	 res_ptr,res_ptr,8
275	bcnd	 ge0,size,Loope2
276Lend2:	bb0	 0,size,Lret2
277/* Add last limb */
278Ljone:	ld	 r10,s1_ptr,0
279	ld	 r8,s2_ptr,0
280	subu.cio r6,r10,r8
281	st	 r6,res_ptr,0
282
283Lret2:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
284	jmp.n	 r1
285	 xor	r2,r2,1
286