1/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
2   for the sparc processor.
3
4   These routines are derived from the SPARC Architecture Manual, version 8,
5   slightly edited to match the desired calling convention, and also to
6   optimize them for our purposes.  */
7
8#ifdef L_mulsi3
9.text
10	.align 4
11	.global .umul
12	.proc 4
13.umul:
14	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
15	mov	%o0, %y		! multiplier to Y register
16	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
17	be	mul_shortway	! can do it the short way
18	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
19	!
20	! long multiply
21	!
22	mulscc	%o4, %o1, %o4	! first iteration of 33
23	mulscc	%o4, %o1, %o4
24	mulscc	%o4, %o1, %o4
25	mulscc	%o4, %o1, %o4
26	mulscc	%o4, %o1, %o4
27	mulscc	%o4, %o1, %o4
28	mulscc	%o4, %o1, %o4
29	mulscc	%o4, %o1, %o4
30	mulscc	%o4, %o1, %o4
31	mulscc	%o4, %o1, %o4
32	mulscc	%o4, %o1, %o4
33	mulscc	%o4, %o1, %o4
34	mulscc	%o4, %o1, %o4
35	mulscc	%o4, %o1, %o4
36	mulscc	%o4, %o1, %o4
37	mulscc	%o4, %o1, %o4
38	mulscc	%o4, %o1, %o4
39	mulscc	%o4, %o1, %o4
40	mulscc	%o4, %o1, %o4
41	mulscc	%o4, %o1, %o4
42	mulscc	%o4, %o1, %o4
43	mulscc	%o4, %o1, %o4
44	mulscc	%o4, %o1, %o4
45	mulscc	%o4, %o1, %o4
46	mulscc	%o4, %o1, %o4
47	mulscc	%o4, %o1, %o4
48	mulscc	%o4, %o1, %o4
49	mulscc	%o4, %o1, %o4
50	mulscc	%o4, %o1, %o4
51	mulscc	%o4, %o1, %o4
52	mulscc	%o4, %o1, %o4
53	mulscc	%o4, %o1, %o4	! 32nd iteration
54	mulscc	%o4, %g0, %o4	! last iteration only shifts
55	! the upper 32 bits of product are wrong, but we do not care
56	retl
57	rd	%y, %o0
58	!
59	! short multiply
60	!
61mul_shortway:
62	mulscc	%o4, %o1, %o4	! first iteration of 13
63	mulscc	%o4, %o1, %o4
64	mulscc	%o4, %o1, %o4
65	mulscc	%o4, %o1, %o4
66	mulscc	%o4, %o1, %o4
67	mulscc	%o4, %o1, %o4
68	mulscc	%o4, %o1, %o4
69	mulscc	%o4, %o1, %o4
70	mulscc	%o4, %o1, %o4
71	mulscc	%o4, %o1, %o4
72	mulscc	%o4, %o1, %o4
73	mulscc	%o4, %o1, %o4	! 12th iteration
74	mulscc	%o4, %g0, %o4	! last iteration only shifts
75	rd	%y, %o5
76	sll	%o4, 12, %o4	! left shift partial product by 12 bits
77	srl	%o5, 20, %o5	! right shift partial product by 20 bits
78	retl
79	or	%o5, %o4, %o0	! merge for true product
80#endif
81
82#ifdef L_divsi3
83/*
84 * Division and remainder, from Appendix E of the SPARC Version 8
85 * Architecture Manual, with fixes from Gordon Irlam.
86 */
87
88/*
89 * Input: dividend and divisor in %o0 and %o1 respectively.
90 *
91 * m4 parameters:
92 *  .div	name of function to generate
93 *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
94 *  true		true=true => signed; true=false => unsigned
95 *
96 * Algorithm parameters:
97 *  N		how many bits per iteration we try to get (4)
98 *  WORDSIZE	total number of bits (32)
99 *
100 * Derived constants:
101 *  TOPBITS	number of bits in the top decade of a number
102 *
103 * Important variables:
104 *  Q		the partial quotient under development (initially 0)
105 *  R		the remainder so far, initially the dividend
106 *  ITER	number of main division loop iterations required;
107 *		equal to ceil(log2(quotient) / N).  Note that this
108 *		is the log base (2^N) of the quotient.
109 *  V		the current comparand, initially divisor*2^(ITER*N-1)
110 *
111 * Cost:
112 *  Current estimate for non-large dividend is
113 *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
114 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
115 *  different path, as the upper bits of the quotient must be developed
116 *  one bit at a time.
117 */
118        .global .udiv
119        .align 4
120        .proc 4
121        .text
122.udiv:
123         b ready_to_divide
124         mov 0, %g3             ! result is always positive
125
126        .global .div
127        .align 4
128        .proc 4
129        .text
130.div:
131	! compute sign of result; if neither is negative, no problem
132	orcc	%o1, %o0, %g0	! either negative?
133	bge	ready_to_divide	! no, go do the divide
134	xor	%o1, %o0, %g3	! compute sign in any case
135	tst	%o1
136	bge	1f
137	tst	%o0
138	! %o1 is definitely negative; %o0 might also be negative
139	bge	ready_to_divide	! if %o0 not negative...
140	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
1411:	! %o0 is negative, %o1 is nonnegative
142	sub	%g0, %o0, %o0	! make %o0 nonnegative
143
144
145ready_to_divide:
146
147	! Ready to divide.  Compute size of quotient; scale comparand.
148	orcc	%o1, %g0, %o5
149	bne	1f
150	mov	%o0, %o3
151
152	! Divide by zero trap.  If it returns, return 0 (about as
153	! wrong as possible, but that is what SunOS does...).
154	ta	0x2    		! ST_DIV0
155	retl
156	clr	%o0
157
1581:
159	cmp	%o3, %o5		! if %o1 exceeds %o0, done
160	blu	got_result		! (and algorithm fails otherwise)
161	clr	%o2
162	sethi	%hi(1 << (32 - 4 - 1)), %g1
163	cmp	%o3, %g1
164	blu	not_really_big
165	clr	%o4
166
167	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
168	! as our usual N-at-a-shot divide step will cause overflow and havoc.
169	! The number of bits in the result here is N*ITER+SC, where SC <= N.
170	! Compute ITER in an unorthodox manner: know we need to shift V into
171	! the top decade: so do not even bother to compare to R.
172	1:
173		cmp	%o5, %g1
174		bgeu	3f
175		mov	1, %g2
176		sll	%o5, 4, %o5
177		b	1b
178		add	%o4, 1, %o4
179
180	! Now compute %g2.
181	2:	addcc	%o5, %o5, %o5
182		bcc	not_too_big
183		add	%g2, 1, %g2
184
185		! We get here if the %o1 overflowed while shifting.
186		! This means that %o3 has the high-order bit set.
187		! Restore %o5 and subtract from %o3.
188		sll	%g1, 4, %g1	! high order bit
189		srl	%o5, 1, %o5	! rest of %o5
190		add	%o5, %g1, %o5
191		b	do_single_div
192		sub	%g2, 1, %g2
193
194	not_too_big:
195	3:	cmp	%o5, %o3
196		blu	2b
197		nop
198		be	do_single_div
199		nop
200	/* NB: these are commented out in the V8-SPARC manual as well */
201	/* (I do not understand this) */
202	! %o5 > %o3: went too far: back up 1 step
203	!	srl	%o5, 1, %o5
204	!	dec	%g2
205	! do single-bit divide steps
206	!
207	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
208	! first divide step without thinking.  BUT, the others are conditional,
209	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
210	! order bit set in the first step, just falling into the regular
211	! division loop will mess up the first time around.
212	! So we unroll slightly...
213	do_single_div:
214		subcc	%g2, 1, %g2
215		bl	end_regular_divide
216		nop
217		sub	%o3, %o5, %o3
218		mov	1, %o2
219		b	end_single_divloop
220		nop
221	single_divloop:
222		sll	%o2, 1, %o2
223		bl	1f
224		srl	%o5, 1, %o5
225		! %o3 >= 0
226		sub	%o3, %o5, %o3
227		b	2f
228		add	%o2, 1, %o2
229	1:	! %o3 < 0
230		add	%o3, %o5, %o3
231		sub	%o2, 1, %o2
232	2:
233	end_single_divloop:
234		subcc	%g2, 1, %g2
235		bge	single_divloop
236		tst	%o3
237		b,a	end_regular_divide
238
239not_really_big:
2401:
241	sll	%o5, 4, %o5
242	cmp	%o5, %o3
243	bleu	1b
244	addcc	%o4, 1, %o4
245	be	got_result
246	sub	%o4, 1, %o4
247
248	tst	%o3	! set up for initial iteration
249divloop:
250	sll	%o2, 4, %o2
251	! depth 1, accumulated bits 0
252	bl	L1.16
253	srl	%o5,1,%o5
254	! remainder is positive
255	subcc	%o3,%o5,%o3
256	! depth 2, accumulated bits 1
257	bl	L2.17
258	srl	%o5,1,%o5
259	! remainder is positive
260	subcc	%o3,%o5,%o3
261	! depth 3, accumulated bits 3
262	bl	L3.19
263	srl	%o5,1,%o5
264	! remainder is positive
265	subcc	%o3,%o5,%o3
266	! depth 4, accumulated bits 7
267	bl	L4.23
268	srl	%o5,1,%o5
269	! remainder is positive
270	subcc	%o3,%o5,%o3
271	b	9f
272	add	%o2, (7*2+1), %o2
273
274L4.23:
275	! remainder is negative
276	addcc	%o3,%o5,%o3
277	b	9f
278	add	%o2, (7*2-1), %o2
279
280
281L3.19:
282	! remainder is negative
283	addcc	%o3,%o5,%o3
284	! depth 4, accumulated bits 5
285	bl	L4.21
286	srl	%o5,1,%o5
287	! remainder is positive
288	subcc	%o3,%o5,%o3
289	b	9f
290	add	%o2, (5*2+1), %o2
291
292L4.21:
293	! remainder is negative
294	addcc	%o3,%o5,%o3
295	b	9f
296	add	%o2, (5*2-1), %o2
297
298L2.17:
299	! remainder is negative
300	addcc	%o3,%o5,%o3
301	! depth 3, accumulated bits 1
302	bl	L3.17
303	srl	%o5,1,%o5
304	! remainder is positive
305	subcc	%o3,%o5,%o3
306	! depth 4, accumulated bits 3
307	bl	L4.19
308	srl	%o5,1,%o5
309	! remainder is positive
310	subcc	%o3,%o5,%o3
311	b	9f
312	add	%o2, (3*2+1), %o2
313
314L4.19:
315	! remainder is negative
316	addcc	%o3,%o5,%o3
317	b	9f
318	add	%o2, (3*2-1), %o2
319
320L3.17:
321	! remainder is negative
322	addcc	%o3,%o5,%o3
323	! depth 4, accumulated bits 1
324	bl	L4.17
325	srl	%o5,1,%o5
326	! remainder is positive
327	subcc	%o3,%o5,%o3
328	b	9f
329	add	%o2, (1*2+1), %o2
330
331L4.17:
332	! remainder is negative
333	addcc	%o3,%o5,%o3
334	b	9f
335	add	%o2, (1*2-1), %o2
336
337L1.16:
338	! remainder is negative
339	addcc	%o3,%o5,%o3
340	! depth 2, accumulated bits -1
341	bl	L2.15
342	srl	%o5,1,%o5
343	! remainder is positive
344	subcc	%o3,%o5,%o3
345	! depth 3, accumulated bits -1
346	bl	L3.15
347	srl	%o5,1,%o5
348	! remainder is positive
349	subcc	%o3,%o5,%o3
350	! depth 4, accumulated bits -1
351	bl	L4.15
352	srl	%o5,1,%o5
353	! remainder is positive
354	subcc	%o3,%o5,%o3
355	b	9f
356	add	%o2, (-1*2+1), %o2
357
358L4.15:
359	! remainder is negative
360	addcc	%o3,%o5,%o3
361	b	9f
362	add	%o2, (-1*2-1), %o2
363
364L3.15:
365	! remainder is negative
366	addcc	%o3,%o5,%o3
367	! depth 4, accumulated bits -3
368	bl	L4.13
369	srl	%o5,1,%o5
370	! remainder is positive
371	subcc	%o3,%o5,%o3
372	b	9f
373	add	%o2, (-3*2+1), %o2
374
375L4.13:
376	! remainder is negative
377	addcc	%o3,%o5,%o3
378	b	9f
379	add	%o2, (-3*2-1), %o2
380
381L2.15:
382	! remainder is negative
383	addcc	%o3,%o5,%o3
384	! depth 3, accumulated bits -3
385	bl	L3.13
386	srl	%o5,1,%o5
387	! remainder is positive
388	subcc	%o3,%o5,%o3
389	! depth 4, accumulated bits -5
390	bl	L4.11
391	srl	%o5,1,%o5
392	! remainder is positive
393	subcc	%o3,%o5,%o3
394	b	9f
395	add	%o2, (-5*2+1), %o2
396
397L4.11:
398	! remainder is negative
399	addcc	%o3,%o5,%o3
400	b	9f
401	add	%o2, (-5*2-1), %o2
402
403L3.13:
404	! remainder is negative
405	addcc	%o3,%o5,%o3
406	! depth 4, accumulated bits -7
407	bl	L4.9
408	srl	%o5,1,%o5
409	! remainder is positive
410	subcc	%o3,%o5,%o3
411	b	9f
412	add	%o2, (-7*2+1), %o2
413
414L4.9:
415	! remainder is negative
416	addcc	%o3,%o5,%o3
417	b	9f
418	add	%o2, (-7*2-1), %o2
419
420	9:
421end_regular_divide:
422	subcc	%o4, 1, %o4
423	bge	divloop
424	tst	%o3
425	bl,a	got_result
426	! non-restoring fixup here (one instruction only!)
427	sub	%o2, 1, %o2
428
429
430got_result:
431	! check to see if answer should be < 0
432	tst	%g3
433	bl,a	1f
434	sub %g0, %o2, %o2
4351:
436	retl
437	mov %o2, %o0
438#endif
439
440#ifdef L_modsi3
441/* This implementation was taken from glibc:
442 *
443 * Input: dividend and divisor in %o0 and %o1 respectively.
444 *
445 * Algorithm parameters:
446 *  N		how many bits per iteration we try to get (4)
447 *  WORDSIZE	total number of bits (32)
448 *
449 * Derived constants:
450 *  TOPBITS	number of bits in the top decade of a number
451 *
452 * Important variables:
453 *  Q		the partial quotient under development (initially 0)
454 *  R		the remainder so far, initially the dividend
455 *  ITER	number of main division loop iterations required;
456 *		equal to ceil(log2(quotient) / N).  Note that this
457 *		is the log base (2^N) of the quotient.
458 *  V		the current comparand, initially divisor*2^(ITER*N-1)
459 *
460 * Cost:
461 *  Current estimate for non-large dividend is
462 *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
463 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
464 *  different path, as the upper bits of the quotient must be developed
465 *  one bit at a time.
466 */
467.text
468	.align 4
469	.global	.urem
470	.proc 4
471.urem:
472	b	divide
473	mov	0, %g3		! result always positive
474
475        .align 4
476	.global .rem
477	.proc 4
478.rem:
479	! compute sign of result; if neither is negative, no problem
480	orcc	%o1, %o0, %g0	! either negative?
481	bge	2f			! no, go do the divide
482	mov	%o0, %g3		! sign of remainder matches %o0
483	tst	%o1
484	bge	1f
485	tst	%o0
486	! %o1 is definitely negative; %o0 might also be negative
487	bge	2f			! if %o0 not negative...
488	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
4891:	! %o0 is negative, %o1 is nonnegative
490	sub	%g0, %o0, %o0	! make %o0 nonnegative
4912:
492
493	! Ready to divide.  Compute size of quotient; scale comparand.
494divide:
495	orcc	%o1, %g0, %o5
496	bne	1f
497	mov	%o0, %o3
498
499		! Divide by zero trap.  If it returns, return 0 (about as
500		! wrong as possible, but that is what SunOS does...).
501		ta	0x2   !ST_DIV0
502		retl
503		clr	%o0
504
5051:
506	cmp	%o3, %o5		! if %o1 exceeds %o0, done
507	blu	got_result		! (and algorithm fails otherwise)
508	clr	%o2
509	sethi	%hi(1 << (32 - 4 - 1)), %g1
510	cmp	%o3, %g1
511	blu	not_really_big
512	clr	%o4
513
514	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
515	! as our usual N-at-a-shot divide step will cause overflow and havoc.
516	! The number of bits in the result here is N*ITER+SC, where SC <= N.
517	! Compute ITER in an unorthodox manner: know we need to shift V into
518	! the top decade: so do not even bother to compare to R.
519	1:
520		cmp	%o5, %g1
521		bgeu	3f
522		mov	1, %g2
523		sll	%o5, 4, %o5
524		b	1b
525		add	%o4, 1, %o4
526
527	! Now compute %g2.
528	2:	addcc	%o5, %o5, %o5
529		bcc	not_too_big
530		add	%g2, 1, %g2
531
532		! We get here if the %o1 overflowed while shifting.
533		! This means that %o3 has the high-order bit set.
534		! Restore %o5 and subtract from %o3.
535		sll	%g1, 4, %g1	! high order bit
536		srl	%o5, 1, %o5		! rest of %o5
537		add	%o5, %g1, %o5
538		b	do_single_div
539		sub	%g2, 1, %g2
540
541	not_too_big:
542	3:	cmp	%o5, %o3
543		blu	2b
544		nop
545		be	do_single_div
546		nop
547	/* NB: these are commented out in the V8-SPARC manual as well */
548	/* (I do not understand this) */
549	! %o5 > %o3: went too far: back up 1 step
550	!	srl	%o5, 1, %o5
551	!	dec	%g2
552	! do single-bit divide steps
553	!
554	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
555	! first divide step without thinking.  BUT, the others are conditional,
556	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
557	! order bit set in the first step, just falling into the regular
558	! division loop will mess up the first time around.
559	! So we unroll slightly...
560	do_single_div:
561		subcc	%g2, 1, %g2
562		bl	end_regular_divide
563		nop
564		sub	%o3, %o5, %o3
565		mov	1, %o2
566		b	end_single_divloop
567		nop
568	single_divloop:
569		sll	%o2, 1, %o2
570		bl	1f
571		srl	%o5, 1, %o5
572		! %o3 >= 0
573		sub	%o3, %o5, %o3
574		b	2f
575		add	%o2, 1, %o2
576	1:	! %o3 < 0
577		add	%o3, %o5, %o3
578		sub	%o2, 1, %o2
579	2:
580	end_single_divloop:
581		subcc	%g2, 1, %g2
582		bge	single_divloop
583		tst	%o3
584		b,a	end_regular_divide
585
586not_really_big:
5871:
588	sll	%o5, 4, %o5
589	cmp	%o5, %o3
590	bleu	1b
591	addcc	%o4, 1, %o4
592	be	got_result
593	sub	%o4, 1, %o4
594
595	tst	%o3	! set up for initial iteration
596divloop:
597	sll	%o2, 4, %o2
598		! depth 1, accumulated bits 0
599	bl	L1.16
600	srl	%o5,1,%o5
601	! remainder is positive
602	subcc	%o3,%o5,%o3
603	! depth 2, accumulated bits 1
604	bl	L2.17
605	srl	%o5,1,%o5
606	! remainder is positive
607	subcc	%o3,%o5,%o3
608	! depth 3, accumulated bits 3
609	bl	L3.19
610	srl	%o5,1,%o5
611	! remainder is positive
612	subcc	%o3,%o5,%o3
613	! depth 4, accumulated bits 7
614	bl	L4.23
615	srl	%o5,1,%o5
616	! remainder is positive
617	subcc	%o3,%o5,%o3
618	b	9f
619	add	%o2, (7*2+1), %o2
620L4.23:
621	! remainder is negative
622	addcc	%o3,%o5,%o3
623	b	9f
624	add	%o2, (7*2-1), %o2
625
626L3.19:
627	! remainder is negative
628	addcc	%o3,%o5,%o3
629	! depth 4, accumulated bits 5
630	bl	L4.21
631	srl	%o5,1,%o5
632	! remainder is positive
633	subcc	%o3,%o5,%o3
634	b	9f
635	add	%o2, (5*2+1), %o2
636
637L4.21:
638	! remainder is negative
639	addcc	%o3,%o5,%o3
640	b	9f
641	add	%o2, (5*2-1), %o2
642
643L2.17:
644	! remainder is negative
645	addcc	%o3,%o5,%o3
646	! depth 3, accumulated bits 1
647	bl	L3.17
648	srl	%o5,1,%o5
649	! remainder is positive
650	subcc	%o3,%o5,%o3
651	! depth 4, accumulated bits 3
652	bl	L4.19
653	srl	%o5,1,%o5
654	! remainder is positive
655	subcc	%o3,%o5,%o3
656	b	9f
657	add	%o2, (3*2+1), %o2
658
659L4.19:
660	! remainder is negative
661	addcc	%o3,%o5,%o3
662	b	9f
663	add	%o2, (3*2-1), %o2
664
665L3.17:
666	! remainder is negative
667	addcc	%o3,%o5,%o3
668	! depth 4, accumulated bits 1
669	bl	L4.17
670	srl	%o5,1,%o5
671	! remainder is positive
672	subcc	%o3,%o5,%o3
673	b	9f
674	add	%o2, (1*2+1), %o2
675
676L4.17:
677	! remainder is negative
678	addcc	%o3,%o5,%o3
679	b	9f
680	add	%o2, (1*2-1), %o2
681
682L1.16:
683	! remainder is negative
684	addcc	%o3,%o5,%o3
685	! depth 2, accumulated bits -1
686	bl	L2.15
687	srl	%o5,1,%o5
688	! remainder is positive
689	subcc	%o3,%o5,%o3
690	! depth 3, accumulated bits -1
691	bl	L3.15
692	srl	%o5,1,%o5
693	! remainder is positive
694	subcc	%o3,%o5,%o3
695	! depth 4, accumulated bits -1
696	bl	L4.15
697	srl	%o5,1,%o5
698	! remainder is positive
699	subcc	%o3,%o5,%o3
700	b	9f
701	add	%o2, (-1*2+1), %o2
702
703L4.15:
704	! remainder is negative
705	addcc	%o3,%o5,%o3
706	b	9f
707	add	%o2, (-1*2-1), %o2
708
709L3.15:
710	! remainder is negative
711	addcc	%o3,%o5,%o3
712	! depth 4, accumulated bits -3
713	bl	L4.13
714	srl	%o5,1,%o5
715	! remainder is positive
716	subcc	%o3,%o5,%o3
717	b	9f
718	add	%o2, (-3*2+1), %o2
719
720L4.13:
721	! remainder is negative
722	addcc	%o3,%o5,%o3
723	b	9f
724	add	%o2, (-3*2-1), %o2
725
726L2.15:
727	! remainder is negative
728	addcc	%o3,%o5,%o3
729	! depth 3, accumulated bits -3
730	bl	L3.13
731	srl	%o5,1,%o5
732	! remainder is positive
733	subcc	%o3,%o5,%o3
734	! depth 4, accumulated bits -5
735	bl	L4.11
736	srl	%o5,1,%o5
737	! remainder is positive
738	subcc	%o3,%o5,%o3
739	b	9f
740	add	%o2, (-5*2+1), %o2
741
742L4.11:
743	! remainder is negative
744	addcc	%o3,%o5,%o3
745	b	9f
746	add	%o2, (-5*2-1), %o2
747
748L3.13:
749	! remainder is negative
750	addcc	%o3,%o5,%o3
751	! depth 4, accumulated bits -7
752	bl	L4.9
753	srl	%o5,1,%o5
754	! remainder is positive
755	subcc	%o3,%o5,%o3
756	b	9f
757	add	%o2, (-7*2+1), %o2
758
759L4.9:
760	! remainder is negative
761	addcc	%o3,%o5,%o3
762	b	9f
763	add	%o2, (-7*2-1), %o2
764
765	9:
766end_regular_divide:
767	subcc	%o4, 1, %o4
768	bge	divloop
769	tst	%o3
770	bl,a	got_result
771	! non-restoring fixup here (one instruction only!)
772	add	%o3, %o1, %o3
773
774got_result:
775	! check to see if answer should be < 0
776	tst	%g3
777	bl,a	1f
778	sub %g0, %o3, %o3
7791:
780	retl
781	mov %o3, %o0
782
783#endif
784
785