1/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
2   for the sparc processor.
3
4   These routines are derived from the SPARC Architecture Manual, version 8,
5   slightly edited to match the desired calling convention, and also to
6   optimize them for our purposes.  */
7
8/* An executable stack is *not* required for these functions.  */
9#if defined(__ELF__) && defined(__linux__)
10.section .note.GNU-stack,"",%progbits
11.previous
12#endif
13
14#ifdef L_mulsi3
15.text
16	.align 4
17	.global .umul
18	.proc 4
19.umul:
20	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
21	mov	%o0, %y		! multiplier to Y register
22	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
23	be	mul_shortway	! can do it the short way
24	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
25	!
26	! long multiply
27	!
28	mulscc	%o4, %o1, %o4	! first iteration of 33
29	mulscc	%o4, %o1, %o4
30	mulscc	%o4, %o1, %o4
31	mulscc	%o4, %o1, %o4
32	mulscc	%o4, %o1, %o4
33	mulscc	%o4, %o1, %o4
34	mulscc	%o4, %o1, %o4
35	mulscc	%o4, %o1, %o4
36	mulscc	%o4, %o1, %o4
37	mulscc	%o4, %o1, %o4
38	mulscc	%o4, %o1, %o4
39	mulscc	%o4, %o1, %o4
40	mulscc	%o4, %o1, %o4
41	mulscc	%o4, %o1, %o4
42	mulscc	%o4, %o1, %o4
43	mulscc	%o4, %o1, %o4
44	mulscc	%o4, %o1, %o4
45	mulscc	%o4, %o1, %o4
46	mulscc	%o4, %o1, %o4
47	mulscc	%o4, %o1, %o4
48	mulscc	%o4, %o1, %o4
49	mulscc	%o4, %o1, %o4
50	mulscc	%o4, %o1, %o4
51	mulscc	%o4, %o1, %o4
52	mulscc	%o4, %o1, %o4
53	mulscc	%o4, %o1, %o4
54	mulscc	%o4, %o1, %o4
55	mulscc	%o4, %o1, %o4
56	mulscc	%o4, %o1, %o4
57	mulscc	%o4, %o1, %o4
58	mulscc	%o4, %o1, %o4
59	mulscc	%o4, %o1, %o4	! 32nd iteration
60	mulscc	%o4, %g0, %o4	! last iteration only shifts
61	! the upper 32 bits of product are wrong, but we do not care
62	retl
63	rd	%y, %o0
64	!
65	! short multiply
66	!
67mul_shortway:
68	mulscc	%o4, %o1, %o4	! first iteration of 13
69	mulscc	%o4, %o1, %o4
70	mulscc	%o4, %o1, %o4
71	mulscc	%o4, %o1, %o4
72	mulscc	%o4, %o1, %o4
73	mulscc	%o4, %o1, %o4
74	mulscc	%o4, %o1, %o4
75	mulscc	%o4, %o1, %o4
76	mulscc	%o4, %o1, %o4
77	mulscc	%o4, %o1, %o4
78	mulscc	%o4, %o1, %o4
79	mulscc	%o4, %o1, %o4	! 12th iteration
80	mulscc	%o4, %g0, %o4	! last iteration only shifts
81	rd	%y, %o5
82	sll	%o4, 12, %o4	! left shift partial product by 12 bits
83	srl	%o5, 20, %o5	! right shift partial product by 20 bits
84	retl
85	or	%o5, %o4, %o0	! merge for true product
86#endif
87
88#ifdef L_divsi3
89/*
90 * Division and remainder, from Appendix E of the SPARC Version 8
91 * Architecture Manual, with fixes from Gordon Irlam.
92 */
93
94/*
95 * Input: dividend and divisor in %o0 and %o1 respectively.
96 *
97 * m4 parameters:
98 *  .div	name of function to generate
99 *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
100 *  true		true=true => signed; true=false => unsigned
101 *
102 * Algorithm parameters:
103 *  N		how many bits per iteration we try to get (4)
104 *  WORDSIZE	total number of bits (32)
105 *
106 * Derived constants:
107 *  TOPBITS	number of bits in the top decade of a number
108 *
109 * Important variables:
110 *  Q		the partial quotient under development (initially 0)
111 *  R		the remainder so far, initially the dividend
112 *  ITER	number of main division loop iterations required;
113 *		equal to ceil(log2(quotient) / N).  Note that this
114 *		is the log base (2^N) of the quotient.
115 *  V		the current comparand, initially divisor*2^(ITER*N-1)
116 *
117 * Cost:
118 *  Current estimate for non-large dividend is
119 *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
120 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
121 *  different path, as the upper bits of the quotient must be developed
122 *  one bit at a time.
123 */
124        .global .udiv
125        .align 4
126        .proc 4
127        .text
128.udiv:
129         b ready_to_divide
130         mov 0, %g3             ! result is always positive
131
132        .global .div
133        .align 4
134        .proc 4
135        .text
136.div:
137	! compute sign of result; if neither is negative, no problem
138	orcc	%o1, %o0, %g0	! either negative?
139	bge	ready_to_divide	! no, go do the divide
140	xor	%o1, %o0, %g3	! compute sign in any case
141	tst	%o1
142	bge	1f
143	tst	%o0
144	! %o1 is definitely negative; %o0 might also be negative
145	bge	ready_to_divide	! if %o0 not negative...
146	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
1471:	! %o0 is negative, %o1 is nonnegative
148	sub	%g0, %o0, %o0	! make %o0 nonnegative
149
150
151ready_to_divide:
152
153	! Ready to divide.  Compute size of quotient; scale comparand.
154	orcc	%o1, %g0, %o5
155	bne	1f
156	mov	%o0, %o3
157
158	! Divide by zero trap.  If it returns, return 0 (about as
159	! wrong as possible, but that is what SunOS does...).
160	ta	0x2    		! ST_DIV0
161	retl
162	clr	%o0
163
1641:
165	cmp	%o3, %o5		! if %o1 exceeds %o0, done
166	blu	got_result		! (and algorithm fails otherwise)
167	clr	%o2
168	sethi	%hi(1 << (32 - 4 - 1)), %g1
169	cmp	%o3, %g1
170	blu	not_really_big
171	clr	%o4
172
173	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
174	! as our usual N-at-a-shot divide step will cause overflow and havoc.
175	! The number of bits in the result here is N*ITER+SC, where SC <= N.
176	! Compute ITER in an unorthodox manner: know we need to shift V into
177	! the top decade: so do not even bother to compare to R.
178	1:
179		cmp	%o5, %g1
180		bgeu	3f
181		mov	1, %g2
182		sll	%o5, 4, %o5
183		b	1b
184		add	%o4, 1, %o4
185
186	! Now compute %g2.
187	2:	addcc	%o5, %o5, %o5
188		bcc	not_too_big
189		add	%g2, 1, %g2
190
191		! We get here if the %o1 overflowed while shifting.
192		! This means that %o3 has the high-order bit set.
193		! Restore %o5 and subtract from %o3.
194		sll	%g1, 4, %g1	! high order bit
195		srl	%o5, 1, %o5	! rest of %o5
196		add	%o5, %g1, %o5
197		b	do_single_div
198		sub	%g2, 1, %g2
199
200	not_too_big:
201	3:	cmp	%o5, %o3
202		blu	2b
203		nop
204		be	do_single_div
205		nop
206	/* NB: these are commented out in the V8-SPARC manual as well */
207	/* (I do not understand this) */
208	! %o5 > %o3: went too far: back up 1 step
209	!	srl	%o5, 1, %o5
210	!	dec	%g2
211	! do single-bit divide steps
212	!
213	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
214	! first divide step without thinking.  BUT, the others are conditional,
215	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
216	! order bit set in the first step, just falling into the regular
217	! division loop will mess up the first time around.
218	! So we unroll slightly...
219	do_single_div:
220		subcc	%g2, 1, %g2
221		bl	end_regular_divide
222		nop
223		sub	%o3, %o5, %o3
224		mov	1, %o2
225		b	end_single_divloop
226		nop
227	single_divloop:
228		sll	%o2, 1, %o2
229		bl	1f
230		srl	%o5, 1, %o5
231		! %o3 >= 0
232		sub	%o3, %o5, %o3
233		b	2f
234		add	%o2, 1, %o2
235	1:	! %o3 < 0
236		add	%o3, %o5, %o3
237		sub	%o2, 1, %o2
238	2:
239	end_single_divloop:
240		subcc	%g2, 1, %g2
241		bge	single_divloop
242		tst	%o3
243		b,a	end_regular_divide
244
245not_really_big:
2461:
247	sll	%o5, 4, %o5
248	cmp	%o5, %o3
249	bleu	1b
250	addcc	%o4, 1, %o4
251	be	got_result
252	sub	%o4, 1, %o4
253
254	tst	%o3	! set up for initial iteration
255divloop:
256	sll	%o2, 4, %o2
257	! depth 1, accumulated bits 0
258	bl	L1.16
259	srl	%o5,1,%o5
260	! remainder is positive
261	subcc	%o3,%o5,%o3
262	! depth 2, accumulated bits 1
263	bl	L2.17
264	srl	%o5,1,%o5
265	! remainder is positive
266	subcc	%o3,%o5,%o3
267	! depth 3, accumulated bits 3
268	bl	L3.19
269	srl	%o5,1,%o5
270	! remainder is positive
271	subcc	%o3,%o5,%o3
272	! depth 4, accumulated bits 7
273	bl	L4.23
274	srl	%o5,1,%o5
275	! remainder is positive
276	subcc	%o3,%o5,%o3
277	b	9f
278	add	%o2, (7*2+1), %o2
279
280L4.23:
281	! remainder is negative
282	addcc	%o3,%o5,%o3
283	b	9f
284	add	%o2, (7*2-1), %o2
285
286
287L3.19:
288	! remainder is negative
289	addcc	%o3,%o5,%o3
290	! depth 4, accumulated bits 5
291	bl	L4.21
292	srl	%o5,1,%o5
293	! remainder is positive
294	subcc	%o3,%o5,%o3
295	b	9f
296	add	%o2, (5*2+1), %o2
297
298L4.21:
299	! remainder is negative
300	addcc	%o3,%o5,%o3
301	b	9f
302	add	%o2, (5*2-1), %o2
303
304L2.17:
305	! remainder is negative
306	addcc	%o3,%o5,%o3
307	! depth 3, accumulated bits 1
308	bl	L3.17
309	srl	%o5,1,%o5
310	! remainder is positive
311	subcc	%o3,%o5,%o3
312	! depth 4, accumulated bits 3
313	bl	L4.19
314	srl	%o5,1,%o5
315	! remainder is positive
316	subcc	%o3,%o5,%o3
317	b	9f
318	add	%o2, (3*2+1), %o2
319
320L4.19:
321	! remainder is negative
322	addcc	%o3,%o5,%o3
323	b	9f
324	add	%o2, (3*2-1), %o2
325
326L3.17:
327	! remainder is negative
328	addcc	%o3,%o5,%o3
329	! depth 4, accumulated bits 1
330	bl	L4.17
331	srl	%o5,1,%o5
332	! remainder is positive
333	subcc	%o3,%o5,%o3
334	b	9f
335	add	%o2, (1*2+1), %o2
336
337L4.17:
338	! remainder is negative
339	addcc	%o3,%o5,%o3
340	b	9f
341	add	%o2, (1*2-1), %o2
342
343L1.16:
344	! remainder is negative
345	addcc	%o3,%o5,%o3
346	! depth 2, accumulated bits -1
347	bl	L2.15
348	srl	%o5,1,%o5
349	! remainder is positive
350	subcc	%o3,%o5,%o3
351	! depth 3, accumulated bits -1
352	bl	L3.15
353	srl	%o5,1,%o5
354	! remainder is positive
355	subcc	%o3,%o5,%o3
356	! depth 4, accumulated bits -1
357	bl	L4.15
358	srl	%o5,1,%o5
359	! remainder is positive
360	subcc	%o3,%o5,%o3
361	b	9f
362	add	%o2, (-1*2+1), %o2
363
364L4.15:
365	! remainder is negative
366	addcc	%o3,%o5,%o3
367	b	9f
368	add	%o2, (-1*2-1), %o2
369
370L3.15:
371	! remainder is negative
372	addcc	%o3,%o5,%o3
373	! depth 4, accumulated bits -3
374	bl	L4.13
375	srl	%o5,1,%o5
376	! remainder is positive
377	subcc	%o3,%o5,%o3
378	b	9f
379	add	%o2, (-3*2+1), %o2
380
381L4.13:
382	! remainder is negative
383	addcc	%o3,%o5,%o3
384	b	9f
385	add	%o2, (-3*2-1), %o2
386
387L2.15:
388	! remainder is negative
389	addcc	%o3,%o5,%o3
390	! depth 3, accumulated bits -3
391	bl	L3.13
392	srl	%o5,1,%o5
393	! remainder is positive
394	subcc	%o3,%o5,%o3
395	! depth 4, accumulated bits -5
396	bl	L4.11
397	srl	%o5,1,%o5
398	! remainder is positive
399	subcc	%o3,%o5,%o3
400	b	9f
401	add	%o2, (-5*2+1), %o2
402
403L4.11:
404	! remainder is negative
405	addcc	%o3,%o5,%o3
406	b	9f
407	add	%o2, (-5*2-1), %o2
408
409L3.13:
410	! remainder is negative
411	addcc	%o3,%o5,%o3
412	! depth 4, accumulated bits -7
413	bl	L4.9
414	srl	%o5,1,%o5
415	! remainder is positive
416	subcc	%o3,%o5,%o3
417	b	9f
418	add	%o2, (-7*2+1), %o2
419
420L4.9:
421	! remainder is negative
422	addcc	%o3,%o5,%o3
423	b	9f
424	add	%o2, (-7*2-1), %o2
425
426	9:
427end_regular_divide:
428	subcc	%o4, 1, %o4
429	bge	divloop
430	tst	%o3
431	bl,a	got_result
432	! non-restoring fixup here (one instruction only!)
433	sub	%o2, 1, %o2
434
435
436got_result:
437	! check to see if answer should be < 0
438	tst	%g3
439	bl,a	1f
440	sub %g0, %o2, %o2
4411:
442	retl
443	mov %o2, %o0
444#endif
445
446#ifdef L_modsi3
447/* This implementation was taken from glibc:
448 *
449 * Input: dividend and divisor in %o0 and %o1 respectively.
450 *
451 * Algorithm parameters:
452 *  N		how many bits per iteration we try to get (4)
453 *  WORDSIZE	total number of bits (32)
454 *
455 * Derived constants:
456 *  TOPBITS	number of bits in the top decade of a number
457 *
458 * Important variables:
459 *  Q		the partial quotient under development (initially 0)
460 *  R		the remainder so far, initially the dividend
461 *  ITER	number of main division loop iterations required;
462 *		equal to ceil(log2(quotient) / N).  Note that this
463 *		is the log base (2^N) of the quotient.
464 *  V		the current comparand, initially divisor*2^(ITER*N-1)
465 *
466 * Cost:
467 *  Current estimate for non-large dividend is
468 *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
469 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
470 *  different path, as the upper bits of the quotient must be developed
471 *  one bit at a time.
472 */
473.text
474	.align 4
475	.global	.urem
476	.proc 4
477.urem:
478	b	divide
479	mov	0, %g3		! result always positive
480
481        .align 4
482	.global .rem
483	.proc 4
484.rem:
485	! compute sign of result; if neither is negative, no problem
486	orcc	%o1, %o0, %g0	! either negative?
487	bge	2f			! no, go do the divide
488	mov	%o0, %g3		! sign of remainder matches %o0
489	tst	%o1
490	bge	1f
491	tst	%o0
492	! %o1 is definitely negative; %o0 might also be negative
493	bge	2f			! if %o0 not negative...
494	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
4951:	! %o0 is negative, %o1 is nonnegative
496	sub	%g0, %o0, %o0	! make %o0 nonnegative
4972:
498
499	! Ready to divide.  Compute size of quotient; scale comparand.
500divide:
501	orcc	%o1, %g0, %o5
502	bne	1f
503	mov	%o0, %o3
504
505		! Divide by zero trap.  If it returns, return 0 (about as
506		! wrong as possible, but that is what SunOS does...).
507		ta	0x2   !ST_DIV0
508		retl
509		clr	%o0
510
5111:
512	cmp	%o3, %o5		! if %o1 exceeds %o0, done
513	blu	got_result		! (and algorithm fails otherwise)
514	clr	%o2
515	sethi	%hi(1 << (32 - 4 - 1)), %g1
516	cmp	%o3, %g1
517	blu	not_really_big
518	clr	%o4
519
520	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
521	! as our usual N-at-a-shot divide step will cause overflow and havoc.
522	! The number of bits in the result here is N*ITER+SC, where SC <= N.
523	! Compute ITER in an unorthodox manner: know we need to shift V into
524	! the top decade: so do not even bother to compare to R.
525	1:
526		cmp	%o5, %g1
527		bgeu	3f
528		mov	1, %g2
529		sll	%o5, 4, %o5
530		b	1b
531		add	%o4, 1, %o4
532
533	! Now compute %g2.
534	2:	addcc	%o5, %o5, %o5
535		bcc	not_too_big
536		add	%g2, 1, %g2
537
538		! We get here if the %o1 overflowed while shifting.
539		! This means that %o3 has the high-order bit set.
540		! Restore %o5 and subtract from %o3.
541		sll	%g1, 4, %g1	! high order bit
542		srl	%o5, 1, %o5		! rest of %o5
543		add	%o5, %g1, %o5
544		b	do_single_div
545		sub	%g2, 1, %g2
546
547	not_too_big:
548	3:	cmp	%o5, %o3
549		blu	2b
550		nop
551		be	do_single_div
552		nop
553	/* NB: these are commented out in the V8-SPARC manual as well */
554	/* (I do not understand this) */
555	! %o5 > %o3: went too far: back up 1 step
556	!	srl	%o5, 1, %o5
557	!	dec	%g2
558	! do single-bit divide steps
559	!
560	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
561	! first divide step without thinking.  BUT, the others are conditional,
562	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
563	! order bit set in the first step, just falling into the regular
564	! division loop will mess up the first time around.
565	! So we unroll slightly...
566	do_single_div:
567		subcc	%g2, 1, %g2
568		bl	end_regular_divide
569		nop
570		sub	%o3, %o5, %o3
571		mov	1, %o2
572		b	end_single_divloop
573		nop
574	single_divloop:
575		sll	%o2, 1, %o2
576		bl	1f
577		srl	%o5, 1, %o5
578		! %o3 >= 0
579		sub	%o3, %o5, %o3
580		b	2f
581		add	%o2, 1, %o2
582	1:	! %o3 < 0
583		add	%o3, %o5, %o3
584		sub	%o2, 1, %o2
585	2:
586	end_single_divloop:
587		subcc	%g2, 1, %g2
588		bge	single_divloop
589		tst	%o3
590		b,a	end_regular_divide
591
592not_really_big:
5931:
594	sll	%o5, 4, %o5
595	cmp	%o5, %o3
596	bleu	1b
597	addcc	%o4, 1, %o4
598	be	got_result
599	sub	%o4, 1, %o4
600
601	tst	%o3	! set up for initial iteration
602divloop:
603	sll	%o2, 4, %o2
604		! depth 1, accumulated bits 0
605	bl	L1.16
606	srl	%o5,1,%o5
607	! remainder is positive
608	subcc	%o3,%o5,%o3
609	! depth 2, accumulated bits 1
610	bl	L2.17
611	srl	%o5,1,%o5
612	! remainder is positive
613	subcc	%o3,%o5,%o3
614	! depth 3, accumulated bits 3
615	bl	L3.19
616	srl	%o5,1,%o5
617	! remainder is positive
618	subcc	%o3,%o5,%o3
619	! depth 4, accumulated bits 7
620	bl	L4.23
621	srl	%o5,1,%o5
622	! remainder is positive
623	subcc	%o3,%o5,%o3
624	b	9f
625	add	%o2, (7*2+1), %o2
626L4.23:
627	! remainder is negative
628	addcc	%o3,%o5,%o3
629	b	9f
630	add	%o2, (7*2-1), %o2
631
632L3.19:
633	! remainder is negative
634	addcc	%o3,%o5,%o3
635	! depth 4, accumulated bits 5
636	bl	L4.21
637	srl	%o5,1,%o5
638	! remainder is positive
639	subcc	%o3,%o5,%o3
640	b	9f
641	add	%o2, (5*2+1), %o2
642
643L4.21:
644	! remainder is negative
645	addcc	%o3,%o5,%o3
646	b	9f
647	add	%o2, (5*2-1), %o2
648
649L2.17:
650	! remainder is negative
651	addcc	%o3,%o5,%o3
652	! depth 3, accumulated bits 1
653	bl	L3.17
654	srl	%o5,1,%o5
655	! remainder is positive
656	subcc	%o3,%o5,%o3
657	! depth 4, accumulated bits 3
658	bl	L4.19
659	srl	%o5,1,%o5
660	! remainder is positive
661	subcc	%o3,%o5,%o3
662	b	9f
663	add	%o2, (3*2+1), %o2
664
665L4.19:
666	! remainder is negative
667	addcc	%o3,%o5,%o3
668	b	9f
669	add	%o2, (3*2-1), %o2
670
671L3.17:
672	! remainder is negative
673	addcc	%o3,%o5,%o3
674	! depth 4, accumulated bits 1
675	bl	L4.17
676	srl	%o5,1,%o5
677	! remainder is positive
678	subcc	%o3,%o5,%o3
679	b	9f
680	add	%o2, (1*2+1), %o2
681
682L4.17:
683	! remainder is negative
684	addcc	%o3,%o5,%o3
685	b	9f
686	add	%o2, (1*2-1), %o2
687
688L1.16:
689	! remainder is negative
690	addcc	%o3,%o5,%o3
691	! depth 2, accumulated bits -1
692	bl	L2.15
693	srl	%o5,1,%o5
694	! remainder is positive
695	subcc	%o3,%o5,%o3
696	! depth 3, accumulated bits -1
697	bl	L3.15
698	srl	%o5,1,%o5
699	! remainder is positive
700	subcc	%o3,%o5,%o3
701	! depth 4, accumulated bits -1
702	bl	L4.15
703	srl	%o5,1,%o5
704	! remainder is positive
705	subcc	%o3,%o5,%o3
706	b	9f
707	add	%o2, (-1*2+1), %o2
708
709L4.15:
710	! remainder is negative
711	addcc	%o3,%o5,%o3
712	b	9f
713	add	%o2, (-1*2-1), %o2
714
715L3.15:
716	! remainder is negative
717	addcc	%o3,%o5,%o3
718	! depth 4, accumulated bits -3
719	bl	L4.13
720	srl	%o5,1,%o5
721	! remainder is positive
722	subcc	%o3,%o5,%o3
723	b	9f
724	add	%o2, (-3*2+1), %o2
725
726L4.13:
727	! remainder is negative
728	addcc	%o3,%o5,%o3
729	b	9f
730	add	%o2, (-3*2-1), %o2
731
732L2.15:
733	! remainder is negative
734	addcc	%o3,%o5,%o3
735	! depth 3, accumulated bits -3
736	bl	L3.13
737	srl	%o5,1,%o5
738	! remainder is positive
739	subcc	%o3,%o5,%o3
740	! depth 4, accumulated bits -5
741	bl	L4.11
742	srl	%o5,1,%o5
743	! remainder is positive
744	subcc	%o3,%o5,%o3
745	b	9f
746	add	%o2, (-5*2+1), %o2
747
748L4.11:
749	! remainder is negative
750	addcc	%o3,%o5,%o3
751	b	9f
752	add	%o2, (-5*2-1), %o2
753
754L3.13:
755	! remainder is negative
756	addcc	%o3,%o5,%o3
757	! depth 4, accumulated bits -7
758	bl	L4.9
759	srl	%o5,1,%o5
760	! remainder is positive
761	subcc	%o3,%o5,%o3
762	b	9f
763	add	%o2, (-7*2+1), %o2
764
765L4.9:
766	! remainder is negative
767	addcc	%o3,%o5,%o3
768	b	9f
769	add	%o2, (-7*2-1), %o2
770
771	9:
772end_regular_divide:
773	subcc	%o4, 1, %o4
774	bge	divloop
775	tst	%o3
776	bl,a	got_result
777	! non-restoring fixup here (one instruction only!)
778	add	%o3, %o1, %o3
779
780got_result:
781	! check to see if answer should be < 0
782	tst	%g3
783	bl,a	1f
784	sub %g0, %o3, %o3
7851:
786	retl
787	mov %o3, %o0
788
789#endif
790
791