1// This file is dual licensed under the MIT and the University of Illinois Open
2// Source Licenses. See LICENSE.TXT for details.
3
4#include "../assembly.h"
5
6// di_int __divdi3(di_int a, di_int b);
7
8// result = a / b.
9// both inputs and the output are 64-bit signed integers.
10// This will do whatever the underlying hardware is set to do on division by zero.
11// No other exceptions are generated, as the divide cannot overflow.
12//
13// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
14// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
15// currently possible via simulation of integer divides on the x87 unit.
16//
17// Stephen Canon, December 2008
18
19#ifdef __i386__
20
21.text
22.balign 4
23DEFINE_COMPILERRT_FUNCTION(__divdi3)
24
25/* This is currently implemented by wrapping the unsigned divide up in an absolute
26   value, then restoring the correct sign at the end of the computation.  This could
27   certainly be improved upon. */
28
29	pushl		%esi
30	movl	 20(%esp),			%edx	// high word of b
31	movl	 16(%esp),			%eax	// low word of b
32	movl		%edx,			%ecx
33	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
34	xorl		%ecx,			%eax
35	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
36	subl		%ecx,			%eax
37	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
38	movl		%edx,		 20(%esp)
39	movl		%eax,		 16(%esp)	// store abs(b) back to stack
40	movl		%ecx,			%esi	// set aside sign of b
41
42	movl	 12(%esp),			%edx	// high word of b
43	movl	  8(%esp),			%eax	// low word of b
44	movl		%edx,			%ecx
45	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
46	xorl		%ecx,			%eax
47	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
48	subl		%ecx,			%eax
49	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
50	movl		%edx,		 12(%esp)
51	movl		%eax,		  8(%esp)	// store abs(a) back to stack
52	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
53
54	pushl		%ebx
55	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
56	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
57	jz			9f						// the code to handle that special case [9].
58
59	/* High word of b is known to be non-zero on this branch */
60
61	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
62
63	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
64	shrl		%eax					//
65	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
66	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
67	orl			%eax,			%ebx	//
68	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
69	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
70	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
71	jae			1f
72
73	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
74
75	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
76
77	pushl		%edi
78	notl		%ecx
79	shrl		%eax
80	shrl		%cl,			%eax	// q = qs >> (1 + i)
81	movl		%eax,			%edi
82	mull	 24(%esp)					// q*blo
83	movl	 16(%esp),			%ebx
84	movl	 20(%esp),			%ecx	// ECX:EBX = a
85	subl		%eax,			%ebx
86	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
87	movl	 28(%esp),			%eax
88	imull		%edi,			%eax	// q*bhi
89	subl		%eax,			%ecx	// ECX:EBX = a - q*b
90	sbbl		$0,				%edi	// decrement q if remainder is negative
91	xorl		%edx,			%edx
92	movl		%edi,			%eax
93
94	addl		%esi,			%eax	// Restore correct sign to result
95	adcl		%esi,			%edx
96	xorl		%esi,			%eax
97	xorl		%esi,			%edx
98	popl		%edi					// Restore callee-save registers
99	popl		%ebx
100	popl		%esi
101	retl								// Return
102
103
1041:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
105
106	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
107	divl		%ebx					// overflow, and find q and r such that
108										//
109										//		ahi:alo = (1:q)*bhi + r
110										//
111										// Note that q is a number in (31-i).(1+i)
112										// fix point.
113
114	pushl		%edi
115	notl		%ecx
116	shrl		%eax
117	orl			$0x80000000,	%eax
118	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
119	movl		%eax,			%edi
120	mull	 24(%esp)					// q*blo
121	movl	 16(%esp),			%ebx
122	movl	 20(%esp),			%ecx	// ECX:EBX = a
123	subl		%eax,			%ebx
124	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
125	movl	 28(%esp),			%eax
126	imull		%edi,			%eax	// q*bhi
127	subl		%eax,			%ecx	// ECX:EBX = a - q*b
128	sbbl		$0,				%edi	// decrement q if remainder is negative
129	xorl		%edx,			%edx
130	movl		%edi,			%eax
131
132	addl		%esi,			%eax	// Restore correct sign to result
133	adcl		%esi,			%edx
134	xorl		%esi,			%eax
135	xorl		%esi,			%edx
136	popl		%edi					// Restore callee-save registers
137	popl		%ebx
138	popl		%esi
139	retl								// Return
140
141
1429:	/* High word of b is zero on this branch */
143
144	movl	 16(%esp),			%eax	// Find qhi and rhi such that
145	movl	 20(%esp),			%ecx	//
146	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
147	divl		%ecx					//
148	movl		%eax,			%ebx	//
149	movl	 12(%esp),			%eax	// Find qlo such that
150	divl		%ecx					//
151	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
152
153	addl		%esi,			%eax	// Restore correct sign to result
154	adcl		%esi,			%edx
155	xorl		%esi,			%eax
156	xorl		%esi,			%edx
157	popl		%ebx					// Restore callee-save registers
158	popl		%esi
159	retl								// Return
160END_COMPILERRT_FUNCTION(__divdi3)
161
162#endif // __i386__
163