10b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 20b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 30b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric#include "../assembly.h" 60b57cec5SDimitry Andric 70b57cec5SDimitry Andric// du_int __udivdi3(du_int a, du_int b); 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric// result = a / b. 100b57cec5SDimitry Andric// both inputs and the output are 64-bit unsigned integers. 110b57cec5SDimitry Andric// This will do whatever the underlying hardware is set to do on division by zero. 120b57cec5SDimitry Andric// No other exceptions are generated, as the divide cannot overflow. 130b57cec5SDimitry Andric// 140b57cec5SDimitry Andric// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 150b57cec5SDimitry Andric// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 160b57cec5SDimitry Andric// currently possible via simulation of integer divides on the x87 unit. 170b57cec5SDimitry Andric// 180b57cec5SDimitry Andric// Stephen Canon, December 2008 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric#ifdef __i386__ 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric.text 230b57cec5SDimitry Andric.balign 4 240b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivdi3) 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric pushl %ebx 270b57cec5SDimitry Andric movl 20(%esp), %ebx // Find the index i of the leading bit in b. 280b57cec5SDimitry Andric bsrl %ebx, %ecx // If the high word of b is zero, jump to 290b57cec5SDimitry Andric jz 9f // the code to handle that special case [9]. 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric // High word of b is known to be non-zero on this branch 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric shrl %cl, %eax // Practically, this means that bhi is given by: 360b57cec5SDimitry Andric shrl %eax // 370b57cec5SDimitry Andric notl %ecx // bhi = (high word of b) << (31 - i) | 380b57cec5SDimitry Andric shll %cl, %ebx // (low word of b) >> (1 + i) 390b57cec5SDimitry Andric orl %eax, %ebx // 400b57cec5SDimitry Andric movl 12(%esp), %edx // Load the high and low words of a, and jump 410b57cec5SDimitry Andric movl 8(%esp), %eax // to [1] if the high word is larger than bhi 420b57cec5SDimitry Andric cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 430b57cec5SDimitry Andric jae 1f 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric // High word of a is greater than or equal to (b >> (1 + i)) on this branch 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric pushl %edi 500b57cec5SDimitry Andric notl %ecx 510b57cec5SDimitry Andric shrl %eax 520b57cec5SDimitry Andric shrl %cl, %eax // q = qs >> (1 + i) 530b57cec5SDimitry Andric movl %eax, %edi 540b57cec5SDimitry Andric mull 20(%esp) // q*blo 550b57cec5SDimitry Andric movl 12(%esp), %ebx 560b57cec5SDimitry Andric movl 16(%esp), %ecx // ECX:EBX = a 570b57cec5SDimitry Andric subl %eax, %ebx 580b57cec5SDimitry Andric sbbl %edx, %ecx // ECX:EBX = a - q*blo 590b57cec5SDimitry Andric movl 24(%esp), %eax 600b57cec5SDimitry Andric imull %edi, %eax // q*bhi 610b57cec5SDimitry Andric subl %eax, %ecx // ECX:EBX = a - q*b 620b57cec5SDimitry Andric sbbl $0, %edi // decrement q if remainder is negative 630b57cec5SDimitry Andric xorl %edx, %edx 640b57cec5SDimitry Andric movl %edi, %eax 650b57cec5SDimitry Andric popl %edi 660b57cec5SDimitry Andric popl %ebx 670b57cec5SDimitry Andric retl 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric1: // High word of a is greater than or equal to (b >> (1 + i)) on this branch 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric subl %ebx, %edx // subtract bhi from ahi so that divide will not 730b57cec5SDimitry Andric divl %ebx // overflow, and find q and r such that 740b57cec5SDimitry Andric // 750b57cec5SDimitry Andric // ahi:alo = (1:q)*bhi + r 760b57cec5SDimitry Andric // 770b57cec5SDimitry Andric // Note that q is a number in (31-i).(1+i) 780b57cec5SDimitry Andric // fix point. 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric pushl %edi 810b57cec5SDimitry Andric notl %ecx 820b57cec5SDimitry Andric shrl %eax 830b57cec5SDimitry Andric orl $0x80000000, %eax 840b57cec5SDimitry Andric shrl %cl, %eax // q = (1:qs) >> (1 + i) 850b57cec5SDimitry Andric movl %eax, %edi 860b57cec5SDimitry Andric mull 20(%esp) // q*blo 870b57cec5SDimitry Andric movl 12(%esp), %ebx 880b57cec5SDimitry Andric movl 16(%esp), %ecx // ECX:EBX = a 890b57cec5SDimitry Andric subl %eax, %ebx 900b57cec5SDimitry Andric sbbl %edx, %ecx // ECX:EBX = a - q*blo 910b57cec5SDimitry Andric movl 24(%esp), %eax 920b57cec5SDimitry Andric imull %edi, %eax // q*bhi 930b57cec5SDimitry Andric subl %eax, %ecx // ECX:EBX = a - q*b 940b57cec5SDimitry Andric sbbl $0, %edi // decrement q if remainder is negative 950b57cec5SDimitry Andric xorl %edx, %edx 960b57cec5SDimitry Andric movl %edi, %eax 970b57cec5SDimitry Andric popl %edi 980b57cec5SDimitry Andric popl %ebx 990b57cec5SDimitry Andric retl 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric9: // High word of b is zero on this branch 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric movl 12(%esp), %eax // Find qhi and rhi such that 1050b57cec5SDimitry Andric movl 16(%esp), %ecx // 1060b57cec5SDimitry Andric xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 1070b57cec5SDimitry Andric divl %ecx // 1080b57cec5SDimitry Andric movl %eax, %ebx // 1090b57cec5SDimitry Andric movl 8(%esp), %eax // Find qlo such that 1100b57cec5SDimitry Andric divl %ecx // 1110b57cec5SDimitry Andric movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 1120b57cec5SDimitry Andric popl %ebx // 1130b57cec5SDimitry Andric retl // and return qhi:qlo 1140b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__udivdi3) 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric#endif // __i386__ 1170b57cec5SDimitry Andric 1180b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE 1190b57cec5SDimitry Andric 120