1//===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file implements the __udivsi3 (32-bit unsigned integer divide) 10// function for the ARM 32-bit architecture. 11// 12//===----------------------------------------------------------------------===// 13 14#include "../assembly.h" 15 16 .syntax unified 17 .text 18 19DEFINE_CODE_STATE 20 21 .p2align 2 22DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) 23 24@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) 25@ Calculate and return the quotient of the (unsigned) division. 26 27DEFINE_COMPILERRT_FUNCTION(__udivsi3) 28#if __ARM_ARCH_EXT_IDIV__ 29 tst r1, r1 30 beq LOCAL_LABEL(divby0) 31 udiv r0, r0, r1 32 bx lr 33 34LOCAL_LABEL(divby0): 35 mov r0, #0 36# ifdef __ARM_EABI__ 37 b __aeabi_idiv0 38# else 39 JMP(lr) 40# endif 41 42#else // ! __ARM_ARCH_EXT_IDIV__ 43 cmp r1, #1 44 bcc LOCAL_LABEL(divby0) 45#if defined(USE_THUMB_1) 46 bne LOCAL_LABEL(num_neq_denom) 47 JMP(lr) 48LOCAL_LABEL(num_neq_denom): 49#else 50 IT(eq) 51 JMPc(lr, eq) 52#endif 53 cmp r0, r1 54#if defined(USE_THUMB_1) 55 bhs LOCAL_LABEL(num_ge_denom) 56 movs r0, #0 57 JMP(lr) 58LOCAL_LABEL(num_ge_denom): 59#else 60 ITT(cc) 61 movcc r0, #0 62 JMPc(lr, cc) 63#endif 64 65 // Implement division using binary long division algorithm. 66 // 67 // r0 is the numerator, r1 the denominator. 68 // 69 // The code before JMP computes the correct shift I, so that 70 // r0 and (r1 << I) have the highest bit set in the same position. 71 // At the time of JMP, ip := .Ldiv0block - 12 * I. 72 // This depends on the fixed instruction size of block. 73 // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 74 // 75 // block(shift) implements the test-and-update-quotient core. 76 // It assumes (r0 << shift) can be computed without overflow and 77 // that (r0 << shift) < 2 * r1. The quotient is stored in r3. 78 79# if defined(__ARM_FEATURE_CLZ) 80 clz ip, r0 81 clz r3, r1 82 // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. 83 sub r3, r3, ip 84# if defined(USE_THUMB_2) 85 adr ip, LOCAL_LABEL(div0block) + 1 86 sub ip, ip, r3, lsl #1 87# else 88 adr ip, LOCAL_LABEL(div0block) 89# endif 90 sub ip, ip, r3, lsl #2 91 sub ip, ip, r3, lsl #3 92 mov r3, #0 93 bx ip 94# else // No CLZ Feature 95# if defined(USE_THUMB_2) 96# error THUMB mode requires CLZ or UDIV 97# endif 98# if defined(USE_THUMB_1) 99# define BLOCK_SIZE 10 100# else 101# define BLOCK_SIZE 12 102# endif 103 104 mov r2, r0 105# if defined(USE_THUMB_1) 106 mov ip, r0 107 adr r0, LOCAL_LABEL(div0block) 108 adds r0, #1 109# else 110 adr ip, LOCAL_LABEL(div0block) 111# endif 112 lsrs r3, r2, #16 113 cmp r3, r1 114# if defined(USE_THUMB_1) 115 blo LOCAL_LABEL(skip_16) 116 movs r2, r3 117 subs r0, r0, #(16 * BLOCK_SIZE) 118LOCAL_LABEL(skip_16): 119# else 120 movhs r2, r3 121 subhs ip, ip, #(16 * BLOCK_SIZE) 122# endif 123 124 lsrs r3, r2, #8 125 cmp r3, r1 126# if defined(USE_THUMB_1) 127 blo LOCAL_LABEL(skip_8) 128 movs r2, r3 129 subs r0, r0, #(8 * BLOCK_SIZE) 130LOCAL_LABEL(skip_8): 131# else 132 movhs r2, r3 133 subhs ip, ip, #(8 * BLOCK_SIZE) 134# endif 135 136 lsrs r3, r2, #4 137 cmp r3, r1 138# if defined(USE_THUMB_1) 139 blo LOCAL_LABEL(skip_4) 140 movs r2, r3 141 subs r0, r0, #(4 * BLOCK_SIZE) 142LOCAL_LABEL(skip_4): 143# else 144 movhs r2, r3 145 subhs ip, #(4 * BLOCK_SIZE) 146# endif 147 148 lsrs r3, r2, #2 149 cmp r3, r1 150# if defined(USE_THUMB_1) 151 blo LOCAL_LABEL(skip_2) 152 movs r2, r3 153 subs r0, r0, #(2 * BLOCK_SIZE) 154LOCAL_LABEL(skip_2): 155# else 156 movhs r2, r3 157 subhs ip, ip, #(2 * BLOCK_SIZE) 158# endif 159 160 // Last block, no need to update r2 or r3. 161# if defined(USE_THUMB_1) 162 lsrs r3, r2, #1 163 cmp r3, r1 164 blo LOCAL_LABEL(skip_1) 165 subs r0, r0, #(1 * BLOCK_SIZE) 166LOCAL_LABEL(skip_1): 167 movs r2, r0 168 mov r0, ip 169 movs r3, #0 170 JMP (r2) 171 172# else 173 cmp r1, r2, lsr #1 174 subls ip, ip, #(1 * BLOCK_SIZE) 175 176 movs r3, #0 177 178 JMP(ip) 179# endif 180# endif // __ARM_FEATURE_CLZ 181 182 183#define IMM # 184 // due to the range limit of branch in Thumb1, we have to place the 185 // block closer 186LOCAL_LABEL(divby0): 187 movs r0, #0 188# if defined(__ARM_EABI__) 189 push {r7, lr} 190 bl __aeabi_idiv0 // due to relocation limit, can't use b. 191 pop {r7, pc} 192# else 193 JMP(lr) 194# endif 195 196 197#if defined(USE_THUMB_1) 198#define block(shift) \ 199 lsls r2, r1, IMM shift; \ 200 cmp r0, r2; \ 201 blo LOCAL_LABEL(block_skip_##shift); \ 202 subs r0, r0, r2; \ 203 LOCAL_LABEL(block_skip_##shift) :; \ 204 adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. 205 206 // TODO: if current location counter is not not word aligned, we don't 207 // need the .p2align and nop 208 // Label div0block must be word-aligned. First align block 31 209 .p2align 2 210 nop // Padding to align div0block as 31 blocks = 310 bytes 211 212#else 213#define block(shift) \ 214 cmp r0, r1, lsl IMM shift; \ 215 ITT(hs); \ 216 WIDE(addhs) r3, r3, IMM (1 << shift); \ 217 WIDE(subhs) r0, r0, r1, lsl IMM shift 218#endif 219 220 block(31) 221 block(30) 222 block(29) 223 block(28) 224 block(27) 225 block(26) 226 block(25) 227 block(24) 228 block(23) 229 block(22) 230 block(21) 231 block(20) 232 block(19) 233 block(18) 234 block(17) 235 block(16) 236 block(15) 237 block(14) 238 block(13) 239 block(12) 240 block(11) 241 block(10) 242 block(9) 243 block(8) 244 block(7) 245 block(6) 246 block(5) 247 block(4) 248 block(3) 249 block(2) 250 block(1) 251LOCAL_LABEL(div0block): 252 block(0) 253 254 mov r0, r3 255 JMP(lr) 256#endif // __ARM_ARCH_EXT_IDIV__ 257 258END_COMPILERRT_FUNCTION(__udivsi3) 259 260NO_EXEC_STACK_DIRECTIVE 261 262