1 2 3/* $OpenBSD: __reml.S,v 1.1 2007/11/25 18:25:34 deraadt Exp $ */ 4/* $NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $ */ 5 6/* 7 * Copyright (c) 1994, 1995 Carnegie-Mellon University. 8 * All rights reserved. 9 * 10 * Author: Chris G. Demetriou 11 * 12 * Permission to use, copy, modify and distribute this software and 13 * its documentation is hereby granted, provided that both the copyright 14 * notice and this permission notice appear in all copies of the 15 * software, derivative works or modified versions, and any portions 16 * thereof, and that both notices appear in supporting documentation. 17 * 18 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 19 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 20 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 21 * 22 * Carnegie Mellon requests users of this software to return to 23 * 24 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 25 * School of Computer Science 26 * Carnegie Mellon University 27 * Pittsburgh PA 15213-3890 28 * 29 * any improvements or extensions that they make and grant Carnegie the 30 * rights to redistribute these changes. 31 */ 32 33/* 34 * Division and remainder. 35 * 36 * The use of m4 is modeled after the sparc code, but the algorithm is 37 * simple binary long division. 38 * 39 * Note that the loops could probably benefit from unrolling. 40 */ 41 42/* 43 * M4 Parameters 44 * __reml name of function to generate 45 * rem rem=div: t10 / t11 -> t12; rem=rem: t10 % t11 -> t12 46 * true true=true: signed; true=false: unsigned 47 * 32 total number of bits 48 */ 49 50 51 52 53 54 55 56 57 58 59 60#include <machine/asm.h> 61 62LEAF(__reml, 0) /* XXX */ 63 lda sp, -64(sp) 64 stq t0, 0(sp) 65 stq t1, 8(sp) 66 stq t2, 16(sp) 67 stq t3, 24(sp) 68 stq t4, 32(sp) 69 stq t10, 40(sp) 70 stq t11, 48(sp) 71 mov zero, t12 /* Initialize result to zero */ 72 73 74 /* Compute sign of result. If either is negative, this is easy. */ 75 or t10, t11, t4 /* not the sign, but... */ 76 srl t4, 32 - 1, t4 /* rather, or of high bits */ 77 blbc t4, Ldoit /* neither negative? do it! */ 78 79 mov t10, t4 /* sign follows t10. */ 80 81 srl t4, 32 - 1, t4 /* make negation the low bit. */ 82 83 srl t10, 32 - 1, t1 /* is t10 negative? */ 84 blbc t1, LnegB /* no. */ 85 /* t10 is negative; flip it. */ 86 87 /* top 32 bits may be random junk */ 88 zap t10, 0xf0, t10 89 90 subq zero, t10, t10 91 srl t11, 32 - 1, t1 /* is t11 negative? */ 92 blbc t1, Ldoit /* no. */ 93LnegB: 94 /* t11 is definitely negative, no matter how we got here. */ 95 96 /* top 32 bits may be random junk */ 97 zap t11, 0xf0, t11 98 99 subq zero, t11, t11 100Ldoit: 101 102 103 /* 104 * Clear the top 32 bits of each operand, as they may 105 * sign extension (if negated above), or random junk. 106 */ 107 zap t10, 0xf0, t10 108 zap t11, 0xf0, t11 109 110 111 /* kill the special cases. */ 112 beq t11, Ldotrap /* division by zero! */ 113 114 cmpult t10, t11, t2 /* t10 < t11? */ 115 /* t12 is already zero, from above. t10 is untouched. */ 116 bne t2, Lret_result 117 118 cmpeq t10, t11, t2 /* t10 == t11? */ 119 cmovne t2, 1, t12 120 cmovne t2, zero, t10 121 bne t2, Lret_result 122 123 /* 124 * Find out how many bits of zeros are at the beginning of the divisor. 125 */ 126LBbits: 127 ldiq t3, 1 /* t1 = 0; t0 = 1<<32-1 */ 128 mov zero, t1 129 sll t3, 32-1, t0 130LBloop: 131 and t11, t0, t2 /* if bit in t11 is set, done. */ 132 bne t2, LAbits 133 addq t1, 1, t1 /* increment t1, bit */ 134 srl t0, 1, t0 135 cmplt t1, 32-1, t2 /* if t1 leaves one bit, done. */ 136 bne t2, LBloop 137 138LAbits: 139 beq t1, Ldodiv /* If t1 = 0, divide now. */ 140 ldiq t3, 1 /* t0 = 1<<32-1 */ 141 sll t3, 32-1, t0 142 143LAloop: 144 and t10, t0, t2 /* if bit in t10 is set, done. */ 145 bne t2, Ldodiv 146 subq t1, 1, t1 /* decrement t1, bit */ 147 srl t0, 1, t0 148 bne t1, LAloop /* If t1 != 0, loop again */ 149 150Ldodiv: 151 sll t11, t1, t11 /* t11 <<= i */ 152 ldiq t3, 1 153 sll t3, t1, t0 154 155Ldivloop: 156 cmpult t10, t11, t2 157 or t12, t0, t3 158 cmoveq t2, t3, t12 159 subq t10, t11, t3 160 cmoveq t2, t3, t10 161 srl t0, 1, t0 162 srl t11, 1, t11 163 beq t10, Lret_result 164 bne t0, Ldivloop 165 166Lret_result: 167 mov t10, t12 168 169 170 /* Check to see if we should negate it. */ 171 subqv zero, t12, t3 172 cmovlbs t4, t3, t12 173 174 175 ldq t0, 0(sp) 176 ldq t1, 8(sp) 177 ldq t2, 16(sp) 178 ldq t3, 24(sp) 179 ldq t4, 32(sp) 180 ldq t10, 40(sp) 181 ldq t11, 48(sp) 182 lda sp, 64(sp) 183 ret zero, (t9), 1 184 185Ldotrap: 186 ldiq a0, -2 /* This is the signal to SIGFPE! */ 187 call_pal PAL_gentrap 188 mov zero, t10 /* so that zero will be returned */ 189 190 br zero, Lret_result 191 192END(__reml) 193