1/* $OpenBSD: impys.S,v 1.12 2011/04/16 20:52:12 deraadt Exp $ */ 2/* 3 (c) Copyright 1986 HEWLETT-PACKARD COMPANY 4 To anyone who acknowledges that this file is provided "AS IS" 5 without any express or implied warranty: 6 permission to use, copy, modify, and distribute this file 7 for any purpose is hereby granted without fee, provided that 8 the above copyright notice and this notice appears in all 9 copies, and that the name of Hewlett-Packard Company not be 10 used in advertising or publicity pertaining to distribution 11 of the software without specific, written prior permission. 12 Hewlett-Packard Company makes no representations about the 13 suitability of this software for any purpose. 14*/ 15/* @(#)impys.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:28 */ 16 17#include <machine/asm.h> 18#include <machine/frame.h> 19 20;**************************************************************************** 21; 22;Implement an integer multiply routine for 32-bit operands and 64-bit product 23;with operand values of zero (multiplicand only) and -2**31 treated specially. 24;The algorithm uses the absolute value of the multiplier, four bits at a time, 25;from right to left, to generate partial product. Execution speed is more 26;important than program size in this implementation. 27; 28;**************************************************************************** 29; 30; Definitions - General registers 31; 32gr0 .reg %r0 ; General register zero 33pu .reg %r3 ; upper part of product 34pl .reg %r4 ; lower part of product 35op2 .reg %r4 ; multiplier 36op1 .reg %r5 ; multiplicand 37cnt .reg %r6 ; count in multiply 38brindex .reg %r7 ; index into the br. table 39sign .reg %r8 ; sign of product 40pc .reg %r9 ; carry bit of product, = 00...01 41pm .reg %r10 ; value of -1 used in shifting 42 43;***************************************************************************** 44 .text 45 46LEAF_ENTRY(s_xmpy) 47 stws,ma pu,4(sp) ; save registers on stack 48 stws,ma pl,4(sp) ; save registers on stack 49 stws,ma op1,4(sp) ; save registers on stack 50 stws,ma cnt,4(sp) ; save registers on stack 51 stws,ma brindex,4(sp) ; save registers on stack 52 stws,ma sign,4(sp) ; save registers on stack 53 stws,ma pc,4(sp) ; save registers on stack 54 stws,ma pm,4(sp) ; save registers on stack 55; 56; Start multiply process 57; 58 ldws 0(arg1),op2 ; get multiplier 59 ldws 0(arg0),op1 ; get multiplicand 60 addi -1,gr0,pm ; initialize pm to 111...1 61 comb,< op2,gr0,mpyb ; br. if multiplier < 0 62 xor op2,op1,sign ; sign(0) = sign of product 63mpy1 comb,< op1,gr0,mpya ; br. if multiplicand < 0 64 addi 0,gr0,pu ; clear product 65 addib,= 0,op1,fini0 ; op1 = 0, product = 0 66mpy2 addi 1,gr0,pc ; initialize pc to 00...01 67 movib,tr 8,cnt,mloop ; set count for mpy loop 68 extru op2,31,4,brindex ; 4 bits as index into table 69; 70 .align 8 71; 72 b sh4c ; br. if sign overflow 73sh4n shd pu,pl,4,pl ; shift product right 4 bits 74 addib,<= -1,cnt,mulend ; reduce count by 1, exit if 75 extru pu,27,28,pu ; <= zero 76; 77mloop blr brindex,gr0 ; br. into table 78 ; entries of 2 words 79 extru op2,27,4,brindex ; next 4 bits into index 80; 81; 82; branch table for the multiplication process with four multiplier bits 83; 84mtable ; two words per entry 85; 86; ---- bits = 0000 ---- shift product 4 bits ------------------------------- 87; 88 b sh4n+4 ; just shift partial 89 shd pu,pl,4,pl ; product right 4 bits 90; 91; ---- bits = 0001 ---- add op1, then shift 4 bits 92; 93 addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift 94 shd pu,pl,4,pl ; product right 4 bits 95; 96; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits 97; 98 addb,tr op1,pu,sh4n ; add 2*op1, to shift 99 addb,uv op1,pu,sh4c ; product right 4 bits 100; 101; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits 102; 103 addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift 104 sh1add,nsv op1,pu,pu ; product right 4 bits 105; 106; ---- bits = 0100 ---- shift 2, add op1, shift 2 107; 108 b sh2sa 109 shd pu,pl,2,pl ; shift product 2 bits 110; 111; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again 112; 113 addb,tr op1,pu,sh2us ; add op1 to product 114 shd pu,pl,2,pl ; shift 2 bits 115; 116; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again 117; 118 addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits 119 addb,nuv op1,pu,sh2us ; br. if not overflow 120; 121; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1 122; 123 b sh3s 124 sub pu,op1,pu ; subtract op1, br. to sh3s 125 126; 127; ---- bits = 1000 ---- shift 3, add op1, shift 1 128; 129 b sh3sa 130 shd pu,pl,3,pl ; shift product right 3 bits 131; 132; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1 133; 134 addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1, 135 shd pu,pl,3,pl ; and shift 1 136; 137; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1 138; 139 addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits 140 addb,nuv op1,pu,sh3us ; br. if no overflow 141; 142; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index 143; 144 addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1, 145 sub pu,op1,pu ; shift 2 with minus sign 146; 147; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index 148; 149 addib,tr 1,brindex,sh2sb ; add 1 to index, to shift 150 shd pu,pl,2,pl ; shift right 2 bits signed 151; 152; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2 153; 154 addb,tr op1,pu,sh2ns ; add op1, to shift 2 155 shd pu,pl,2,pl ; right 2 unsigned, etc. 156; 157; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed 158; 159 addib,tr 1,brindex,sh1sa ; add 1 to index, to shift 160 shd pu,pl,1,pl ; shift 1 bit 161; 162; ---- bits = 1111 ---- add -op1, shift 4 signed 163; 164 addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1, 165 sub pu,op1,pu ; to shift 4 signed 166 167; 168; ---- bits = 10000 ---- shift 4 signed 169; 170 addib,tr 1,brindex,sh4s+4 ; add 1 to index 171 shd pu,pl,4,pl ; shift 4 signed 172; 173; ---- end of table --------------------------------------------------------- 174; 175sh4s shd pu,pl,4,pl 176 addib,tr -1,cnt,mloop ; loop (count > 0 always here) 177 shd pm,pu,4,pu ; shift 4, minus signed 178; 179sh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0 180 shd pc,pu,4,pu ; shift 4 with overflow 181 b signs ; end of multiply 182 bb,>=,n sign,0,fini ; test sign of procduct 183; 184mpyb add,= op2,op2,gr0 ; if <> 0, back to main sect. 185 b mpy1 186 sub 0,op2,op2 ; op2 = |multiplier| 187 add,>= op1,gr0,gr0 ; if op1 < 0, invert sign, 188 xor pm,sign,sign ; for correct result 189; 190; special case for multiplier = -2**31, op1 = signed multiplicand 191; or multiplicand = -2**31, op1 = signed multiplier 192; 193 shd op1,0,1,pl ; shift op1 left 31 bits 194mmax extrs op1,30,31,pu 195 b signs ; negate product (if needed) 196 bb,>=,n sign,0,fini ; test sign of product 197; 198mpya add,= op1,op1,gr0 ; op1 = -2**31, special case 199 b mpy2 200 sub 0,op1,op1 ; op1 = |multiplicand| 201 add,>= op2,gr0,gr0 ; if op2 < 0, invert sign, 202 xor pm,sign,sign ; for correct result 203 movb,tr op2,op1,mmax ; use op2 as multiplicand 204 shd op1,0,1,pl ; shift it left 31 bits 205; 206sh3c shd pu,pl,3,pl ; shift product 3 bits 207 shd pc,pu,3,pu ; shift 3 signed 208 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 209 shd pu,pl,1,pl 210; 211sh3us extru pu,28,29,pu ; shift 3 unsigned 212 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 213 shd pu,pl,1,pl 214; 215sh3sa extrs pu,28,29,pu ; shift 3 signed 216 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 217 shd pu,pl,1,pl 218; 219sh3s shd pu,pl,3,pl ; shift 3 minus signed 220 shd pm,pu,3,pu 221 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 222 shd pu,pl,1,pl 223; 224sh1 addib,> -1,cnt,mloop ; loop if count > 0 225 extru pu,30,31,pu 226 b signs ; end of multiply 227 bb,>=,n sign,0,fini ; test sign of product 228; 229sh2ns addib,tr 1,brindex,sh2sb+4 ; increment index 230 extru pu,29,30,pu ; shift unsigned 231; 232sh2s shd pu,pl,2,pl ; shift with minus sign 233 shd pm,pu,2,pu ; 234 sub pu,op1,pu ; subtract op1 235 shd pu,pl,2,pl ; shift with minus sign 236 addib,tr -1,cnt,mloop ; decrement count, loop 237 shd pm,pu,2,pu ; shift with minus sign 238 ; count never reaches 0 here 239; 240sh2sb extrs pu,29,30,pu ; shift 2 signed 241 sub pu,op1,pu ; subtract op1 from product 242 shd pu,pl,2,pl ; shift with minus sign 243 addib,tr -1,cnt,mloop ; decrement count, loop 244 shd pm,pu,2,pu ; shift with minus sign 245 ; count never reaches 0 here 246; 247sh1sa extrs pu,30,31,pu ; signed 248 sub pu,op1,pu ; subtract op1 from product 249 shd pu,pl,3,pl ; shift 3 with minus sign 250 addib,tr -1,cnt,mloop ; dec. count, to loop 251 shd pm,pu,3,pu ; count never reaches 0 here 252; 253fini0 movib,tr,n 0,pl,fini ; product = 0 as op1 = 0 254; 255sh2us extru pu,29,30,pu ; shift 2 unsigned 256 addb,tr op1,pu,sh2a ; add op1 257 shd pu,pl,2,pl ; shift 2 bits 258; 259sh2c shd pu,pl,2,pl 260 shd pc,pu,2,pu ; shift with carry 261 addb,tr op1,pu,sh2a ; add op1 to product 262 shd pu,pl,2,pl ; br. to sh2 to shift pu 263; 264sh2sa extrs pu,29,30,pu ; shift with sign 265 addb,tr op1,pu,sh2a ; add op1 to product 266 shd pu,pl,2,pl ; br. to sh2 to shift pu 267; 268sh2a addib,> -1,cnt,mloop ; loop if count > 0 269 extru pu,29,30,pu 270; 271mulend bb,>=,n sign,0,fini ; test sign of product 272signs sub 0,pl,pl ; negate product if sign 273 subb 0,pu,pu ; is negative 274; 275; finish 276; 277fini stws pu,0(arg2) ; save high part of result 278 stws pl,4(arg2) ; save low part of result 279 280 ldws,mb -4(sp),pm ; restore registers 281 ldws,mb -4(sp),pc ; restore registers 282 ldws,mb -4(sp),sign ; restore registers 283 ldws,mb -4(sp),brindex ; restore registers 284 ldws,mb -4(sp),cnt ; restore registers 285 ldws,mb -4(sp),op1 ; restore registers 286 ldws,mb -4(sp),pl ; restore registers 287 bv 0(rp) ; return 288 ldws,mb -4(sp),pu ; restore registers 289EXIT(s_xmpy) 290 291 .end 292