1/* $OpenBSD: impyu.S,v 1.12 2011/04/16 20:52:12 deraadt Exp $ */ 2/* 3 (c) Copyright 1986 HEWLETT-PACKARD COMPANY 4 To anyone who acknowledges that this file is provided "AS IS" 5 without any express or implied warranty: 6 permission to use, copy, modify, and distribute this file 7 for any purpose is hereby granted without fee, provided that 8 the above copyright notice and this notice appears in all 9 copies, and that the name of Hewlett-Packard Company not be 10 used in advertising or publicity pertaining to distribution 11 of the software without specific, written prior permission. 12 Hewlett-Packard Company makes no representations about the 13 suitability of this software for any purpose. 14*/ 15/* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */ 16 17#include <machine/asm.h> 18#include <machine/frame.h> 19 20;**************************************************************************** 21; 22;Implement an integer multiply routine for 32-bit operands and 64-bit product 23; with operand values of zero (multiplicand only) and 2**32reated specially. 24; The algorithm uses the multiplier, four bits at a time, from right to left, 25; to generate partial product. Execution speed is more important than program 26; size in this implementation. 27; 28;****************************************************************************** 29; 30; Definitions - General registers 31; 32gr0 .reg %r0 ; General register zero 33pu .reg %r3 ; upper part of product 34pl .reg %r4 ; lower part of product 35op2 .reg %r4 ; multiplier 36op1 .reg %r5 ; multiplicand 37cnt .reg %r6 ; count in multiply 38brindex .reg %r7 ; index into the br. table 39saveop2 .reg %r8 ; save op2 if high bit of multiplicand 40 ; is set 41pc .reg %r9 ; carry bit of product, = 00...01 42pm .reg %r10 ; value of -1 used in shifting 43temp .reg %r6 44 45;**************************************************************************** 46 .text 47LEAF_ENTRY(u_xmpy) 48 stws,ma pu,4(sp) ; save registers on stack 49 stws,ma pl,4(sp) ; save registers on stack 50 stws,ma op1,4(sp) ; save registers on stack 51 stws,ma cnt,4(sp) ; save registers on stack 52 stws,ma brindex,4(sp) ; save registers on stack 53 stws,ma saveop2,4(sp) ; save registers on stack 54 stws,ma pc,4(sp) ; save registers on stack 55 stws,ma pm,4(sp) ; save registers on stack 56; 57; Start multiply process 58; 59 ldws 0(arg0),op1 ; get multiplicand 60 ldws 0(arg1),op2 ; get multiplier 61 addib,= 0,op1,fini0 ; op1 = 0, product = 0 62 addi 0,gr0,pu ; clear product 63 bb,>= op1,0,mpy1 ; test msb of multiplicand 64 addi 0,gr0,saveop2 ; clear saveop2 65; 66; msb of multiplicand is set so will save multiplier for a final 67; addition into the result 68; 69 extru,= op1,31,31,op1 ; clear msb of multiplicand 70 b mpy1 ; if op1 < 2**32, start multiply 71 add op2,gr0,saveop2 ; save op2 in saveop2 72 shd gr0,op2,1,pu ; shift op2 left 31 for result 73 b fini ; go to finish 74 shd op2,gr0,1,pl 75; 76mpy1 addi -1,gr0,pm ; initialize pm to 111...1 77 addi 1,gr0,pc ; initialize pc to 00...01 78 movib,tr 8,cnt,mloop ; set count for mpy loop 79 extru op2,31,4,brindex ; 4 bits as index into table 80; 81 .align 8 82; 83 b sh4c ; br. if sign overflow 84sh4n shd pu,pl,4,pl ; shift product right 4 bits 85 addib,<= -1,cnt,mulend ; reduce count by 1, exit if 86 extru pu,27,28,pu ; <= zero 87; 88mloop blr brindex,gr0 ; br. into table 89 ; entries of 2 words 90 extru op2,27,4,brindex ; next 4 bits into index 91; 92; 93; branch table for the multiplication process with four multiplier bits 94; 95mtable ; two words per entry 96; 97; ---- bits = 0000 ---- shift product 4 bits ------------------------------- 98; 99 b sh4n+4 ; just shift partial 100 shd pu,pl,4,pl ; product right 4 bits 101; 102; ---- bits = 0001 ---- add op1, then shift 4 bits 103; 104 addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift 105 shd pu,pl,4,pl ; product right 4 bits 106; 107; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits 108; 109 addb,tr op1,pu,sh4n ; add 2*op1, to shift 110 addb,uv op1,pu,sh4c ; product right 4 bits 111; 112; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits 113; 114 addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift 115 sh1add,nuv op1,pu,pu ; product right 4 bits 116; 117; ---- bits = 0100 ---- shift 2, add op1, shift 2 118; 119 b sh2sa 120 shd pu,pl,2,pl ; shift product 2 bits 121; 122; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again 123; 124 addb,tr op1,pu,sh2us ; add op1 to product 125 shd pu,pl,2,pl ; shift 2 bits 126; 127; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again 128; 129 addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits 130 addb,nuv op1,pu,sh2us ; br. if not overflow 131; 132; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1 133; 134 b sh3s 135 sub pu,op1,pu ; subtract op1, br. to sh3s 136 137; 138; ---- bits = 1000 ---- shift 3, add op1, shift 1 139; 140 b sh3sa 141 shd pu,pl,3,pl ; shift product right 3 bits 142; 143; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1 144; 145 addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1, 146 shd pu,pl,3,pl ; and shift 1 147; 148; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1 149; 150 addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits 151 addb,nuv op1,pu,sh3us ; br. if no overflow 152; 153; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index 154; 155 addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1, 156 sub pu,op1,pu ; shift 2 with minus sign 157; 158; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index 159; 160 addib,tr 1,brindex,sh2sb ; add 1 to index, to shift 161 shd pu,pl,2,pl ; shift right 2 bits signed 162; 163; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2 164; 165 addb,tr op1,pu,sh2ns ; add op1, to shift 2 166 shd pu,pl,2,pl ; right 2 unsigned, etc. 167; 168; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed 169; 170 addib,tr 1,brindex,sh1sa ; add 1 to index, to shift 171 shd pu,pl,1,pl ; shift 1 bit 172; 173; ---- bits = 1111 ---- add -op1, shift 4 signed 174; 175 addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1, 176 sub pu,op1,pu ; to shift 4 signed 177 178; 179; ---- bits = 10000 ---- shift 4 signed 180; 181 addib,tr 1,brindex,sh4s+4 ; add 1 to index 182 shd pu,pl,4,pl ; shift 4 signed 183; 184; ---- end of table --------------------------------------------------------- 185; 186sh4s shd pu,pl,4,pl 187 addib,> -1,cnt,mloop ; decrement count, loop if > 0 188 shd pm,pu,4,pu ; shift 4, minus signed 189 addb,tr op1,pu,lastadd ; do one more add, then finish 190 addb,=,n saveop2,gr0,fini ; check saveop2 191; 192sh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0 193 shd pc,pu,4,pu ; shift 4 with overflow 194 b lastadd ; end of multiply 195 addb,=,n saveop2,gr0,fini ; check saveop2 196; 197sh3c shd pu,pl,3,pl ; shift product 3 bits 198 shd pc,pu,3,pu ; shift 3 signed 199 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 200 shd pu,pl,1,pl 201; 202sh3us extru pu,28,29,pu ; shift 3 unsigned 203 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 204 shd pu,pl,1,pl 205; 206sh3sa extrs pu,28,29,pu ; shift 3 signed 207 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 208 shd pu,pl,1,pl 209; 210sh3s shd pu,pl,3,pl ; shift 3 minus signed 211 shd pm,pu,3,pu 212 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 213 shd pu,pl,1,pl 214; 215sh1 addib,> -1,cnt,mloop ; loop if count > 0 216 extru pu,30,31,pu 217 b lastadd ; end of multiply 218 addb,=,n saveop2,gr0,fini ; check saveop2 219; 220sh2ns addib,tr 1,brindex,sh2sb+4 ; increment index 221 extru pu,29,30,pu ; shift unsigned 222; 223sh2s shd pu,pl,2,pl ; shift with minus sign 224 shd pm,pu,2,pu ; 225 sub pu,op1,pu ; subtract op1 226 shd pu,pl,2,pl ; shift with minus sign 227 addib,> -1,cnt,mloop ; decrement count, loop if > 0 228 shd pm,pu,2,pu ; shift with minus sign 229 addb,tr op1,pu,lastadd ; do one more add, then finish 230 addb,=,n saveop2,gr0,fini ; check saveop2 231; 232sh2sb extrs pu,29,30,pu ; shift 2 signed 233 sub pu,op1,pu ; subtract op1 from product 234 shd pu,pl,2,pl ; shift with minus sign 235 addib,> -1,cnt,mloop ; decrement count, loop if > 0 236 shd pm,pu,2,pu ; shift with minus sign 237 addb,tr op1,pu,lastadd ; do one more add, then finish 238 addb,=,n saveop2,gr0,fini ; check saveop2 239; 240sh1sa extrs pu,30,31,pu ; signed 241 sub pu,op1,pu ; subtract op1 from product 242 shd pu,pl,3,pl ; shift 3 with minus sign 243 addib,> -1,cnt,mloop ; decrement count, loop if >0 244 shd pm,pu,3,pu 245 addb,tr op1,pu,lastadd ; do one more add, then finish 246 addb,=,n saveop2,gr0,fini ; check saveop2 247; 248fini0 movib,tr 0,pl,fini ; product = 0 as op1 = 0 249 stws pu,0(arg2) ; save high part of result 250; 251sh2us extru pu,29,30,pu ; shift 2 unsigned 252 addb,tr op1,pu,sh2a ; add op1 253 shd pu,pl,2,pl ; shift 2 bits 254; 255sh2c shd pu,pl,2,pl 256 shd pc,pu,2,pu ; shift with carry 257 addb,tr op1,pu,sh2a ; add op1 to product 258 shd pu,pl,2,pl ; br. to sh2 to shift pu 259; 260sh2sa extrs pu,29,30,pu ; shift with sign 261 addb,tr op1,pu,sh2a ; add op1 to product 262 shd pu,pl,2,pl ; br. to sh2 to shift pu 263; 264sh2a addib,> -1,cnt,mloop ; loop if count > 0 265 extru pu,29,30,pu 266; 267mulend addb,=,n saveop2,gr0,fini ; check saveop2 268lastadd shd saveop2,gr0,1,temp ; if saveop2 <> 0, shift it 269 shd gr0,saveop2,1,saveop2 ; left 31 and add to result 270 add pl,temp,pl 271 addc pu,saveop2,pu 272; 273; finish 274; 275fini stws pu,0(arg2) ; save high part of result 276 stws pl,4(arg2) ; save low part of result 277 278 ldws,mb -4(sp),pm ; restore registers 279 ldws,mb -4(sp),pc ; restore registers 280 ldws,mb -4(sp),saveop2 ; restore registers 281 ldws,mb -4(sp),brindex ; restore registers 282 ldws,mb -4(sp),cnt ; restore registers 283 ldws,mb -4(sp),op1 ; restore registers 284 ldws,mb -4(sp),pl ; restore registers 285 bv 0(rp) ; return 286 ldws,mb -4(sp),pu ; restore registers 287EXIT(u_xmpy) 288 289 .end 290