1/* $OpenBSD: impyu.S,v 1.11 2005/01/23 18:01:30 mickey Exp $ */ 2/* 3 (c) Copyright 1986 HEWLETT-PACKARD COMPANY 4 To anyone who acknowledges that this file is provided "AS IS" 5 without any express or implied warranty: 6 permission to use, copy, modify, and distribute this file 7 for any purpose is hereby granted without fee, provided that 8 the above copyright notice and this notice appears in all 9 copies, and that the name of Hewlett-Packard Company not be 10 used in advertising or publicity pertaining to distribution 11 of the software without specific, written prior permission. 12 Hewlett-Packard Company makes no representations about the 13 suitability of this software for any purpose. 14*/ 15/* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */ 16 17#include <machine/asm.h> 18#define _LOCORE 19#include <machine/frame.h> 20 21;**************************************************************************** 22; 23;Implement an integer multiply routine for 32-bit operands and 64-bit product 24; with operand values of zero (multiplicand only) and 2**32reated specially. 25; The algorithm uses the multiplier, four bits at a time, from right to left, 26; to generate partial product. Execution speed is more important than program 27; size in this implementation. 28; 29;****************************************************************************** 30; 31; Definitions - General registers 32; 33gr0 .reg %r0 ; General register zero 34pu .reg %r3 ; upper part of product 35pl .reg %r4 ; lower part of product 36op2 .reg %r4 ; multiplier 37op1 .reg %r5 ; multiplicand 38cnt .reg %r6 ; count in multiply 39brindex .reg %r7 ; index into the br. table 40saveop2 .reg %r8 ; save op2 if high bit of multiplicand 41 ; is set 42pc .reg %r9 ; carry bit of product, = 00...01 43pm .reg %r10 ; value of -1 used in shifting 44temp .reg %r6 45 46;**************************************************************************** 47 .text 48LEAF_ENTRY(u_xmpy) 49 stws,ma pu,4(sp) ; save registers on stack 50 stws,ma pl,4(sp) ; save registers on stack 51 stws,ma op1,4(sp) ; save registers on stack 52 stws,ma cnt,4(sp) ; save registers on stack 53 stws,ma brindex,4(sp) ; save registers on stack 54 stws,ma saveop2,4(sp) ; save registers on stack 55 stws,ma pc,4(sp) ; save registers on stack 56 stws,ma pm,4(sp) ; save registers on stack 57; 58; Start multiply process 59; 60 ldws 0(arg0),op1 ; get multiplicand 61 ldws 0(arg1),op2 ; get multiplier 62 addib,= 0,op1,fini0 ; op1 = 0, product = 0 63 addi 0,gr0,pu ; clear product 64 bb,>= op1,0,mpy1 ; test msb of multiplicand 65 addi 0,gr0,saveop2 ; clear saveop2 66; 67; msb of multiplicand is set so will save multiplier for a final 68; addition into the result 69; 70 extru,= op1,31,31,op1 ; clear msb of multiplicand 71 b mpy1 ; if op1 < 2**32, start multiply 72 add op2,gr0,saveop2 ; save op2 in saveop2 73 shd gr0,op2,1,pu ; shift op2 left 31 for result 74 b fini ; go to finish 75 shd op2,gr0,1,pl 76; 77mpy1 addi -1,gr0,pm ; initialize pm to 111...1 78 addi 1,gr0,pc ; initialize pc to 00...01 79 movib,tr 8,cnt,mloop ; set count for mpy loop 80 extru op2,31,4,brindex ; 4 bits as index into table 81; 82 .align 8 83; 84 b sh4c ; br. if sign overflow 85sh4n shd pu,pl,4,pl ; shift product right 4 bits 86 addib,<= -1,cnt,mulend ; reduce count by 1, exit if 87 extru pu,27,28,pu ; <= zero 88; 89mloop blr brindex,gr0 ; br. into table 90 ; entries of 2 words 91 extru op2,27,4,brindex ; next 4 bits into index 92; 93; 94; branch table for the multiplication process with four multiplier bits 95; 96mtable ; two words per entry 97; 98; ---- bits = 0000 ---- shift product 4 bits ------------------------------- 99; 100 b sh4n+4 ; just shift partial 101 shd pu,pl,4,pl ; product right 4 bits 102; 103; ---- bits = 0001 ---- add op1, then shift 4 bits 104; 105 addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift 106 shd pu,pl,4,pl ; product right 4 bits 107; 108; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits 109; 110 addb,tr op1,pu,sh4n ; add 2*op1, to shift 111 addb,uv op1,pu,sh4c ; product right 4 bits 112; 113; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits 114; 115 addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift 116 sh1add,nuv op1,pu,pu ; product right 4 bits 117; 118; ---- bits = 0100 ---- shift 2, add op1, shift 2 119; 120 b sh2sa 121 shd pu,pl,2,pl ; shift product 2 bits 122; 123; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again 124; 125 addb,tr op1,pu,sh2us ; add op1 to product 126 shd pu,pl,2,pl ; shift 2 bits 127; 128; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again 129; 130 addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits 131 addb,nuv op1,pu,sh2us ; br. if not overflow 132; 133; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1 134; 135 b sh3s 136 sub pu,op1,pu ; subtract op1, br. to sh3s 137 138; 139; ---- bits = 1000 ---- shift 3, add op1, shift 1 140; 141 b sh3sa 142 shd pu,pl,3,pl ; shift product right 3 bits 143; 144; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1 145; 146 addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1, 147 shd pu,pl,3,pl ; and shift 1 148; 149; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1 150; 151 addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits 152 addb,nuv op1,pu,sh3us ; br. if no overflow 153; 154; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index 155; 156 addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1, 157 sub pu,op1,pu ; shift 2 with minus sign 158; 159; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index 160; 161 addib,tr 1,brindex,sh2sb ; add 1 to index, to shift 162 shd pu,pl,2,pl ; shift right 2 bits signed 163; 164; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2 165; 166 addb,tr op1,pu,sh2ns ; add op1, to shift 2 167 shd pu,pl,2,pl ; right 2 unsigned, etc. 168; 169; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed 170; 171 addib,tr 1,brindex,sh1sa ; add 1 to index, to shift 172 shd pu,pl,1,pl ; shift 1 bit 173; 174; ---- bits = 1111 ---- add -op1, shift 4 signed 175; 176 addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1, 177 sub pu,op1,pu ; to shift 4 signed 178 179; 180; ---- bits = 10000 ---- shift 4 signed 181; 182 addib,tr 1,brindex,sh4s+4 ; add 1 to index 183 shd pu,pl,4,pl ; shift 4 signed 184; 185; ---- end of table --------------------------------------------------------- 186; 187sh4s shd pu,pl,4,pl 188 addib,> -1,cnt,mloop ; decrement count, loop if > 0 189 shd pm,pu,4,pu ; shift 4, minus signed 190 addb,tr op1,pu,lastadd ; do one more add, then finish 191 addb,=,n saveop2,gr0,fini ; check saveop2 192; 193sh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0 194 shd pc,pu,4,pu ; shift 4 with overflow 195 b lastadd ; end of multiply 196 addb,=,n saveop2,gr0,fini ; check saveop2 197; 198sh3c shd pu,pl,3,pl ; shift product 3 bits 199 shd pc,pu,3,pu ; shift 3 signed 200 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 201 shd pu,pl,1,pl 202; 203sh3us extru pu,28,29,pu ; shift 3 unsigned 204 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 205 shd pu,pl,1,pl 206; 207sh3sa extrs pu,28,29,pu ; shift 3 signed 208 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 209 shd pu,pl,1,pl 210; 211sh3s shd pu,pl,3,pl ; shift 3 minus signed 212 shd pm,pu,3,pu 213 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 214 shd pu,pl,1,pl 215; 216sh1 addib,> -1,cnt,mloop ; loop if count > 0 217 extru pu,30,31,pu 218 b lastadd ; end of multiply 219 addb,=,n saveop2,gr0,fini ; check saveop2 220; 221sh2ns addib,tr 1,brindex,sh2sb+4 ; increment index 222 extru pu,29,30,pu ; shift unsigned 223; 224sh2s shd pu,pl,2,pl ; shift with minus sign 225 shd pm,pu,2,pu ; 226 sub pu,op1,pu ; subtract op1 227 shd pu,pl,2,pl ; shift with minus sign 228 addib,> -1,cnt,mloop ; decrement count, loop if > 0 229 shd pm,pu,2,pu ; shift with minus sign 230 addb,tr op1,pu,lastadd ; do one more add, then finish 231 addb,=,n saveop2,gr0,fini ; check saveop2 232; 233sh2sb extrs pu,29,30,pu ; shift 2 signed 234 sub pu,op1,pu ; subtract op1 from product 235 shd pu,pl,2,pl ; shift with minus sign 236 addib,> -1,cnt,mloop ; decrement count, loop if > 0 237 shd pm,pu,2,pu ; shift with minus sign 238 addb,tr op1,pu,lastadd ; do one more add, then finish 239 addb,=,n saveop2,gr0,fini ; check saveop2 240; 241sh1sa extrs pu,30,31,pu ; signed 242 sub pu,op1,pu ; subtract op1 from product 243 shd pu,pl,3,pl ; shift 3 with minus sign 244 addib,> -1,cnt,mloop ; decrement count, loop if >0 245 shd pm,pu,3,pu 246 addb,tr op1,pu,lastadd ; do one more add, then finish 247 addb,=,n saveop2,gr0,fini ; check saveop2 248; 249fini0 movib,tr 0,pl,fini ; product = 0 as op1 = 0 250 stws pu,0(arg2) ; save high part of result 251; 252sh2us extru pu,29,30,pu ; shift 2 unsigned 253 addb,tr op1,pu,sh2a ; add op1 254 shd pu,pl,2,pl ; shift 2 bits 255; 256sh2c shd pu,pl,2,pl 257 shd pc,pu,2,pu ; shift with carry 258 addb,tr op1,pu,sh2a ; add op1 to product 259 shd pu,pl,2,pl ; br. to sh2 to shift pu 260; 261sh2sa extrs pu,29,30,pu ; shift with sign 262 addb,tr op1,pu,sh2a ; add op1 to product 263 shd pu,pl,2,pl ; br. to sh2 to shift pu 264; 265sh2a addib,> -1,cnt,mloop ; loop if count > 0 266 extru pu,29,30,pu 267; 268mulend addb,=,n saveop2,gr0,fini ; check saveop2 269lastadd shd saveop2,gr0,1,temp ; if saveop2 <> 0, shift it 270 shd gr0,saveop2,1,saveop2 ; left 31 and add to result 271 add pl,temp,pl 272 addc pu,saveop2,pu 273; 274; finish 275; 276fini stws pu,0(arg2) ; save high part of result 277 stws pl,4(arg2) ; save low part of result 278 279 ldws,mb -4(sp),pm ; restore registers 280 ldws,mb -4(sp),pc ; restore registers 281 ldws,mb -4(sp),saveop2 ; restore registers 282 ldws,mb -4(sp),brindex ; restore registers 283 ldws,mb -4(sp),cnt ; restore registers 284 ldws,mb -4(sp),op1 ; restore registers 285 ldws,mb -4(sp),pl ; restore registers 286 bv 0(rp) ; return 287 ldws,mb -4(sp),pu ; restore registers 288EXIT(u_xmpy) 289 290 .end 291