1/* $NetBSD: impys.S,v 1.3 2005/12/11 12:17:40 christos Exp $ */ 2 3/* $OpenBSD: impys.S,v 1.5 2001/03/29 03:58:18 mickey Exp $ */ 4 5/* 6 * Copyright 1996 1995 by Open Software Foundation, Inc. 7 * All Rights Reserved 8 * 9 * Permission to use, copy, modify, and distribute this software and 10 * its documentation for any purpose and without fee is hereby granted, 11 * provided that the above copyright notice appears in all copies and 12 * that both the copyright notice and this permission notice appear in 13 * supporting documentation. 14 * 15 * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE 16 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 17 * FOR A PARTICULAR PURPOSE. 18 * 19 * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR 20 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 21 * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, 22 * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION 23 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 24 * 25 */ 26/* 27 * pmk1.1 28 */ 29/* 30 * (c) Copyright 1986 HEWLETT-PACKARD COMPANY 31 * 32 * To anyone who acknowledges that this file is provided "AS IS" 33 * without any express or implied warranty: 34 * permission to use, copy, modify, and distribute this file 35 * for any purpose is hereby granted without fee, provided that 36 * the above copyright notice and this notice appears in all 37 * copies, and that the name of Hewlett-Packard Company not be 38 * used in advertising or publicity pertaining to distribution 39 * of the software without specific, written prior permission. 40 * Hewlett-Packard Company makes no representations about the 41 * suitability of this software for any purpose. 42 */ 43 44#include <machine/asm.h> 45 46/**************************************************************************** 47 * 48 * Implement an integer multiply routine for 32-bit operands and 64-bit product 49 * with operand values of zero (multiplicand only) and -2**31 treated specially. 50 * The algorithm uses the absolute value of the multiplier, four bits at a time, 51 * from right to left, to generate partial product. Execution speed is more 52 * important than program size in this implementation. 53 * 54 ***************************************************************************/ 55/* 56 * Definitions - General registers 57 */ 58gr0: .equ 0 /* General register zero */ 59pu: .equ 3 /* upper part of product */ 60pl: .equ 4 /* lower part of product */ 61op2: .equ 4 /* multiplier */ 62op1: .equ 5 /* multiplicand */ 63cnt: .equ 6 /* count in multiply */ 64brindex: .equ 7 /* index into the br. table */ 65sign: .equ 8 /* sign of product */ 66pc: .equ 9 /* carry bit of product, = 00...01 */ 67pm: .equ 10 /* value of -1 used in shifting */ 68 69 .text 70 71ENTRY(impys,32) 72 stws,ma pu,4(%sp) ; save registers on stack 73 stws,ma pl,4(%sp) ; save registers on stack 74 stws,ma op1,4(%sp) ; save registers on stack 75 stws,ma cnt,4(%sp) ; save registers on stack 76 stws,ma brindex,4(%sp) ; save registers on stack 77 stws,ma sign,4(%sp) ; save registers on stack 78 stws,ma pc,4(%sp) ; save registers on stack 79 stws,ma pm,4(%sp) ; save registers on stack 80; 81; Start multiply process 82; 83 ldws 0(%arg1),op2 ; get multiplier 84 ldws 0(%arg0),op1 ; get multiplicand 85 addi -1,gr0,pm ; initialize pm to 111...1 86 comb,< op2,gr0,mpyb ; br. if multiplier < 0 87 xor op2,op1,sign ; sign(0) = sign of product 88mpy1: comb,< op1,gr0,mpya ; br. if multiplicand < 0 89 addi 0,gr0,pu ; clear product 90 addib,= 0,op1,fini0 ; op1 = 0, product = 0 91mpy2: addi 1,gr0,pc ; initialize pc to 00...01 92 movib,tr 8,cnt,mloop ; set count for mpy loop 93 extru op2,31,4,brindex ; 4 bits as index into table 94; 95 .align 8 96; 97 b sh4c ; br. if sign overflow 98sh4n: shd pu,pl,4,pl ; shift product right 4 bits 99 addib,<= -1,cnt,mulend ; reduce count by 1, exit if 100 extru pu,27,28,pu ; <= zero 101; 102mloop: blr brindex,gr0 ; br. into table 103 ; entries of 2 words 104 extru op2,27,4,brindex ; next 4 bits into index 105; 106; 107; branch table for the multiplication process with four multiplier bits 108; 109mtable: ; two words per entry 110; 111; ---- bits = 0000 ---- shift product 4 bits ------------------------------- 112; 113 b sh4n+4 ; just shift partial 114 shd pu,pl,4,pl ; product right 4 bits 115; 116; ---- bits = 0001 ---- add op1, then shift 4 bits 117; 118 addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift 119 shd pu,pl,4,pl ; product right 4 bits 120; 121; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits 122; 123 addb,tr op1,pu,sh4n ; add 2*op1, to shift 124 addb,uv op1,pu,sh4c ; product right 4 bits 125; 126; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits 127; 128 addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift 129 sh1add,nsv op1,pu,pu ; product right 4 bits 130; 131; ---- bits = 0100 ---- shift 2, add op1, shift 2 132; 133 b sh2sa 134 shd pu,pl,2,pl ; shift product 2 bits 135; 136; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again 137; 138 addb,tr op1,pu,sh2us ; add op1 to product 139 shd pu,pl,2,pl ; shift 2 bits 140; 141; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again 142; 143 addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits 144 addb,nuv op1,pu,sh2us ; br. if not overflow 145; 146; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1 147; 148 b sh3s 149 sub pu,op1,pu ; subtract op1, br. to sh3s 150 151; 152; ---- bits = 1000 ---- shift 3, add op1, shift 1 153; 154 b sh3sa 155 shd pu,pl,3,pl ; shift product right 3 bits 156; 157; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1 158; 159 addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1, 160 shd pu,pl,3,pl ; and shift 1 161; 162; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1 163; 164 addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits 165 addb,nuv op1,pu,sh3us ; br. if no overflow 166; 167; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index 168; 169 addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1, 170 sub pu,op1,pu ; shift 2 with minus sign 171; 172; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index 173; 174 addib,tr 1,brindex,sh2sb ; add 1 to index, to shift 175 shd pu,pl,2,pl ; shift right 2 bits signed 176; 177; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2 178; 179 addb,tr op1,pu,sh2ns ; add op1, to shift 2 180 shd pu,pl,2,pl ; right 2 unsigned, etc. 181; 182; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed 183; 184 addib,tr 1,brindex,sh1sa ; add 1 to index, to shift 185 shd pu,pl,1,pl ; shift 1 bit 186; 187; ---- bits = 1111 ---- add -op1, shift 4 signed 188; 189 addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1, 190 sub pu,op1,pu ; to shift 4 signed 191 192; 193; ---- bits = 10000 ---- shift 4 signed 194; 195 addib,tr 1,brindex,sh4s+4 ; add 1 to index 196 shd pu,pl,4,pl ; shift 4 signed 197; 198; ---- end of table --------------------------------------------------------- 199; 200sh4s: shd pu,pl,4,pl 201 addib,tr -1,cnt,mloop ; loop (count > 0 always here) 202 shd pm,pu,4,pu ; shift 4, minus signed 203; 204sh4c: addib,> -1,cnt,mloop ; decrement count, loop if > 0 205 shd pc,pu,4,pu ; shift 4 with overflow 206 b signs ; end of multiply 207 bb,>=,n sign,0,fini ; test sign of procduct 208; 209mpyb: add,= op2,op2,gr0 ; if <> 0, back to main sect. 210 b mpy1 211 sub 0,op2,op2 ; op2 = |multiplier| 212 add,>= op1,gr0,gr0 ; if op1 < 0, invert sign, 213 xor pm,sign,sign ; for correct result 214; 215; special case for multiplier = -2**31, op1 = signed multiplicand 216; or multiplicand = -2**31, op1 = signed multiplier 217; 218 shd op1,0,1,pl ; shift op1 left 31 bits 219mmax: extrs op1,30,31,pu 220 b signs ; negate product (if needed) 221 bb,>=,n sign,0,fini ; test sign of product 222; 223mpya: add,= op1,op1,gr0 ; op1 = -2**31, special case 224 b mpy2 225 sub 0,op1,op1 ; op1 = |multiplicand| 226 add,>= op2,gr0,gr0 ; if op2 < 0, invert sign, 227 xor pm,sign,sign ; for correct result 228 movb,tr op2,op1,mmax ; use op2 as multiplicand 229 shd op1,0,1,pl ; shift it left 31 bits 230; 231sh3c: shd pu,pl,3,pl ; shift product 3 bits 232 shd pc,pu,3,pu ; shift 3 signed 233 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 234 shd pu,pl,1,pl 235; 236sh3us: extru pu,28,29,pu ; shift 3 unsigned 237 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 238 shd pu,pl,1,pl 239; 240sh3sa: extrs pu,28,29,pu ; shift 3 signed 241 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 242 shd pu,pl,1,pl 243; 244sh3s: shd pu,pl,3,pl ; shift 3 minus signed 245 shd pm,pu,3,pu 246 addb,tr op1,pu,sh1 ; add op1, to shift 1 bit 247 shd pu,pl,1,pl 248; 249sh1: addib,> -1,cnt,mloop ; loop if count > 0 250 extru pu,30,31,pu 251 b signs ; end of multiply 252 bb,>=,n sign,0,fini ; test sign of product 253; 254sh2ns: addib,tr 1,brindex,sh2sb+4 ; increment index 255 extru pu,29,30,pu ; shift unsigned 256; 257sh2s: shd pu,pl,2,pl ; shift with minus sign 258 shd pm,pu,2,pu ; 259 sub pu,op1,pu ; subtract op1 260 shd pu,pl,2,pl ; shift with minus sign 261 addib,tr -1,cnt,mloop ; decrement count, loop 262 shd pm,pu,2,pu ; shift with minus sign 263 ; count never reaches 0 here 264; 265sh2sb: extrs pu,29,30,pu ; shift 2 signed 266 sub pu,op1,pu ; subtract op1 from product 267 shd pu,pl,2,pl ; shift with minus sign 268 addib,tr -1,cnt,mloop ; decrement count, loop 269 shd pm,pu,2,pu ; shift with minus sign 270 ; count never reaches 0 here 271; 272sh1sa: extrs pu,30,31,pu ; signed 273 sub pu,op1,pu ; subtract op1 from product 274 shd pu,pl,3,pl ; shift 3 with minus sign 275 addib,tr -1,cnt,mloop ; dec. count, to loop 276 shd pm,pu,3,pu ; count never reaches 0 here 277; 278fini0: movib,tr,n 0,pl,fini ; product = 0 as op1 = 0 279; 280sh2us: extru pu,29,30,pu ; shift 2 unsigned 281 addb,tr op1,pu,sh2a ; add op1 282 shd pu,pl,2,pl ; shift 2 bits 283; 284sh2c: shd pu,pl,2,pl 285 shd pc,pu,2,pu ; shift with carry 286 addb,tr op1,pu,sh2a ; add op1 to product 287 shd pu,pl,2,pl ; br. to sh2 to shift pu 288; 289sh2sa: extrs pu,29,30,pu ; shift with sign 290 addb,tr op1,pu,sh2a ; add op1 to product 291 shd pu,pl,2,pl ; br. to sh2 to shift pu 292; 293sh2a: addib,> -1,cnt,mloop ; loop if count > 0 294 extru pu,29,30,pu 295; 296mulend: bb,>=,n sign,0,fini ; test sign of product 297signs: sub 0,pl,pl ; negate product if sign 298 subb 0,pu,pu ; is negative 299; 300; finish 301; 302fini: stws pu,0(%arg2) ; save high part of result 303 stws pl,4(%arg2) ; save low part of result 304 305 ldws,mb -4(%sp),pm ; restore registers 306 ldws,mb -4(%sp),pc ; restore registers 307 ldws,mb -4(%sp),sign ; restore registers 308 ldws,mb -4(%sp),brindex ; restore registers 309 ldws,mb -4(%sp),cnt ; restore registers 310 ldws,mb -4(%sp),op1 ; restore registers 311 ldws,mb -4(%sp),pl ; restore registers 312 bv 0(%rp) ; return 313 ldws,mb -4(%sp),pu ; restore registers 314 315EXIT(impys) 316 .end 317