1* Copyright (c) 1995 Colin Plumb. All rights reserved. 2* For licensing and other legal details, see the file legal.c. 3* 4* lbn68360.c - 32-bit bignum primitives for 683xx processors. 5* 6* This code is using InterTools calling convention, which is a bit odd. 7* One minor note is that the default variable sizes are 8* char = unsigned 8, short = 8 (in violation of ANSI!), 9* int = 16, long = 32. Longs (including on the stack) are 16-bit aligned. 10* Arguments are apdded to 16 bits. 11* A6 is used as a frame pointer, and globals are indexed off A5. 12* Return valies are passes id D0 or A0 (or FP0), depending on type. 13* D0, D1, A0 and A4 (!) are volatile across function calls. A1 14* must be preserved! 15* 16* This code assumes 16-bit ints. Code for 32-bit ints is commented out 17* with "**". 18* 19* Regardless of UINT_MAX, only bignums up to 64K words (2 million bits) 20* are supported. (68k hackers will recognize this as a consequence of 21* using dbra.) This could be extended easily if anyone cares. 22* 23* These primitives use little-endian word order. 24* (The order of bytes within words is irrelevant to this issue.) 25 26* The Metrowerks C compiler (1.2.2) produces bad 68k code for the 27* following input, which happens to be the inner loop of lbnSub1, 28* so it has been rewritees in assembly, even though it is not terribly 29* speed-critical. (Optimizer on or off does not matter.) 30* 31* unsigned 32* decrement(unsigned *num, unsigned len) 33* { 34* do { 35* if ((*num++)-- != 0) 36* return 0; 37* } while (--len); 38* return 1; 39* } 40 41* BNWORD32 lbnSub1_32(BNWORD32 *num, unsigned len, BNWORD32 borrow) 42 SECTION S_lbnSub1_32,,"code" 43 XDEF _lbnSub1_32 44_lbnSub1_32: 45 movea.l 4(sp),a0 * num 46 move.l 10(sp),d0 * borrow 47** move.l 12(sp),d0 * borrow 48 sub.l d0,(a0)+ 49 bcc sub_done 50 move.w 8(sp),d0 * len 51** move.w 10(sp),d0 * len 52 subq.w #2,d0 53 bcs sub_done 54sub_loop: 55 subq.l #1,(a0)+ 56 dbcc d0,sub_loop 57sub_done: 58 moveq.l #0,d0 59 addx.w d0,d0 60 rts 61 62* BNWORD32 lbnAdd1_32(BNWORD32 *num, unsigned len, BNWORD32 carry) 63 SECTION S_lbnAdd1_32,,"code" 64 XDEF _lbnAdd1_32 65_lbnAdd1_32: 66 movea.l 4(sp),a0 * num 67 move.l 10(sp),d0 * carry 68** move.l 12(sp),d0 * carry 69 add.l d0,(a0)+ 70 bcc add_done 71 move.w 8(sp),d0 * len 72** move.w 10(sp),d0 * len 73 subq.w #2,d0 74 bcs add_done 75add_loop: 76 addq.l #1,(a0)+ 77 dbcc d0,add_loop 78add_done: 79 moveq.l #0,d0 80 addx.w d0,d0 81 rts 82 83* void lbnMulN1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k) 84 SECTION S_lbnMulN1_32,,"code" 85 XDEF _lbnMulN1_32 86_lbnMulN1_32: 87 movem.l d2-d5,-(sp) * 16 bytes of extra data 88 moveq.l #0,d4 89 move.l 20(sp),a4 * out 90 move.l 24(sp),a0 * in 91 move.w 28(sp),d5 * len 92 move.l 30(sp),d2 * k 93** move.w 30(sp),d5 * len 94** move.l 32(sp),d2 * k 95 96 move.l (a0)+,d3 * First multiply 97 mulu.l d2,d1:d3 * dc.w 0x4c02, 0x3401 98 move.l d3,(a4)+ 99 100 subq.w #1,d5 * Setup for loop unrolling 101 lsr.w #1,d5 102 bcs.s m32_even 103 beq.s m32_short 104 105 subq.w #1,d5 * Set up software pipeline properly 106 move.l d1,d0 107 108m32_loop: 109 move.l (a0)+,d3 110 mulu.l d2,d1:d3 * dc.w 0x4c02, 0x3401 111 add.l d0,d3 112 addx.l d4,d1 113 move.l d3,(a4)+ 114m32_even: 115 116 move.l (a0)+,d3 117 mulu.l d2,d0:d3 * dc.w 0x4c02, 0x3400 118 add.l d1,d3 119 addx.l d4,d0 120 move.l d3,(a4)+ 121 122 dbra d5,m32_loop 123 124 move.l d0,(a4) 125 movem.l (sp)+,d2-d5 126 rts 127m32_short: 128 move.l d1,(a4) 129 movem.l (sp)+,d2-d5 130 rts 131 132* BNWORD32 133* lbnMulAdd1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k) 134 SECTION S_lbnMulAdd1_32,,"code" 135 XDEF _lbnMulAdd1_32 136_lbnMulAdd1_32: 137 movem.l d2-d5,-(sp) * 16 bytes of extra data 138 moveq.l #0,d4 139 move.l 20(sp),a4 * out 140 move.l 24(sp),a0 * in 141 move.w 28(sp),d5 * len 142 move.l 30(sp),d2 * k 143** move.w 30(sp),d5 * len 144** move.l 32(sp),d2 * k 145 146 move.l (a0)+,d3 * First multiply 147 mulu.l d2,d1:d3 * dc.w 0x4c02, 0x3401 148 add.l d3,(a4)+ 149 addx.l d4,d1 150 151 subq.w #1,d5 * Setup for loop unrolling 152 lsr.w #1,d5 153 bcs.s ma32_even 154 beq.s ma32_short 155 156 subq.w #1,d5 * Set up software pipeline properly 157 move.l d1,d0 158 159ma32_loop: 160 move.l (a0)+,d3 161 mulu.l d2,d1:d3 * dc.w 0x4c02, 0x3401 162 add.l d0,d3 163 addx.l d4,d1 164 add.l d3,(a4)+ 165 addx.l d4,d1 166ma32_even: 167 168 move.l (a0)+,d3 169 mulu.l d2,d0:d3 * dc.w 0x4c02, 0x3400 170 add.l d1,d3 171 addx.l d4,d0 172 add.l d3,(a4)+ 173 addx.l d4,d0 174 175 dbra d5,ma32_loop 176 177 movem.l (sp)+,d2-d5 178 rts 179ma32_short: 180 move.l d1,d0 181 movem.l (sp)+,d2-d5 182 rts 183 184* BNWORD32 185* lbnMulSub1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k) 186 SECTION S_lbnMulSub1_32,,"code" 187 XDEF _lbnMulSub1_32 188_lbnMulSub1_32: 189 movem.l d2-d5,-(sp) * 16 bytes of extra data 190 moveq.l #0,d4 191 move.l 20(sp),a4 * out 192 move.l 24(sp),a0 * in 193 move.w 28(sp),d5 * len 194 move.l 30(sp),d2 * k 195** move.w 30(sp),d5 * len 196** move.l 32(sp),d2 * k 197 198 move.l (a0)+,d3 * First multiply 199 mulu.l d2,d1:d3 * dc.w 0x4c02, 0x3401 200 sub.l d3,(a4)+ 201 addx.l d4,d1 202 203 subq.w #1,d5 * Setup for loop unrolling 204 lsr.w #1,d5 205 bcs.s ms32_even 206 beq.s ms32_short 207 208 subq.w #1,d5 * Set up software pipeline properly 209 move.l d1,d0 210 211ms32_loop: 212 move.l (a0)+,d3 213 mulu.l d2,d1:d3 * dc.w 0x4c02, 0x3401 214 add.l d0,d3 215 addx.l d4,d1 216 sub.l d3,(a4)+ 217 addx.l d4,d1 218ms32_even: 219 220 move.l (a0)+,d3 221 mulu.l d2,d0:d3 * dc.w 0x4c02, 0x3400 222 add.l d1,d3 223 addx.l d4,d0 224 sub.l d3,(a4)+ 225 addx.l d4,d0 226 227 dbra d5,ms32_loop 228 229 movem.l (sp)+,d2-d5 230 rts 231 232ms32_short: 233 move.l d1,d0 234 movem.l (sp)+,d2-d5 235 rts 236 237 238* BNWORD32 lbnDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d) 239 SECTION S_lbnDiv21_32,,"code" 240 XDEF _lbnDiv21_32 241_lbnDiv21_32: 242 move.l 8(sp),d0 243 move.l 12(sp),d1 244 move.l 4(sp),a0 245 divu.l 16(sp),d0:d1 * dc.w 0x4c6f, 0x1400, 16 246 move.l d1,(a0) 247 rts 248 249* unsigned lbnModQ_32(BNWORD32 const *n, unsigned len, unsigned d) 250 SECTION S_lbnModQ_32,,"code" 251 XDEF _lbnModQ_32 252_lbnModQ_32: 253 move.l 4(sp),a0 * n 254 move.l d2,-(sp) 255 move.l d3,a4 256 moveq.l #0,d1 257 moveq.l #0,d2 258 move.w 12(sp),d1 * len 259 move.w 14(sp),d2 * d 260** move.l 12(sp),d1 * len 261** move.l 16(sp),d2 * d 262 lea -4(a0,d1.L*4),a0 * dc.w 0x41f0, 0x1cfc 263 264* First time, divide 32/32 - may be faster than 64/32 265 move.l (a0),d3 266 divul.l d2,d0:d3 * dc.w 0x4c02, 0x3000 267 subq.w #2,d1 268 bmi mq32_done 269 270mq32_loop: 271 move.l -(a0),d3 272 divu.l d2,d0:d3 * dc.w 0x4c02,0x3400 273 dbra d1,mq32_loop 274 275mq32_done: 276 move.l (sp)+,d2 277 move.l a4,d3 278 rts 279 280 end 281