1dnl ****************************************************************************** 2dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. 3dnl 4dnl This file is part of the ECM Library. 5dnl 6dnl The ECM Library is free software; you can redistribute it and/or modify 7dnl it under the terms of the GNU Lesser General Public License as published by 8dnl the Free Software Foundation; either version 3 of the License, or (at your 9dnl option) any later version. 10dnl 11dnl The ECM Library is distributed in the hope that it will be useful, but 12dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 14dnl License for more details. 15dnl 16dnl You should have received a copy of the GNU Lesser General Public License 17dnl along with the ECM Library; see the file COPYING.LIB. If not, write to 18dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, 19dnl MA 02110-1301, USA. 20dnl ****************************************************************************** 21 22define(C, ` 23dnl') 24 25C mp_limb_t mulredc8(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, 26C const mp_limb_t *m, mp_limb_t inv_m); 27C 28C arguments: 29C r3 = ptr to result z least significant limb 30C r4 = ptr to input x least significant limb 31C r5 = ptr to input y least significant limb 32C r6 = ptr to modulus m least significant limb 33C r7 = -1/m mod 2^64 34C 35C final carry returned in r3 36 37 38 39include(`config.m4') 40 41 GLOBL GSYM_PREFIX`'mulredc8 42 GLOBL .GSYM_PREFIX`'mulredc8 43 44 .section ".opd", "aw" 45 .align 3 46GSYM_PREFIX`'mulredc8: 47 .quad .GSYM_PREFIX`'mulredc8, .TOC.@tocbase, 0 48 .size GSYM_PREFIX`'mulredc8, 24 49 50 51C Implements multiplication and REDC for two input numbers of 8 words 52 53C The algorithm: 54C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) 55C 56C T1:T0 = x[i]*y[0] ; 57C u = (T0*invm) % 2^64 ; 58C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ 59C for (j = 1; j < len; j++) 60C { 61C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; 62C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ 63C tmp[j-1] = T0; 64C } 65C tmp[len-1] = T1 ; 66C tmp[len] = cy ; /* cy <= 1 (see note 2) */ 67C for (i = 1; i < len; i++) 68C { 69C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; 70C u = (T0*invm) % 2^64 ; 71C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ 72C for (j = 1; j < len; j++) 73C { 74C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; 75C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 76C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ 77C tmp[j-1] = T0; 78C } 79C tmp[len-1] = T1 ; 80C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ 81C } 82C z[0 ... len-1] = tmp[0 ... len-1] ; 83C return (tmp[len]) ; 84C 85C notes: 86C 87C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, 88C so cy:T1 <= 2*2^64 - 4. 89C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 90C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), 91C so cy:T1 <= 2*2^64 - 3. For j > 1, 92C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), 93C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. 94C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, 95C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) 96C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 97C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 98C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), 99C so cy:T1 <= 3*2^64 - 3. For j > 1, 100C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), 101C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. 102C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. 103C Assume this is true for index i-1, Then 104C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 105C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 106C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), 107C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. 108C 109C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 110C YP = r5, MP = r6, TP = r1 (stack ptr) 111C 112 113C local variables: tmp[0 ... 8] array, having 8+1 8-byte words 114C The tmp array needs 8+1 entries, but tmp[8] is stored in 115C r15, so only 8 entries are used in the stack. 116 117 118 TEXT 119 .align 5 C powerPC 32 byte alignment 120.GSYM_PREFIX`'mulredc8: 121 122C ######################################################################## 123C # i = 0 pass 124C ######################################################################### 125 126C Pass for j = 0. We need to fetch x[i] from memory and compute the new u 127 128 ld r12, 0(r4) C XI = x[0] 129 ld r0, 0(r5) C y[0] 130 stdu r13, -8(r1) C save r13 131 mulld r8, r0, r12 C x[0]*y[0] low half 132 stdu r14, -8(r1) C save r14 133 mulhdu r9, r0, r12 C x[0]*y[0] high half 134 ld r0, 0(r6) C m[0] 135 mulld r11, r7, r8 C U = T0*invm mod 2^64 136 stdu r15, -8(r1) C save r15 137 mulld r13, r0, r11 C T0 = U*m[0] low 138 stdu r16, -8(r1) C save r16 139 li r16, 0 C set r16 to zero for carry propagation 140 subi r1, r1, 64 C set tmp stack space 141 mulhdu r14, r0, r11 C T1 = U*m[0] high 142 ld r0, 8(r5) C y[1] 143 addc r8, r8, r13 C 144 adde r13, r9, r14 C T0 = initial tmp(0) 145 addze r10, r16 C carry to CY 146 C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence 147 C CY:T1 <= 2*2^64 - 4 148 149C Pass for j = 1 150 151 mulld r8, r0, r12 C x[i]*y[j] low half 152 mulhdu r9, r0, r12 C x[i]*y[j] high half 153 ld r0, 8(r6) C m[j] 154 addc r13, r8, r13 C add low word to T0 155 adde r14, r9, r10 C add high word with carry + CY to T1 156 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! 157 158 mulld r8, r0, r11 C U*m[j] low 159 mulhdu r9, r0, r11 C U*m[j] high 160 addc r8, r8, r13 C add T0 and low word 161 ld r0, 16(r5) C y[j+1] 162 adde r13, r9, r14 C add high word with carry to T1 163 addze r10, r16 C carry to CY 164 std r8, 0(r1) C store tmp[j-1] 165 C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= 166 C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 167 168C Pass for j = 2 169 170 mulld r8, r0, r12 C x[i]*y[j] low half 171 mulhdu r9, r0, r12 C x[i]*y[j] high half 172 ld r0, 16(r6) C m[j] 173 addc r13, r8, r13 C add low word to T0 174 adde r14, r9, r10 C add high word with carry + CY to T1 175 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! 176 177 mulld r8, r0, r11 C U*m[j] low 178 mulhdu r9, r0, r11 C U*m[j] high 179 addc r8, r8, r13 C add T0 and low word 180 ld r0, 24(r5) C y[j+1] 181 adde r13, r9, r14 C add high word with carry to T1 182 addze r10, r16 C carry to CY 183 std r8, 8(r1) C store tmp[j-1] 184 C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= 185 C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 186 187C Pass for j = 3 188 189 mulld r8, r0, r12 C x[i]*y[j] low half 190 mulhdu r9, r0, r12 C x[i]*y[j] high half 191 ld r0, 24(r6) C m[j] 192 addc r13, r8, r13 C add low word to T0 193 adde r14, r9, r10 C add high word with carry + CY to T1 194 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! 195 196 mulld r8, r0, r11 C U*m[j] low 197 mulhdu r9, r0, r11 C U*m[j] high 198 addc r8, r8, r13 C add T0 and low word 199 ld r0, 32(r5) C y[j+1] 200 adde r13, r9, r14 C add high word with carry to T1 201 addze r10, r16 C carry to CY 202 std r8, 16(r1) C store tmp[j-1] 203 C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= 204 C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 205 206C Pass for j = 4 207 208 mulld r8, r0, r12 C x[i]*y[j] low half 209 mulhdu r9, r0, r12 C x[i]*y[j] high half 210 ld r0, 32(r6) C m[j] 211 addc r13, r8, r13 C add low word to T0 212 adde r14, r9, r10 C add high word with carry + CY to T1 213 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! 214 215 mulld r8, r0, r11 C U*m[j] low 216 mulhdu r9, r0, r11 C U*m[j] high 217 addc r8, r8, r13 C add T0 and low word 218 ld r0, 40(r5) C y[j+1] 219 adde r13, r9, r14 C add high word with carry to T1 220 addze r10, r16 C carry to CY 221 std r8, 24(r1) C store tmp[j-1] 222 C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= 223 C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 224 225C Pass for j = 5 226 227 mulld r8, r0, r12 C x[i]*y[j] low half 228 mulhdu r9, r0, r12 C x[i]*y[j] high half 229 ld r0, 40(r6) C m[j] 230 addc r13, r8, r13 C add low word to T0 231 adde r14, r9, r10 C add high word with carry + CY to T1 232 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! 233 234 mulld r8, r0, r11 C U*m[j] low 235 mulhdu r9, r0, r11 C U*m[j] high 236 addc r8, r8, r13 C add T0 and low word 237 ld r0, 48(r5) C y[j+1] 238 adde r13, r9, r14 C add high word with carry to T1 239 addze r10, r16 C carry to CY 240 std r8, 32(r1) C store tmp[j-1] 241 C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= 242 C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 243 244C Pass for j = 6 245 246 mulld r8, r0, r12 C x[i]*y[j] low half 247 mulhdu r9, r0, r12 C x[i]*y[j] high half 248 ld r0, 48(r6) C m[j] 249 addc r13, r8, r13 C add low word to T0 250 adde r14, r9, r10 C add high word with carry + CY to T1 251 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! 252 253 mulld r8, r0, r11 C U*m[j] low 254 mulhdu r9, r0, r11 C U*m[j] high 255 addc r8, r8, r13 C add T0 and low word 256 ld r0, 56(r5) C y[j+1] 257 adde r13, r9, r14 C add high word with carry to T1 258 addze r10, r16 C carry to CY 259 std r8, 40(r1) C store tmp[j-1] 260 C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= 261 C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 262 263C Pass for j = 7. Don't fetch new data from y[j+1]. 264 265 mulld r8, r0, r12 C x[i]*y[j] low half 266 mulhdu r9, r0, r12 C x[i]*y[j] high half 267 ld r0, 56(r6) C m[j] 268 addc r13, r8, r13 C add low word to T0 269 adde r14, r9, r10 C add high word with carry + CY to T1 270 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! 271 272 mulld r8, r0, r11 C U*m[j] low 273 mulhdu r9, r0, r11 C U*m[j] high 274 addc r8, r8, r13 C add T0 and low word 275 adde r13, r9, r14 C add high word with carry to T1 276 std r8, 48(r1) C store tmp[len-2] 277 addze r15, r16 C put carry in r15 (tmp[len] <= 1) 278 std r13, 56(r1) C store tmp[len-1] 279 280 281C ######################################################################### 282C # i > 0 passes 283C ######################################################################### 284 285 286 li r9, 7 C outer loop count 287 mtctr r9 288 2891: 290 291C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory 292C and compute the new u 293 294 ldu r12, 8(r4) C x[i] 295 ld r0, 0(r5) C y[0] 296 ld r13, 0(r1) C tmp[0] 297 mulld r8, r0, r12 C x[i]*y[0] low half 298 ld r14, 8(r1) C tmp[1] 299 mulhdu r9, r0, r12 C x[i]*y[0] high half 300 addc r13, r8, r13 C T0 301 ld r0, 0(r6) C m[0] 302 mulld r11, r7, r13 C U = T0*invm mod 2^64 303 adde r14, r9, r14 C T1 304 mulld r8, r0, r11 C U*m[0] low 305 addze r10, r16 C CY 306 mulhdu r9, r0, r11 C U*m[0] high 307 ld r0, 8(r5) C y[1] 308 addc r8, r8, r13 C result = 0 309 adde r13, r9, r14 C T0, carry pending 310 C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, 311 C so cy:T1 <= 3*2^64 - 4 312 313C Pass for j = 1 314 315 ld r14, 16(r1) C tmp[j+1] 316 mulld r8, r0, r12 C x[i]*y[j] low half 317 adde r14, r14, r10 C tmp[j+1] + CY + pending carry 318 addze r10, r16 C carry to CY 319 mulhdu r9, r0, r12 C x[i]*y[j] high half 320 ld r0, 8(r6) C m[j] 321 addc r13, r8, r13 C add low word to T0 322 mulld r8, r0, r11 C U*m[j] low 323 adde r14, r9, r14 C add high to T1 324 addze r10, r10 C add carry to CY 325 mulhdu r9, r0, r11 C U*m[j] high 326 addc r8, r8, r13 C add T0 and low word 327 ld r0, 16(r5) C y[j+1] 328 adde r13, r9, r14 C T1, carry pending 329 std r8, 0(r1) C store tmp[j-1] 330 C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 331 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 332 333C Pass for j = 2 334 335 ld r14, 24(r1) C tmp[j+1] 336 mulld r8, r0, r12 C x[i]*y[j] low half 337 adde r14, r14, r10 C tmp[j+1] + CY + pending carry 338 addze r10, r16 C carry to CY 339 mulhdu r9, r0, r12 C x[i]*y[j] high half 340 ld r0, 16(r6) C m[j] 341 addc r13, r8, r13 C add low word to T0 342 mulld r8, r0, r11 C U*m[j] low 343 adde r14, r9, r14 C add high to T1 344 addze r10, r10 C add carry to CY 345 mulhdu r9, r0, r11 C U*m[j] high 346 addc r8, r8, r13 C add T0 and low word 347 ld r0, 24(r5) C y[j+1] 348 adde r13, r9, r14 C T1, carry pending 349 std r8, 8(r1) C store tmp[j-1] 350 C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 351 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 352 353C Pass for j = 3 354 355 ld r14, 32(r1) C tmp[j+1] 356 mulld r8, r0, r12 C x[i]*y[j] low half 357 adde r14, r14, r10 C tmp[j+1] + CY + pending carry 358 addze r10, r16 C carry to CY 359 mulhdu r9, r0, r12 C x[i]*y[j] high half 360 ld r0, 24(r6) C m[j] 361 addc r13, r8, r13 C add low word to T0 362 mulld r8, r0, r11 C U*m[j] low 363 adde r14, r9, r14 C add high to T1 364 addze r10, r10 C add carry to CY 365 mulhdu r9, r0, r11 C U*m[j] high 366 addc r8, r8, r13 C add T0 and low word 367 ld r0, 32(r5) C y[j+1] 368 adde r13, r9, r14 C T1, carry pending 369 std r8, 16(r1) C store tmp[j-1] 370 C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 371 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 372 373C Pass for j = 4 374 375 ld r14, 40(r1) C tmp[j+1] 376 mulld r8, r0, r12 C x[i]*y[j] low half 377 adde r14, r14, r10 C tmp[j+1] + CY + pending carry 378 addze r10, r16 C carry to CY 379 mulhdu r9, r0, r12 C x[i]*y[j] high half 380 ld r0, 32(r6) C m[j] 381 addc r13, r8, r13 C add low word to T0 382 mulld r8, r0, r11 C U*m[j] low 383 adde r14, r9, r14 C add high to T1 384 addze r10, r10 C add carry to CY 385 mulhdu r9, r0, r11 C U*m[j] high 386 addc r8, r8, r13 C add T0 and low word 387 ld r0, 40(r5) C y[j+1] 388 adde r13, r9, r14 C T1, carry pending 389 std r8, 24(r1) C store tmp[j-1] 390 C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 391 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 392 393C Pass for j = 5 394 395 ld r14, 48(r1) C tmp[j+1] 396 mulld r8, r0, r12 C x[i]*y[j] low half 397 adde r14, r14, r10 C tmp[j+1] + CY + pending carry 398 addze r10, r16 C carry to CY 399 mulhdu r9, r0, r12 C x[i]*y[j] high half 400 ld r0, 40(r6) C m[j] 401 addc r13, r8, r13 C add low word to T0 402 mulld r8, r0, r11 C U*m[j] low 403 adde r14, r9, r14 C add high to T1 404 addze r10, r10 C add carry to CY 405 mulhdu r9, r0, r11 C U*m[j] high 406 addc r8, r8, r13 C add T0 and low word 407 ld r0, 48(r5) C y[j+1] 408 adde r13, r9, r14 C T1, carry pending 409 std r8, 32(r1) C store tmp[j-1] 410 C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 411 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 412 413C Pass for j = 6 414 415 ld r14, 56(r1) C tmp[j+1] 416 mulld r8, r0, r12 C x[i]*y[j] low half 417 adde r14, r14, r10 C tmp[j+1] + CY + pending carry 418 addze r10, r16 C carry to CY 419 mulhdu r9, r0, r12 C x[i]*y[j] high half 420 ld r0, 48(r6) C m[j] 421 addc r13, r8, r13 C add low word to T0 422 mulld r8, r0, r11 C U*m[j] low 423 adde r14, r9, r14 C add high to T1 424 addze r10, r10 C add carry to CY 425 mulhdu r9, r0, r11 C U*m[j] high 426 addc r8, r8, r13 C add T0 and low word 427 ld r0, 56(r5) C y[j+1] 428 adde r13, r9, r14 C T1, carry pending 429 std r8, 40(r1) C store tmp[j-1] 430 C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 431 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 432 433C Pass for j = 7. Don't fetch new data from y[j+1]. 434 435 mulld r8, r0, r12 C x[i]*y[j] low half 436 adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry 437 C since tmp[len] <= 1, T1 <= 3 and carry is zero 438 mulhdu r9, r0, r12 C x[i]*y[j] high half 439 ld r0, 56(r6) C m[j] 440 addc r13, r8, r13 C add low word to T0 441 mulld r8, r0, r11 C U*m[j] low 442 adde r14, r9, r14 C add high to T1 443 addze r10, r16 C CY 444 mulhdu r9, r0, r11 C U*m[j] high 445 addc r8, r8, r13 C add T0 and low word 446 adde r13, r9, r14 C T1, carry pending 447 std r8, 48(r1) C store tmp[len-2] 448 addze r15, r10 C store tmp[len] <= 1 449 std r13, 56(r1) C store tmp[len-1] 450 C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 451 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) 452 453 bdnz 1b 454 455C Copy result from tmp memory to z 456 457 ld r8, 0(r1) 458 ldu r9, 8(r1) 459 std r8, 0(r3) 460 stdu r9, 8(r3) 461 ldu r8, 8(r1) 462 ldu r9, 8(r1) 463 stdu r8, 8(r3) 464 stdu r9, 8(r3) 465 ldu r8, 8(r1) 466 ldu r9, 8(r1) 467 stdu r8, 8(r3) 468 stdu r9, 8(r3) 469 ldu r8, 8(r1) 470 ldu r9, 8(r1) 471 stdu r8, 8(r3) 472 stdu r9, 8(r3) 473 474 mr r3, r15 C return tmp(len) 475 ldu r16, 8(r1) 476 ldu r15, 8(r1) 477 ldu r14, 8(r1) 478 ldu r13, 8(r1) 479 addi r1, r1, 8 480 blr 481 482 .size .GSYM_PREFIX`'mulredc8, .-.GSYM_PREFIX`'mulredc8 483 484