1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4/* 5 * 6 * This PA-RISC 2.0 function computes the product of two unsigned integers, 7 * and adds the result to a previously computed integer. The multiplicand 8 * is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in 9 * memory in little-double-wordian order. The multiplier is an unsigned 10 * 64-bit integer. The previously computed integer to which the product is 11 * added is located in the result ("res") area, and is assumed to be a 12 * 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory 13 * in little-double-wordian order. This value normally will be the result 14 * of a previously computed nine doubleword result. It is not necessary 15 * to pad the multiplicand with an additional 64-bit zero doubleword. 16 * 17 * Multiplicand, multiplier, and addend ideally should be aligned at 18 * 16-byte boundaries for best performance. The code will function 19 * correctly for alignment at eight-byte boundaries which are not 16-byte 20 * boundaries, but the execution may be slightly slower due to even/odd 21 * bank conflicts on PA-RISC 8000 processors. 22 * 23 * This function is designed to accept the same calling sequence as Bill 24 * Ackerman's "maxpy_little" function. The carry from the ninth doubleword 25 * of the result is written to the tenth word of the result, as is done by 26 * Bill Ackerman's function. The final carry also is returned as an 27 * integer, which may be ignored. The function prototype may be either 28 * of the following: 29 * 30 * void multacc512( int l, chunk* m, const chunk* a, chunk* res ); 31 * or 32 * int multacc512( int l, chunk* m, const chunk* a, chunk* res ); 33 * 34 * where: "l" originally denoted vector lengths. This parameter is 35 * ignored. This function always assumes a multiplicand length of 36 * 512 bits (eight doublewords), and addend and result lengths of 37 * 576 bits (nine doublewords). 38 * 39 * "m" is a pointer to the doubleword multiplier, ideally aligned 40 * on a 16-byte boundary. 41 * 42 * "a" is a pointer to the eight-doubleword multiplicand, stored 43 * in little-double-wordian order, and ideally aligned on a 16-byte 44 * boundary. 45 * 46 * "res" is a pointer to the nine doubleword addend, and to the 47 * nine-doubleword product computed by this function. The result 48 * also is stored in little-double-wordian order, and ideally is 49 * aligned on a 16-byte boundary. It is expected that the alignment 50 * of the "res" area may alternate between even/odd doubleword 51 * boundaries for successive calls for 512-bit x 512-bit 52 * multiplications. 53 * 54 * The code for this function has been scheduled to use the parallelism 55 * of the PA-RISC 8000 series microprocessors as well as the author was 56 * able. Comments and/or suggestions for improvement are welcomed. 57 * 58 * The code is "64-bit safe". This means it may be called in either 59 * the 32ILP context or the 64LP context. All 64-bits of registers are 60 * saved and restored. 61 * 62 * This code is self-contained. It requires no other header files in order 63 * to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic 64 * definitions for registers and stack offsets are included within this 65 * one source file. 66 * 67 * This is a leaf routine. As such, minimal use is made of the stack area. 68 * Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight 69 * general registers, and 128 bytes are used to move intermediate products 70 * from the floating-point registers to the general registers. Stack 71 * protocols assure proper alignment of these areas. 72 * 73 */ 74 75 76/* ====================================================================*/ 77/* symbolic definitions for PA-RISC registers */ 78/* in the MIPS style, avoids lots of case shifts */ 79/* assigments (except t4) preserve register number parity */ 80/* ====================================================================*/ 81 82#define zero %r0 /* permanent zero */ 83#define t5 %r1 /* temp register, altered by addil */ 84 85#define rp %r2 /* return pointer */ 86 87#define s1 %r3 /* callee saves register*/ 88#define s0 %r4 /* callee saves register*/ 89#define s3 %r5 /* callee saves register*/ 90#define s2 %r6 /* callee saves register*/ 91#define s5 %r7 /* callee saves register*/ 92#define s4 %r8 /* callee saves register*/ 93#define s7 %r9 /* callee saves register*/ 94#define s6 %r10 /* callee saves register*/ 95 96#define t1 %r19 /* caller saves register*/ 97#define t0 %r20 /* caller saves register*/ 98#define t3 %r21 /* caller saves register*/ 99#define t2 %r22 /* caller saves register*/ 100 101#define a3 %r23 /* fourth argument register, high word */ 102#define a2 %r24 /* third argument register, low word*/ 103#define a1 %r25 /* second argument register, high word*/ 104#define a0 %r26 /* first argument register, low word*/ 105 106#define v0 %r28 /* high order return value*/ 107#define v1 %r29 /* low order return value*/ 108 109#define sp %r30 /* stack pointer*/ 110#define t4 %r31 /* temporary register */ 111 112#define fa0 %fr4 /* first argument register*/ 113#define fa1 %fr5 /* second argument register*/ 114#define fa2 %fr6 /* third argument register*/ 115#define fa3 %fr7 /* fourth argument register*/ 116 117#define fa0r %fr4R /* first argument register*/ 118#define fa1r %fr5R /* second argument register*/ 119#define fa2r %fr6R /* third argument register*/ 120#define fa3r %fr7R /* fourth argument register*/ 121 122#define ft0 %fr8 /* caller saves register*/ 123#define ft1 %fr9 /* caller saves register*/ 124#define ft2 %fr10 /* caller saves register*/ 125#define ft3 %fr11 /* caller saves register*/ 126 127#define ft0r %fr8R /* caller saves register*/ 128#define ft1r %fr9R /* caller saves register*/ 129#define ft2r %fr10R /* caller saves register*/ 130#define ft3r %fr11R /* caller saves register*/ 131 132#define ft4 %fr22 /* caller saves register*/ 133#define ft5 %fr23 /* caller saves register*/ 134#define ft6 %fr24 /* caller saves register*/ 135#define ft7 %fr25 /* caller saves register*/ 136#define ft8 %fr26 /* caller saves register*/ 137#define ft9 %fr27 /* caller saves register*/ 138#define ft10 %fr28 /* caller saves register*/ 139#define ft11 %fr29 /* caller saves register*/ 140#define ft12 %fr30 /* caller saves register*/ 141#define ft13 %fr31 /* caller saves register*/ 142 143#define ft4r %fr22R /* caller saves register*/ 144#define ft5r %fr23R /* caller saves register*/ 145#define ft6r %fr24R /* caller saves register*/ 146#define ft7r %fr25R /* caller saves register*/ 147#define ft8r %fr26R /* caller saves register*/ 148#define ft9r %fr27R /* caller saves register*/ 149#define ft10r %fr28R /* caller saves register*/ 150#define ft11r %fr29R /* caller saves register*/ 151#define ft12r %fr30R /* caller saves register*/ 152#define ft13r %fr31R /* caller saves register*/ 153 154 155 156/* ================================================================== */ 157/* functional definitions for PA-RISC registers */ 158/* ================================================================== */ 159 160/* general registers */ 161 162#define T1 a0 /* temp, (length parameter ignored) */ 163 164#define pM a1 /* -> 64-bit multiplier */ 165#define T2 a1 /* temp, (after fetching multiplier) */ 166 167#define pA a2 /* -> multiplicand vector (8 64-bit words) */ 168#define T3 a2 /* temp, (after fetching multiplicand) */ 169 170#define pR a3 /* -> addend vector (8 64-bit doublewords, 171 result vector (9 64-bit words) */ 172 173#define S0 s0 /* callee saves summand registers */ 174#define S1 s1 175#define S2 s2 176#define S3 s3 177#define S4 s4 178#define S5 s5 179#define S6 s6 180#define S7 s7 181 182#define S8 v0 /* caller saves summand registers */ 183#define S9 v1 184#define S10 t0 185#define S11 t1 186#define S12 t2 187#define S13 t3 188#define S14 t4 189#define S15 t5 190 191 192 193/* floating-point registers */ 194 195#define M fa0 /* multiplier double word */ 196#define MR fa0r /* low order half of multiplier double word */ 197#define ML fa0 /* high order half of multiplier double word */ 198 199#define A0 fa2 /* multiplicand double word 0 */ 200#define A0R fa2r /* low order half of multiplicand double word */ 201#define A0L fa2 /* high order half of multiplicand double word */ 202 203#define A1 fa3 /* multiplicand double word 1 */ 204#define A1R fa3r /* low order half of multiplicand double word */ 205#define A1L fa3 /* high order half of multiplicand double word */ 206 207#define A2 ft0 /* multiplicand double word 2 */ 208#define A2R ft0r /* low order half of multiplicand double word */ 209#define A2L ft0 /* high order half of multiplicand double word */ 210 211#define A3 ft1 /* multiplicand double word 3 */ 212#define A3R ft1r /* low order half of multiplicand double word */ 213#define A3L ft1 /* high order half of multiplicand double word */ 214 215#define A4 ft2 /* multiplicand double word 4 */ 216#define A4R ft2r /* low order half of multiplicand double word */ 217#define A4L ft2 /* high order half of multiplicand double word */ 218 219#define A5 ft3 /* multiplicand double word 5 */ 220#define A5R ft3r /* low order half of multiplicand double word */ 221#define A5L ft3 /* high order half of multiplicand double word */ 222 223#define A6 ft4 /* multiplicand double word 6 */ 224#define A6R ft4r /* low order half of multiplicand double word */ 225#define A6L ft4 /* high order half of multiplicand double word */ 226 227#define A7 ft5 /* multiplicand double word 7 */ 228#define A7R ft5r /* low order half of multiplicand double word */ 229#define A7L ft5 /* high order half of multiplicand double word */ 230 231#define P0 ft6 /* product word 0 */ 232#define P1 ft7 /* product word 0 */ 233#define P2 ft8 /* product word 0 */ 234#define P3 ft9 /* product word 0 */ 235#define P4 ft10 /* product word 0 */ 236#define P5 ft11 /* product word 0 */ 237#define P6 ft12 /* product word 0 */ 238#define P7 ft13 /* product word 0 */ 239 240 241 242 243/* ====================================================================== */ 244/* symbolic definitions for HP-UX stack offsets */ 245/* symbolic definitions for memory NOPs */ 246/* ====================================================================== */ 247 248#define ST_SZ 192 /* stack area total size */ 249 250#define SV0 -192(sp) /* general register save area */ 251#define SV1 -184(sp) 252#define SV2 -176(sp) 253#define SV3 -168(sp) 254#define SV4 -160(sp) 255#define SV5 -152(sp) 256#define SV6 -144(sp) 257#define SV7 -136(sp) 258 259#define XF0 -128(sp) /* data transfer area */ 260#define XF1 -120(sp) /* for floating-pt to integer regs */ 261#define XF2 -112(sp) 262#define XF3 -104(sp) 263#define XF4 -96(sp) 264#define XF5 -88(sp) 265#define XF6 -80(sp) 266#define XF7 -72(sp) 267#define XF8 -64(sp) 268#define XF9 -56(sp) 269#define XF10 -48(sp) 270#define XF11 -40(sp) 271#define XF12 -32(sp) 272#define XF13 -24(sp) 273#define XF14 -16(sp) 274#define XF15 -8(sp) 275 276#define mnop proberi (sp),3,zero /* memory NOP */ 277 278 279 280 281/* ====================================================================== */ 282/* assembler formalities */ 283/* ====================================================================== */ 284 285#ifdef __LP64__ 286 .level 2.0W 287#else 288 .level 2.0 289#endif 290 .space $TEXT$ 291 .subspa $CODE$ 292 .align 16 293 294/* ====================================================================== */ 295/* here to compute 64-bit x 512-bit product + 512-bit addend */ 296/* ====================================================================== */ 297 298multacc512 299 .PROC 300 .CALLINFO 301 .ENTRY 302 fldd 0(pM),M ; multiplier double word 303 ldo ST_SZ(sp),sp ; push stack 304 305 fldd 0(pA),A0 ; multiplicand double word 0 306 std S1,SV1 ; save s1 307 308 fldd 16(pA),A2 ; multiplicand double word 2 309 std S3,SV3 ; save s3 310 311 fldd 32(pA),A4 ; multiplicand double word 4 312 std S5,SV5 ; save s5 313 314 fldd 48(pA),A6 ; multiplicand double word 6 315 std S7,SV7 ; save s7 316 317 318 std S0,SV0 ; save s0 319 fldd 8(pA),A1 ; multiplicand double word 1 320 xmpyu MR,A0L,P0 ; A0 cross 32-bit word products 321 xmpyu ML,A0R,P2 322 323 std S2,SV2 ; save s2 324 fldd 24(pA),A3 ; multiplicand double word 3 325 xmpyu MR,A2L,P4 ; A2 cross 32-bit word products 326 xmpyu ML,A2R,P6 327 328 std S4,SV4 ; save s4 329 fldd 40(pA),A5 ; multiplicand double word 5 330 331 std S6,SV6 ; save s6 332 fldd 56(pA),A7 ; multiplicand double word 7 333 334 335 fstd P0,XF0 ; MR * A0L 336 xmpyu MR,A0R,P0 ; A0 right 32-bit word product 337 xmpyu MR,A1L,P1 ; A1 cross 32-bit word product 338 339 fstd P2,XF2 ; ML * A0R 340 xmpyu ML,A0L,P2 ; A0 left 32-bit word product 341 xmpyu ML,A1R,P3 ; A1 cross 32-bit word product 342 343 fstd P4,XF4 ; MR * A2L 344 xmpyu MR,A2R,P4 ; A2 right 32-bit word product 345 xmpyu MR,A3L,P5 ; A3 cross 32-bit word product 346 347 fstd P6,XF6 ; ML * A2R 348 xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product 349 xmpyu ML,A3R,P7 ; A3 cross 32-bit word product 350 351 352 ldd XF0,S0 ; MR * A0L 353 fstd P1,XF1 ; MR * A1L 354 355 ldd XF2,S2 ; ML * A0R 356 fstd P3,XF3 ; ML * A1R 357 358 ldd XF4,S4 ; MR * A2L 359 fstd P5,XF5 ; MR * A3L 360 xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products 361 xmpyu ML,A1L,P3 362 363 ldd XF6,S6 ; ML * A2R 364 fstd P7,XF7 ; ML * A3R 365 xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products 366 xmpyu ML,A3L,P7 367 368 369 fstd P0,XF0 ; MR * A0R 370 ldd XF1,S1 ; MR * A1L 371 nop 372 add S0,S2,T1 ; A0 cross product sum 373 374 fstd P2,XF2 ; ML * A0L 375 ldd XF3,S3 ; ML * A1R 376 add,dc zero,zero,S0 ; A0 cross product sum carry 377 depd,z T1,31,32,S2 ; A0 cross product sum << 32 378 379 fstd P4,XF4 ; MR * A2R 380 ldd XF5,S5 ; MR * A3L 381 shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32 382 add S4,S6,T3 ; A2 cross product sum 383 384 fstd P6,XF6 ; ML * A2L 385 ldd XF7,S7 ; ML * A3R 386 add,dc zero,zero,S4 ; A2 cross product sum carry 387 depd,z T3,31,32,S6 ; A2 cross product sum << 32 388 389 390 ldd XF0,S8 ; MR * A0R 391 fstd P1,XF1 ; MR * A1R 392 xmpyu MR,A4L,P0 ; A4 cross 32-bit word product 393 xmpyu MR,A5L,P1 ; A5 cross 32-bit word product 394 395 ldd XF2,S10 ; ML * A0L 396 fstd P3,XF3 ; ML * A1L 397 xmpyu ML,A4R,P2 ; A4 cross 32-bit word product 398 xmpyu ML,A5R,P3 ; A5 cross 32-bit word product 399 400 ldd XF4,S12 ; MR * A2R 401 fstd P5,XF5 ; MR * A3L 402 xmpyu MR,A6L,P4 ; A6 cross 32-bit word product 403 xmpyu MR,A7L,P5 ; A7 cross 32-bit word product 404 405 ldd XF6,S14 ; ML * A2L 406 fstd P7,XF7 ; ML * A3L 407 xmpyu ML,A6R,P6 ; A6 cross 32-bit word product 408 xmpyu ML,A7R,P7 ; A7 cross 32-bit word product 409 410 411 fstd P0,XF0 ; MR * A4L 412 ldd XF1,S9 ; MR * A1R 413 shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32 414 add S1,S3,T1 ; A1 cross product sum 415 416 fstd P2,XF2 ; ML * A4R 417 ldd XF3,S11 ; ML * A1L 418 add,dc zero,zero,S1 ; A1 cross product sum carry 419 depd,z T1,31,32,S3 ; A1 cross product sum << 32 420 421 fstd P4,XF4 ; MR * A6L 422 ldd XF5,S13 ; MR * A3R 423 shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32 424 add S5,S7,T3 ; A3 cross product sum 425 426 fstd P6,XF6 ; ML * A6R 427 ldd XF7,S15 ; ML * A3L 428 add,dc zero,zero,S5 ; A3 cross product sum carry 429 depd,z T3,31,32,S7 ; A3 cross product sum << 32 430 431 432 shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32 433 add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword 434 435 add,dc S0,S10,S10 ; M * A0 left doubleword 436 add S3,S9,S9 ; M * A1 right doubleword 437 438 add,dc S1,S11,S11 ; M * A1 left doubleword 439 add S6,S12,S12 ; M * A2 right doubleword 440 441 442 ldd 24(pR),S3 ; Addend word 3 443 fstd P1,XF1 ; MR * A5L 444 add,dc S4,S14,S14 ; M * A2 left doubleword 445 xmpyu MR,A5R,P1 ; A5 right 32-bit word product 446 447 ldd 8(pR),S1 ; Addend word 1 448 fstd P3,XF3 ; ML * A5R 449 add S7,S13,S13 ; M * A3 right doubleword 450 xmpyu ML,A5L,P3 ; A5 left 32-bit word product 451 452 ldd 0(pR),S7 ; Addend word 0 453 fstd P5,XF5 ; MR * A7L 454 add,dc S5,S15,S15 ; M * A3 left doubleword 455 xmpyu MR,A7R,P5 ; A7 right 32-bit word product 456 457 ldd 16(pR),S5 ; Addend word 2 458 fstd P7,XF7 ; ML * A7R 459 add S10,S9,S9 ; P1 doubleword 460 xmpyu ML,A7L,P7 ; A7 left 32-bit word products 461 462 463 ldd XF0,S0 ; MR * A4L 464 fstd P1,XF9 ; MR * A5R 465 add,dc S11,S12,S12 ; P2 doubleword 466 xmpyu MR,A4R,P0 ; A4 right 32-bit word product 467 468 ldd XF2,S2 ; ML * A4R 469 fstd P3,XF11 ; ML * A5L 470 add,dc S14,S13,S13 ; P3 doubleword 471 xmpyu ML,A4L,P2 ; A4 left 32-bit word product 472 473 ldd XF6,S6 ; ML * A6R 474 fstd P5,XF13 ; MR * A7R 475 add,dc zero,S15,T2 ; P4 partial doubleword 476 xmpyu MR,A6R,P4 ; A6 right 32-bit word product 477 478 ldd XF4,S4 ; MR * A6L 479 fstd P7,XF15 ; ML * A7L 480 add S7,S8,S8 ; R0 + P0, new R0 doubleword 481 xmpyu ML,A6L,P6 ; A6 left 32-bit word product 482 483 484 fstd P0,XF0 ; MR * A4R 485 ldd XF7,S7 ; ML * A7R 486 add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword 487 488 fstd P2,XF2 ; ML * A4L 489 ldd XF1,S1 ; MR * A5L 490 add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword 491 492 fstd P4,XF4 ; MR * A6R 493 ldd XF5,S5 ; MR * A7L 494 add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword 495 496 fstd P6,XF6 ; ML * A6L 497 ldd XF3,S3 ; ML * A5R 498 add,dc zero,T2,T2 ; c + partial P4 499 add S0,S2,T1 ; A4 cross product sum 500 501 502 std S8,0(pR) ; save R0 503 add,dc zero,zero,S0 ; A4 cross product sum carry 504 depd,z T1,31,32,S2 ; A4 cross product sum << 32 505 506 std S9,8(pR) ; save R1 507 shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32 508 add S4,S6,T3 ; A6 cross product sum 509 510 std S12,16(pR) ; save R2 511 add,dc zero,zero,S4 ; A6 cross product sum carry 512 depd,z T3,31,32,S6 ; A6 cross product sum << 32 513 514 515 std S13,24(pR) ; save R3 516 shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32 517 add S1,S3,T1 ; A5 cross product sum 518 519 ldd XF0,S8 ; MR * A4R 520 add,dc zero,zero,S1 ; A5 cross product sum carry 521 depd,z T1,31,32,S3 ; A5 cross product sum << 32 522 523 ldd XF2,S10 ; ML * A4L 524 ldd XF9,S9 ; MR * A5R 525 shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32 526 add S5,S7,T3 ; A7 cross product sum 527 528 ldd XF4,S12 ; MR * A6R 529 ldd XF11,S11 ; ML * A5L 530 add,dc zero,zero,S5 ; A7 cross product sum carry 531 depd,z T3,31,32,S7 ; A7 cross product sum << 32 532 533 ldd XF6,S14 ; ML * A6L 534 ldd XF13,S13 ; MR * A7R 535 shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32 536 add S2,S8,S8 ; M * A4 right doubleword 537 538 539 ldd XF15,S15 ; ML * A7L 540 add,dc S0,S10,S10 ; M * A4 left doubleword 541 add S3,S9,S9 ; M * A5 right doubleword 542 543 add,dc S1,S11,S11 ; M * A5 left doubleword 544 add S6,S12,S12 ; M * A6 right doubleword 545 546 ldd 32(pR),S0 ; Addend word 4 547 ldd 40(pR),S1 ; Addend word 5 548 add,dc S4,S14,S14 ; M * A6 left doubleword 549 add S7,S13,S13 ; M * A7 right doubleword 550 551 ldd 48(pR),S2 ; Addend word 6 552 ldd 56(pR),S3 ; Addend word 7 553 add,dc S5,S15,S15 ; M * A7 left doubleword 554 add S8,T2,S8 ; P4 doubleword 555 556 ldd 64(pR),S4 ; Addend word 8 557 ldd SV5,s5 ; restore s5 558 add,dc S10,S9,S9 ; P5 doubleword 559 add,dc S11,S12,S12 ; P6 doubleword 560 561 562 ldd SV6,s6 ; restore s6 563 ldd SV7,s7 ; restore s7 564 add,dc S14,S13,S13 ; P7 doubleword 565 add,dc zero,S15,S15 ; P8 doubleword 566 567 add S0,S8,S8 ; new R4 doubleword 568 569 ldd SV0,s0 ; restore s0 570 std S8,32(pR) ; save R4 571 add,dc S1,S9,S9 ; new R5 doubleword 572 573 ldd SV1,s1 ; restore s1 574 std S9,40(pR) ; save R5 575 add,dc S2,S12,S12 ; new R6 doubleword 576 577 ldd SV2,s2 ; restore s2 578 std S12,48(pR) ; save R6 579 add,dc S3,S13,S13 ; new R7 doubleword 580 581 ldd SV3,s3 ; restore s3 582 std S13,56(pR) ; save R7 583 add,dc S4,S15,S15 ; new R8 doubleword 584 585 ldd SV4,s4 ; restore s4 586 std S15,64(pR) ; save result[8] 587 add,dc zero,zero,v0 ; return carry from R8 588 589 CMPIB,*= 0,v0,$L0 ; if no overflow, exit 590 LDO 8(pR),pR 591 592$FINAL1 ; Final carry propagation 593 LDD 64(pR),v0 594 LDO 8(pR),pR 595 ADDI 1,v0,v0 596 CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry. 597 STD v0,56(pR) 598$L0 599 bv zero(rp) ; -> caller 600 ldo -ST_SZ(sp),sp ; pop stack 601 602/* ====================================================================== */ 603/* end of module */ 604/* ====================================================================== */ 605 606 607 bve (rp) 608 .EXIT 609 nop 610 .PROCEND 611 .SPACE $TEXT$ 612 .SUBSPA $CODE$ 613 .EXPORT multacc512,ENTRY 614 615 .end 616