1/* 32 and 64-bit millicode, original author Hewlett-Packard 2 adapted for gcc by Paul Bame <bame@debian.org> 3 and Alan Modra <alan@linuxcare.com.au>. 4 5 Copyright (C) 2001-2019 Free Software Foundation, Inc. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19Under Section 7 of GPL version 3, you are granted additional 20permissions described in the GCC Runtime Library Exception, version 213.1, as published by the Free Software Foundation. 22 23You should have received a copy of the GNU General Public License and 24a copy of the GCC Runtime Library Exception along with this program; 25see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 26<http://www.gnu.org/licenses/>. */ 27 28/* An executable stack is *not* required for these functions. */ 29#if defined(__ELF__) && defined(__linux__) 30.section .note.GNU-stack,"",%progbits 31.previous 32#endif 33 34#ifdef pa64 35 .level 2.0w 36#endif 37 38/* Hardware General Registers. */ 39r0: .reg %r0 40r1: .reg %r1 41r2: .reg %r2 42r3: .reg %r3 43r4: .reg %r4 44r5: .reg %r5 45r6: .reg %r6 46r7: .reg %r7 47r8: .reg %r8 48r9: .reg %r9 49r10: .reg %r10 50r11: .reg %r11 51r12: .reg %r12 52r13: .reg %r13 53r14: .reg %r14 54r15: .reg %r15 55r16: .reg %r16 56r17: .reg %r17 57r18: .reg %r18 58r19: .reg %r19 59r20: .reg %r20 60r21: .reg %r21 61r22: .reg %r22 62r23: .reg %r23 63r24: .reg %r24 64r25: .reg %r25 65r26: .reg %r26 66r27: .reg %r27 67r28: .reg %r28 68r29: .reg %r29 69r30: .reg %r30 70r31: .reg %r31 71 72/* Hardware Space Registers. */ 73sr0: .reg %sr0 74sr1: .reg %sr1 75sr2: .reg %sr2 76sr3: .reg %sr3 77sr4: .reg %sr4 78sr5: .reg %sr5 79sr6: .reg %sr6 80sr7: .reg %sr7 81 82/* Hardware Floating Point Registers. */ 83fr0: .reg %fr0 84fr1: .reg %fr1 85fr2: .reg %fr2 86fr3: .reg %fr3 87fr4: .reg %fr4 88fr5: .reg %fr5 89fr6: .reg %fr6 90fr7: .reg %fr7 91fr8: .reg %fr8 92fr9: .reg %fr9 93fr10: .reg %fr10 94fr11: .reg %fr11 95fr12: .reg %fr12 96fr13: .reg %fr13 97fr14: .reg %fr14 98fr15: .reg %fr15 99 100/* Hardware Control Registers. */ 101cr11: .reg %cr11 102sar: .reg %cr11 /* Shift Amount Register */ 103 104/* Software Architecture General Registers. */ 105rp: .reg r2 /* return pointer */ 106#ifdef pa64 107mrp: .reg r2 /* millicode return pointer */ 108#else 109mrp: .reg r31 /* millicode return pointer */ 110#endif 111ret0: .reg r28 /* return value */ 112ret1: .reg r29 /* return value (high part of double) */ 113sp: .reg r30 /* stack pointer */ 114dp: .reg r27 /* data pointer */ 115arg0: .reg r26 /* argument */ 116arg1: .reg r25 /* argument or high part of double argument */ 117arg2: .reg r24 /* argument */ 118arg3: .reg r23 /* argument or high part of double argument */ 119 120/* Software Architecture Space Registers. */ 121/* sr0 ; return link from BLE */ 122sret: .reg sr1 /* return value */ 123sarg: .reg sr1 /* argument */ 124/* sr4 ; PC SPACE tracker */ 125/* sr5 ; process private data */ 126 127/* Frame Offsets (millicode convention!) Used when calling other 128 millicode routines. Stack unwinding is dependent upon these 129 definitions. */ 130r31_slot: .equ -20 /* "current RP" slot */ 131sr0_slot: .equ -16 /* "static link" slot */ 132#if defined(pa64) 133mrp_slot: .equ -16 /* "current RP" slot */ 134psp_slot: .equ -8 /* "previous SP" slot */ 135#else 136mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */ 137#endif 138 139 140#define DEFINE(name,value)name: .EQU value 141#define RDEFINE(name,value)name: .REG value 142#ifdef milliext 143#define MILLI_BE(lbl) BE lbl(sr7,r0) 144#define MILLI_BEN(lbl) BE,n lbl(sr7,r0) 145#define MILLI_BLE(lbl) BLE lbl(sr7,r0) 146#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0) 147#define MILLIRETN BE,n 0(sr0,mrp) 148#define MILLIRET BE 0(sr0,mrp) 149#define MILLI_RETN BE,n 0(sr0,mrp) 150#define MILLI_RET BE 0(sr0,mrp) 151#else 152#define MILLI_BE(lbl) B lbl 153#define MILLI_BEN(lbl) B,n lbl 154#define MILLI_BLE(lbl) BL lbl,mrp 155#define MILLI_BLEN(lbl) BL,n lbl,mrp 156#define MILLIRETN BV,n 0(mrp) 157#define MILLIRET BV 0(mrp) 158#define MILLI_RETN BV,n 0(mrp) 159#define MILLI_RET BV 0(mrp) 160#endif 161 162#ifdef __STDC__ 163#define CAT(a,b) a##b 164#else 165#define CAT(a,b) a/**/b 166#endif 167 168#ifdef ELF 169#define SUBSPA_MILLI .section .text 170#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16 171#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16 172#define ATTR_MILLI 173#define SUBSPA_DATA .section .data 174#define ATTR_DATA 175#define GLOBAL $global$ 176#define GSYM(sym) !sym: 177#define LSYM(sym) !CAT(.L,sym:) 178#define LREF(sym) CAT(.L,sym) 179 180#else 181 182#ifdef coff 183/* This used to be .milli but since link32 places different named 184 sections in different segments millicode ends up a long ways away 185 from .text (1meg?). This way they will be a lot closer. 186 187 The SUBSPA_MILLI_* specify locality sets for certain millicode 188 modules in order to ensure that modules that call one another are 189 placed close together. Without locality sets this is unlikely to 190 happen because of the Dynamite linker library search algorithm. We 191 want these modules close together so that short calls always reach 192 (we don't want to require long calls or use long call stubs). */ 193 194#define SUBSPA_MILLI .subspa .text 195#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16 196#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16 197#define ATTR_MILLI .attr code,read,execute 198#define SUBSPA_DATA .subspa .data 199#define ATTR_DATA .attr init_data,read,write 200#define GLOBAL _gp 201#else 202#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8 203#define SUBSPA_MILLI_DIV SUBSPA_MILLI 204#define SUBSPA_MILLI_MUL SUBSPA_MILLI 205#define ATTR_MILLI 206#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero 207#define ATTR_DATA 208#define GLOBAL $global$ 209#endif 210#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16 211 212#define GSYM(sym) !sym 213#define LSYM(sym) !CAT(L$,sym) 214#define LREF(sym) CAT(L$,sym) 215#endif 216 217#ifdef L_dyncall 218 SUBSPA_MILLI 219 ATTR_DATA 220GSYM($$dyncall) 221 .export $$dyncall,millicode 222 .proc 223 .callinfo millicode 224 .entry 225#ifdef LINUX 226 extru,<> %r22,30,1,%r0 ; nullify if plabel bit set 227 bv,n %r0(%r22) ; branch to target 228 ldw -2(%r22),%r21 ; load address of target 229 bv %r0(%r21) ; branch to the real target 230 ldw 2(%r22),%r19 ; load new LTP value 231#else 232 bb,>=,n %r22,30,LREF(1) ; branch if not plabel address 233 ldw -2(%r22),%r21 ; load address of target to r21 234 ldsid (%sr0,%r21),%r1 ; get the "space ident" selected by r21 235 ldw 2(%r22),%r19 ; load new LTP value 236 mtsp %r1,%sr0 ; move that space identifier into sr0 237 be 0(%sr0,%r21) ; branch to the real target 238 stw %r2,-24(%r30) ; save return address into frame marker 239LSYM(1) 240 ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22 241 mtsp %r1,%sr0 ; move that space identifier into sr0 242 be 0(%sr0,%r22) ; branch to the target 243 stw %r2,-24(%r30) ; save return address into frame marker 244#endif 245 .exit 246 .procend 247#endif 248 249#ifdef L_divI 250/* ROUTINES: $$divI, $$divoI 251 252 Single precision divide for signed binary integers. 253 254 The quotient is truncated towards zero. 255 The sign of the quotient is the XOR of the signs of the dividend and 256 divisor. 257 Divide by zero is trapped. 258 Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI. 259 260 INPUT REGISTERS: 261 . arg0 == dividend 262 . arg1 == divisor 263 . mrp == return pc 264 . sr0 == return space when called externally 265 266 OUTPUT REGISTERS: 267 . arg0 = undefined 268 . arg1 = undefined 269 . ret1 = quotient 270 271 OTHER REGISTERS AFFECTED: 272 . r1 = undefined 273 274 SIDE EFFECTS: 275 . Causes a trap under the following conditions: 276 . divisor is zero (traps with ADDIT,= 0,25,0) 277 . dividend==-2**31 and divisor==-1 and routine is $$divoI 278 . (traps with ADDO 26,25,0) 279 . Changes memory at the following places: 280 . NONE 281 282 PERMISSIBLE CONTEXT: 283 . Unwindable. 284 . Suitable for internal or external millicode. 285 . Assumes the special millicode register conventions. 286 287 DISCUSSION: 288 . Branchs to other millicode routines using BE 289 . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15 290 . 291 . For selected divisors, calls a divide by constant routine written by 292 . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13. 293 . 294 . The only overflow case is -2**31 divided by -1. 295 . Both routines return -2**31 but only $$divoI traps. */ 296 297RDEFINE(temp,r1) 298RDEFINE(retreg,ret1) /* r29 */ 299RDEFINE(temp1,arg0) 300 SUBSPA_MILLI_DIV 301 ATTR_MILLI 302 .import $$divI_2,millicode 303 .import $$divI_3,millicode 304 .import $$divI_4,millicode 305 .import $$divI_5,millicode 306 .import $$divI_6,millicode 307 .import $$divI_7,millicode 308 .import $$divI_8,millicode 309 .import $$divI_9,millicode 310 .import $$divI_10,millicode 311 .import $$divI_12,millicode 312 .import $$divI_14,millicode 313 .import $$divI_15,millicode 314 .export $$divI,millicode 315 .export $$divoI,millicode 316 .proc 317 .callinfo millicode 318 .entry 319GSYM($$divoI) 320 comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */ 321GSYM($$divI) 322 ldo -1(arg1),temp /* is there at most one bit set ? */ 323 and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */ 324 addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */ 325 b,n LREF(neg_denom) 326LSYM(pow2) 327 addi,>= 0,arg0,retreg /* if numerator is negative, add the */ 328 add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */ 329 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ 330 extrs retreg,15,16,retreg /* retreg = retreg >> 16 */ 331 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ 332 ldi 0xcc,temp1 /* setup 0xcc in temp1 */ 333 extru,= arg1,23,8,temp /* test denominator with 0xff00 */ 334 extrs retreg,23,24,retreg /* retreg = retreg >> 8 */ 335 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ 336 ldi 0xaa,temp /* setup 0xaa in temp */ 337 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ 338 extrs retreg,27,28,retreg /* retreg = retreg >> 4 */ 339 and,= arg1,temp1,r0 /* test denominator with 0xcc */ 340 extrs retreg,29,30,retreg /* retreg = retreg >> 2 */ 341 and,= arg1,temp,r0 /* test denominator with 0xaa */ 342 extrs retreg,30,31,retreg /* retreg = retreg >> 1 */ 343 MILLIRETN 344LSYM(neg_denom) 345 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */ 346 b,n LREF(regular_seq) 347 sub r0,arg1,temp /* make denominator positive */ 348 comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */ 349 ldo -1(temp),retreg /* is there at most one bit set ? */ 350 and,= temp,retreg,r0 /* if so, the denominator is power of 2 */ 351 b,n LREF(regular_seq) 352 sub r0,arg0,retreg /* negate numerator */ 353 comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */ 354 copy retreg,arg0 /* set up arg0, arg1 and temp */ 355 copy temp,arg1 /* before branching to pow2 */ 356 b LREF(pow2) 357 ldo -1(arg1),temp 358LSYM(regular_seq) 359 comib,>>=,n 15,arg1,LREF(small_divisor) 360 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ 361LSYM(normal) 362 subi 0,retreg,retreg /* make it positive */ 363 sub 0,arg1,temp /* clear carry, */ 364 /* negate the divisor */ 365 ds 0,temp,0 /* set V-bit to the comple- */ 366 /* ment of the divisor sign */ 367 add retreg,retreg,retreg /* shift msb bit into carry */ 368 ds r0,arg1,temp /* 1st divide step, if no carry */ 369 addc retreg,retreg,retreg /* shift retreg with/into carry */ 370 ds temp,arg1,temp /* 2nd divide step */ 371 addc retreg,retreg,retreg /* shift retreg with/into carry */ 372 ds temp,arg1,temp /* 3rd divide step */ 373 addc retreg,retreg,retreg /* shift retreg with/into carry */ 374 ds temp,arg1,temp /* 4th divide step */ 375 addc retreg,retreg,retreg /* shift retreg with/into carry */ 376 ds temp,arg1,temp /* 5th divide step */ 377 addc retreg,retreg,retreg /* shift retreg with/into carry */ 378 ds temp,arg1,temp /* 6th divide step */ 379 addc retreg,retreg,retreg /* shift retreg with/into carry */ 380 ds temp,arg1,temp /* 7th divide step */ 381 addc retreg,retreg,retreg /* shift retreg with/into carry */ 382 ds temp,arg1,temp /* 8th divide step */ 383 addc retreg,retreg,retreg /* shift retreg with/into carry */ 384 ds temp,arg1,temp /* 9th divide step */ 385 addc retreg,retreg,retreg /* shift retreg with/into carry */ 386 ds temp,arg1,temp /* 10th divide step */ 387 addc retreg,retreg,retreg /* shift retreg with/into carry */ 388 ds temp,arg1,temp /* 11th divide step */ 389 addc retreg,retreg,retreg /* shift retreg with/into carry */ 390 ds temp,arg1,temp /* 12th divide step */ 391 addc retreg,retreg,retreg /* shift retreg with/into carry */ 392 ds temp,arg1,temp /* 13th divide step */ 393 addc retreg,retreg,retreg /* shift retreg with/into carry */ 394 ds temp,arg1,temp /* 14th divide step */ 395 addc retreg,retreg,retreg /* shift retreg with/into carry */ 396 ds temp,arg1,temp /* 15th divide step */ 397 addc retreg,retreg,retreg /* shift retreg with/into carry */ 398 ds temp,arg1,temp /* 16th divide step */ 399 addc retreg,retreg,retreg /* shift retreg with/into carry */ 400 ds temp,arg1,temp /* 17th divide step */ 401 addc retreg,retreg,retreg /* shift retreg with/into carry */ 402 ds temp,arg1,temp /* 18th divide step */ 403 addc retreg,retreg,retreg /* shift retreg with/into carry */ 404 ds temp,arg1,temp /* 19th divide step */ 405 addc retreg,retreg,retreg /* shift retreg with/into carry */ 406 ds temp,arg1,temp /* 20th divide step */ 407 addc retreg,retreg,retreg /* shift retreg with/into carry */ 408 ds temp,arg1,temp /* 21st divide step */ 409 addc retreg,retreg,retreg /* shift retreg with/into carry */ 410 ds temp,arg1,temp /* 22nd divide step */ 411 addc retreg,retreg,retreg /* shift retreg with/into carry */ 412 ds temp,arg1,temp /* 23rd divide step */ 413 addc retreg,retreg,retreg /* shift retreg with/into carry */ 414 ds temp,arg1,temp /* 24th divide step */ 415 addc retreg,retreg,retreg /* shift retreg with/into carry */ 416 ds temp,arg1,temp /* 25th divide step */ 417 addc retreg,retreg,retreg /* shift retreg with/into carry */ 418 ds temp,arg1,temp /* 26th divide step */ 419 addc retreg,retreg,retreg /* shift retreg with/into carry */ 420 ds temp,arg1,temp /* 27th divide step */ 421 addc retreg,retreg,retreg /* shift retreg with/into carry */ 422 ds temp,arg1,temp /* 28th divide step */ 423 addc retreg,retreg,retreg /* shift retreg with/into carry */ 424 ds temp,arg1,temp /* 29th divide step */ 425 addc retreg,retreg,retreg /* shift retreg with/into carry */ 426 ds temp,arg1,temp /* 30th divide step */ 427 addc retreg,retreg,retreg /* shift retreg with/into carry */ 428 ds temp,arg1,temp /* 31st divide step */ 429 addc retreg,retreg,retreg /* shift retreg with/into carry */ 430 ds temp,arg1,temp /* 32nd divide step, */ 431 addc retreg,retreg,retreg /* shift last retreg bit into retreg */ 432 xor,>= arg0,arg1,0 /* get correct sign of quotient */ 433 sub 0,retreg,retreg /* based on operand signs */ 434 MILLIRETN 435 nop 436 437LSYM(small_divisor) 438 439#if defined(pa64) 440/* Clear the upper 32 bits of the arg1 register. We are working with */ 441/* small divisors (and 32-bit integers) We must not be mislead */ 442/* by "1" bits left in the upper 32 bits. */ 443 depd %r0,31,32,%r25 444#endif 445 blr,n arg1,r0 446 nop 447/* table for divisor == 0,1, ... ,15 */ 448 addit,= 0,arg1,r0 /* trap if divisor == 0 */ 449 nop 450 MILLIRET /* divisor == 1 */ 451 copy arg0,retreg 452 MILLI_BEN($$divI_2) /* divisor == 2 */ 453 nop 454 MILLI_BEN($$divI_3) /* divisor == 3 */ 455 nop 456 MILLI_BEN($$divI_4) /* divisor == 4 */ 457 nop 458 MILLI_BEN($$divI_5) /* divisor == 5 */ 459 nop 460 MILLI_BEN($$divI_6) /* divisor == 6 */ 461 nop 462 MILLI_BEN($$divI_7) /* divisor == 7 */ 463 nop 464 MILLI_BEN($$divI_8) /* divisor == 8 */ 465 nop 466 MILLI_BEN($$divI_9) /* divisor == 9 */ 467 nop 468 MILLI_BEN($$divI_10) /* divisor == 10 */ 469 nop 470 b LREF(normal) /* divisor == 11 */ 471 add,>= 0,arg0,retreg 472 MILLI_BEN($$divI_12) /* divisor == 12 */ 473 nop 474 b LREF(normal) /* divisor == 13 */ 475 add,>= 0,arg0,retreg 476 MILLI_BEN($$divI_14) /* divisor == 14 */ 477 nop 478 MILLI_BEN($$divI_15) /* divisor == 15 */ 479 nop 480 481LSYM(negative1) 482 sub 0,arg0,retreg /* result is negation of dividend */ 483 MILLIRET 484 addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */ 485 .exit 486 .procend 487 .end 488#endif 489 490#ifdef L_divU 491/* ROUTINE: $$divU 492 . 493 . Single precision divide for unsigned integers. 494 . 495 . Quotient is truncated towards zero. 496 . Traps on divide by zero. 497 498 INPUT REGISTERS: 499 . arg0 == dividend 500 . arg1 == divisor 501 . mrp == return pc 502 . sr0 == return space when called externally 503 504 OUTPUT REGISTERS: 505 . arg0 = undefined 506 . arg1 = undefined 507 . ret1 = quotient 508 509 OTHER REGISTERS AFFECTED: 510 . r1 = undefined 511 512 SIDE EFFECTS: 513 . Causes a trap under the following conditions: 514 . divisor is zero 515 . Changes memory at the following places: 516 . NONE 517 518 PERMISSIBLE CONTEXT: 519 . Unwindable. 520 . Does not create a stack frame. 521 . Suitable for internal or external millicode. 522 . Assumes the special millicode register conventions. 523 524 DISCUSSION: 525 . Branchs to other millicode routines using BE: 526 . $$divU_# for 3,5,6,7,9,10,12,14,15 527 . 528 . For selected small divisors calls the special divide by constant 529 . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */ 530 531RDEFINE(temp,r1) 532RDEFINE(retreg,ret1) /* r29 */ 533RDEFINE(temp1,arg0) 534 SUBSPA_MILLI_DIV 535 ATTR_MILLI 536 .export $$divU,millicode 537 .import $$divU_3,millicode 538 .import $$divU_5,millicode 539 .import $$divU_6,millicode 540 .import $$divU_7,millicode 541 .import $$divU_9,millicode 542 .import $$divU_10,millicode 543 .import $$divU_12,millicode 544 .import $$divU_14,millicode 545 .import $$divU_15,millicode 546 .proc 547 .callinfo millicode 548 .entry 549GSYM($$divU) 550/* The subtract is not nullified since it does no harm and can be used 551 by the two cases that branch back to "normal". */ 552 ldo -1(arg1),temp /* is there at most one bit set ? */ 553 and,= arg1,temp,r0 /* if so, denominator is power of 2 */ 554 b LREF(regular_seq) 555 addit,= 0,arg1,0 /* trap for zero dvr */ 556 copy arg0,retreg 557 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ 558 extru retreg,15,16,retreg /* retreg = retreg >> 16 */ 559 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ 560 ldi 0xcc,temp1 /* setup 0xcc in temp1 */ 561 extru,= arg1,23,8,temp /* test denominator with 0xff00 */ 562 extru retreg,23,24,retreg /* retreg = retreg >> 8 */ 563 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ 564 ldi 0xaa,temp /* setup 0xaa in temp */ 565 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ 566 extru retreg,27,28,retreg /* retreg = retreg >> 4 */ 567 and,= arg1,temp1,r0 /* test denominator with 0xcc */ 568 extru retreg,29,30,retreg /* retreg = retreg >> 2 */ 569 and,= arg1,temp,r0 /* test denominator with 0xaa */ 570 extru retreg,30,31,retreg /* retreg = retreg >> 1 */ 571 MILLIRETN 572 nop 573LSYM(regular_seq) 574 comib,>= 15,arg1,LREF(special_divisor) 575 subi 0,arg1,temp /* clear carry, negate the divisor */ 576 ds r0,temp,r0 /* set V-bit to 1 */ 577LSYM(normal) 578 add arg0,arg0,retreg /* shift msb bit into carry */ 579 ds r0,arg1,temp /* 1st divide step, if no carry */ 580 addc retreg,retreg,retreg /* shift retreg with/into carry */ 581 ds temp,arg1,temp /* 2nd divide step */ 582 addc retreg,retreg,retreg /* shift retreg with/into carry */ 583 ds temp,arg1,temp /* 3rd divide step */ 584 addc retreg,retreg,retreg /* shift retreg with/into carry */ 585 ds temp,arg1,temp /* 4th divide step */ 586 addc retreg,retreg,retreg /* shift retreg with/into carry */ 587 ds temp,arg1,temp /* 5th divide step */ 588 addc retreg,retreg,retreg /* shift retreg with/into carry */ 589 ds temp,arg1,temp /* 6th divide step */ 590 addc retreg,retreg,retreg /* shift retreg with/into carry */ 591 ds temp,arg1,temp /* 7th divide step */ 592 addc retreg,retreg,retreg /* shift retreg with/into carry */ 593 ds temp,arg1,temp /* 8th divide step */ 594 addc retreg,retreg,retreg /* shift retreg with/into carry */ 595 ds temp,arg1,temp /* 9th divide step */ 596 addc retreg,retreg,retreg /* shift retreg with/into carry */ 597 ds temp,arg1,temp /* 10th divide step */ 598 addc retreg,retreg,retreg /* shift retreg with/into carry */ 599 ds temp,arg1,temp /* 11th divide step */ 600 addc retreg,retreg,retreg /* shift retreg with/into carry */ 601 ds temp,arg1,temp /* 12th divide step */ 602 addc retreg,retreg,retreg /* shift retreg with/into carry */ 603 ds temp,arg1,temp /* 13th divide step */ 604 addc retreg,retreg,retreg /* shift retreg with/into carry */ 605 ds temp,arg1,temp /* 14th divide step */ 606 addc retreg,retreg,retreg /* shift retreg with/into carry */ 607 ds temp,arg1,temp /* 15th divide step */ 608 addc retreg,retreg,retreg /* shift retreg with/into carry */ 609 ds temp,arg1,temp /* 16th divide step */ 610 addc retreg,retreg,retreg /* shift retreg with/into carry */ 611 ds temp,arg1,temp /* 17th divide step */ 612 addc retreg,retreg,retreg /* shift retreg with/into carry */ 613 ds temp,arg1,temp /* 18th divide step */ 614 addc retreg,retreg,retreg /* shift retreg with/into carry */ 615 ds temp,arg1,temp /* 19th divide step */ 616 addc retreg,retreg,retreg /* shift retreg with/into carry */ 617 ds temp,arg1,temp /* 20th divide step */ 618 addc retreg,retreg,retreg /* shift retreg with/into carry */ 619 ds temp,arg1,temp /* 21st divide step */ 620 addc retreg,retreg,retreg /* shift retreg with/into carry */ 621 ds temp,arg1,temp /* 22nd divide step */ 622 addc retreg,retreg,retreg /* shift retreg with/into carry */ 623 ds temp,arg1,temp /* 23rd divide step */ 624 addc retreg,retreg,retreg /* shift retreg with/into carry */ 625 ds temp,arg1,temp /* 24th divide step */ 626 addc retreg,retreg,retreg /* shift retreg with/into carry */ 627 ds temp,arg1,temp /* 25th divide step */ 628 addc retreg,retreg,retreg /* shift retreg with/into carry */ 629 ds temp,arg1,temp /* 26th divide step */ 630 addc retreg,retreg,retreg /* shift retreg with/into carry */ 631 ds temp,arg1,temp /* 27th divide step */ 632 addc retreg,retreg,retreg /* shift retreg with/into carry */ 633 ds temp,arg1,temp /* 28th divide step */ 634 addc retreg,retreg,retreg /* shift retreg with/into carry */ 635 ds temp,arg1,temp /* 29th divide step */ 636 addc retreg,retreg,retreg /* shift retreg with/into carry */ 637 ds temp,arg1,temp /* 30th divide step */ 638 addc retreg,retreg,retreg /* shift retreg with/into carry */ 639 ds temp,arg1,temp /* 31st divide step */ 640 addc retreg,retreg,retreg /* shift retreg with/into carry */ 641 ds temp,arg1,temp /* 32nd divide step, */ 642 MILLIRET 643 addc retreg,retreg,retreg /* shift last retreg bit into retreg */ 644 645/* Handle the cases where divisor is a small constant or has high bit on. */ 646LSYM(special_divisor) 647/* blr arg1,r0 */ 648/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */ 649 650/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from 651 generating such a blr, comib sequence. A problem in nullification. So I 652 rewrote this code. */ 653 654#if defined(pa64) 655/* Clear the upper 32 bits of the arg1 register. We are working with 656 small divisors (and 32-bit unsigned integers) We must not be mislead 657 by "1" bits left in the upper 32 bits. */ 658 depd %r0,31,32,%r25 659#endif 660 comib,> 0,arg1,LREF(big_divisor) 661 nop 662 blr arg1,r0 663 nop 664 665LSYM(zero_divisor) /* this label is here to provide external visibility */ 666 addit,= 0,arg1,0 /* trap for zero dvr */ 667 nop 668 MILLIRET /* divisor == 1 */ 669 copy arg0,retreg 670 MILLIRET /* divisor == 2 */ 671 extru arg0,30,31,retreg 672 MILLI_BEN($$divU_3) /* divisor == 3 */ 673 nop 674 MILLIRET /* divisor == 4 */ 675 extru arg0,29,30,retreg 676 MILLI_BEN($$divU_5) /* divisor == 5 */ 677 nop 678 MILLI_BEN($$divU_6) /* divisor == 6 */ 679 nop 680 MILLI_BEN($$divU_7) /* divisor == 7 */ 681 nop 682 MILLIRET /* divisor == 8 */ 683 extru arg0,28,29,retreg 684 MILLI_BEN($$divU_9) /* divisor == 9 */ 685 nop 686 MILLI_BEN($$divU_10) /* divisor == 10 */ 687 nop 688 b LREF(normal) /* divisor == 11 */ 689 ds r0,temp,r0 /* set V-bit to 1 */ 690 MILLI_BEN($$divU_12) /* divisor == 12 */ 691 nop 692 b LREF(normal) /* divisor == 13 */ 693 ds r0,temp,r0 /* set V-bit to 1 */ 694 MILLI_BEN($$divU_14) /* divisor == 14 */ 695 nop 696 MILLI_BEN($$divU_15) /* divisor == 15 */ 697 nop 698 699/* Handle the case where the high bit is on in the divisor. 700 Compute: if( dividend>=divisor) quotient=1; else quotient=0; 701 Note: dividend>==divisor iff dividend-divisor does not borrow 702 and not borrow iff carry. */ 703LSYM(big_divisor) 704 sub arg0,arg1,r0 705 MILLIRET 706 addc r0,r0,retreg 707 .exit 708 .procend 709 .end 710#endif 711 712#ifdef L_remI 713/* ROUTINE: $$remI 714 715 DESCRIPTION: 716 . $$remI returns the remainder of the division of two signed 32-bit 717 . integers. The sign of the remainder is the same as the sign of 718 . the dividend. 719 720 721 INPUT REGISTERS: 722 . arg0 == dividend 723 . arg1 == divisor 724 . mrp == return pc 725 . sr0 == return space when called externally 726 727 OUTPUT REGISTERS: 728 . arg0 = destroyed 729 . arg1 = destroyed 730 . ret1 = remainder 731 732 OTHER REGISTERS AFFECTED: 733 . r1 = undefined 734 735 SIDE EFFECTS: 736 . Causes a trap under the following conditions: DIVIDE BY ZERO 737 . Changes memory at the following places: NONE 738 739 PERMISSIBLE CONTEXT: 740 . Unwindable 741 . Does not create a stack frame 742 . Is usable for internal or external microcode 743 744 DISCUSSION: 745 . Calls other millicode routines via mrp: NONE 746 . Calls other millicode routines: NONE */ 747 748RDEFINE(tmp,r1) 749RDEFINE(retreg,ret1) 750 751 SUBSPA_MILLI 752 ATTR_MILLI 753 .proc 754 .callinfo millicode 755 .entry 756GSYM($$remI) 757GSYM($$remoI) 758 .export $$remI,MILLICODE 759 .export $$remoI,MILLICODE 760 ldo -1(arg1),tmp /* is there at most one bit set ? */ 761 and,<> arg1,tmp,r0 /* if not, don't use power of 2 */ 762 addi,> 0,arg1,r0 /* if denominator > 0, use power */ 763 /* of 2 */ 764 b,n LREF(neg_denom) 765LSYM(pow2) 766 comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */ 767 and arg0,tmp,retreg /* get the result */ 768 MILLIRETN 769LSYM(neg_num) 770 subi 0,arg0,arg0 /* negate numerator */ 771 and arg0,tmp,retreg /* get the result */ 772 subi 0,retreg,retreg /* negate result */ 773 MILLIRETN 774LSYM(neg_denom) 775 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */ 776 /* of 2 */ 777 b,n LREF(regular_seq) 778 sub r0,arg1,tmp /* make denominator positive */ 779 comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */ 780 ldo -1(tmp),retreg /* is there at most one bit set ? */ 781 and,= tmp,retreg,r0 /* if not, go to regular_seq */ 782 b,n LREF(regular_seq) 783 comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */ 784 and arg0,retreg,retreg 785 MILLIRETN 786LSYM(neg_num_2) 787 subi 0,arg0,tmp /* test against 0x80000000 */ 788 and tmp,retreg,retreg 789 subi 0,retreg,retreg 790 MILLIRETN 791LSYM(regular_seq) 792 addit,= 0,arg1,0 /* trap if div by zero */ 793 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ 794 sub 0,retreg,retreg /* make it positive */ 795 sub 0,arg1, tmp /* clear carry, */ 796 /* negate the divisor */ 797 ds 0, tmp,0 /* set V-bit to the comple- */ 798 /* ment of the divisor sign */ 799 or 0,0, tmp /* clear tmp */ 800 add retreg,retreg,retreg /* shift msb bit into carry */ 801 ds tmp,arg1, tmp /* 1st divide step, if no carry */ 802 /* out, msb of quotient = 0 */ 803 addc retreg,retreg,retreg /* shift retreg with/into carry */ 804LSYM(t1) 805 ds tmp,arg1, tmp /* 2nd divide step */ 806 addc retreg,retreg,retreg /* shift retreg with/into carry */ 807 ds tmp,arg1, tmp /* 3rd divide step */ 808 addc retreg,retreg,retreg /* shift retreg with/into carry */ 809 ds tmp,arg1, tmp /* 4th divide step */ 810 addc retreg,retreg,retreg /* shift retreg with/into carry */ 811 ds tmp,arg1, tmp /* 5th divide step */ 812 addc retreg,retreg,retreg /* shift retreg with/into carry */ 813 ds tmp,arg1, tmp /* 6th divide step */ 814 addc retreg,retreg,retreg /* shift retreg with/into carry */ 815 ds tmp,arg1, tmp /* 7th divide step */ 816 addc retreg,retreg,retreg /* shift retreg with/into carry */ 817 ds tmp,arg1, tmp /* 8th divide step */ 818 addc retreg,retreg,retreg /* shift retreg with/into carry */ 819 ds tmp,arg1, tmp /* 9th divide step */ 820 addc retreg,retreg,retreg /* shift retreg with/into carry */ 821 ds tmp,arg1, tmp /* 10th divide step */ 822 addc retreg,retreg,retreg /* shift retreg with/into carry */ 823 ds tmp,arg1, tmp /* 11th divide step */ 824 addc retreg,retreg,retreg /* shift retreg with/into carry */ 825 ds tmp,arg1, tmp /* 12th divide step */ 826 addc retreg,retreg,retreg /* shift retreg with/into carry */ 827 ds tmp,arg1, tmp /* 13th divide step */ 828 addc retreg,retreg,retreg /* shift retreg with/into carry */ 829 ds tmp,arg1, tmp /* 14th divide step */ 830 addc retreg,retreg,retreg /* shift retreg with/into carry */ 831 ds tmp,arg1, tmp /* 15th divide step */ 832 addc retreg,retreg,retreg /* shift retreg with/into carry */ 833 ds tmp,arg1, tmp /* 16th divide step */ 834 addc retreg,retreg,retreg /* shift retreg with/into carry */ 835 ds tmp,arg1, tmp /* 17th divide step */ 836 addc retreg,retreg,retreg /* shift retreg with/into carry */ 837 ds tmp,arg1, tmp /* 18th divide step */ 838 addc retreg,retreg,retreg /* shift retreg with/into carry */ 839 ds tmp,arg1, tmp /* 19th divide step */ 840 addc retreg,retreg,retreg /* shift retreg with/into carry */ 841 ds tmp,arg1, tmp /* 20th divide step */ 842 addc retreg,retreg,retreg /* shift retreg with/into carry */ 843 ds tmp,arg1, tmp /* 21st divide step */ 844 addc retreg,retreg,retreg /* shift retreg with/into carry */ 845 ds tmp,arg1, tmp /* 22nd divide step */ 846 addc retreg,retreg,retreg /* shift retreg with/into carry */ 847 ds tmp,arg1, tmp /* 23rd divide step */ 848 addc retreg,retreg,retreg /* shift retreg with/into carry */ 849 ds tmp,arg1, tmp /* 24th divide step */ 850 addc retreg,retreg,retreg /* shift retreg with/into carry */ 851 ds tmp,arg1, tmp /* 25th divide step */ 852 addc retreg,retreg,retreg /* shift retreg with/into carry */ 853 ds tmp,arg1, tmp /* 26th divide step */ 854 addc retreg,retreg,retreg /* shift retreg with/into carry */ 855 ds tmp,arg1, tmp /* 27th divide step */ 856 addc retreg,retreg,retreg /* shift retreg with/into carry */ 857 ds tmp,arg1, tmp /* 28th divide step */ 858 addc retreg,retreg,retreg /* shift retreg with/into carry */ 859 ds tmp,arg1, tmp /* 29th divide step */ 860 addc retreg,retreg,retreg /* shift retreg with/into carry */ 861 ds tmp,arg1, tmp /* 30th divide step */ 862 addc retreg,retreg,retreg /* shift retreg with/into carry */ 863 ds tmp,arg1, tmp /* 31st divide step */ 864 addc retreg,retreg,retreg /* shift retreg with/into carry */ 865 ds tmp,arg1, tmp /* 32nd divide step, */ 866 addc retreg,retreg,retreg /* shift last bit into retreg */ 867 movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */ 868 add,< arg1,0,0 /* if arg1 > 0, add arg1 */ 869 add,tr tmp,arg1,retreg /* for correcting remainder tmp */ 870 sub tmp,arg1,retreg /* else add absolute value arg1 */ 871LSYM(finish) 872 add,>= arg0,0,0 /* set sign of remainder */ 873 sub 0,retreg,retreg /* to sign of dividend */ 874 MILLIRET 875 nop 876 .exit 877 .procend 878#ifdef milliext 879 .origin 0x00000200 880#endif 881 .end 882#endif 883 884#ifdef L_remU 885/* ROUTINE: $$remU 886 . Single precision divide for remainder with unsigned binary integers. 887 . 888 . The remainder must be dividend-(dividend/divisor)*divisor. 889 . Divide by zero is trapped. 890 891 INPUT REGISTERS: 892 . arg0 == dividend 893 . arg1 == divisor 894 . mrp == return pc 895 . sr0 == return space when called externally 896 897 OUTPUT REGISTERS: 898 . arg0 = undefined 899 . arg1 = undefined 900 . ret1 = remainder 901 902 OTHER REGISTERS AFFECTED: 903 . r1 = undefined 904 905 SIDE EFFECTS: 906 . Causes a trap under the following conditions: DIVIDE BY ZERO 907 . Changes memory at the following places: NONE 908 909 PERMISSIBLE CONTEXT: 910 . Unwindable. 911 . Does not create a stack frame. 912 . Suitable for internal or external millicode. 913 . Assumes the special millicode register conventions. 914 915 DISCUSSION: 916 . Calls other millicode routines using mrp: NONE 917 . Calls other millicode routines: NONE */ 918 919 920RDEFINE(temp,r1) 921RDEFINE(rmndr,ret1) /* r29 */ 922 SUBSPA_MILLI 923 ATTR_MILLI 924 .export $$remU,millicode 925 .proc 926 .callinfo millicode 927 .entry 928GSYM($$remU) 929 ldo -1(arg1),temp /* is there at most one bit set ? */ 930 and,= arg1,temp,r0 /* if not, don't use power of 2 */ 931 b LREF(regular_seq) 932 addit,= 0,arg1,r0 /* trap on div by zero */ 933 and arg0,temp,rmndr /* get the result for power of 2 */ 934 MILLIRETN 935LSYM(regular_seq) 936 comib,>=,n 0,arg1,LREF(special_case) 937 subi 0,arg1,rmndr /* clear carry, negate the divisor */ 938 ds r0,rmndr,r0 /* set V-bit to 1 */ 939 add arg0,arg0,temp /* shift msb bit into carry */ 940 ds r0,arg1,rmndr /* 1st divide step, if no carry */ 941 addc temp,temp,temp /* shift temp with/into carry */ 942 ds rmndr,arg1,rmndr /* 2nd divide step */ 943 addc temp,temp,temp /* shift temp with/into carry */ 944 ds rmndr,arg1,rmndr /* 3rd divide step */ 945 addc temp,temp,temp /* shift temp with/into carry */ 946 ds rmndr,arg1,rmndr /* 4th divide step */ 947 addc temp,temp,temp /* shift temp with/into carry */ 948 ds rmndr,arg1,rmndr /* 5th divide step */ 949 addc temp,temp,temp /* shift temp with/into carry */ 950 ds rmndr,arg1,rmndr /* 6th divide step */ 951 addc temp,temp,temp /* shift temp with/into carry */ 952 ds rmndr,arg1,rmndr /* 7th divide step */ 953 addc temp,temp,temp /* shift temp with/into carry */ 954 ds rmndr,arg1,rmndr /* 8th divide step */ 955 addc temp,temp,temp /* shift temp with/into carry */ 956 ds rmndr,arg1,rmndr /* 9th divide step */ 957 addc temp,temp,temp /* shift temp with/into carry */ 958 ds rmndr,arg1,rmndr /* 10th divide step */ 959 addc temp,temp,temp /* shift temp with/into carry */ 960 ds rmndr,arg1,rmndr /* 11th divide step */ 961 addc temp,temp,temp /* shift temp with/into carry */ 962 ds rmndr,arg1,rmndr /* 12th divide step */ 963 addc temp,temp,temp /* shift temp with/into carry */ 964 ds rmndr,arg1,rmndr /* 13th divide step */ 965 addc temp,temp,temp /* shift temp with/into carry */ 966 ds rmndr,arg1,rmndr /* 14th divide step */ 967 addc temp,temp,temp /* shift temp with/into carry */ 968 ds rmndr,arg1,rmndr /* 15th divide step */ 969 addc temp,temp,temp /* shift temp with/into carry */ 970 ds rmndr,arg1,rmndr /* 16th divide step */ 971 addc temp,temp,temp /* shift temp with/into carry */ 972 ds rmndr,arg1,rmndr /* 17th divide step */ 973 addc temp,temp,temp /* shift temp with/into carry */ 974 ds rmndr,arg1,rmndr /* 18th divide step */ 975 addc temp,temp,temp /* shift temp with/into carry */ 976 ds rmndr,arg1,rmndr /* 19th divide step */ 977 addc temp,temp,temp /* shift temp with/into carry */ 978 ds rmndr,arg1,rmndr /* 20th divide step */ 979 addc temp,temp,temp /* shift temp with/into carry */ 980 ds rmndr,arg1,rmndr /* 21st divide step */ 981 addc temp,temp,temp /* shift temp with/into carry */ 982 ds rmndr,arg1,rmndr /* 22nd divide step */ 983 addc temp,temp,temp /* shift temp with/into carry */ 984 ds rmndr,arg1,rmndr /* 23rd divide step */ 985 addc temp,temp,temp /* shift temp with/into carry */ 986 ds rmndr,arg1,rmndr /* 24th divide step */ 987 addc temp,temp,temp /* shift temp with/into carry */ 988 ds rmndr,arg1,rmndr /* 25th divide step */ 989 addc temp,temp,temp /* shift temp with/into carry */ 990 ds rmndr,arg1,rmndr /* 26th divide step */ 991 addc temp,temp,temp /* shift temp with/into carry */ 992 ds rmndr,arg1,rmndr /* 27th divide step */ 993 addc temp,temp,temp /* shift temp with/into carry */ 994 ds rmndr,arg1,rmndr /* 28th divide step */ 995 addc temp,temp,temp /* shift temp with/into carry */ 996 ds rmndr,arg1,rmndr /* 29th divide step */ 997 addc temp,temp,temp /* shift temp with/into carry */ 998 ds rmndr,arg1,rmndr /* 30th divide step */ 999 addc temp,temp,temp /* shift temp with/into carry */ 1000 ds rmndr,arg1,rmndr /* 31st divide step */ 1001 addc temp,temp,temp /* shift temp with/into carry */ 1002 ds rmndr,arg1,rmndr /* 32nd divide step, */ 1003 comiclr,<= 0,rmndr,r0 1004 add rmndr,arg1,rmndr /* correction */ 1005 MILLIRETN 1006 nop 1007 1008/* Putting >= on the last DS and deleting COMICLR does not work! */ 1009LSYM(special_case) 1010 sub,>>= arg0,arg1,rmndr 1011 copy arg0,rmndr 1012 MILLIRETN 1013 nop 1014 .exit 1015 .procend 1016 .end 1017#endif 1018 1019#ifdef L_div_const 1020/* ROUTINE: $$divI_2 1021 . $$divI_3 $$divU_3 1022 . $$divI_4 1023 . $$divI_5 $$divU_5 1024 . $$divI_6 $$divU_6 1025 . $$divI_7 $$divU_7 1026 . $$divI_8 1027 . $$divI_9 $$divU_9 1028 . $$divI_10 $$divU_10 1029 . 1030 . $$divI_12 $$divU_12 1031 . 1032 . $$divI_14 $$divU_14 1033 . $$divI_15 $$divU_15 1034 . $$divI_16 1035 . $$divI_17 $$divU_17 1036 . 1037 . Divide by selected constants for single precision binary integers. 1038 1039 INPUT REGISTERS: 1040 . arg0 == dividend 1041 . mrp == return pc 1042 . sr0 == return space when called externally 1043 1044 OUTPUT REGISTERS: 1045 . arg0 = undefined 1046 . arg1 = undefined 1047 . ret1 = quotient 1048 1049 OTHER REGISTERS AFFECTED: 1050 . r1 = undefined 1051 1052 SIDE EFFECTS: 1053 . Causes a trap under the following conditions: NONE 1054 . Changes memory at the following places: NONE 1055 1056 PERMISSIBLE CONTEXT: 1057 . Unwindable. 1058 . Does not create a stack frame. 1059 . Suitable for internal or external millicode. 1060 . Assumes the special millicode register conventions. 1061 1062 DISCUSSION: 1063 . Calls other millicode routines using mrp: NONE 1064 . Calls other millicode routines: NONE */ 1065 1066 1067/* TRUNCATED DIVISION BY SMALL INTEGERS 1068 1069 We are interested in q(x) = floor(x/y), where x >= 0 and y > 0 1070 (with y fixed). 1071 1072 Let a = floor(z/y), for some choice of z. Note that z will be 1073 chosen so that division by z is cheap. 1074 1075 Let r be the remainder(z/y). In other words, r = z - ay. 1076 1077 Now, our method is to choose a value for b such that 1078 1079 q'(x) = floor((ax+b)/z) 1080 1081 is equal to q(x) over as large a range of x as possible. If the 1082 two are equal over a sufficiently large range, and if it is easy to 1083 form the product (ax), and it is easy to divide by z, then we can 1084 perform the division much faster than the general division algorithm. 1085 1086 So, we want the following to be true: 1087 1088 . For x in the following range: 1089 . 1090 . ky <= x < (k+1)y 1091 . 1092 . implies that 1093 . 1094 . k <= (ax+b)/z < (k+1) 1095 1096 We want to determine b such that this is true for all k in the 1097 range {0..K} for some maximum K. 1098 1099 Since (ax+b) is an increasing function of x, we can take each 1100 bound separately to determine the "best" value for b. 1101 1102 (ax+b)/z < (k+1) implies 1103 1104 (a((k+1)y-1)+b < (k+1)z implies 1105 1106 b < a + (k+1)(z-ay) implies 1107 1108 b < a + (k+1)r 1109 1110 This needs to be true for all k in the range {0..K}. In 1111 particular, it is true for k = 0 and this leads to a maximum 1112 acceptable value for b. 1113 1114 b < a+r or b <= a+r-1 1115 1116 Taking the other bound, we have 1117 1118 k <= (ax+b)/z implies 1119 1120 k <= (aky+b)/z implies 1121 1122 k(z-ay) <= b implies 1123 1124 kr <= b 1125 1126 Clearly, the largest range for k will be achieved by maximizing b, 1127 when r is not zero. When r is zero, then the simplest choice for b 1128 is 0. When r is not 0, set 1129 1130 . b = a+r-1 1131 1132 Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y) 1133 for all x in the range: 1134 1135 . 0 <= x < (K+1)y 1136 1137 We need to determine what K is. Of our two bounds, 1138 1139 . b < a+(k+1)r is satisfied for all k >= 0, by construction. 1140 1141 The other bound is 1142 1143 . kr <= b 1144 1145 This is always true if r = 0. If r is not 0 (the usual case), then 1146 K = floor((a+r-1)/r), is the maximum value for k. 1147 1148 Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct 1149 answer for q(x) = floor(x/y) when x is in the range 1150 1151 (0,(K+1)y-1) K = floor((a+r-1)/r) 1152 1153 To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that 1154 the formula for q'(x) yields the correct value of q(x) for all x 1155 representable by a single word in HPPA. 1156 1157 We are also constrained in that computing the product (ax), adding 1158 b, and dividing by z must all be done quickly, otherwise we will be 1159 better off going through the general algorithm using the DS 1160 instruction, which uses approximately 70 cycles. 1161 1162 For each y, there is a choice of z which satisfies the constraints 1163 for (K+1)y >= 2**32. We may not, however, be able to satisfy the 1164 timing constraints for arbitrary y. It seems that z being equal to 1165 a power of 2 or a power of 2 minus 1 is as good as we can do, since 1166 it minimizes the time to do division by z. We want the choice of z 1167 to also result in a value for (a) that minimizes the computation of 1168 the product (ax). This is best achieved if (a) has a regular bit 1169 pattern (so the multiplication can be done with shifts and adds). 1170 The value of (a) also needs to be less than 2**32 so the product is 1171 always guaranteed to fit in 2 words. 1172 1173 In actual practice, the following should be done: 1174 1175 1) For negative x, you should take the absolute value and remember 1176 . the fact so that the result can be negated. This obviously does 1177 . not apply in the unsigned case. 1178 2) For even y, you should factor out the power of 2 that divides y 1179 . and divide x by it. You can then proceed by dividing by the 1180 . odd factor of y. 1181 1182 Here is a table of some odd values of y, and corresponding choices 1183 for z which are "good". 1184 1185 y z r a (hex) max x (hex) 1186 1187 3 2**32 1 55555555 100000001 1188 5 2**32 1 33333333 100000003 1189 7 2**24-1 0 249249 (infinite) 1190 9 2**24-1 0 1c71c7 (infinite) 1191 11 2**20-1 0 1745d (infinite) 1192 13 2**24-1 0 13b13b (infinite) 1193 15 2**32 1 11111111 10000000d 1194 17 2**32 1 f0f0f0f 10000000f 1195 1196 If r is 1, then b = a+r-1 = a. This simplifies the computation 1197 of (ax+b), since you can compute (x+1)(a) instead. If r is 0, 1198 then b = 0 is ok to use which simplifies (ax+b). 1199 1200 The bit patterns for 55555555, 33333333, and 11111111 are obviously 1201 very regular. The bit patterns for the other values of a above are: 1202 1203 y (hex) (binary) 1204 1205 7 249249 001001001001001001001001 << regular >> 1206 9 1c71c7 000111000111000111000111 << regular >> 1207 11 1745d 000000010111010001011101 << irregular >> 1208 13 13b13b 000100111011000100111011 << irregular >> 1209 1210 The bit patterns for (a) corresponding to (y) of 11 and 13 may be 1211 too irregular to warrant using this method. 1212 1213 When z is a power of 2 minus 1, then the division by z is slightly 1214 more complicated, involving an iterative solution. 1215 1216 The code presented here solves division by 1 through 17, except for 1217 11 and 13. There are algorithms for both signed and unsigned 1218 quantities given. 1219 1220 TIMINGS (cycles) 1221 1222 divisor positive negative unsigned 1223 1224 . 1 2 2 2 1225 . 2 4 4 2 1226 . 3 19 21 19 1227 . 4 4 4 2 1228 . 5 18 22 19 1229 . 6 19 22 19 1230 . 8 4 4 2 1231 . 10 18 19 17 1232 . 12 18 20 18 1233 . 15 16 18 16 1234 . 16 4 4 2 1235 . 17 16 18 16 1236 1237 Now, the algorithm for 7, 9, and 14 is an iterative one. That is, 1238 a loop body is executed until the tentative quotient is 0. The 1239 number of times the loop body is executed varies depending on the 1240 dividend, but is never more than two times. If the dividend is 1241 less than the divisor, then the loop body is not executed at all. 1242 Each iteration adds 4 cycles to the timings. 1243 1244 divisor positive negative unsigned 1245 1246 . 7 19+4n 20+4n 20+4n n = number of iterations 1247 . 9 21+4n 22+4n 21+4n 1248 . 14 21+4n 22+4n 20+4n 1249 1250 To give an idea of how the number of iterations varies, here is a 1251 table of dividend versus number of iterations when dividing by 7. 1252 1253 smallest largest required 1254 dividend dividend iterations 1255 1256 . 0 6 0 1257 . 7 0x6ffffff 1 1258 0x1000006 0xffffffff 2 1259 1260 There is some overlap in the range of numbers requiring 1 and 2 1261 iterations. */ 1262 1263RDEFINE(t2,r1) 1264RDEFINE(x2,arg0) /* r26 */ 1265RDEFINE(t1,arg1) /* r25 */ 1266RDEFINE(x1,ret1) /* r29 */ 1267 1268 SUBSPA_MILLI_DIV 1269 ATTR_MILLI 1270 1271 .proc 1272 .callinfo millicode 1273 .entry 1274/* NONE of these routines require a stack frame 1275 ALL of these routines are unwindable from millicode */ 1276 1277GSYM($$divide_by_constant) 1278 .export $$divide_by_constant,millicode 1279/* Provides a "nice" label for the code covered by the unwind descriptor 1280 for things like gprof. */ 1281 1282/* DIVISION BY 2 (shift by 1) */ 1283GSYM($$divI_2) 1284 .export $$divI_2,millicode 1285 comclr,>= arg0,0,0 1286 addi 1,arg0,arg0 1287 MILLIRET 1288 extrs arg0,30,31,ret1 1289 1290 1291/* DIVISION BY 4 (shift by 2) */ 1292GSYM($$divI_4) 1293 .export $$divI_4,millicode 1294 comclr,>= arg0,0,0 1295 addi 3,arg0,arg0 1296 MILLIRET 1297 extrs arg0,29,30,ret1 1298 1299 1300/* DIVISION BY 8 (shift by 3) */ 1301GSYM($$divI_8) 1302 .export $$divI_8,millicode 1303 comclr,>= arg0,0,0 1304 addi 7,arg0,arg0 1305 MILLIRET 1306 extrs arg0,28,29,ret1 1307 1308/* DIVISION BY 16 (shift by 4) */ 1309GSYM($$divI_16) 1310 .export $$divI_16,millicode 1311 comclr,>= arg0,0,0 1312 addi 15,arg0,arg0 1313 MILLIRET 1314 extrs arg0,27,28,ret1 1315 1316/**************************************************************************** 1317* 1318* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these 1319* 1320* includes 3,5,15,17 and also 6,10,12 1321* 1322****************************************************************************/ 1323 1324/* DIVISION BY 3 (use z = 2**32; a = 55555555) */ 1325 1326GSYM($$divI_3) 1327 .export $$divI_3,millicode 1328 comb,<,N x2,0,LREF(neg3) 1329 1330 addi 1,x2,x2 /* this cannot overflow */ 1331 extru x2,1,2,x1 /* multiply by 5 to get started */ 1332 sh2add x2,x2,x2 1333 b LREF(pos) 1334 addc x1,0,x1 1335 1336LSYM(neg3) 1337 subi 1,x2,x2 /* this cannot overflow */ 1338 extru x2,1,2,x1 /* multiply by 5 to get started */ 1339 sh2add x2,x2,x2 1340 b LREF(neg) 1341 addc x1,0,x1 1342 1343GSYM($$divU_3) 1344 .export $$divU_3,millicode 1345 addi 1,x2,x2 /* this CAN overflow */ 1346 addc 0,0,x1 1347 shd x1,x2,30,t1 /* multiply by 5 to get started */ 1348 sh2add x2,x2,x2 1349 b LREF(pos) 1350 addc x1,t1,x1 1351 1352/* DIVISION BY 5 (use z = 2**32; a = 33333333) */ 1353 1354GSYM($$divI_5) 1355 .export $$divI_5,millicode 1356 comb,<,N x2,0,LREF(neg5) 1357 1358 addi 3,x2,t1 /* this cannot overflow */ 1359 sh1add x2,t1,x2 /* multiply by 3 to get started */ 1360 b LREF(pos) 1361 addc 0,0,x1 1362 1363LSYM(neg5) 1364 sub 0,x2,x2 /* negate x2 */ 1365 addi 1,x2,x2 /* this cannot overflow */ 1366 shd 0,x2,31,x1 /* get top bit (can be 1) */ 1367 sh1add x2,x2,x2 /* multiply by 3 to get started */ 1368 b LREF(neg) 1369 addc x1,0,x1 1370 1371GSYM($$divU_5) 1372 .export $$divU_5,millicode 1373 addi 1,x2,x2 /* this CAN overflow */ 1374 addc 0,0,x1 1375 shd x1,x2,31,t1 /* multiply by 3 to get started */ 1376 sh1add x2,x2,x2 1377 b LREF(pos) 1378 addc t1,x1,x1 1379 1380/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */ 1381GSYM($$divI_6) 1382 .export $$divI_6,millicode 1383 comb,<,N x2,0,LREF(neg6) 1384 extru x2,30,31,x2 /* divide by 2 */ 1385 addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */ 1386 sh2add x2,t1,x2 /* multiply by 5 to get started */ 1387 b LREF(pos) 1388 addc 0,0,x1 1389 1390LSYM(neg6) 1391 subi 2,x2,x2 /* negate, divide by 2, and add 1 */ 1392 /* negation and adding 1 are done */ 1393 /* at the same time by the SUBI */ 1394 extru x2,30,31,x2 1395 shd 0,x2,30,x1 1396 sh2add x2,x2,x2 /* multiply by 5 to get started */ 1397 b LREF(neg) 1398 addc x1,0,x1 1399 1400GSYM($$divU_6) 1401 .export $$divU_6,millicode 1402 extru x2,30,31,x2 /* divide by 2 */ 1403 addi 1,x2,x2 /* cannot carry */ 1404 shd 0,x2,30,x1 /* multiply by 5 to get started */ 1405 sh2add x2,x2,x2 1406 b LREF(pos) 1407 addc x1,0,x1 1408 1409/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */ 1410GSYM($$divU_10) 1411 .export $$divU_10,millicode 1412 extru x2,30,31,x2 /* divide by 2 */ 1413 addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */ 1414 sh1add x2,t1,x2 /* multiply by 3 to get started */ 1415 addc 0,0,x1 1416LSYM(pos) 1417 shd x1,x2,28,t1 /* multiply by 0x11 */ 1418 shd x2,0,28,t2 1419 add x2,t2,x2 1420 addc x1,t1,x1 1421LSYM(pos_for_17) 1422 shd x1,x2,24,t1 /* multiply by 0x101 */ 1423 shd x2,0,24,t2 1424 add x2,t2,x2 1425 addc x1,t1,x1 1426 1427 shd x1,x2,16,t1 /* multiply by 0x10001 */ 1428 shd x2,0,16,t2 1429 add x2,t2,x2 1430 MILLIRET 1431 addc x1,t1,x1 1432 1433GSYM($$divI_10) 1434 .export $$divI_10,millicode 1435 comb,< x2,0,LREF(neg10) 1436 copy 0,x1 1437 extru x2,30,31,x2 /* divide by 2 */ 1438 addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */ 1439 sh1add x2,x2,x2 /* multiply by 3 to get started */ 1440 1441LSYM(neg10) 1442 subi 2,x2,x2 /* negate, divide by 2, and add 1 */ 1443 /* negation and adding 1 are done */ 1444 /* at the same time by the SUBI */ 1445 extru x2,30,31,x2 1446 sh1add x2,x2,x2 /* multiply by 3 to get started */ 1447LSYM(neg) 1448 shd x1,x2,28,t1 /* multiply by 0x11 */ 1449 shd x2,0,28,t2 1450 add x2,t2,x2 1451 addc x1,t1,x1 1452LSYM(neg_for_17) 1453 shd x1,x2,24,t1 /* multiply by 0x101 */ 1454 shd x2,0,24,t2 1455 add x2,t2,x2 1456 addc x1,t1,x1 1457 1458 shd x1,x2,16,t1 /* multiply by 0x10001 */ 1459 shd x2,0,16,t2 1460 add x2,t2,x2 1461 addc x1,t1,x1 1462 MILLIRET 1463 sub 0,x1,x1 1464 1465/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */ 1466GSYM($$divI_12) 1467 .export $$divI_12,millicode 1468 comb,< x2,0,LREF(neg12) 1469 copy 0,x1 1470 extru x2,29,30,x2 /* divide by 4 */ 1471 addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */ 1472 sh2add x2,x2,x2 /* multiply by 5 to get started */ 1473 1474LSYM(neg12) 1475 subi 4,x2,x2 /* negate, divide by 4, and add 1 */ 1476 /* negation and adding 1 are done */ 1477 /* at the same time by the SUBI */ 1478 extru x2,29,30,x2 1479 b LREF(neg) 1480 sh2add x2,x2,x2 /* multiply by 5 to get started */ 1481 1482GSYM($$divU_12) 1483 .export $$divU_12,millicode 1484 extru x2,29,30,x2 /* divide by 4 */ 1485 addi 5,x2,t1 /* cannot carry */ 1486 sh2add x2,t1,x2 /* multiply by 5 to get started */ 1487 b LREF(pos) 1488 addc 0,0,x1 1489 1490/* DIVISION BY 15 (use z = 2**32; a = 11111111) */ 1491GSYM($$divI_15) 1492 .export $$divI_15,millicode 1493 comb,< x2,0,LREF(neg15) 1494 copy 0,x1 1495 addib,tr 1,x2,LREF(pos)+4 1496 shd x1,x2,28,t1 1497 1498LSYM(neg15) 1499 b LREF(neg) 1500 subi 1,x2,x2 1501 1502GSYM($$divU_15) 1503 .export $$divU_15,millicode 1504 addi 1,x2,x2 /* this CAN overflow */ 1505 b LREF(pos) 1506 addc 0,0,x1 1507 1508/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */ 1509GSYM($$divI_17) 1510 .export $$divI_17,millicode 1511 comb,<,n x2,0,LREF(neg17) 1512 addi 1,x2,x2 /* this cannot overflow */ 1513 shd 0,x2,28,t1 /* multiply by 0xf to get started */ 1514 shd x2,0,28,t2 1515 sub t2,x2,x2 1516 b LREF(pos_for_17) 1517 subb t1,0,x1 1518 1519LSYM(neg17) 1520 subi 1,x2,x2 /* this cannot overflow */ 1521 shd 0,x2,28,t1 /* multiply by 0xf to get started */ 1522 shd x2,0,28,t2 1523 sub t2,x2,x2 1524 b LREF(neg_for_17) 1525 subb t1,0,x1 1526 1527GSYM($$divU_17) 1528 .export $$divU_17,millicode 1529 addi 1,x2,x2 /* this CAN overflow */ 1530 addc 0,0,x1 1531 shd x1,x2,28,t1 /* multiply by 0xf to get started */ 1532LSYM(u17) 1533 shd x2,0,28,t2 1534 sub t2,x2,x2 1535 b LREF(pos_for_17) 1536 subb t1,x1,x1 1537 1538 1539/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these 1540 includes 7,9 and also 14 1541 1542 1543 z = 2**24-1 1544 r = z mod x = 0 1545 1546 so choose b = 0 1547 1548 Also, in order to divide by z = 2**24-1, we approximate by dividing 1549 by (z+1) = 2**24 (which is easy), and then correcting. 1550 1551 (ax) = (z+1)q' + r 1552 . = zq' + (q'+r) 1553 1554 So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1) 1555 Then the true remainder of (ax)/z is (q'+r). Repeat the process 1556 with this new remainder, adding the tentative quotients together, 1557 until a tentative quotient is 0 (and then we are done). There is 1558 one last correction to be done. It is possible that (q'+r) = z. 1559 If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But, 1560 in fact, we need to add 1 more to the quotient. Now, it turns 1561 out that this happens if and only if the original value x is 1562 an exact multiple of y. So, to avoid a three instruction test at 1563 the end, instead use 1 instruction to add 1 to x at the beginning. */ 1564 1565/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */ 1566GSYM($$divI_7) 1567 .export $$divI_7,millicode 1568 comb,<,n x2,0,LREF(neg7) 1569LSYM(7) 1570 addi 1,x2,x2 /* cannot overflow */ 1571 shd 0,x2,29,x1 1572 sh3add x2,x2,x2 1573 addc x1,0,x1 1574LSYM(pos7) 1575 shd x1,x2,26,t1 1576 shd x2,0,26,t2 1577 add x2,t2,x2 1578 addc x1,t1,x1 1579 1580 shd x1,x2,20,t1 1581 shd x2,0,20,t2 1582 add x2,t2,x2 1583 addc x1,t1,t1 1584 1585 /* computed <t1,x2>. Now divide it by (2**24 - 1) */ 1586 1587 copy 0,x1 1588 shd,= t1,x2,24,t1 /* tentative quotient */ 1589LSYM(1) 1590 addb,tr t1,x1,LREF(2) /* add to previous quotient */ 1591 extru x2,31,24,x2 /* new remainder (unadjusted) */ 1592 1593 MILLIRETN 1594 1595LSYM(2) 1596 addb,tr t1,x2,LREF(1) /* adjust remainder */ 1597 extru,= x2,7,8,t1 /* new quotient */ 1598 1599LSYM(neg7) 1600 subi 1,x2,x2 /* negate x2 and add 1 */ 1601LSYM(8) 1602 shd 0,x2,29,x1 1603 sh3add x2,x2,x2 1604 addc x1,0,x1 1605 1606LSYM(neg7_shift) 1607 shd x1,x2,26,t1 1608 shd x2,0,26,t2 1609 add x2,t2,x2 1610 addc x1,t1,x1 1611 1612 shd x1,x2,20,t1 1613 shd x2,0,20,t2 1614 add x2,t2,x2 1615 addc x1,t1,t1 1616 1617 /* computed <t1,x2>. Now divide it by (2**24 - 1) */ 1618 1619 copy 0,x1 1620 shd,= t1,x2,24,t1 /* tentative quotient */ 1621LSYM(3) 1622 addb,tr t1,x1,LREF(4) /* add to previous quotient */ 1623 extru x2,31,24,x2 /* new remainder (unadjusted) */ 1624 1625 MILLIRET 1626 sub 0,x1,x1 /* negate result */ 1627 1628LSYM(4) 1629 addb,tr t1,x2,LREF(3) /* adjust remainder */ 1630 extru,= x2,7,8,t1 /* new quotient */ 1631 1632GSYM($$divU_7) 1633 .export $$divU_7,millicode 1634 addi 1,x2,x2 /* can carry */ 1635 addc 0,0,x1 1636 shd x1,x2,29,t1 1637 sh3add x2,x2,x2 1638 b LREF(pos7) 1639 addc t1,x1,x1 1640 1641/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */ 1642GSYM($$divI_9) 1643 .export $$divI_9,millicode 1644 comb,<,n x2,0,LREF(neg9) 1645 addi 1,x2,x2 /* cannot overflow */ 1646 shd 0,x2,29,t1 1647 shd x2,0,29,t2 1648 sub t2,x2,x2 1649 b LREF(pos7) 1650 subb t1,0,x1 1651 1652LSYM(neg9) 1653 subi 1,x2,x2 /* negate and add 1 */ 1654 shd 0,x2,29,t1 1655 shd x2,0,29,t2 1656 sub t2,x2,x2 1657 b LREF(neg7_shift) 1658 subb t1,0,x1 1659 1660GSYM($$divU_9) 1661 .export $$divU_9,millicode 1662 addi 1,x2,x2 /* can carry */ 1663 addc 0,0,x1 1664 shd x1,x2,29,t1 1665 shd x2,0,29,t2 1666 sub t2,x2,x2 1667 b LREF(pos7) 1668 subb t1,x1,x1 1669 1670/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */ 1671GSYM($$divI_14) 1672 .export $$divI_14,millicode 1673 comb,<,n x2,0,LREF(neg14) 1674GSYM($$divU_14) 1675 .export $$divU_14,millicode 1676 b LREF(7) /* go to 7 case */ 1677 extru x2,30,31,x2 /* divide by 2 */ 1678 1679LSYM(neg14) 1680 subi 2,x2,x2 /* negate (and add 2) */ 1681 b LREF(8) 1682 extru x2,30,31,x2 /* divide by 2 */ 1683 .exit 1684 .procend 1685 .end 1686#endif 1687 1688#ifdef L_mulI 1689/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */ 1690/****************************************************************************** 1691This routine is used on PA2.0 processors when gcc -mno-fpregs is used 1692 1693ROUTINE: $$mulI 1694 1695 1696DESCRIPTION: 1697 1698 $$mulI multiplies two single word integers, giving a single 1699 word result. 1700 1701 1702INPUT REGISTERS: 1703 1704 arg0 = Operand 1 1705 arg1 = Operand 2 1706 r31 == return pc 1707 sr0 == return space when called externally 1708 1709 1710OUTPUT REGISTERS: 1711 1712 arg0 = undefined 1713 arg1 = undefined 1714 ret1 = result 1715 1716OTHER REGISTERS AFFECTED: 1717 1718 r1 = undefined 1719 1720SIDE EFFECTS: 1721 1722 Causes a trap under the following conditions: NONE 1723 Changes memory at the following places: NONE 1724 1725PERMISSIBLE CONTEXT: 1726 1727 Unwindable 1728 Does not create a stack frame 1729 Is usable for internal or external microcode 1730 1731DISCUSSION: 1732 1733 Calls other millicode routines via mrp: NONE 1734 Calls other millicode routines: NONE 1735 1736***************************************************************************/ 1737 1738 1739#define a0 %arg0 1740#define a1 %arg1 1741#define t0 %r1 1742#define r %ret1 1743 1744#define a0__128a0 zdep a0,24,25,a0 1745#define a0__256a0 zdep a0,23,24,a0 1746#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0) 1747#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1) 1748#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2) 1749#define b_n_ret_t0 b,n LREF(ret_t0) 1750#define b_e_shift b LREF(e_shift) 1751#define b_e_t0ma0 b LREF(e_t0ma0) 1752#define b_e_t0 b LREF(e_t0) 1753#define b_e_t0a0 b LREF(e_t0a0) 1754#define b_e_t02a0 b LREF(e_t02a0) 1755#define b_e_t04a0 b LREF(e_t04a0) 1756#define b_e_2t0 b LREF(e_2t0) 1757#define b_e_2t0a0 b LREF(e_2t0a0) 1758#define b_e_2t04a0 b LREF(e2t04a0) 1759#define b_e_3t0 b LREF(e_3t0) 1760#define b_e_4t0 b LREF(e_4t0) 1761#define b_e_4t0a0 b LREF(e_4t0a0) 1762#define b_e_4t08a0 b LREF(e4t08a0) 1763#define b_e_5t0 b LREF(e_5t0) 1764#define b_e_8t0 b LREF(e_8t0) 1765#define b_e_8t0a0 b LREF(e_8t0a0) 1766#define r__r_a0 add r,a0,r 1767#define r__r_2a0 sh1add a0,r,r 1768#define r__r_4a0 sh2add a0,r,r 1769#define r__r_8a0 sh3add a0,r,r 1770#define r__r_t0 add r,t0,r 1771#define r__r_2t0 sh1add t0,r,r 1772#define r__r_4t0 sh2add t0,r,r 1773#define r__r_8t0 sh3add t0,r,r 1774#define t0__3a0 sh1add a0,a0,t0 1775#define t0__4a0 sh2add a0,0,t0 1776#define t0__5a0 sh2add a0,a0,t0 1777#define t0__8a0 sh3add a0,0,t0 1778#define t0__9a0 sh3add a0,a0,t0 1779#define t0__16a0 zdep a0,27,28,t0 1780#define t0__32a0 zdep a0,26,27,t0 1781#define t0__64a0 zdep a0,25,26,t0 1782#define t0__128a0 zdep a0,24,25,t0 1783#define t0__t0ma0 sub t0,a0,t0 1784#define t0__t0_a0 add t0,a0,t0 1785#define t0__t0_2a0 sh1add a0,t0,t0 1786#define t0__t0_4a0 sh2add a0,t0,t0 1787#define t0__t0_8a0 sh3add a0,t0,t0 1788#define t0__2t0_a0 sh1add t0,a0,t0 1789#define t0__3t0 sh1add t0,t0,t0 1790#define t0__4t0 sh2add t0,0,t0 1791#define t0__4t0_a0 sh2add t0,a0,t0 1792#define t0__5t0 sh2add t0,t0,t0 1793#define t0__8t0 sh3add t0,0,t0 1794#define t0__8t0_a0 sh3add t0,a0,t0 1795#define t0__9t0 sh3add t0,t0,t0 1796#define t0__16t0 zdep t0,27,28,t0 1797#define t0__32t0 zdep t0,26,27,t0 1798#define t0__256a0 zdep a0,23,24,t0 1799 1800 1801 SUBSPA_MILLI 1802 ATTR_MILLI 1803 .align 16 1804 .proc 1805 .callinfo millicode 1806 .export $$mulI,millicode 1807GSYM($$mulI) 1808 combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */ 1809 copy 0,r /* zero out the result */ 1810 xor a0,a1,a0 /* swap a0 & a1 using the */ 1811 xor a0,a1,a1 /* old xor trick */ 1812 xor a0,a1,a0 1813LSYM(l4) 1814 combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */ 1815 zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ 1816 sub,> 0,a1,t0 /* otherwise negate both and */ 1817 combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */ 1818 sub 0,a0,a1 1819 movb,tr,n t0,a0,LREF(l2) /* 10th inst. */ 1820 1821LSYM(l0) r__r_t0 /* add in this partial product */ 1822LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */ 1823LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ 1824LSYM(l3) blr t0,0 /* case on these 8 bits ****** */ 1825 extru a1,23,24,a1 /* a1 >>= 8 ****************** */ 1826 1827/*16 insts before this. */ 1828/* a0 <<= 8 ************************** */ 1829LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop 1830LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop 1831LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop 1832LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0 1833LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop 1834LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0 1835LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN 1836LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0 1837LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop 1838LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0 1839LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN 1840LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 1841LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN 1842LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 1843LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 1844LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0 1845LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1846LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0 1847LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN 1848LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0 1849LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN 1850LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 1851LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 1852LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 1853LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN 1854LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 1855LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 1856LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0 1857LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1858LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 1859LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 1860LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 1861LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1862LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 1863LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 1864LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0 1865LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN 1866LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 1867LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 1868LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 1869LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN 1870LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 1871LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 1872LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 1873LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1874LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 1875LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0 1876LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0 1877LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0 1878LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0 1879LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 1880LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0 1881LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1882LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 1883LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 1884LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0 1885LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1886LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0 1887LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 1888LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0 1889LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 1890LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 1891LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 1892LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 1893LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1894LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 1895LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 1896LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 1897LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1898LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 1899LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0 1900LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0 1901LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN 1902LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0 1903LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 1904LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 1905LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 1906LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 1907LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0 1908LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 1909LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0 1910LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0 1911LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 1912LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 1913LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1914LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 1915LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 1916LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0 1917LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1918LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 1919LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 1920LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0 1921LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 1922LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0 1923LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0 1924LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 1925LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 1926LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 1927LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0 1928LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 1929LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 1930LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 1931LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 1932LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0 1933LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 1934LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 1935LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0 1936LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0 1937LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 1938LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 1939LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0 1940LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 1941LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0 1942LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0 1943LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0 1944LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0 1945LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0 1946LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 1947LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0 1948LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0 1949LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 1950LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 1951LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 1952LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 1953LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 1954LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 1955LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 1956LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 1957LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN 1958LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0 1959LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 1960LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 1961LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1962LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 1963LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 1964LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0 1965LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1966LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 1967LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 1968LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0 1969LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0 1970LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0 1971LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0 1972LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0 1973LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0 1974LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0 1975LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 1976LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 1977LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 1978LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 1979LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 1980LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 1981LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 1982LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 1983LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 1984LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0 1985LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 1986LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0 1987LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0 1988LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 1989LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0 1990LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 1991LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0 1992LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0 1993LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0 1994LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 1995LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0 1996LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 1997LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 1998LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0 1999LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0 2000LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0 2001LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0 2002LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0 2003LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0 2004LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0 2005LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0 2006LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0 2007LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0 2008LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0 2009LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 2010LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 2011LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0 2012LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0 2013LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0 2014LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 2015LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0 2016LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0 2017LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0 2018LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 2019LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0 2020LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0 2021LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 2022LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 2023LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 2024LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 2025LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0 2026LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0 2027LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 2028LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 2029LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0 2030LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0 2031LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0 2032LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0 2033LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 2034LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0 2035LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0 2036LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 2037LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0 2038LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0 2039LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0 2040LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0 2041LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0 2042LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0 2043LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0 2044LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0 2045LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 2046LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 2047LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 2048LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 2049LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0 2050LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0 2051LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0 2052LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 2053LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0 2054LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 2055LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0 2056LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0 2057LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 2058LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0 2059LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0 2060LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 2061LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0 2062LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0 2063LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0 2064LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0 2065LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0 2066LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0 2067LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0 2068LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0 2069LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0 2070LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0 2071LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0 2072LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0 2073LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0 2074LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0 2075LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0 2076LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0 2077LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0 2078LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0 2079LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0 2080LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0 2081LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 2082LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0 2083LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 2084LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 2085/*1040 insts before this. */ 2086LSYM(ret_t0) MILLIRET 2087LSYM(e_t0) r__r_t0 2088LSYM(e_shift) a1_ne_0_b_l2 2089 a0__256a0 /* a0 <<= 8 *********** */ 2090 MILLIRETN 2091LSYM(e_t0ma0) a1_ne_0_b_l0 2092 t0__t0ma0 2093 MILLIRET 2094 r__r_t0 2095LSYM(e_t0a0) a1_ne_0_b_l0 2096 t0__t0_a0 2097 MILLIRET 2098 r__r_t0 2099LSYM(e_t02a0) a1_ne_0_b_l0 2100 t0__t0_2a0 2101 MILLIRET 2102 r__r_t0 2103LSYM(e_t04a0) a1_ne_0_b_l0 2104 t0__t0_4a0 2105 MILLIRET 2106 r__r_t0 2107LSYM(e_2t0) a1_ne_0_b_l1 2108 r__r_2t0 2109 MILLIRETN 2110LSYM(e_2t0a0) a1_ne_0_b_l0 2111 t0__2t0_a0 2112 MILLIRET 2113 r__r_t0 2114LSYM(e2t04a0) t0__t0_2a0 2115 a1_ne_0_b_l1 2116 r__r_2t0 2117 MILLIRETN 2118LSYM(e_3t0) a1_ne_0_b_l0 2119 t0__3t0 2120 MILLIRET 2121 r__r_t0 2122LSYM(e_4t0) a1_ne_0_b_l1 2123 r__r_4t0 2124 MILLIRETN 2125LSYM(e_4t0a0) a1_ne_0_b_l0 2126 t0__4t0_a0 2127 MILLIRET 2128 r__r_t0 2129LSYM(e4t08a0) t0__t0_2a0 2130 a1_ne_0_b_l1 2131 r__r_4t0 2132 MILLIRETN 2133LSYM(e_5t0) a1_ne_0_b_l0 2134 t0__5t0 2135 MILLIRET 2136 r__r_t0 2137LSYM(e_8t0) a1_ne_0_b_l1 2138 r__r_8t0 2139 MILLIRETN 2140LSYM(e_8t0a0) a1_ne_0_b_l0 2141 t0__8t0_a0 2142 MILLIRET 2143 r__r_t0 2144 2145 .procend 2146 .end 2147#endif 2148