1// 2// Detect WAW violations. Cases taken from DV tables. 3// 4.text 5 .explicit 6// AR[BSP] 7 mov ar.bsp = r0 8 mov ar.bsp = r1 9 ;; 10// AR[BSPSTORE] 11 mov ar.bspstore = r2 12 mov ar.bspstore = r3 13 ;; 14 15// AR[CCV] 16 mov ar.ccv = r4 17 mov ar.ccv = r4 18 ;; 19 20// AR[EC] 21 br.wtop.sptk L 22 mov ar.ec = r0 23 ;; 24 25// AR[FPSR].sf0.controls 26 mov ar.fpsr = r0 27 fsetc.s0 0x7f, 0x0f 28 ;; 29 30// AR[FPSR].sf1.controls 31 mov ar.fpsr = r0 32 fsetc.s1 0x7f, 0x0f 33 ;; 34 35// AR[FPSR].sf2.controls 36 mov ar.fpsr = r0 37 fsetc.s2 0x7f, 0x0f 38 ;; 39 40// AR[FPSR].sf3.controls 41 mov ar.fpsr = r0 42 fsetc.s3 0x7f, 0x0f 43 ;; 44 45// AR[FPSR].sf0.flags 46 fcmp.eq.s0 p1, p2 = f3, f4 47 fcmp.eq.s0 p3, p4 = f3, f4 // no DV here 48 ;; 49 fcmp.eq.s0 p1, p2 = f3, f4 50 fclrf.s0 51 ;; 52 53// AR[FPSR].sf1.flags 54 fcmp.eq.s1 p1, p2 = f3, f4 55 fcmp.eq.s1 p3, p4 = f3, f4 // no DV here 56 ;; 57 fcmp.eq.s1 p1, p2 = f3, f4 58 fclrf.s1 59 ;; 60 61// AR[FPSR].sf2.flags 62 fcmp.eq.s2 p1, p2 = f3, f4 63 fcmp.eq.s2 p3, p4 = f3, f4 // no DV here 64 ;; 65 fcmp.eq.s2 p1, p2 = f3, f4 66 fclrf.s2 67 ;; 68 69// AR[FPSR].sf3.flags 70 fcmp.eq.s3 p1, p2 = f3, f4 71 fcmp.eq.s3 p3, p4 = f3, f4 // no DV here 72 ;; 73 fcmp.eq.s3 p1, p2 = f3, f4 74 fclrf.s3 75 ;; 76 77// AR[FPSR].traps/rv plus all controls/flags 78 mov ar.fpsr = r0 79 mov ar.fpsr = r0 80 ;; 81 82// AR[ITC] 83 mov ar.itc = r1 84 mov ar.itc = r1 85 ;; 86 87// AR[RUC] 88 mov ar.ruc = r1 89 mov ar.ruc = r1 90 ;; 91 92// AR[K] 93 mov ar.k2 = r3 94 mov ar.k2 = r3 95 ;; 96 97// AR[LC] 98 br.cloop.sptk L 99 mov ar.lc = r0 100 ;; 101 102// AR[PFS] 103 mov ar.pfs = r0 104 br.call.sptk b0 = L 105 ;; 106 107// AR[RNAT] (see also AR[BSPSTORE]) 108 mov ar.rnat = r8 109 mov ar.rnat = r8 110 ;; 111 112// AR[RSC] 113 mov ar.rsc = r10 114 mov ar.rsc = r10 115 ;; 116 117// AR[UNAT] 118 mov ar.unat = r12 119 st8.spill [r0] = r1 120 ;; 121 122// AR% 123 mov ar48 = r0 124 mov ar48 = r0 125 ;; 126 127// BR% 128 mov b1 = r0 129 mov b1 = r1 130 ;; 131 132// CFM (and others) 133 br.wtop.sptk L 134 br.wtop.sptk L 135 ;; 136 137// CR[CMCV] 138 mov cr.cmcv = r1 139 mov cr.cmcv = r2 140 ;; 141 142// CR[DCR] 143 mov cr.dcr = r3 144 mov cr.dcr = r3 145 ;; 146 147// CR[EOI] (and InService) 148 mov cr.eoi = r0 149 mov cr.eoi = r0 150 ;; 151 srlz.d 152 153// CR[GPTA] 154 mov cr.gpta = r6 155 mov cr.gpta = r7 156 ;; 157 158// CR[IFA] 159 mov cr.ifa = r9 160 mov cr.ifa = r10 161 ;; 162 163// CR[IFS] 164 mov cr.ifs = r11 165 cover 166 ;; 167 168// CR[IHA] 169 mov cr.iha = r13 170 mov cr.iha = r14 171 ;; 172 173// CR[IIB%] 174 mov cr.iib0 = r15 175 mov cr.iib0 = r16 176 ;; 177 178 mov cr.iib1 = r15 179 mov cr.iib1 = r16 180 ;; 181 182// CR[IIM] 183 mov cr.iim = r15 184 mov cr.iim = r16 185 ;; 186 187// CR[IIP] 188 mov cr.iip = r17 189 mov cr.iip = r17 190 ;; 191 192// CR[IIPA] 193 mov cr.iipa = r19 194 mov cr.iipa = r20 195 ;; 196 197// CR[IPSR] 198 mov cr.ipsr = r21 199 mov cr.ipsr = r22 200 ;; 201 202// CR[IRR%] (and others) 203 mov r2 = cr.ivr 204 mov r3 = cr.ivr 205 ;; 206 207// CR[ISR] 208 mov cr.isr = r24 209 mov cr.isr = r25 210 ;; 211 212// CR[ITIR] 213 mov cr.itir = r26 214 mov cr.itir = r27 215 ;; 216 217// CR[ITM] 218 mov cr.itm = r28 219 mov cr.itm = r29 220 ;; 221 222// CR[ITV] 223 mov cr.itv = r0 224 mov cr.itv = r1 225 ;; 226 227// CR[IVA] 228 mov cr.iva = r0 229 mov cr.iva = r1 230 ;; 231 232// CR[IVR] (no explicit writers) 233 234// CR[LID] 235 mov cr.lid = r0 236 mov cr.lid = r1 237 ;; 238 239// CR[LRR%] 240 mov cr.lrr0 = r0 241 mov cr.lrr1 = r0 // no DV here 242 ;; 243 mov cr.lrr0 = r0 244 mov cr.lrr0 = r0 245 ;; 246 247// CR[PMV] 248 mov cr.pmv = r0 249 mov cr.pmv = r1 250 ;; 251 252// CR[PTA] 253 mov cr.pta = r0 254 mov cr.pta = r1 255 ;; 256 257// CR[TPR] 258 mov cr.tpr = r0 259 mov cr.tpr = r1 260 ;; 261 262// DBR# 263 mov dbr[r1] = r1 264 mov dbr[r1] = r2 265 ;; 266 srlz.d 267 268// DTC 269 ptc.e r0 270 ptc.e r1 // no DVs here 271 ;; 272 ptc.e r0 // (and others) 273 itc.i r0 274 ;; 275 srlz.d 276 277// DTC_LIMIT 278 ptc.g r0, r1 // NOTE: GAS automatically emits stops after 279 ptc.ga r2, r3 // ptc.g/ptc.ga, so this conflict is no 280 ;; // longer possible in GAS-generated assembly 281 srlz.d 282 283// DTR 284 itr.d dtr[r0] = r1 // (and others) 285 ptr.d r2, r3 286 ;; 287 srlz.d 288 289// FR% 290 mov f3 = f2 291 ldfs.c.clr f3 = [r1] 292 ;; 293 294// GR% 295 mov r2 = r0 296 ld8.c.clr r2 = [r1] 297 ;; 298 299// IBR# 300 mov ibr[r0] = r2 301 mov ibr[r1] = r2 302 ;; 303 304// InService 305 mov cr.eoi = r0 306 mov r1 = cr.ivr 307 ;; 308 srlz.d 309 310// ITC 311 ptc.e r0 312 itc.i r1 313 ;; 314 srlz.i 315 ;; 316 317// ITR 318 itr.i itr[r0] = r1 319 ptr.i r2, r3 320 ;; 321 srlz.i 322 ;; 323 324// PKR# 325 .reg.val r1, 0x1 326 .reg.val r2, ~0x1 327 mov pkr[r1] = r1 328 mov pkr[r2] = r1 // no DV here 329 ;; 330 mov pkr[r1] = r1 331 mov pkr[r1] = r1 332 ;; 333 334// PMC# 335 mov pmc[r3] = r1 336 mov pmc[r4] = r1 337 ;; 338 339// PMD# 340 mov pmd[r3] = r1 341 mov pmd[r4] = r1 342 ;; 343 344// PR%, 1 - 15 345 cmp.eq p1, p0 = r0, r1 346 cmp.eq p1, p0 = r2, r3 347 ;; 348 fcmp.eq p1, p2 = f2, f3 349 fcmp.eq p1, p3 = f2, f3 350 ;; 351 cmp.eq.and p1, p2 = r0, r1 352 cmp.eq.or p1, p3 = r2, r3 353 ;; 354 cmp.eq.or p1, p3 = r2, r3 355 cmp.eq.and p1, p2 = r0, r1 356 ;; 357 cmp.eq.and p1, p2 = r0, r1 358 cmp.eq.and p1, p3 = r2, r3 // no DV here 359 ;; 360 cmp.eq.or p1, p2 = r0, r1 361 cmp.eq.or p1, p3 = r2, r3 // no DV here 362 ;; 363 364// PR63 365 br.wtop.sptk L 366 br.wtop.sptk L 367 ;; 368 cmp.eq p63, p0 = r0, r1 369 cmp.eq p63, p0 = r2, r3 370 ;; 371 fcmp.eq p63, p2 = f2, f3 372 fcmp.eq p63, p3 = f2, f3 373 ;; 374 cmp.eq.and p63, p2 = r0, r1 375 cmp.eq.or p63, p3 = r2, r3 376 ;; 377 cmp.eq.or p63, p3 = r2, r3 378 cmp.eq.and p63, p2 = r0, r1 379 ;; 380 cmp.eq.and p63, p2 = r0, r1 381 cmp.eq.and p63, p3 = r2, r3 // no DV here 382 ;; 383 cmp.eq.or p63, p2 = r0, r1 384 cmp.eq.or p63, p3 = r2, r3 // no DV here 385 ;; 386 387// PSR.ac 388 rum (1<<3) 389 rum (1<<3) 390 ;; 391 392// PSR.be 393 rum (1<<1) 394 rum (1<<1) 395 ;; 396 397// PSR.bn 398 bsw.0 // GAS automatically emits a stop after bsw.n 399 bsw.0 // so this conflict is avoided 400 ;; 401 402// PSR.cpl 403 epc 404 br.ret.sptk b0 405 ;; 406 407// PSR.da (rfi is the only writer) 408// PSR.db (and others) 409 mov psr.l = r0 410 mov psr.l = r1 411 ;; 412 srlz.d 413 414// PSR.dd (rfi is the only writer) 415 416// PSR.dfh 417 ssm (1<<19) 418 ssm (1<<19) 419 ;; 420 srlz.d 421 422// PSR.dfl 423 ssm (1<<18) 424 ssm (1<<18) 425 ;; 426 srlz.d 427 428// PSR.di 429 rsm (1<<22) 430 rsm (1<<22) 431 ;; 432 433// PSR.dt 434 rsm (1<<17) 435 rsm (1<<17) 436 ;; 437 438// PSR.ed (rfi is the only writer) 439// PSR.i 440 ssm (1<<14) 441 ssm (1<<14) 442 ;; 443 444// PSR.ia (no DV semantics) 445// PSR.ic 446 ssm (1<<13) 447 ssm (1<<13) 448 ;; 449 450// PSR.id (rfi is the only writer) 451// PSR.is (br.ia and rfi are the only writers) 452// PSR.it (rfi is the only writer) 453// PSR.lp (see PSR.db) 454 455// PSR.mc (rfi is the only writer) 456// PSR.mfh 457 mov f32 = f33 458 mov r10 = psr 459 ;; 460 ssm (1<<5) 461 ssm (1<<5) 462 ;; 463 ssm (1<<5) 464 mov psr.um = r10 465 ;; 466 rum (1<<5) 467 rum (1<<5) 468 ;; 469 mov f32 = f33 470 mov f34 = f35 // no DV here 471 ;; 472 473// PSR.mfl 474 mov f2 = f3 475 mov r10 = psr 476 ;; 477 ssm (1<<4) 478 ssm (1<<4) 479 ;; 480 ssm (1<<4) 481 mov psr.um = r10 482 ;; 483 rum (1<<4) 484 rum (1<<4) 485 ;; 486 mov f2 = f3 487 mov f4 = f5 // no DV here 488 ;; 489 490// PSR.pk 491 rsm (1<<15) 492 rsm (1<<15) 493 ;; 494 495// PSR.pp 496 rsm (1<<21) 497 rsm (1<<21) 498 ;; 499 500// PSR.ri (no DV semantics) 501// PSR.rt (see PSR.db) 502 503// PSR.si 504 rsm (1<<23) 505 ssm (1<<23) 506 ;; 507 508// PSR.sp 509 ssm (1<<20) 510 rsm (1<<20) 511 ;; 512 srlz.d 513 514// PSR.ss (rfi is the only writer) 515// PSR.tb (see PSR.db) 516 517// PSR.up 518 rsm (1<<2) 519 rsm (1<<2) 520 ;; 521 rum (1<<2) 522 mov psr.um = r0 523 ;; 524 525// RR# 526 mov rr[r2] = r1 527 mov rr[r2] = r3 528 ;; 529 530// PR, additional cases (or.andcm and and.orcm interaction) 531 cmp.eq.or.andcm p6, p7 = 1, r32 532 cmp.eq.or.andcm p6, p7 = 5, r36 // no DV here 533 ;; 534 cmp.eq.and.orcm p6, p7 = 1, r32 535 cmp.eq.and.orcm p6, p7 = 5, r36 // no DV here 536 ;; 537 cmp.eq.or.andcm p63, p7 = 1, r32 538 cmp.eq.or.andcm p63, p7 = 5, r36 // no DV here 539 ;; 540 cmp.eq.or.andcm p6, p63 = 1, r32 541 cmp.eq.or.andcm p6, p63 = 5, r36 // no DV here 542 ;; 543 cmp.eq.and.orcm p63, p7 = 1, r32 544 cmp.eq.and.orcm p63, p7 = 5, r36 // no DV here 545 ;; 546 cmp.eq.and.orcm p6, p63 = 1, r32 547 cmp.eq.and.orcm p6, p63 = 5, r36 // no DV here 548 ;; 549 cmp.eq.or.andcm p6, p7 = 1, r32 550 cmp.eq.and.orcm p6, p7 = 5, r36 551 ;; 552 cmp.eq.or.andcm p63, p7 = 1, r32 553 cmp.eq.and.orcm p63, p7 = 5, r36 554 ;; 555 cmp.eq.or.andcm p6, p63 = 1, r32 556 cmp.eq.and.orcm p6, p63 = 5, r36 557 ;; 558 559// PR%, 16 - 62 560 cmp.eq p21, p0 = r0, r1 561 cmp.eq p21, p0 = r2, r3 562 ;; 563 fcmp.eq p21, p22 = f2, f3 564 fcmp.eq p21, p23 = f2, f3 565 ;; 566 cmp.eq.and p21, p22 = r0, r1 567 cmp.eq.or p21, p23 = r2, r3 568 ;; 569 cmp.eq.or p21, p23 = r2, r3 570 cmp.eq.and p21, p22 = r0, r1 571 ;; 572 cmp.eq.and p21, p22 = r0, r1 573 cmp.eq.and p21, p23 = r2, r3 // no DV here 574 ;; 575 cmp.eq.or p21, p22 = r0, r1 576 cmp.eq.or p21, p23 = r2, r3 // no DV here 577 ;; 578 579// RSE 580 581L: 582