1// 2// Detect WAW violations. Cases taken from DV tables. 3// 4.text 5 .explicit 6// AR[BSP] 7 mov ar.bsp = r0 8 mov ar.bsp = r1 9 ;; 10// AR[BSPSTORE] 11 mov ar.bspstore = r2 12 mov ar.bspstore = r3 13 ;; 14 15// AR[CCV] 16 mov ar.ccv = r4 17 mov ar.ccv = r4 18 ;; 19 20// AR[EC] 21 br.wtop.sptk L 22 mov ar.ec = r0 23 ;; 24 25// AR[FPSR].sf0.controls 26 mov ar.fpsr = r0 27 fsetc.s0 0x7f, 0x0f 28 ;; 29 30// AR[FPSR].sf1.controls 31 mov ar.fpsr = r0 32 fsetc.s1 0x7f, 0x0f 33 ;; 34 35// AR[FPSR].sf2.controls 36 mov ar.fpsr = r0 37 fsetc.s2 0x7f, 0x0f 38 ;; 39 40// AR[FPSR].sf3.controls 41 mov ar.fpsr = r0 42 fsetc.s3 0x7f, 0x0f 43 ;; 44 45// AR[FPSR].sf0.flags 46 fcmp.eq.s0 p1, p2 = f3, f4 47 fcmp.eq.s0 p3, p4 = f3, f4 // no DV here 48 ;; 49 fcmp.eq.s0 p1, p2 = f3, f4 50 fclrf.s0 51 ;; 52 53// AR[FPSR].sf1.flags 54 fcmp.eq.s1 p1, p2 = f3, f4 55 fcmp.eq.s1 p3, p4 = f3, f4 // no DV here 56 ;; 57 fcmp.eq.s1 p1, p2 = f3, f4 58 fclrf.s1 59 ;; 60 61// AR[FPSR].sf2.flags 62 fcmp.eq.s2 p1, p2 = f3, f4 63 fcmp.eq.s2 p3, p4 = f3, f4 // no DV here 64 ;; 65 fcmp.eq.s2 p1, p2 = f3, f4 66 fclrf.s2 67 ;; 68 69// AR[FPSR].sf3.flags 70 fcmp.eq.s3 p1, p2 = f3, f4 71 fcmp.eq.s3 p3, p4 = f3, f4 // no DV here 72 ;; 73 fcmp.eq.s3 p1, p2 = f3, f4 74 fclrf.s3 75 ;; 76 77// AR[FPSR].traps/rv plus all controls/flags 78 mov ar.fpsr = r0 79 mov ar.fpsr = r0 80 ;; 81 82// AR[ITC] 83 mov ar.itc = r1 84 mov ar.itc = r1 85 ;; 86 87// AR[K] 88 mov ar.k2 = r3 89 mov ar.k2 = r3 90 ;; 91 92// AR[LC] 93 br.cloop.sptk L 94 mov ar.lc = r0 95 ;; 96 97// AR[PFS] 98 mov ar.pfs = r0 99 br.call.sptk b0 = L 100 ;; 101 102// AR[RNAT] (see also AR[BSPSTORE]) 103 mov ar.rnat = r8 104 mov ar.rnat = r8 105 ;; 106 107// AR[RSC] 108 mov ar.rsc = r10 109 mov ar.rsc = r10 110 ;; 111 112// AR[UNAT] 113 mov ar.unat = r12 114 st8.spill [r0] = r1 115 ;; 116 117// AR% 118 mov ar48 = r0 119 mov ar48 = r0 120 ;; 121 122// BR% 123 mov b1 = r0 124 mov b1 = r1 125 ;; 126 127// CFM (and others) 128 br.wtop.sptk L 129 br.wtop.sptk L 130 ;; 131 132// CR[CMCV] 133 mov cr.cmcv = r1 134 mov cr.cmcv = r2 135 ;; 136 137// CR[DCR] 138 mov cr.dcr = r3 139 mov cr.dcr = r3 140 ;; 141 142// CR[EOI] (and InService) 143 mov cr.eoi = r0 144 mov cr.eoi = r0 145 ;; 146 srlz.d 147 148// CR[GPTA] 149 mov cr.gpta = r6 150 mov cr.gpta = r7 151 ;; 152 153// CR[IFA] 154 mov cr.ifa = r9 155 mov cr.ifa = r10 156 ;; 157 158// CR[IFS] 159 mov cr.ifs = r11 160 cover 161 ;; 162 163// CR[IHA] 164 mov cr.iha = r13 165 mov cr.iha = r14 166 ;; 167 168// CR[IIM] 169 mov cr.iim = r15 170 mov cr.iim = r16 171 ;; 172 173// CR[IIP] 174 mov cr.iip = r17 175 mov cr.iip = r17 176 ;; 177 178// CR[IIPA] 179 mov cr.iipa = r19 180 mov cr.iipa = r20 181 ;; 182 183// CR[IPSR] 184 mov cr.ipsr = r21 185 mov cr.ipsr = r22 186 ;; 187 188// CR[IRR%] (and others) 189 mov r2 = cr.ivr 190 mov r3 = cr.ivr 191 ;; 192 193// CR[ISR] 194 mov cr.isr = r24 195 mov cr.isr = r25 196 ;; 197 198// CR[ITIR] 199 mov cr.itir = r26 200 mov cr.itir = r27 201 ;; 202 203// CR[ITM] 204 mov cr.itm = r28 205 mov cr.itm = r29 206 ;; 207 208// CR[ITV] 209 mov cr.itv = r0 210 mov cr.itv = r1 211 ;; 212 213// CR[IVA] 214 mov cr.iva = r0 215 mov cr.iva = r1 216 ;; 217 218// CR[IVR] (no explicit writers) 219 220// CR[LID] 221 mov cr.lid = r0 222 mov cr.lid = r1 223 ;; 224 225// CR[LRR%] 226 mov cr.lrr0 = r0 227 mov cr.lrr1 = r0 // no DV here 228 ;; 229 mov cr.lrr0 = r0 230 mov cr.lrr0 = r0 231 ;; 232 233// CR[PMV] 234 mov cr.pmv = r0 235 mov cr.pmv = r1 236 ;; 237 238// CR[PTA] 239 mov cr.pta = r0 240 mov cr.pta = r1 241 ;; 242 243// CR[TPR] 244 mov cr.tpr = r0 245 mov cr.tpr = r1 246 ;; 247 248// DBR# 249 mov dbr[r1] = r1 250 mov dbr[r1] = r2 251 ;; 252 srlz.d 253 254// DTC 255 ptc.e r0 256 ptc.e r1 // no DVs here 257 ;; 258 ptc.e r0 // (and others) 259 itc.i r0 260 ;; 261 srlz.d 262 263// DTC_LIMIT 264 ptc.g r0, r1 // NOTE: GAS automatically emits stops after 265 ptc.ga r2, r3 // ptc.g/ptc.ga, so this conflict is no 266 ;; // longer possible in GAS-generated assembly 267 srlz.d 268 269// DTR 270 itr.d dtr[r0] = r1 // (and others) 271 ptr.d r2, r3 272 ;; 273 srlz.d 274 275// FR% 276 mov f3 = f2 277 ldfs.c.clr f3 = [r1] 278 ;; 279 280// GR% 281 mov r2 = r0 282 ld8.c.clr r2 = [r1] 283 ;; 284 285// IBR# 286 mov ibr[r0] = r2 287 mov ibr[r1] = r2 288 ;; 289 290// InService 291 mov cr.eoi = r0 292 mov r1 = cr.ivr 293 ;; 294 srlz.d 295 296// ITC 297 ptc.e r0 298 itc.i r1 299 ;; 300 srlz.i 301 ;; 302 303// ITR 304 itr.i itr[r0] = r1 305 ptr.i r2, r3 306 ;; 307 srlz.i 308 ;; 309 310// PKR# 311 .reg.val r1, 0x1 312 .reg.val r2, ~0x1 313 mov pkr[r1] = r1 314 mov pkr[r2] = r1 // no DV here 315 ;; 316 mov pkr[r1] = r1 317 mov pkr[r1] = r1 318 ;; 319 320// PMC# 321 mov pmc[r3] = r1 322 mov pmc[r4] = r1 323 ;; 324 325// PMD# 326 mov pmd[r3] = r1 327 mov pmd[r4] = r1 328 ;; 329 330// PR%, 1 - 15 331 cmp.eq p1, p0 = r0, r1 332 cmp.eq p1, p0 = r2, r3 333 ;; 334 fcmp.eq p1, p2 = f2, f3 335 fcmp.eq p1, p3 = f2, f3 336 ;; 337 cmp.eq.and p1, p2 = r0, r1 338 cmp.eq.or p1, p3 = r2, r3 339 ;; 340 cmp.eq.or p1, p3 = r2, r3 341 cmp.eq.and p1, p2 = r0, r1 342 ;; 343 cmp.eq.and p1, p2 = r0, r1 344 cmp.eq.and p1, p3 = r2, r3 // no DV here 345 ;; 346 cmp.eq.or p1, p2 = r0, r1 347 cmp.eq.or p1, p3 = r2, r3 // no DV here 348 ;; 349 350// PR63 351 br.wtop.sptk L 352 br.wtop.sptk L 353 ;; 354 cmp.eq p63, p0 = r0, r1 355 cmp.eq p63, p0 = r2, r3 356 ;; 357 fcmp.eq p63, p2 = f2, f3 358 fcmp.eq p63, p3 = f2, f3 359 ;; 360 cmp.eq.and p63, p2 = r0, r1 361 cmp.eq.or p63, p3 = r2, r3 362 ;; 363 cmp.eq.or p63, p3 = r2, r3 364 cmp.eq.and p63, p2 = r0, r1 365 ;; 366 cmp.eq.and p63, p2 = r0, r1 367 cmp.eq.and p63, p3 = r2, r3 // no DV here 368 ;; 369 cmp.eq.or p63, p2 = r0, r1 370 cmp.eq.or p63, p3 = r2, r3 // no DV here 371 ;; 372 373// PSR.ac 374 rum (1<<3) 375 rum (1<<3) 376 ;; 377 378// PSR.be 379 rum (1<<1) 380 rum (1<<1) 381 ;; 382 383// PSR.bn 384 bsw.0 // GAS automatically emits a stop after bsw.n 385 bsw.0 // so this conflict is avoided 386 ;; 387 388// PSR.cpl 389 epc 390 br.ret.sptk b0 391 ;; 392 393// PSR.da (rfi is the only writer) 394// PSR.db (and others) 395 mov psr.l = r0 396 mov psr.l = r1 397 ;; 398 srlz.d 399 400// PSR.dd (rfi is the only writer) 401 402// PSR.dfh 403 ssm (1<<19) 404 ssm (1<<19) 405 ;; 406 srlz.d 407 408// PSR.dfl 409 ssm (1<<18) 410 ssm (1<<18) 411 ;; 412 srlz.d 413 414// PSR.di 415 rsm (1<<22) 416 rsm (1<<22) 417 ;; 418 419// PSR.dt 420 rsm (1<<17) 421 rsm (1<<17) 422 ;; 423 424// PSR.ed (rfi is the only writer) 425// PSR.i 426 ssm (1<<14) 427 ssm (1<<14) 428 ;; 429 430// PSR.ia (no DV semantics) 431// PSR.ic 432 ssm (1<<13) 433 ssm (1<<13) 434 ;; 435 436// PSR.id (rfi is the only writer) 437// PSR.is (br.ia and rfi are the only writers) 438// PSR.it (rfi is the only writer) 439// PSR.lp (see PSR.db) 440 441// PSR.mc (rfi is the only writer) 442// PSR.mfh 443 mov f32 = f33 444 mov r10 = psr 445 ;; 446 ssm (1<<5) 447 ssm (1<<5) 448 ;; 449 ssm (1<<5) 450 mov psr.um = r10 451 ;; 452 rum (1<<5) 453 rum (1<<5) 454 ;; 455 mov f32 = f33 456 mov f34 = f35 // no DV here 457 ;; 458 459// PSR.mfl 460 mov f2 = f3 461 mov r10 = psr 462 ;; 463 ssm (1<<4) 464 ssm (1<<4) 465 ;; 466 ssm (1<<4) 467 mov psr.um = r10 468 ;; 469 rum (1<<4) 470 rum (1<<4) 471 ;; 472 mov f2 = f3 473 mov f4 = f5 // no DV here 474 ;; 475 476// PSR.pk 477 rsm (1<<15) 478 rsm (1<<15) 479 ;; 480 481// PSR.pp 482 rsm (1<<21) 483 rsm (1<<21) 484 ;; 485 486// PSR.ri (no DV semantics) 487// PSR.rt (see PSR.db) 488 489// PSR.si 490 rsm (1<<23) 491 ssm (1<<23) 492 ;; 493 494// PSR.sp 495 ssm (1<<20) 496 rsm (1<<20) 497 ;; 498 srlz.d 499 500// PSR.ss (rfi is the only writer) 501// PSR.tb (see PSR.db) 502 503// PSR.up 504 rsm (1<<2) 505 rsm (1<<2) 506 ;; 507 rum (1<<2) 508 mov psr.um = r0 509 ;; 510 511// RR# 512 mov rr[r2] = r1 513 mov rr[r2] = r3 514 ;; 515 516// PR, additional cases (or.andcm and and.orcm interaction) 517 cmp.eq.or.andcm p6, p7 = 1, r32 518 cmp.eq.or.andcm p6, p7 = 5, r36 // no DV here 519 ;; 520 cmp.eq.and.orcm p6, p7 = 1, r32 521 cmp.eq.and.orcm p6, p7 = 5, r36 // no DV here 522 ;; 523 cmp.eq.or.andcm p63, p7 = 1, r32 524 cmp.eq.or.andcm p63, p7 = 5, r36 // no DV here 525 ;; 526 cmp.eq.or.andcm p6, p63 = 1, r32 527 cmp.eq.or.andcm p6, p63 = 5, r36 // no DV here 528 ;; 529 cmp.eq.and.orcm p63, p7 = 1, r32 530 cmp.eq.and.orcm p63, p7 = 5, r36 // no DV here 531 ;; 532 cmp.eq.and.orcm p6, p63 = 1, r32 533 cmp.eq.and.orcm p6, p63 = 5, r36 // no DV here 534 ;; 535 cmp.eq.or.andcm p6, p7 = 1, r32 536 cmp.eq.and.orcm p6, p7 = 5, r36 537 ;; 538 cmp.eq.or.andcm p63, p7 = 1, r32 539 cmp.eq.and.orcm p63, p7 = 5, r36 540 ;; 541 cmp.eq.or.andcm p6, p63 = 1, r32 542 cmp.eq.and.orcm p6, p63 = 5, r36 543 ;; 544 545// PR%, 16 - 62 546 cmp.eq p21, p0 = r0, r1 547 cmp.eq p21, p0 = r2, r3 548 ;; 549 fcmp.eq p21, p22 = f2, f3 550 fcmp.eq p21, p23 = f2, f3 551 ;; 552 cmp.eq.and p21, p22 = r0, r1 553 cmp.eq.or p21, p23 = r2, r3 554 ;; 555 cmp.eq.or p21, p23 = r2, r3 556 cmp.eq.and p21, p22 = r0, r1 557 ;; 558 cmp.eq.and p21, p22 = r0, r1 559 cmp.eq.and p21, p23 = r2, r3 // no DV here 560 ;; 561 cmp.eq.or p21, p22 = r0, r1 562 cmp.eq.or p21, p23 = r2, r3 // no DV here 563 ;; 564 565// RSE 566 567L: 568