1//
2// Detect WAW violations.  Cases taken from DV tables.
3//
4.text
5	.explicit
6// AR[BSP]
7	mov	ar.bsp = r0
8	mov	ar.bsp = r1
9	;;
10// AR[BSPSTORE]
11	mov	ar.bspstore = r2
12	mov	ar.bspstore = r3
13	;;
14
15// AR[CCV]
16	mov	ar.ccv = r4
17	mov	ar.ccv = r4
18	;;
19
20// AR[EC]
21	br.wtop.sptk	L
22	mov	ar.ec = r0
23	;;
24
25// AR[FPSR].sf0.controls
26	mov		ar.fpsr = r0
27	fsetc.s0	0x7f, 0x0f
28	;;
29
30// AR[FPSR].sf1.controls
31	mov		ar.fpsr = r0
32	fsetc.s1	0x7f, 0x0f
33	;;
34
35// AR[FPSR].sf2.controls
36	mov		ar.fpsr = r0
37	fsetc.s2	0x7f, 0x0f
38	;;
39
40// AR[FPSR].sf3.controls
41	mov		ar.fpsr = r0
42	fsetc.s3	0x7f, 0x0f
43	;;
44
45// AR[FPSR].sf0.flags
46	fcmp.eq.s0	p1, p2 = f3, f4
47	fcmp.eq.s0	p3, p4 = f3, f4	// no DV here
48	;;
49	fcmp.eq.s0	p1, p2 = f3, f4
50	fclrf.s0
51	;;
52
53// AR[FPSR].sf1.flags
54	fcmp.eq.s1	p1, p2 = f3, f4
55	fcmp.eq.s1	p3, p4 = f3, f4	// no DV here
56	;;
57	fcmp.eq.s1	p1, p2 = f3, f4
58	fclrf.s1
59	;;
60
61// AR[FPSR].sf2.flags
62	fcmp.eq.s2	p1, p2 = f3, f4
63	fcmp.eq.s2	p3, p4 = f3, f4	// no DV here
64	;;
65	fcmp.eq.s2	p1, p2 = f3, f4
66	fclrf.s2
67	;;
68
69// AR[FPSR].sf3.flags
70	fcmp.eq.s3	p1, p2 = f3, f4
71	fcmp.eq.s3	p3, p4 = f3, f4	// no DV here
72	;;
73	fcmp.eq.s3	p1, p2 = f3, f4
74	fclrf.s3
75	;;
76
77// AR[FPSR].traps/rv plus all controls/flags
78	mov	ar.fpsr = r0
79	mov	ar.fpsr = r0
80	;;
81
82// AR[ITC]
83	mov	ar.itc = r1
84	mov	ar.itc = r1
85	;;
86
87// AR[K]
88	mov	ar.k2 = r3
89	mov	ar.k2 = r3
90	;;
91
92// AR[LC]
93	br.cloop.sptk	L
94	mov	ar.lc = r0
95	;;
96
97// AR[PFS]
98	mov	ar.pfs = r0
99	br.call.sptk	b0 = L
100	;;
101
102// AR[RNAT] (see also AR[BSPSTORE])
103	mov	ar.rnat = r8
104	mov	ar.rnat = r8
105	;;
106
107// AR[RSC]
108	mov	ar.rsc = r10
109	mov	ar.rsc = r10
110	;;
111
112// AR[UNAT]
113	mov	ar.unat = r12
114	st8.spill	[r0] = r1
115	;;
116
117// AR%
118	mov	ar48 = r0
119	mov	ar48 = r0
120	;;
121
122// BR%
123	mov	b1 = r0
124	mov	b1 = r1
125	;;
126
127// CFM (and others)
128	br.wtop.sptk	L
129	br.wtop.sptk	L
130	;;
131
132// CR[CMCV]
133	mov	cr.cmcv = r1
134	mov	cr.cmcv = r2
135	;;
136
137// CR[DCR]
138	mov	cr.dcr = r3
139	mov	cr.dcr = r3
140	;;
141
142// CR[EOI] (and InService)
143	mov	cr.eoi = r0
144	mov	cr.eoi = r0
145	;;
146	srlz.d
147
148// CR[GPTA]
149	mov	cr.gpta = r6
150	mov	cr.gpta = r7
151	;;
152
153// CR[IFA]
154	mov	cr.ifa = r9
155	mov	cr.ifa = r10
156	;;
157
158// CR[IFS]
159	mov	cr.ifs = r11
160	cover
161	;;
162
163// CR[IHA]
164	mov	cr.iha = r13
165	mov	cr.iha = r14
166	;;
167
168// CR[IIM]
169	mov	cr.iim = r15
170	mov	cr.iim = r16
171	;;
172
173// CR[IIP]
174	mov	cr.iip = r17
175	mov	cr.iip = r17
176	;;
177
178// CR[IIPA]
179	mov	cr.iipa = r19
180	mov	cr.iipa = r20
181	;;
182
183// CR[IPSR]
184	mov	cr.ipsr = r21
185	mov	cr.ipsr = r22
186	;;
187
188// CR[IRR%] (and others)
189	mov	r2 = cr.ivr
190	mov	r3 = cr.ivr
191	;;
192
193// CR[ISR]
194	mov	cr.isr = r24
195	mov	cr.isr = r25
196	;;
197
198// CR[ITIR]
199	mov	cr.itir = r26
200	mov	cr.itir = r27
201	;;
202
203// CR[ITM]
204	mov	cr.itm = r28
205	mov	cr.itm = r29
206	;;
207
208// CR[ITV]
209	mov	cr.itv = r0
210	mov	cr.itv = r1
211	;;
212
213// CR[IVA]
214	mov	cr.iva = r0
215	mov	cr.iva = r1
216	;;
217
218// CR[IVR] (no explicit writers)
219
220// CR[LID]
221	mov	cr.lid = r0
222	mov	cr.lid = r1
223	;;
224
225// CR[LRR%]
226	mov	cr.lrr0 = r0
227	mov	cr.lrr1 = r0 // no DV here
228	;;
229	mov	cr.lrr0 = r0
230	mov	cr.lrr0 = r0
231	;;
232
233// CR[PMV]
234	mov	cr.pmv = r0
235	mov	cr.pmv = r1
236	;;
237
238// CR[PTA]
239	mov	cr.pta = r0
240	mov	cr.pta = r1
241	;;
242
243// CR[TPR]
244	mov	cr.tpr = r0
245	mov	cr.tpr = r1
246	;;
247
248// DBR#
249	mov	dbr[r1] = r1
250	mov	dbr[r1] = r2
251	;;
252	srlz.d
253
254// DTC
255	ptc.e	r0
256	ptc.e	r1	// no DVs here
257	;;
258	ptc.e	r0	// (and others)
259	itc.i	r0
260	;;
261	srlz.d
262
263// DTC_LIMIT
264	ptc.g	r0, r1		// NOTE: GAS automatically emits stops after
265	ptc.ga	r2, r3		//  ptc.g/ptc.ga, so this conflict is no
266	;;			//  longer possible in GAS-generated assembly
267	srlz.d
268
269// DTR
270	itr.d	dtr[r0] = r1	// (and others)
271	ptr.d	r2, r3
272	;;
273	srlz.d
274
275// FR%
276	mov		f3 = f2
277	ldfs.c.clr	f3 = [r1]
278	;;
279
280// GR%
281	mov		r2 = r0
282	ld8.c.clr	r2 = [r1]
283	;;
284
285// IBR#
286	mov	ibr[r0] = r2
287	mov	ibr[r1] = r2
288	;;
289
290// InService
291	mov	cr.eoi = r0
292	mov	r1 = cr.ivr
293	;;
294	srlz.d
295
296// ITC
297	ptc.e	r0
298	itc.i	r1
299	;;
300	srlz.i
301	;;
302
303// ITR
304	itr.i	itr[r0] = r1
305	ptr.i	r2, r3
306	;;
307	srlz.i
308	;;
309
310// PKR#
311	.reg.val r1, 0x1
312	.reg.val r2, ~0x1
313	mov	pkr[r1] = r1
314	mov	pkr[r2] = r1	// no DV here
315	;;
316	mov	pkr[r1] = r1
317	mov	pkr[r1] = r1
318	;;
319
320// PMC#
321	mov	pmc[r3] = r1
322	mov	pmc[r4] = r1
323	;;
324
325// PMD#
326	mov	pmd[r3] = r1
327	mov	pmd[r4] = r1
328	;;
329
330// PR%, 1 - 15
331	cmp.eq	p1, p0 = r0, r1
332	cmp.eq	p1, p0 = r2, r3
333	;;
334	fcmp.eq p1, p2 = f2, f3
335	fcmp.eq p1, p3 = f2, f3
336	;;
337	cmp.eq.and p1, p2 = r0, r1
338	cmp.eq.or  p1, p3 = r2, r3
339	;;
340	cmp.eq.or  p1, p3 = r2, r3
341	cmp.eq.and p1, p2 = r0, r1
342	;;
343	cmp.eq.and p1, p2 = r0, r1
344	cmp.eq.and p1, p3 = r2, r3 // no DV here
345	;;
346	cmp.eq.or p1, p2 = r0, r1
347	cmp.eq.or p1, p3 = r2, r3 // no DV here
348	;;
349
350// PR63
351	br.wtop.sptk	L
352	br.wtop.sptk	L
353	;;
354	cmp.eq	p63, p0 = r0, r1
355	cmp.eq	p63, p0 = r2, r3
356	;;
357	fcmp.eq p63, p2 = f2, f3
358	fcmp.eq p63, p3 = f2, f3
359	;;
360	cmp.eq.and p63, p2 = r0, r1
361	cmp.eq.or  p63, p3 = r2, r3
362	;;
363	cmp.eq.or  p63, p3 = r2, r3
364	cmp.eq.and p63, p2 = r0, r1
365	;;
366	cmp.eq.and p63, p2 = r0, r1
367	cmp.eq.and p63, p3 = r2, r3 // no DV here
368	;;
369	cmp.eq.or p63, p2 = r0, r1
370	cmp.eq.or p63, p3 = r2, r3 // no DV here
371	;;
372
373// PSR.ac
374	rum	(1<<3)
375	rum	(1<<3)
376	;;
377
378// PSR.be
379	rum	(1<<1)
380	rum	(1<<1)
381	;;
382
383// PSR.bn
384	bsw.0			// GAS automatically emits a stop after bsw.n
385	bsw.0			// so this conflict is avoided
386	;;
387
388// PSR.cpl
389	epc
390	br.ret.sptk	b0
391	;;
392
393// PSR.da (rfi is the only writer)
394// PSR.db (and others)
395	mov	psr.l = r0
396	mov	psr.l = r1
397	;;
398	srlz.d
399
400// PSR.dd (rfi is the only writer)
401
402// PSR.dfh
403	ssm	(1<<19)
404	ssm	(1<<19)
405	;;
406	srlz.d
407
408// PSR.dfl
409	ssm	(1<<18)
410	ssm	(1<<18)
411	;;
412	srlz.d
413
414// PSR.di
415	rsm	(1<<22)
416	rsm	(1<<22)
417	;;
418
419// PSR.dt
420	rsm	(1<<17)
421	rsm	(1<<17)
422	;;
423
424// PSR.ed (rfi is the only writer)
425// PSR.i
426	ssm	(1<<14)
427	ssm	(1<<14)
428	;;
429
430// PSR.ia (no DV semantics)
431// PSR.ic
432	ssm	(1<<13)
433	ssm	(1<<13)
434	;;
435
436// PSR.id (rfi is the only writer)
437// PSR.is (br.ia and rfi are the only writers)
438// PSR.it (rfi is the only writer)
439// PSR.lp (see PSR.db)
440
441// PSR.mc (rfi is the only writer)
442// PSR.mfh
443	mov	f32 = f33
444	mov	r10 = psr
445	;;
446	ssm	(1<<5)
447	ssm	(1<<5)
448	;;
449	ssm	(1<<5)
450	mov	psr.um = r10
451	;;
452	rum	(1<<5)
453	rum	(1<<5)
454	;;
455	mov	f32 = f33
456	mov	f34 = f35	// no DV here
457	;;
458
459// PSR.mfl
460	mov	f2 = f3
461	mov	r10 = psr
462	;;
463	ssm	(1<<4)
464	ssm	(1<<4)
465	;;
466	ssm	(1<<4)
467	mov	psr.um = r10
468	;;
469	rum	(1<<4)
470	rum	(1<<4)
471	;;
472	mov	f2 = f3
473	mov	f4 = f5	// no DV here
474	;;
475
476// PSR.pk
477	rsm	(1<<15)
478	rsm	(1<<15)
479	;;
480
481// PSR.pp
482	rsm	(1<<21)
483	rsm	(1<<21)
484	;;
485
486// PSR.ri (no DV semantics)
487// PSR.rt (see PSR.db)
488
489// PSR.si
490	rsm	(1<<23)
491	ssm	(1<<23)
492	;;
493
494// PSR.sp
495	ssm	(1<<20)
496	rsm	(1<<20)
497	;;
498	srlz.d
499
500// PSR.ss (rfi is the only writer)
501// PSR.tb (see PSR.db)
502
503// PSR.up
504	rsm	(1<<2)
505	rsm	(1<<2)
506	;;
507	rum	(1<<2)
508	mov	psr.um = r0
509	;;
510
511// RR#
512	mov	rr[r2] = r1
513	mov	rr[r2] = r3
514	;;
515
516// PR, additional cases (or.andcm and and.orcm interaction)
517	cmp.eq.or.andcm	p6, p7 = 1, r32
518	cmp.eq.or.andcm p6, p7 = 5, r36	// no DV here
519	;;
520	cmp.eq.and.orcm	p6, p7 = 1, r32
521	cmp.eq.and.orcm p6, p7 = 5, r36	// no DV here
522	;;
523	cmp.eq.or.andcm	p63, p7 = 1, r32
524	cmp.eq.or.andcm p63, p7 = 5, r36 // no DV here
525	;;
526	cmp.eq.or.andcm	p6, p63 = 1, r32
527	cmp.eq.or.andcm p6, p63 = 5, r36 // no DV here
528	;;
529	cmp.eq.and.orcm	p63, p7 = 1, r32
530	cmp.eq.and.orcm p63, p7 = 5, r36 // no DV here
531	;;
532	cmp.eq.and.orcm	p6, p63 = 1, r32
533	cmp.eq.and.orcm p6, p63 = 5, r36 // no DV here
534	;;
535	cmp.eq.or.andcm	p6, p7 = 1, r32
536	cmp.eq.and.orcm p6, p7 = 5, r36
537	;;
538	cmp.eq.or.andcm	p63, p7 = 1, r32
539	cmp.eq.and.orcm p63, p7 = 5, r36
540	;;
541	cmp.eq.or.andcm	p6, p63 = 1, r32
542	cmp.eq.and.orcm p6, p63 = 5, r36
543	;;
544
545// PR%, 16 - 62
546	cmp.eq	p21, p0 = r0, r1
547	cmp.eq	p21, p0 = r2, r3
548	;;
549	fcmp.eq p21, p22 = f2, f3
550	fcmp.eq p21, p23 = f2, f3
551	;;
552	cmp.eq.and p21, p22 = r0, r1
553	cmp.eq.or  p21, p23 = r2, r3
554	;;
555	cmp.eq.or  p21, p23 = r2, r3
556	cmp.eq.and p21, p22 = r0, r1
557	;;
558	cmp.eq.and p21, p22 = r0, r1
559	cmp.eq.and p21, p23 = r2, r3 // no DV here
560	;;
561	cmp.eq.or p21, p22 = r0, r1
562	cmp.eq.or p21, p23 = r2, r3 // no DV here
563	;;
564
565// RSE
566
567L:
568