1//
2// Detect WAW violations.  Cases taken from DV tables.
3//
4.text
5	.explicit
6// AR[BSP]
7	mov	ar.bsp = r0
8	mov	ar.bsp = r1
9	;;
10// AR[BSPSTORE]
11	mov	ar.bspstore = r2
12	mov	ar.bspstore = r3
13	;;
14
15// AR[CCV]
16	mov	ar.ccv = r4
17	mov	ar.ccv = r4
18	;;
19
20// AR[EC]
21	br.wtop.sptk	L
22	mov	ar.ec = r0
23	;;
24
25// AR[FPSR].sf0.controls
26	mov		ar.fpsr = r0
27	fsetc.s0	0x7f, 0x0f
28	;;
29
30// AR[FPSR].sf1.controls
31	mov		ar.fpsr = r0
32	fsetc.s1	0x7f, 0x0f
33	;;
34
35// AR[FPSR].sf2.controls
36	mov		ar.fpsr = r0
37	fsetc.s2	0x7f, 0x0f
38	;;
39
40// AR[FPSR].sf3.controls
41	mov		ar.fpsr = r0
42	fsetc.s3	0x7f, 0x0f
43	;;
44
45// AR[FPSR].sf0.flags
46	fcmp.eq.s0	p1, p2 = f3, f4
47	fcmp.eq.s0	p3, p4 = f3, f4	// no DV here
48	;;
49	fcmp.eq.s0	p1, p2 = f3, f4
50	fclrf.s0
51	;;
52
53// AR[FPSR].sf1.flags
54	fcmp.eq.s1	p1, p2 = f3, f4
55	fcmp.eq.s1	p3, p4 = f3, f4	// no DV here
56	;;
57	fcmp.eq.s1	p1, p2 = f3, f4
58	fclrf.s1
59	;;
60
61// AR[FPSR].sf2.flags
62	fcmp.eq.s2	p1, p2 = f3, f4
63	fcmp.eq.s2	p3, p4 = f3, f4	// no DV here
64	;;
65	fcmp.eq.s2	p1, p2 = f3, f4
66	fclrf.s2
67	;;
68
69// AR[FPSR].sf3.flags
70	fcmp.eq.s3	p1, p2 = f3, f4
71	fcmp.eq.s3	p3, p4 = f3, f4	// no DV here
72	;;
73	fcmp.eq.s3	p1, p2 = f3, f4
74	fclrf.s3
75	;;
76
77// AR[FPSR].traps/rv plus all controls/flags
78	mov	ar.fpsr = r0
79	mov	ar.fpsr = r0
80	;;
81
82// AR[ITC]
83	mov	ar.itc = r1
84	mov	ar.itc = r1
85	;;
86
87// AR[RUC]
88	mov	ar.ruc = r1
89	mov	ar.ruc = r1
90	;;
91
92// AR[K]
93	mov	ar.k2 = r3
94	mov	ar.k2 = r3
95	;;
96
97// AR[LC]
98	br.cloop.sptk	L
99	mov	ar.lc = r0
100	;;
101
102// AR[PFS]
103	mov	ar.pfs = r0
104	br.call.sptk	b0 = L
105	;;
106
107// AR[RNAT] (see also AR[BSPSTORE])
108	mov	ar.rnat = r8
109	mov	ar.rnat = r8
110	;;
111
112// AR[RSC]
113	mov	ar.rsc = r10
114	mov	ar.rsc = r10
115	;;
116
117// AR[UNAT]
118	mov	ar.unat = r12
119	st8.spill	[r0] = r1
120	;;
121
122// AR%
123	mov	ar48 = r0
124	mov	ar48 = r0
125	;;
126
127// BR%
128	mov	b1 = r0
129	mov	b1 = r1
130	;;
131
132// CFM (and others)
133	br.wtop.sptk	L
134	br.wtop.sptk	L
135	;;
136
137// CR[CMCV]
138	mov	cr.cmcv = r1
139	mov	cr.cmcv = r2
140	;;
141
142// CR[DCR]
143	mov	cr.dcr = r3
144	mov	cr.dcr = r3
145	;;
146
147// CR[EOI] (and InService)
148	mov	cr.eoi = r0
149	mov	cr.eoi = r0
150	;;
151	srlz.d
152
153// CR[GPTA]
154	mov	cr.gpta = r6
155	mov	cr.gpta = r7
156	;;
157
158// CR[IFA]
159	mov	cr.ifa = r9
160	mov	cr.ifa = r10
161	;;
162
163// CR[IFS]
164	mov	cr.ifs = r11
165	cover
166	;;
167
168// CR[IHA]
169	mov	cr.iha = r13
170	mov	cr.iha = r14
171	;;
172
173// CR[IIB%]
174	mov	cr.iib0 = r15
175	mov	cr.iib0 = r16
176	;;
177
178	mov	cr.iib1 = r15
179	mov	cr.iib1 = r16
180	;;
181
182// CR[IIM]
183	mov	cr.iim = r15
184	mov	cr.iim = r16
185	;;
186
187// CR[IIP]
188	mov	cr.iip = r17
189	mov	cr.iip = r17
190	;;
191
192// CR[IIPA]
193	mov	cr.iipa = r19
194	mov	cr.iipa = r20
195	;;
196
197// CR[IPSR]
198	mov	cr.ipsr = r21
199	mov	cr.ipsr = r22
200	;;
201
202// CR[IRR%] (and others)
203	mov	r2 = cr.ivr
204	mov	r3 = cr.ivr
205	;;
206
207// CR[ISR]
208	mov	cr.isr = r24
209	mov	cr.isr = r25
210	;;
211
212// CR[ITIR]
213	mov	cr.itir = r26
214	mov	cr.itir = r27
215	;;
216
217// CR[ITM]
218	mov	cr.itm = r28
219	mov	cr.itm = r29
220	;;
221
222// CR[ITV]
223	mov	cr.itv = r0
224	mov	cr.itv = r1
225	;;
226
227// CR[IVA]
228	mov	cr.iva = r0
229	mov	cr.iva = r1
230	;;
231
232// CR[IVR] (no explicit writers)
233
234// CR[LID]
235	mov	cr.lid = r0
236	mov	cr.lid = r1
237	;;
238
239// CR[LRR%]
240	mov	cr.lrr0 = r0
241	mov	cr.lrr1 = r0 // no DV here
242	;;
243	mov	cr.lrr0 = r0
244	mov	cr.lrr0 = r0
245	;;
246
247// CR[PMV]
248	mov	cr.pmv = r0
249	mov	cr.pmv = r1
250	;;
251
252// CR[PTA]
253	mov	cr.pta = r0
254	mov	cr.pta = r1
255	;;
256
257// CR[TPR]
258	mov	cr.tpr = r0
259	mov	cr.tpr = r1
260	;;
261
262// DBR#
263	mov	dbr[r1] = r1
264	mov	dbr[r1] = r2
265	;;
266	srlz.d
267
268// DTC
269	ptc.e	r0
270	ptc.e	r1	// no DVs here
271	;;
272	ptc.e	r0	// (and others)
273	itc.i	r0
274	;;
275	srlz.d
276
277// DTC_LIMIT
278	ptc.g	r0, r1		// NOTE: GAS automatically emits stops after
279	ptc.ga	r2, r3		//  ptc.g/ptc.ga, so this conflict is no
280	;;			//  longer possible in GAS-generated assembly
281	srlz.d
282
283// DTR
284	itr.d	dtr[r0] = r1	// (and others)
285	ptr.d	r2, r3
286	;;
287	srlz.d
288
289// FR%
290	mov		f3 = f2
291	ldfs.c.clr	f3 = [r1]
292	;;
293
294// GR%
295	mov		r2 = r0
296	ld8.c.clr	r2 = [r1]
297	;;
298
299// IBR#
300	mov	ibr[r0] = r2
301	mov	ibr[r1] = r2
302	;;
303
304// InService
305	mov	cr.eoi = r0
306	mov	r1 = cr.ivr
307	;;
308	srlz.d
309
310// ITC
311	ptc.e	r0
312	itc.i	r1
313	;;
314	srlz.i
315	;;
316
317// ITR
318	itr.i	itr[r0] = r1
319	ptr.i	r2, r3
320	;;
321	srlz.i
322	;;
323
324// PKR#
325	.reg.val r1, 0x1
326	.reg.val r2, ~0x1
327	mov	pkr[r1] = r1
328	mov	pkr[r2] = r1	// no DV here
329	;;
330	mov	pkr[r1] = r1
331	mov	pkr[r1] = r1
332	;;
333
334// PMC#
335	mov	pmc[r3] = r1
336	mov	pmc[r4] = r1
337	;;
338
339// PMD#
340	mov	pmd[r3] = r1
341	mov	pmd[r4] = r1
342	;;
343
344// PR%, 1 - 15
345	cmp.eq	p1, p0 = r0, r1
346	cmp.eq	p1, p0 = r2, r3
347	;;
348	fcmp.eq p1, p2 = f2, f3
349	fcmp.eq p1, p3 = f2, f3
350	;;
351	cmp.eq.and p1, p2 = r0, r1
352	cmp.eq.or  p1, p3 = r2, r3
353	;;
354	cmp.eq.or  p1, p3 = r2, r3
355	cmp.eq.and p1, p2 = r0, r1
356	;;
357	cmp.eq.and p1, p2 = r0, r1
358	cmp.eq.and p1, p3 = r2, r3 // no DV here
359	;;
360	cmp.eq.or p1, p2 = r0, r1
361	cmp.eq.or p1, p3 = r2, r3 // no DV here
362	;;
363
364// PR63
365	br.wtop.sptk	L
366	br.wtop.sptk	L
367	;;
368	cmp.eq	p63, p0 = r0, r1
369	cmp.eq	p63, p0 = r2, r3
370	;;
371	fcmp.eq p63, p2 = f2, f3
372	fcmp.eq p63, p3 = f2, f3
373	;;
374	cmp.eq.and p63, p2 = r0, r1
375	cmp.eq.or  p63, p3 = r2, r3
376	;;
377	cmp.eq.or  p63, p3 = r2, r3
378	cmp.eq.and p63, p2 = r0, r1
379	;;
380	cmp.eq.and p63, p2 = r0, r1
381	cmp.eq.and p63, p3 = r2, r3 // no DV here
382	;;
383	cmp.eq.or p63, p2 = r0, r1
384	cmp.eq.or p63, p3 = r2, r3 // no DV here
385	;;
386
387// PSR.ac
388	rum	(1<<3)
389	rum	(1<<3)
390	;;
391
392// PSR.be
393	rum	(1<<1)
394	rum	(1<<1)
395	;;
396
397// PSR.bn
398	bsw.0			// GAS automatically emits a stop after bsw.n
399	bsw.0			// so this conflict is avoided
400	;;
401
402// PSR.cpl
403	epc
404	br.ret.sptk	b0
405	;;
406
407// PSR.da (rfi is the only writer)
408// PSR.db (and others)
409	mov	psr.l = r0
410	mov	psr.l = r1
411	;;
412	srlz.d
413
414// PSR.dd (rfi is the only writer)
415
416// PSR.dfh
417	ssm	(1<<19)
418	ssm	(1<<19)
419	;;
420	srlz.d
421
422// PSR.dfl
423	ssm	(1<<18)
424	ssm	(1<<18)
425	;;
426	srlz.d
427
428// PSR.di
429	rsm	(1<<22)
430	rsm	(1<<22)
431	;;
432
433// PSR.dt
434	rsm	(1<<17)
435	rsm	(1<<17)
436	;;
437
438// PSR.ed (rfi is the only writer)
439// PSR.i
440	ssm	(1<<14)
441	ssm	(1<<14)
442	;;
443
444// PSR.ia (no DV semantics)
445// PSR.ic
446	ssm	(1<<13)
447	ssm	(1<<13)
448	;;
449
450// PSR.id (rfi is the only writer)
451// PSR.is (br.ia and rfi are the only writers)
452// PSR.it (rfi is the only writer)
453// PSR.lp (see PSR.db)
454
455// PSR.mc (rfi is the only writer)
456// PSR.mfh
457	mov	f32 = f33
458	mov	r10 = psr
459	;;
460	ssm	(1<<5)
461	ssm	(1<<5)
462	;;
463	ssm	(1<<5)
464	mov	psr.um = r10
465	;;
466	rum	(1<<5)
467	rum	(1<<5)
468	;;
469	mov	f32 = f33
470	mov	f34 = f35	// no DV here
471	;;
472
473// PSR.mfl
474	mov	f2 = f3
475	mov	r10 = psr
476	;;
477	ssm	(1<<4)
478	ssm	(1<<4)
479	;;
480	ssm	(1<<4)
481	mov	psr.um = r10
482	;;
483	rum	(1<<4)
484	rum	(1<<4)
485	;;
486	mov	f2 = f3
487	mov	f4 = f5	// no DV here
488	;;
489
490// PSR.pk
491	rsm	(1<<15)
492	rsm	(1<<15)
493	;;
494
495// PSR.pp
496	rsm	(1<<21)
497	rsm	(1<<21)
498	;;
499
500// PSR.ri (no DV semantics)
501// PSR.rt (see PSR.db)
502
503// PSR.si
504	rsm	(1<<23)
505	ssm	(1<<23)
506	;;
507
508// PSR.sp
509	ssm	(1<<20)
510	rsm	(1<<20)
511	;;
512	srlz.d
513
514// PSR.ss (rfi is the only writer)
515// PSR.tb (see PSR.db)
516
517// PSR.up
518	rsm	(1<<2)
519	rsm	(1<<2)
520	;;
521	rum	(1<<2)
522	mov	psr.um = r0
523	;;
524
525// RR#
526	mov	rr[r2] = r1
527	mov	rr[r2] = r3
528	;;
529
530// PR, additional cases (or.andcm and and.orcm interaction)
531	cmp.eq.or.andcm	p6, p7 = 1, r32
532	cmp.eq.or.andcm p6, p7 = 5, r36	// no DV here
533	;;
534	cmp.eq.and.orcm	p6, p7 = 1, r32
535	cmp.eq.and.orcm p6, p7 = 5, r36	// no DV here
536	;;
537	cmp.eq.or.andcm	p63, p7 = 1, r32
538	cmp.eq.or.andcm p63, p7 = 5, r36 // no DV here
539	;;
540	cmp.eq.or.andcm	p6, p63 = 1, r32
541	cmp.eq.or.andcm p6, p63 = 5, r36 // no DV here
542	;;
543	cmp.eq.and.orcm	p63, p7 = 1, r32
544	cmp.eq.and.orcm p63, p7 = 5, r36 // no DV here
545	;;
546	cmp.eq.and.orcm	p6, p63 = 1, r32
547	cmp.eq.and.orcm p6, p63 = 5, r36 // no DV here
548	;;
549	cmp.eq.or.andcm	p6, p7 = 1, r32
550	cmp.eq.and.orcm p6, p7 = 5, r36
551	;;
552	cmp.eq.or.andcm	p63, p7 = 1, r32
553	cmp.eq.and.orcm p63, p7 = 5, r36
554	;;
555	cmp.eq.or.andcm	p6, p63 = 1, r32
556	cmp.eq.and.orcm p6, p63 = 5, r36
557	;;
558
559// PR%, 16 - 62
560	cmp.eq	p21, p0 = r0, r1
561	cmp.eq	p21, p0 = r2, r3
562	;;
563	fcmp.eq p21, p22 = f2, f3
564	fcmp.eq p21, p23 = f2, f3
565	;;
566	cmp.eq.and p21, p22 = r0, r1
567	cmp.eq.or  p21, p23 = r2, r3
568	;;
569	cmp.eq.or  p21, p23 = r2, r3
570	cmp.eq.and p21, p22 = r0, r1
571	;;
572	cmp.eq.and p21, p22 = r0, r1
573	cmp.eq.and p21, p23 = r2, r3 // no DV here
574	;;
575	cmp.eq.or p21, p22 = r0, r1
576	cmp.eq.or p21, p23 = r2, r3 // no DV here
577	;;
578
579// RSE
580
581L:
582