1 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \
2 // RUN:            -fcuda-is-device -target-feature +ptx60 \
3 // RUN:            -S -emit-llvm -o - -x cuda %s \
4 // RUN:   | FileCheck -check-prefix=CHECK_M16 %s
5 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \
6 // RUN:            -fcuda-is-device -target-feature +ptx61 -DPTX61 \
7 // RUN:            -S -emit-llvm -o - -x cuda %s \
8 // RUN:   | FileCheck -check-prefixes=CHECK_M16,CHECK_M32_M8 %s
9 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
10 // RUN:   -DPTX61 -fcuda-is-device -S -o /dev/null -x cuda -verify=pre-sm_70 %s
11 // RUN: %clang_cc1 -triple nvptx-unknown-unknown \
12 // RUN:   -target-cpu sm_70 -target-feature +ptx60 \
13 // RUN:   -DPTX61 -fcuda-is-device -S -o /dev/null -x cuda -verify=pre-ptx61 %s
14 
15 #if !defined(CUDA_VERSION)
16 #define __device__ __attribute__((device))
17 #define __global__ __attribute__((global))
18 #define __shared__ __attribute__((shared))
19 #define __constant__ __attribute__((constant))
20 
21 typedef unsigned long long uint64_t;
22 #endif
23 // We have to keep all builtins that depend on particular target feature in the
24 // same function, because the codegen will stop after the very first function
25 // that encounters an error, so -verify will not be able to find errors in
26 // subsequent functions.
27 
28 // CHECK-LABEL: nvvm_wmma_m16n16k16
nvvm_wmma_m16n16k16(int * src,int * dst,float * fsrc,float * fdst,int ldm)29 __device__ void nvvm_wmma_m16n16k16(int *src, int *dst,
30                                     float *fsrc, float *fdst,
31                                     int ldm) {
32   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
33   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
34   __hmma_m16n16k16_ld_a(dst, src, ldm, 0);
35   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
36   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
37   __hmma_m16n16k16_ld_a(dst, src+1, ldm, 1);
38 
39   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
40   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
41   __hmma_m16n16k16_ld_b(dst, src, ldm, 0);
42   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
43   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
44   __hmma_m16n16k16_ld_b(dst, src+2, ldm, 1);
45 
46   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
47   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
48   __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
49   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
50   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
51   __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
52 
53   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
54   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
55   __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
56   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
57   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
58   __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
59 
60   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
61   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
62   __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
63   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
64   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
65   __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
66 
67   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
68   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
69   __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
70   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
71   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
72   __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
73 
74   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
75   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
76   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
77   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
78   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
79   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
80   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
81   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
82   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
83   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
84   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
85   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
86   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
87   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
88   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
89   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
90   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
91   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
92   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
93   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
94   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
95   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
96   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
97   __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
98 
99   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
100   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
101   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
102   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
103   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
104   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
105   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
106   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
107   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
108   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
109   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
110   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
111   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
112   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
113   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
114   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
115   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
116   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
117   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
118   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
119   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
120   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
121   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
122   __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
123 
124   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
125   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
126   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
127   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
128   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
129   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
130   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
131   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
132   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
133   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
134   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
135   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
136   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
137   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
138   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
139   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
140   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
141   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
142   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
143   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
144   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
145   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
146   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
147   __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
148 
149   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
150   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
151   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
152   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
153   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
154   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
155   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
156   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
157   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
158   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
159   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
160   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
161   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
162   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
163   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
164   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
165   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
166   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
167   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
168   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
169   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
170   // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
171   // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
172   __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
173 }
174 
175 #ifdef PTX61
176 // CHECK-LABEL: nvvm_wmma_m32n8k16
nvvm_wmma_m32n8k16(int * src,int * dst,float * fsrc,float * fdst,int ldm)177 __device__ void nvvm_wmma_m32n8k16(int *src, int *dst,
178                                     float *fsrc, float *fdst,
179                                     int ldm) {
180   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.f16
181   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
182   __hmma_m32n8k16_ld_a(dst, src, ldm, 0);
183   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.f16
184   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
185   __hmma_m32n8k16_ld_a(dst, src+1, ldm, 1);
186 
187   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.f16
188   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
189   __hmma_m32n8k16_ld_b(dst, src, ldm, 0);
190   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.f16
191   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
192   __hmma_m32n8k16_ld_b(dst, src+2, ldm, 1);
193 
194   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f16
195   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
196   __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0);
197   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f16
198   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
199   __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1);
200 
201   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f32
202   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
203   __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 0);
204   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f32
205   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
206   __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 1);
207 
208   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f16
209   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
210   __hmma_m32n8k16_st_c_f16(dst, src, ldm, 0);
211   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f16
212   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
213   __hmma_m32n8k16_st_c_f16(dst, src, ldm, 1);
214 
215   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f32
216   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
217   __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 0);
218   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f32
219   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
220   __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 1);
221 
222   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16
223   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
224   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0);
225   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16.satfinite
226   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
227   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1);
228   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16
229   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
230   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0);
231   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16.satfinite
232   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
233   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1);
234   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16
235   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
236   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0);
237   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16.satfinite
238   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
239   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1);
240   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16
241   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
242   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0);
243   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16.satfinite
244   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
245   __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1);
246 
247   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32
248   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
249   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
250   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32.satfinite
251   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
252   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
253   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32
254   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
255   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
256   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32.satfinite
257   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
258   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
259   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32
260   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
261   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
262   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32.satfinite
263   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
264   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
265   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32
266   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
267   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
268   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32.satfinite
269   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
270   __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
271 
272   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16
273   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
274   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 0);
275   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16.satfinite
276   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
277   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 1);
278   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16
279   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
280   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 0);
281   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16.satfinite
282   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
283   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 1);
284   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16
285   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
286   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 0);
287   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16.satfinite
288   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
289   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 1);
290   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16
291   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
292   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 0);
293   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16.satfinite
294   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
295   __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 1);
296 
297   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32
298   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
299   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
300   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32.satfinite
301   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
302   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
303   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32
304   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
305   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
306   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32.satfinite
307   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
308   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
309   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32
310   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
311   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
312   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32.satfinite
313   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
314   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
315   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32
316   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
317   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
318   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32.satfinite
319   // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
320   __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
321 
322 
323   // m8n32k16 variants.
324 
325   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.f16
326   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
327   __hmma_m8n32k16_ld_a(dst, src, ldm, 0);
328   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.f16
329   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
330   __hmma_m8n32k16_ld_a(dst, src+1, ldm, 1);
331 
332   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.f16
333   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
334   __hmma_m8n32k16_ld_b(dst, src, ldm, 0);
335   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.f16
336   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
337   __hmma_m8n32k16_ld_b(dst, src+2, ldm, 1);
338 
339   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f16
340   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
341   __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0);
342   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f16
343   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
344   __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1);
345 
346   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f32
347   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
348   __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 0);
349   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f32
350   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
351   __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 1);
352 
353   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f16
354   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
355   __hmma_m8n32k16_st_c_f16(dst, src, ldm, 0);
356   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f16
357   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
358   __hmma_m8n32k16_st_c_f16(dst, src, ldm, 1);
359 
360   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f32
361   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
362   __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 0);
363   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f32
364   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
365   __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 1);
366 
367   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16
368   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
369   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0);
370   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16.satfinite
371   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
372   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1);
373   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16
374   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
375   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0);
376   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16.satfinite
377   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
378   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1);
379   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16
380   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
381   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0);
382   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16.satfinite
383   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
384   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1);
385   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16
386   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
387   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0);
388   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16.satfinite
389   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
390   __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1);
391 
392   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32
393   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
394   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
395   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32.satfinite
396   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
397   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
398   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32
399   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
400   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
401   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32.satfinite
402   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
403   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
404   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32
405   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
406   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
407   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32.satfinite
408   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
409   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
410   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32
411   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
412   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
413   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32.satfinite
414   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
415   __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
416 
417   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16
418   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
419   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 0);
420   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16.satfinite
421   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
422   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 1);
423   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16
424   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
425   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 0);
426   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16.satfinite
427   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
428   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 1);
429   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16
430   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
431   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 0);
432   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16.satfinite
433   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
434   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 1);
435   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16
436   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
437   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 0);
438   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16.satfinite
439   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
440   __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 1);
441 
442   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32
443   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
444   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
445   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32.satfinite
446   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
447   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
448   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32
449   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
450   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
451   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32.satfinite
452   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
453   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
454   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32
455   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
456   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
457   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32.satfinite
458   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
459   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
460   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32
461   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
462   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
463   // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32.satfinite
464   // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
465   __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
466 }
467 #endif
468