1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2
3 // Also check we do not crash when running some middle-end passes. Most
4 // importantly this includes the IR verifier, to ensure we emit valid IR.
5 // RUN: %clang_cc1 -fenable-matrix -emit-llvm -triple x86_64-apple-darwin %s -o %t
6
7 // Tests for the matrix type builtins.
8
9 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
10 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
11 typedef float fx3x2_t __attribute__((matrix_type(3, 2)));
12 typedef int ix20x4_t __attribute__((matrix_type(20, 4)));
13 typedef int ix4x20_t __attribute__((matrix_type(4, 20)));
14 typedef unsigned ux1x6_t __attribute__((matrix_type(1, 6)));
15 typedef unsigned ux6x1_t __attribute__((matrix_type(6, 1)));
16
transpose_double_5x5(dx5x5_t * a)17 void transpose_double_5x5(dx5x5_t *a) {
18 // CHECK-LABEL: define void @transpose_double_5x5(
19 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
20 // CHECK-NEXT: [[TRANS:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[A]], i32 5, i32 5)
21 // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [25 x double]* %a_t to <25 x double>*
22 // CHECK-NEXT: store <25 x double> [[TRANS]], <25 x double>* [[AT_ADDR]], align 8
23 dx5x5_t a_t = __builtin_matrix_transpose(*a);
24 }
25
transpose_float_3x2(fx3x2_t * a)26 void transpose_float_3x2(fx3x2_t *a) {
27 // CHECK-LABEL: define void @transpose_float_3x2(
28 // CHECK: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
29 // CHECK-NEXT: [[TRANS:%.*]] = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> [[A]], i32 3, i32 2)
30 // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [6 x float]* %a_t to <6 x float>*
31 // CHECK-NEXT: store <6 x float> [[TRANS]], <6 x float>* [[AT_ADDR]], align 4
32
33 fx2x3_t a_t = __builtin_matrix_transpose(*a);
34 }
35
transpose_int_20x4(ix20x4_t * a)36 void transpose_int_20x4(ix20x4_t *a) {
37 // CHECK-LABEL: define void @transpose_int_20x4(
38 // CHECK: [[A:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
39 // CHECK-NEXT: [[TRANS:%.*]] = call <80 x i32> @llvm.matrix.transpose.v80i32(<80 x i32> [[A]], i32 20, i32 4)
40 // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [80 x i32]* %a_t to <80 x i32>*
41 // CHECK-NEXT: store <80 x i32> [[TRANS]], <80 x i32>* [[AT_ADDR]], align 4
42
43 ix4x20_t a_t = __builtin_matrix_transpose(*a);
44 }
45
46 struct Foo {
47 ux1x6_t in;
48 ux6x1_t out;
49 };
50
transpose_struct_member(struct Foo * F)51 void transpose_struct_member(struct Foo *F) {
52 // CHECK-LABEL: define void @transpose_struct_member(
53 // CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
54 // CHECK-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6)
55 // CHECK-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8
56 // CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 1
57 // CHECK-NEXT: [[OUT_PTR_C:%.*]] = bitcast [6 x i32]* [[OUT_PTR]] to <6 x i32>*
58 // CHECK-NEXT: store <6 x i32> [[M_T]], <6 x i32>* [[OUT_PTR_C]], align 4
59
60 F->out = __builtin_matrix_transpose(F->in);
61 }
62
transpose_transpose_struct_member(struct Foo * F)63 void transpose_transpose_struct_member(struct Foo *F) {
64 // CHECK-LABEL: define void @transpose_transpose_struct_member(
65 // CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
66 // CHECK-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6)
67 // CHECK-NEXT: [[M_T2:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M_T]], i32 6, i32 1)
68 // CHECK-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8
69 // CHECK-NEXT: [[IN_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 0
70 // CHECK-NEXT: [[IN_PTR_C:%.*]] = bitcast [6 x i32]* [[IN_PTR]] to <6 x i32>*
71 // CHECK-NEXT: store <6 x i32> [[M_T2]], <6 x i32>* [[IN_PTR_C]], align 4
72
73 F->in = __builtin_matrix_transpose(__builtin_matrix_transpose(F->in));
74 }
75
76 dx5x5_t get_matrix();
77
transpose_rvalue()78 void transpose_rvalue() {
79 // CHECK-LABEL: define void @transpose_rvalue()
80 // CHECK-NEXT: entry:
81 // CHECK-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8
82 // CHECK-NEXT: [[CALL:%.*]] = call <25 x double> (...) @get_matrix()
83 // CHECK-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[CALL]], i32 5, i32 5)
84 // CHECK-NEXT: [[M_T_ADDR_C:%.*]] = bitcast [25 x double]* [[M_T_ADDR]] to <25 x double>*
85 // CHECK-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 8
86
87 dx5x5_t m_t = __builtin_matrix_transpose(get_matrix());
88 }
89
90 const dx5x5_t global_matrix;
91
transpose_global()92 void transpose_global() {
93 // CHECK-LABEL: define void @transpose_global()
94 // CHECK-NEXT: entry:
95 // CHECK-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8
96 // CHECK-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, <25 x double>* bitcast ([25 x double]* @global_matrix to <25 x double>*), align 8
97 // CHECK-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[GLOBAL_MATRIX]], i32 5, i32 5)
98 // CHECK-NEXT: [[M_T_ADDR_C:%.*]] = bitcast [25 x double]* [[M_T_ADDR]] to <25 x double>*
99 // CHECK-NEXT: store <25 x double> [[M_T]], <25 x double>* [[M_T_ADDR_C]], align 8
100
101 dx5x5_t m_t = __builtin_matrix_transpose(global_matrix);
102 }
103
column_major_load_with_const_stride_double(double * Ptr)104 void column_major_load_with_const_stride_double(double *Ptr) {
105 // CHECK-LABEL: define void @column_major_load_with_const_stride_double(double* %Ptr)
106 // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
107 // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
108
109 dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
110 }
111
column_major_load_with_const_stride2_double(double * Ptr)112 void column_major_load_with_const_stride2_double(double *Ptr) {
113 // CHECK-LABEL: define void @column_major_load_with_const_stride2_double(double* %Ptr)
114 // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
115 // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
116
117 dx5x5_t m_a2 = __builtin_matrix_column_major_load(Ptr, 5, 5, 2 * 3 + 9);
118 }
119
column_major_load_with_variable_stride_ull_float(float * Ptr,unsigned long long S)120 void column_major_load_with_variable_stride_ull_float(float *Ptr, unsigned long long S) {
121 // CHECK-LABEL: define void @column_major_load_with_variable_stride_ull_float(float* %Ptr, i64 %S)
122 // CHECK: [[S:%.*]] = load i64, i64* %S.addr, align 8
123 // CHECK-NEXT: [[PTR:%.*]] = load float*, float** %Ptr.addr, align 8
124 // CHECK-NEXT: call <6 x float> @llvm.matrix.column.major.load.v6f32(float* align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3)
125
126 fx2x3_t m_b = __builtin_matrix_column_major_load(Ptr, 2, 3, S);
127 }
128
column_major_load_with_stride_math_int(int * Ptr,int S)129 void column_major_load_with_stride_math_int(int *Ptr, int S) {
130 // CHECK-LABEL: define void @column_major_load_with_stride_math_int(i32* %Ptr, i32 %S)
131 // CHECK: [[S:%.*]] = load i32, i32* %S.addr, align 4
132 // CHECK-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S]], 32
133 // CHECK-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
134 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
135 // CHECK-NEXT: call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
136
137 ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
138 }
139
column_major_load_with_stride_math_s_int(int * Ptr,short S)140 void column_major_load_with_stride_math_s_int(int *Ptr, short S) {
141 // CHECK-LABEL: define void @column_major_load_with_stride_math_s_int(i32* %Ptr, i16 signext %S)
142 // CHECK: [[S:%.*]] = load i16, i16* %S.addr, align 2
143 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
144 // CHECK-NEXT: [[STRIDE:%.*]] = add nsw i32 [[S_EXT]], 32
145 // CHECK-NEXT: [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
146 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
147 // CHECK-NEXT: %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
148
149 ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
150 }
151
column_major_load_array1(double Ptr[25])152 void column_major_load_array1(double Ptr[25]) {
153 // CHECK-LABEL: define void @column_major_load_array1(double* %Ptr)
154 // CHECK: [[ADDR:%.*]] = load double*, double** %Ptr.addr, align 8
155 // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5)
156
157 dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
158 }
159
column_major_load_array2()160 void column_major_load_array2() {
161 // CHECK-LABEL: define void @column_major_load_array2() #0 {
162 // CHECK-NEXT: entry:
163 // CHECK-NEXT: [[PTR:%.*]] = alloca [25 x double], align 16
164 // CHECK: [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], [25 x double]* [[PTR]], i64 0, i64 0
165 // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5)
166
167 double Ptr[25];
168 dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
169 }
170
column_major_load_const(const double * Ptr)171 void column_major_load_const(const double *Ptr) {
172 // CHECK-LABEL: define void @column_major_load_const(double* %Ptr)
173 // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
174 // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
175
176 dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
177 }
178
column_major_load_volatile(volatile double * Ptr)179 void column_major_load_volatile(volatile double *Ptr) {
180 // CHECK-LABEL: define void @column_major_load_volatile(double* %Ptr)
181 // CHECK: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
182 // CHECK-NEXT: call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
183
184 dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
185 }
186
column_major_store_with_const_stride_double(double * Ptr)187 void column_major_store_with_const_stride_double(double *Ptr) {
188 // CHECK-LABEL: define void @column_major_store_with_const_stride_double(double* %Ptr)
189 // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
190 // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
191 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
192
193 dx5x5_t m;
194 __builtin_matrix_column_major_store(m, Ptr, 5);
195 }
196
column_major_store_with_const_stride2_double(double * Ptr)197 void column_major_store_with_const_stride2_double(double *Ptr) {
198 // CHECK-LABEL: define void @column_major_store_with_const_stride2_double(double* %Ptr)
199 // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
200 // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
201 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
202 //
203 dx5x5_t m;
204 __builtin_matrix_column_major_store(m, Ptr, 2 * 3 + 9);
205 }
206
column_major_store_with_stride_math_int(int * Ptr,int S)207 void column_major_store_with_stride_math_int(int *Ptr, int S) {
208 // CHECK-LABEL: define void @column_major_store_with_stride_math_int(i32* %Ptr, i32 %S)
209 // CHECK: [[M:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
210 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
211 // CHECK-NEXT: [[S:%.*]] = load i32, i32* %S.addr, align 4
212 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[S]], 32
213 // CHECK-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64
214 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
215
216 ix4x20_t m;
217 __builtin_matrix_column_major_store(m, Ptr, S + 32);
218 }
219
column_major_store_with_stride_math_s_int(int * Ptr,short S)220 void column_major_store_with_stride_math_s_int(int *Ptr, short S) {
221 // CHECK-LABEL: define void @column_major_store_with_stride_math_s_int(i32* %Ptr, i16 signext %S)
222 // CHECK: [[M:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4
223 // CHECK-NEXT: [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
224 // CHECK-NEXT: [[S:%.*]] = load i16, i16* %S.addr, align 2
225 // CHECK-NEXT: [[EXT:%.*]] = sext i16 [[S]] to i32
226 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[EXT]], 2
227 // CHECK-NEXT: [[IDX:%.*]] = sext i32 [[ADD]] to i64
228 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
229
230 ix4x20_t m;
231 __builtin_matrix_column_major_store(m, Ptr, S + 2);
232 }
233
column_major_store_array1(double Ptr[25])234 void column_major_store_array1(double Ptr[25]) {
235 // CHECK-LABEL: define void @column_major_store_array1(double* %Ptr)
236 // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
237 // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
238 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
239
240 dx5x5_t m;
241 __builtin_matrix_column_major_store(m, Ptr, 5);
242 }
243
column_major_store_array2()244 void column_major_store_array2() {
245 // CHECK-LABEL: define void @column_major_store_array2()
246 // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
247 // CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [25 x double], [25 x double]* %Ptr, i64 0, i64 0
248 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5)
249
250 double Ptr[25];
251 dx5x5_t m;
252 __builtin_matrix_column_major_store(m, Ptr, 5);
253 }
254
column_major_store_volatile(volatile double * Ptr)255 void column_major_store_volatile(volatile double *Ptr) {
256 // CHECK-LABEL: define void @column_major_store_volatile(double* %Ptr) #0 {
257 // CHECK: [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
258 // CHECK-NEXT: [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
259 // CHECK-NEXT: call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
260
261 dx5x5_t m;
262 __builtin_matrix_column_major_store(m, Ptr, 5);
263 }
264