1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
2
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
5
6 template <typename EltTy, unsigned Rows, unsigned Columns>
7 struct MyMatrix {
8 using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
9
10 matrix_t value;
11 };
12
13 template <typename EltTy0, unsigned R0, unsigned C0>
add(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)14 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
15 return A.value + B.value;
16 }
17
test_add_template()18 void test_add_template() {
19 // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
20 // CHECK: %call = call <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat2)
21
22 // CHECK-LABEL: define linkonce_odr <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
23 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
24 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
25 // CHECK-NEXT: [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
26 // CHECK-NEXT: ret <10 x float> [[RES]]
27
28 MyMatrix<float, 2, 5> Mat1;
29 MyMatrix<float, 2, 5> Mat2;
30 Mat1.value = add(Mat1, Mat2);
31 }
32
33 template <typename EltTy0, unsigned R0, unsigned C0>
subtract(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)34 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
35 return A.value - B.value;
36 }
37
test_subtract_template()38 void test_subtract_template() {
39 // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
40 // CHECK: %call = call <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat2)
41
42 // CHECK-LABEL: define linkonce_odr <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
43 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
44 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
45 // CHECK-NEXT: [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
46 // CHECK-NEXT: ret <10 x float> [[RES]]
47
48 MyMatrix<float, 2, 5> Mat1;
49 MyMatrix<float, 2, 5> Mat2;
50 Mat1.value = subtract(Mat1, Mat2);
51 }
52
53 struct DoubleWrapper1 {
54 int x;
operator doubleDoubleWrapper155 operator double() {
56 return x;
57 }
58 };
59
test_DoubleWrapper1_Sub1(MyMatrix<double,10,9> & m)60 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
61 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
62 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
63 // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
64 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
65 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
66 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
67 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
68
69 DoubleWrapper1 w1;
70 w1.x = 10;
71 m.value = m.value - w1;
72 }
73
test_DoubleWrapper1_Sub2(MyMatrix<double,10,9> & m)74 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
75 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
76 // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
77 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
78 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
79 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
80 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
81 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
82
83 DoubleWrapper1 w1;
84 w1.x = 10;
85 m.value = w1 - m.value;
86 }
87
88 struct DoubleWrapper2 {
89 int x;
operator doubleDoubleWrapper290 operator double() {
91 return x;
92 }
93 };
94
test_DoubleWrapper2_Add1(MyMatrix<double,10,9> & m)95 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
96 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
97 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8
98 // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
99 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
100 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
101 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
102 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
103
104 DoubleWrapper2 w2;
105 w2.x = 20;
106 m.value = m.value + w2;
107 }
108
test_DoubleWrapper2_Add2(MyMatrix<double,10,9> & m)109 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
110 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
111 // CHECK: [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
112 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8
113 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
114 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
115 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
116 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
117
118 DoubleWrapper2 w2;
119 w2.x = 20;
120 m.value = w2 + m.value;
121 }
122
123 struct IntWrapper {
124 char x;
operator intIntWrapper125 operator int() {
126 return x;
127 }
128 };
129
test_IntWrapper_Add(MyMatrix<double,10,9> & m)130 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
131 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
132 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
133 // CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
134 // CHECK: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
135 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
136 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
137 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
138 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
139
140 IntWrapper w3;
141 w3.x = 'c';
142 m.value = m.value + w3;
143 }
144
test_IntWrapper_Sub(MyMatrix<double,10,9> & m)145 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
146 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
147 // CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
148 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
149 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
150 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
151 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
152 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
153 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
154
155 IntWrapper w3;
156 w3.x = 'c';
157 m.value = w3 - m.value;
158 }
159
160 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
multiply(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,C0,C1> & B)161 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
162 return A.value * B.value;
163 }
164
test_multiply_template(MyMatrix<float,2,5> Mat1,MyMatrix<float,5,2> Mat2)165 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
166 MyMatrix<float, 5, 2> Mat2) {
167 // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
168 // CHECK-NEXT: entry:
169 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(%struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix.2* nonnull align 4 dereferenceable(40) %Mat2)
170 // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0
171 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = bitcast [4 x float]* %value to <4 x float>*
172 // CHECK-NEXT: store <4 x float> [[RES]], <4 x float>* [[VALUE_ADDR]], align 4
173 // CHECK-NEXT: ret void
174 //
175 // CHECK-LABEL: define linkonce_odr <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
176 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
177 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
178 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
179 // CHECK-NEXT: ret <4 x float> [[RES]]
180
181 MyMatrix<float, 2, 2> Res;
182 Res.value = multiply(Mat1, Mat2);
183 return Res;
184 }
185
test_IntWrapper_Multiply(MyMatrix<double,10,9> & m,IntWrapper & w3)186 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
187 // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
188 // CHECK: [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{.*}})
189 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
190 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
191 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
192 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
193 // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
194 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
195 // CHECK: ret void
196 m.value = w3 * m.value;
197 }
198
199 template <typename EltTy, unsigned Rows, unsigned Columns>
insert(MyMatrix<EltTy,Rows,Columns> & Mat,EltTy e,unsigned i,unsigned j)200 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
201 Mat.value[i][j] = e;
202 }
203
test_insert_template1(MyMatrix<unsigned,2,2> & Mat,unsigned e,unsigned i,unsigned j)204 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
205 // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
206 // CHECK: [[MAT_ADDR:%.*]] = load %struct.MyMatrix.3*, %struct.MyMatrix.3** %Mat.addr, align 8
207 // CHECK-NEXT: [[E:%.*]] = load i32, i32* %e.addr, align 4
208 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i.addr, align 4
209 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
210 // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.3* nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 [[E]], i32 [[I]], i32 [[J]])
211 // CHECK-NEXT: ret void
212 //
213 // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
214 // CHECK: [[E:%.*]] = load i32, i32* %e.addr, align 4
215 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
216 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
217 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
218 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
219 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
220 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
221 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [4 x i32]* {{.*}} to <4 x i32>*
222 // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, <4 x i32>* [[MAT_ADDR]], align 4
223 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
224 // CHECK-NEXT: store <4 x i32> [[MATINS]], <4 x i32>* [[MAT_ADDR]], align 4
225 // CHECK-NEXT: ret void
226
227 insert(Mat, e, i, j);
228 }
229
test_insert_template2(MyMatrix<float,3,8> & Mat,float e)230 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
231 // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
232 // CHECK: [[MAT_ADDR:%.*]] = load %struct.MyMatrix.4*, %struct.MyMatrix.4** %Mat.addr, align 8
233 // CHECK-NEXT: [[E:%.*]] = load float, float* %e.addr, align 4
234 // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.4* nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float [[E]], i32 2, i32 5)
235 // CHECK-NEXT: ret void
236 //
237 // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
238 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
239 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
240 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
241 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
242 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
243 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
244 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
245 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [24 x float]* {{.*}} to <24 x float>*
246 // CHECK-NEXT: [[MAT:%.*]] = load <24 x float>, <24 x float>* [[MAT_ADDR]], align 4
247 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
248 // CHECK-NEXT: store <24 x float> [[MATINS]], <24 x float>* [[MAT_ADDR]], align 4
249 // CHECK-NEXT: ret void
250
251 insert(Mat, e, 2, 5);
252 }
253
254 template <typename EltTy, unsigned Rows, unsigned Columns>
255 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
256 return Mat.value[1u][0u];
257 }
258
test_extract_template(MyMatrix<int,2,2> Mat1)259 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
260 // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
261 // CHECK-NEXT: entry:
262 // CHECK-NEXT: [[CALL:%.*]] = call i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix.5* nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
263 // CHECK-NEXT: ret i32 [[CALL]]
264 //
265 // CHECK-LABEL: define linkonce_odr i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
266 // CHECK: [[MAT:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4
267 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
268 // CHECK-NEXT: ret i32 [[MATEXT]]
269
270 return extract(Mat1);
271 }
272
273 using double4x4 = double __attribute__((matrix_type(4, 4)));
274
275 template <class R, class C>
matrix_subscript(double4x4 m,R r,C c)276 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
277
test_matrix_subscript(double4x4 m)278 double test_matrix_subscript(double4x4 m) {
279 // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
280 // CHECK: [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}}, align 8
281 // CHECK-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> [[MAT]], i32 1, i32 2)
282 // CHECK-NEXT: [[RES:%.*]] = load double, double* [[CALL]], align 8
283 // CHECK-NEXT: ret double [[RES]]
284
285 return matrix_subscript(m, 1, 2);
286 }
287
test_matrix_subscript_reference(const double4x4 m)288 const double &test_matrix_subscript_reference(const double4x4 m) {
289 // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
290 // CHECK-NEXT: entry:
291 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8
292 // CHECK-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
293 // CHECK-NEXT: [[NAMELESS0:%.*]] = bitcast [16 x double]* [[M_ADDR]] to <16 x double>*
294 // CHECK-NEXT: store <16 x double> [[M:%.*]], <16 x double>* [[NAMELESS0]], align 8
295 // CHECK-NEXT: [[NAMELESS1:%.*]] = load <16 x double>, <16 x double>* [[NAMELESS0]], align 8
296 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
297 // CHECK-NEXT: store double [[MATEXT]], double* [[REF_TMP]], align 8
298 // CHECK-NEXT: ret double* [[REF_TMP]]
299
300 return m[0][1];
301 }
302
303 struct UnsignedWrapper {
304 char x;
operator unsignedUnsignedWrapper305 operator unsigned() {
306 return x;
307 }
308 };
309
extract_IntWrapper_idx(double4x4 & m,IntWrapper i,UnsignedWrapper j)310 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
311 // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
312 // CHECK: [[I:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %i)
313 // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1
314 // CHECK-NEXT: [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
315 // CHECK-NEXT: [[J:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} %j)
316 // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1
317 // CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
318 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [16 x double]*, [16 x double]** %m.addr, align 8
319 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [16 x double]* [[MAT_ADDR]] to <16 x double>*
320 // CHECK-NEXT: [[MAT:%.*]] = load <16 x double>, <16 x double>* [[MAT_ADDR2]], align 8
321 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
322 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
323 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
324 // CHECK-NEXT: ret double [[MATEXT]]
325 return m[i + 1][j - 1];
326 }
327
328 template <class T, unsigned R, unsigned C>
329 using matrix_type = T __attribute__((matrix_type(R, C)));
330 struct identmatrix_t {
331 template <class T, unsigned N>
operator matrix_type<T,N,N>identmatrix_t332 operator matrix_type<T, N, N>() const {
333 matrix_type<T, N, N> result;
334 for (unsigned i = 0; i != N; ++i)
335 result[i][i] = 1;
336 return result;
337 }
338 };
339
340 constexpr identmatrix_t identmatrix;
341
test_constexpr1(matrix_type<float,4,4> & m)342 void test_constexpr1(matrix_type<float, 4, 4> &m) {
343 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
344 // CHECK: [[MAT:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 4
345 // CHECK-NEXT: [[IM:%.*]] = call <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
346 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
347 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [16 x float]*, [16 x float]** %m.addr, align 8
348 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [16 x float]* [[MAT_ADDR]] to <16 x float>*
349 // CHECK-NEXT: store <16 x float> [[ADD]], <16 x float>* [[MAT_ADDR2]], align 4
350 // CHECK-NEXT: ret voi
351
352 // CHECK-LABEL: define linkonce_odr <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
353 // CHECK-LABEL: for.body: ; preds = %for.cond
354 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i, align 4
355 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
356 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i, align 4
357 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
358 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
359 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
360 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [16 x float]* %result to <16 x float>*
361 // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, <16 x float>* [[MAT_ADDR]], align 4
362 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
363 // CHECK-NEXT: store <16 x float> [[MATINS]], <16 x float>* [[MAT_ADDR]], align 4
364 // CHECK-NEXT: br label %for.inc
365 m = m + identmatrix;
366 }
367
test_constexpr2(matrix_type<int,5,5> & m)368 void test_constexpr2(matrix_type<int, 5, 5> &m) {
369 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
370 // CHECK: [[IM:%.*]] = call <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
371 // CHECK: [[MAT:%.*]] = load <25 x i32>, <25 x i32>* {{.*}}, align 4
372 // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
373 // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
374 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [25 x i32]*, [25 x i32]** %m.addr, align 8
375 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [25 x i32]* [[MAT_ADDR]] to <25 x i32>*
376 // CHECK-NEXT: store <25 x i32> [[SUB2]], <25 x i32>* [[MAT_ADDR2]], align 4
377 // CHECK-NEXT: ret void
378 //
379
380 // CHECK-LABEL: define linkonce_odr <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
381 // CHECK-LABEL: for.body: ; preds = %for.cond
382 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i, align 4
383 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
384 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i, align 4
385 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
386 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
387 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
388 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [25 x i32]* %result to <25 x i32>*
389 // CHECK-NEXT: [[MAT:%.*]] = load <25 x i32>, <25 x i32>* [[MAT_ADDR]], align 4
390 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
391 // CHECK-NEXT: store <25 x i32> [[MATINS]], <25 x i32>* [[MAT_ADDR]], align 4
392 // CHECK-NEXT: br label %for.inc
393
394 m = identmatrix - m + 1;
395 }
396