1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
2 
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
5 
6 template <typename EltTy, unsigned Rows, unsigned Columns>
7 struct MyMatrix {
8   using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
9 
10   matrix_t value;
11 };
12 
13 template <typename EltTy0, unsigned R0, unsigned C0>
add(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)14 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
15   return A.value + B.value;
16 }
17 
test_add_template()18 void test_add_template() {
19   // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
20   // CHECK:       %call = call <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat2)
21 
22   // CHECK-LABEL: define linkonce_odr <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
23   // CHECK:       [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
24   // CHECK:       [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
25   // CHECK-NEXT:  [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
26   // CHECK-NEXT:  ret <10 x float> [[RES]]
27 
28   MyMatrix<float, 2, 5> Mat1;
29   MyMatrix<float, 2, 5> Mat2;
30   Mat1.value = add(Mat1, Mat2);
31 }
32 
33 template <typename EltTy0, unsigned R0, unsigned C0>
subtract(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,R0,C0> & B)34 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
35   return A.value - B.value;
36 }
37 
test_subtract_template()38 void test_subtract_template() {
39   // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
40   // CHECK:       %call = call <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat2)
41 
42   // CHECK-LABEL: define linkonce_odr <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
43   // CHECK:       [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
44   // CHECK:       [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
45   // CHECK-NEXT:  [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
46   // CHECK-NEXT:  ret <10 x float> [[RES]]
47 
48   MyMatrix<float, 2, 5> Mat1;
49   MyMatrix<float, 2, 5> Mat2;
50   Mat1.value = subtract(Mat1, Mat2);
51 }
52 
53 struct DoubleWrapper1 {
54   int x;
operator doubleDoubleWrapper155   operator double() {
56     return x;
57   }
58 };
59 
test_DoubleWrapper1_Sub1(MyMatrix<double,10,9> & m)60 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
61   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
62   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
63   // CHECK:       [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
64   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
65   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
66   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
67   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
68 
69   DoubleWrapper1 w1;
70   w1.x = 10;
71   m.value = m.value - w1;
72 }
73 
test_DoubleWrapper1_Sub2(MyMatrix<double,10,9> & m)74 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
75   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
76   // CHECK:       [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
77   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
78   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
79   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
80   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
81   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
82 
83   DoubleWrapper1 w1;
84   w1.x = 10;
85   m.value = w1 - m.value;
86 }
87 
88 struct DoubleWrapper2 {
89   int x;
operator doubleDoubleWrapper290   operator double() {
91     return x;
92   }
93 };
94 
test_DoubleWrapper2_Add1(MyMatrix<double,10,9> & m)95 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
96   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
97   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8
98   // CHECK:       [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
99   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
100   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
101   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
102   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
103 
104   DoubleWrapper2 w2;
105   w2.x = 20;
106   m.value = m.value + w2;
107 }
108 
test_DoubleWrapper2_Add2(MyMatrix<double,10,9> & m)109 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
110   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
111   // CHECK:       [[SCALAR:%.*]] = call double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
112   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* %1, align 8
113   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
114   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
115   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
116   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
117 
118   DoubleWrapper2 w2;
119   w2.x = 20;
120   m.value = w2 + m.value;
121 }
122 
123 struct IntWrapper {
124   char x;
operator intIntWrapper125   operator int() {
126     return x;
127   }
128 };
129 
test_IntWrapper_Add(MyMatrix<double,10,9> & m)130 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
131   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
132   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
133   // CHECK:       [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
134   // CHECK:       [[SCALAR_FP:%.*]] = sitofp i32 %call to double
135   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
136   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
137   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
138   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
139 
140   IntWrapper w3;
141   w3.x = 'c';
142   m.value = m.value + w3;
143 }
144 
test_IntWrapper_Sub(MyMatrix<double,10,9> & m)145 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
146   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
147   // CHECK:       [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
148   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
149   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
150   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
151   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
152   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
153   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
154 
155   IntWrapper w3;
156   w3.x = 'c';
157   m.value = w3 - m.value;
158 }
159 
160 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
multiply(MyMatrix<EltTy0,R0,C0> & A,MyMatrix<EltTy0,C0,C1> & B)161 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
162   return A.value * B.value;
163 }
164 
test_multiply_template(MyMatrix<float,2,5> Mat1,MyMatrix<float,5,2> Mat2)165 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
166                                              MyMatrix<float, 5, 2> Mat2) {
167   // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
168   // CHECK-NEXT:  entry:
169   // CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(%struct.MyMatrix* nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix.2* nonnull align 4 dereferenceable(40) %Mat2)
170   // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0
171   // CHECK-NEXT:    [[VALUE_ADDR:%.*]] = bitcast [4 x float]* %value to <4 x float>*
172   // CHECK-NEXT:    store <4 x float> [[RES]], <4 x float>* [[VALUE_ADDR]], align 4
173   // CHECK-NEXT:    ret void
174   //
175   // CHECK-LABEL:  define linkonce_odr <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
176   // CHECK:         [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
177   // CHECK:         [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
178   // CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
179   // CHECK-NEXT:    ret <4 x float> [[RES]]
180 
181   MyMatrix<float, 2, 2> Res;
182   Res.value = multiply(Mat1, Mat2);
183   return Res;
184 }
185 
test_IntWrapper_Multiply(MyMatrix<double,10,9> & m,IntWrapper & w3)186 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
187   // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
188   // CHECK:       [[SCALAR:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{.*}})
189   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
190   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
191   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
192   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
193   // CHECK-NEXT:  [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
194   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
195   // CHECK:       ret void
196   m.value = w3 * m.value;
197 }
198 
199 template <typename EltTy, unsigned Rows, unsigned Columns>
insert(MyMatrix<EltTy,Rows,Columns> & Mat,EltTy e,unsigned i,unsigned j)200 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
201   Mat.value[i][j] = e;
202 }
203 
test_insert_template1(MyMatrix<unsigned,2,2> & Mat,unsigned e,unsigned i,unsigned j)204 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
205   // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
206   // CHECK:         [[MAT_ADDR:%.*]] = load %struct.MyMatrix.3*, %struct.MyMatrix.3** %Mat.addr, align 8
207   // CHECK-NEXT:    [[E:%.*]] = load i32, i32* %e.addr, align 4
208   // CHECK-NEXT:    [[I:%.*]] = load i32, i32* %i.addr, align 4
209   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
210   // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.3* nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 [[E]], i32 [[I]], i32 [[J]])
211   // CHECK-NEXT:    ret void
212   //
213   // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
214   // CHECK:         [[E:%.*]] = load i32, i32* %e.addr, align 4
215   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
216   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
217   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
218   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
219   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
220   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
221   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [4 x i32]* {{.*}} to <4 x i32>*
222   // CHECK-NEXT:    [[MAT:%.*]] = load <4 x i32>, <4 x i32>* [[MAT_ADDR]], align 4
223   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
224   // CHECK-NEXT:    store <4 x i32> [[MATINS]], <4 x i32>* [[MAT_ADDR]], align 4
225   // CHECK-NEXT:    ret void
226 
227   insert(Mat, e, i, j);
228 }
229 
test_insert_template2(MyMatrix<float,3,8> & Mat,float e)230 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
231   // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
232   // CHECK:         [[MAT_ADDR:%.*]] = load %struct.MyMatrix.4*, %struct.MyMatrix.4** %Mat.addr, align 8
233   // CHECK-NEXT:    [[E:%.*]] = load float, float* %e.addr, align 4
234   // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.4* nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float [[E]], i32 2, i32 5)
235   // CHECK-NEXT:    ret void
236   //
237   // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
238   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
239   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
240   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
241   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
242   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
243   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
244   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
245   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [24 x float]* {{.*}} to <24 x float>*
246   // CHECK-NEXT:    [[MAT:%.*]] = load <24 x float>, <24 x float>* [[MAT_ADDR]], align 4
247   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
248   // CHECK-NEXT:    store <24 x float> [[MATINS]], <24 x float>* [[MAT_ADDR]], align 4
249   // CHECK-NEXT:    ret void
250 
251   insert(Mat, e, 2, 5);
252 }
253 
254 template <typename EltTy, unsigned Rows, unsigned Columns>
255 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
256   return Mat.value[1u][0u];
257 }
258 
test_extract_template(MyMatrix<int,2,2> Mat1)259 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
260   // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
261   // CHECK-NEXT:  entry:
262   // CHECK-NEXT:    [[CALL:%.*]] = call i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix.5* nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
263   // CHECK-NEXT:    ret i32 [[CALL]]
264   //
265   // CHECK-LABEL: define linkonce_odr i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
266   // CHECK:         [[MAT:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4
267   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
268   // CHECK-NEXT:    ret i32 [[MATEXT]]
269 
270   return extract(Mat1);
271 }
272 
273 using double4x4 = double __attribute__((matrix_type(4, 4)));
274 
275 template <class R, class C>
matrix_subscript(double4x4 m,R r,C c)276 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
277 
test_matrix_subscript(double4x4 m)278 double test_matrix_subscript(double4x4 m) {
279   // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
280   // CHECK:         [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}}, align 8
281   // CHECK-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> [[MAT]], i32 1, i32 2)
282   // CHECK-NEXT:    [[RES:%.*]] = load double, double* [[CALL]], align 8
283   // CHECK-NEXT:    ret double [[RES]]
284 
285   return matrix_subscript(m, 1, 2);
286 }
287 
test_matrix_subscript_reference(const double4x4 m)288 const double &test_matrix_subscript_reference(const double4x4 m) {
289   // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
290   // CHECK-NEXT:  entry:
291   // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x double], align 8
292   // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca double, align 8
293   // CHECK-NEXT:    [[NAMELESS0:%.*]] = bitcast [16 x double]* [[M_ADDR]] to <16 x double>*
294   // CHECK-NEXT:    store <16 x double> [[M:%.*]], <16 x double>* [[NAMELESS0]], align 8
295   // CHECK-NEXT:    [[NAMELESS1:%.*]] = load <16 x double>, <16 x double>* [[NAMELESS0]], align 8
296   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
297   // CHECK-NEXT:    store double [[MATEXT]], double* [[REF_TMP]], align 8
298   // CHECK-NEXT:    ret double* [[REF_TMP]]
299 
300   return m[0][1];
301 }
302 
303 struct UnsignedWrapper {
304   char x;
operator unsignedUnsignedWrapper305   operator unsigned() {
306     return x;
307   }
308 };
309 
extract_IntWrapper_idx(double4x4 & m,IntWrapper i,UnsignedWrapper j)310 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
311   // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
312   // CHECK:         [[I:%.*]] = call i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %i)
313   // CHECK-NEXT:    [[I_ADD:%.*]] = add nsw i32 [[I]], 1
314   // CHECK-NEXT:    [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
315   // CHECK-NEXT:    [[J:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} %j)
316   // CHECK-NEXT:    [[J_SUB:%.*]] = sub i32 [[J]], 1
317   // CHECK-NEXT:    [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
318   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [16 x double]*, [16 x double]** %m.addr, align 8
319   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [16 x double]* [[MAT_ADDR]] to <16 x double>*
320   // CHECK-NEXT:    [[MAT:%.*]] = load <16 x double>, <16 x double>* [[MAT_ADDR2]], align 8
321   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
322   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
323   // CHECK-NEXT:    [[MATEXT:%.*]]  = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
324   // CHECK-NEXT:    ret double [[MATEXT]]
325   return m[i + 1][j - 1];
326 }
327 
328 template <class T, unsigned R, unsigned C>
329 using matrix_type = T __attribute__((matrix_type(R, C)));
330 struct identmatrix_t {
331   template <class T, unsigned N>
operator matrix_type<T,N,N>identmatrix_t332   operator matrix_type<T, N, N>() const {
333     matrix_type<T, N, N> result;
334     for (unsigned i = 0; i != N; ++i)
335       result[i][i] = 1;
336     return result;
337   }
338 };
339 
340 constexpr identmatrix_t identmatrix;
341 
test_constexpr1(matrix_type<float,4,4> & m)342 void test_constexpr1(matrix_type<float, 4, 4> &m) {
343   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
344   // CHECK:         [[MAT:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 4
345   // CHECK-NEXT:    [[IM:%.*]] = call <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
346   // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
347   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [16 x float]*, [16 x float]** %m.addr, align 8
348   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [16 x float]* [[MAT_ADDR]] to <16 x float>*
349   // CHECK-NEXT:    store <16 x float> [[ADD]], <16 x float>* [[MAT_ADDR2]], align 4
350   // CHECK-NEXT:    ret voi
351 
352   // CHECK-LABEL: define linkonce_odr <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
353   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
354   // CHECK-NEXT:   [[I:%.*]] = load i32, i32* %i, align 4
355   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
356   // CHECK-NEXT:   [[I2:%.*]] = load i32, i32* %i, align 4
357   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
358   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
359   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
360   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [16 x float]* %result to <16 x float>*
361   // CHECK-NEXT:   [[MAT:%.*]] = load <16 x float>, <16 x float>* [[MAT_ADDR]], align 4
362   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
363   // CHECK-NEXT:   store <16 x float> [[MATINS]], <16 x float>* [[MAT_ADDR]], align 4
364   // CHECK-NEXT:   br label %for.inc
365   m = m + identmatrix;
366 }
367 
test_constexpr2(matrix_type<int,5,5> & m)368 void test_constexpr2(matrix_type<int, 5, 5> &m) {
369   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
370   // CHECK:         [[IM:%.*]] = call <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
371   // CHECK:         [[MAT:%.*]] = load <25 x i32>, <25 x i32>* {{.*}}, align 4
372   // CHECK-NEXT:    [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
373   // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
374   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [25 x i32]*, [25 x i32]** %m.addr, align 8
375   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [25 x i32]* [[MAT_ADDR]] to <25 x i32>*
376   // CHECK-NEXT:    store <25 x i32> [[SUB2]], <25 x i32>* [[MAT_ADDR2]], align 4
377   // CHECK-NEXT:    ret void
378   //
379 
380   // CHECK-LABEL: define linkonce_odr <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
381   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
382   // CHECK-NEXT:   [[I:%.*]] = load i32, i32* %i, align 4
383   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
384   // CHECK-NEXT:   [[I2:%.*]] = load i32, i32* %i, align 4
385   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
386   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
387   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
388   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [25 x i32]* %result to <25 x i32>*
389   // CHECK-NEXT:   [[MAT:%.*]] = load <25 x i32>, <25 x i32>* [[MAT_ADDR]], align 4
390   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
391   // CHECK-NEXT:   store <25 x i32> [[MATINS]], <25 x i32>* [[MAT_ADDR]], align 4
392   // CHECK-NEXT:   br label %for.inc
393 
394   m = identmatrix - m + 1;
395 }
396