1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++17 | FileCheck %s
2 
3 // Tests for the matrix type builtins.
4 
5 template <typename EltTy, unsigned Rows, unsigned Columns>
6 using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
7 
8 template <typename EltTy, unsigned Rows, unsigned Columns>
9 struct MyMatrix {
10   matrix_t<EltTy, Rows, Columns> value;
11 };
12 
13 template <typename T, unsigned R, unsigned C>
transpose(const MyMatrix<T,R,C> & M)14 MyMatrix<T, C, R> transpose(const MyMatrix<T, R, C> &M) {
15   MyMatrix<T, C, R> Res;
16   Res.value = __builtin_matrix_transpose(M.value);
17   return Res;
18 }
19 
test_transpose_template1()20 void test_transpose_template1() {
21   // CHECK-LABEL: define void @_Z24test_transpose_template1v()
22   // CHECK:         call void @_Z9transposeIiLj4ELj10EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.0* sret align 4 %M1_t, %struct.MyMatrix* nonnull align 4 dereferenceable(160) %M1)
23 
24   // CHECK-LABEL: define linkonce_odr void @_Z9transposeIiLj4ELj10EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(
25   // CHECK:         [[M:%.*]] = load <40 x i32>, <40 x i32>* {{.*}}, align 4
26   // CHECK-NEXT:    [[M_T:%.*]] = call <40 x i32> @llvm.matrix.transpose.v40i32(<40 x i32> [[M]], i32 4, i32 10)
27 
28   MyMatrix<int, 4, 10> M1;
29   MyMatrix<int, 10, 4> M1_t = transpose(M1);
30 }
31 
test_transpose_template2(MyMatrix<double,7,6> & M)32 void test_transpose_template2(MyMatrix<double, 7, 6> &M) {
33   // CHECK-LABEL: define void @_Z24test_transpose_template2R8MyMatrixIdLj7ELj6EE(
34   // CHECK:         call void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.2* sret align 8 %ref.tmp1, %struct.MyMatrix.1* nonnull align 8 dereferenceable(336) %0)
35   // CHECK-NEXT:    call void @_Z9transposeIdLj6ELj7EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.1* sret align 8 %ref.tmp, %struct.MyMatrix.2* nonnull align 8 dereferenceable(336) %ref.tmp1)
36   // CHECK-NEXT:    call void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.2* sret align 8 %M2_t, %struct.MyMatrix.1* nonnull align 8 dereferenceable(336) %ref.tmp)
37 
38   // CHECK-LABEL: define linkonce_odr void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(
39   // CHECK:         [[M:%.*]] = load <42 x double>, <42 x double>* {{.*}}, align 8
40   // CHECK-NEXT:    [[M_T:%.*]] = call <42 x double> @llvm.matrix.transpose.v42f64(<42 x double> [[M]], i32 7, i32 6)
41   // CHECK-NEXT:    [[RES_ADDR:%.*]] = getelementptr inbounds %struct.MyMatrix.2, %struct.MyMatrix.2* %agg.result, i32 0, i32 0
42   // CHECK-NEXT:    [[RES_ADDR_C:%.*]] = bitcast [42 x double]* [[RES_ADDR]] to <42 x double>*
43   // CHECK-NEXT:    store <42 x double> [[M_T]], <42 x double>* [[RES_ADDR_C]], align 8
44 
45   // CHECK-LABEL: define linkonce_odr void @_Z9transposeIdLj6ELj7EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(
46   // CHECK:         [[M:%.*]] = load <42 x double>, <42 x double>* {{.*}}, align 8
47   // CHECK-NEXT:    [[M_T:%.*]] = call <42 x double> @llvm.matrix.transpose.v42f64(<42 x double> [[M]], i32 6, i32 7)
48   // CHECK-NEXT:    [[RES_ADDR:%.*]] = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0
49   // CHECK-NEXT:    [[RES_ADDR_C:%.*]] = bitcast [42 x double]* [[RES_ADDR]] to <42 x double>*
50   // CHECK-NEXT:    store <42 x double> [[M_T]], <42 x double>* [[RES_ADDR_C]], align 8
51 
52   MyMatrix<double, 6, 7> M2_t = transpose(transpose(transpose(M)));
53 }
54 
55 matrix_t<float, 3, 3> get_matrix();
56 
test_transpose_rvalue()57 void test_transpose_rvalue() {
58   // CHECK-LABEL: define void @_Z21test_transpose_rvaluev()
59   // CHECK-NEXT:  entry:
60   // CHECK-NEXT:    [[M_T_ADDR:%.*]] = alloca [9 x float], align 4
61   // CHECK-NEXT:    [[CALL_RES:%.*]] = call <9 x float> @_Z10get_matrixv()
62   // CHECK-NEXT:    [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
63   // CHECK-NEXT:    [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[ADD]], i32 3, i32 3)
64   // CHECK-NEXT:    [[M_T_ADDR_CAST:%.*]] = bitcast [9 x float]* [[M_T_ADDR]] to <9 x float>*
65   // CHECK-NEXT:    store <9 x float> [[M_T]], <9 x float>* [[M_T_ADDR_CAST]], align 4
66   matrix_t<float, 3, 3> m_t = __builtin_matrix_transpose(get_matrix() + 2.0);
67 }
68 
test_transpose_const(const matrix_t<float,3,3> & m)69 void test_transpose_const(const matrix_t<float, 3, 3> &m) {
70   // CHECK-LABEL:  define void @_Z20test_transpose_constRKU11matrix_typeLm3ELm3Ef(
71   // CHECK:         [[MATRIX:%.*]] = load <9 x float>, <9 x float>* {{.*}}, align 4
72   // CHECK-NEXT:    [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[MATRIX]], i32 3, i32 3)
73   // CHECK-NEXT:    [[M_T_ADDR:%.*]] = bitcast [9 x float]* %m_t to <9 x float>*
74   // CHECK-NEXT:    store <9 x float> [[M_T]], <9 x float>* [[M_T_ADDR]], align 4
75   matrix_t<float, 3, 3> m_t = __builtin_matrix_transpose(m);
76 }
77 
78 // TODO: Enable once initialization support is defined and implemented for
79 //       matrix types.
80 // void test_lvalue_conversion() {
81 //  constexpr double4x4 m = {};
82 //  [] { return __builtin_matrix_transpose(m); }
83 //}
84 
85 template <typename T, unsigned R, unsigned C, unsigned S>
column_major_load_with_stride(T * Ptr)86 matrix_t<T, R, C> column_major_load_with_stride(T *Ptr) {
87   return __builtin_matrix_column_major_load(Ptr, R, C, S);
88 }
89 
test_column_major_load_with_stride_template_double(double * Ptr)90 void test_column_major_load_with_stride_template_double(double *Ptr) {
91   // CHECK-LABEL: define void @_Z50test_column_major_load_with_stride_template_doublePd(double* %Ptr)
92   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
93   // CHECK-NEXT:    call <40 x double> @_Z29column_major_load_with_strideIdLj10ELj4ELj15EEU11matrix_typeXT0_EXT1_ET_PS0_(double* [[PTR]])
94 
95   // CHECK-LABEL:  define linkonce_odr <40 x double> @_Z29column_major_load_with_strideIdLj10ELj4ELj15EEU11matrix_typeXT0_EXT1_ET_PS0_(double* %Ptr)
96   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
97   // CHECK-NEXT:    call <40 x double> @llvm.matrix.column.major.load.v40f64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
98 
99   matrix_t<double, 10, 4> M1 = column_major_load_with_stride<double, 10, 4, 15>(Ptr);
100 }
101 
test_column_major_load_with_stride_template_int(int * Ptr)102 void test_column_major_load_with_stride_template_int(int *Ptr) {
103   // CHECK-LABEL: define void @_Z47test_column_major_load_with_stride_template_intPi(i32* %Ptr) #5 {
104   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
105   // CHECK-NEXT:    call <6 x i32> @_Z29column_major_load_with_strideIiLj3ELj2ELj12EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* [[PTR]])
106 
107   // CHECK-LABEL: define linkonce_odr <6 x i32> @_Z29column_major_load_with_strideIiLj3ELj2ELj12EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* %Ptr)
108   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
109   // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2)
110 
111   matrix_t<int, 3, 2> M1 = column_major_load_with_stride<int, 3, 2, 12>(Ptr);
112 }
113 
114 struct UnsignedWrapper {
115   char x;
operator unsignedUnsignedWrapper116   operator unsigned() {
117     return x;
118   }
119 };
120 
test_column_major_load_stride_wrapper(int * Ptr,UnsignedWrapper & W)121 void test_column_major_load_stride_wrapper(int *Ptr, UnsignedWrapper &W) {
122   // CHECK-LABEL:  define void @_Z37test_column_major_load_stride_wrapperPiR15UnsignedWrapper(i32* %Ptr, %struct.UnsignedWrapper* nonnull align 1 dereferenceable(1) %W)
123   // CHECK:         [[W:%.*]] = load %struct.UnsignedWrapper*, %struct.UnsignedWrapper** %W.addr, align 8
124   // CHECK-NEXT:    [[STRIDE:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* [[W]])
125   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = zext i32 [[STRIDE]] to i64
126   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
127   // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
128   matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, W);
129 }
130 
constexpr3()131 constexpr int constexpr3() { return 3; }
132 
test_column_major_load_constexpr_num_rows(int * Ptr)133 void test_column_major_load_constexpr_num_rows(int *Ptr) {
134   // CHECK-LABEL: define void @_Z41test_column_major_load_constexpr_num_rowsPi(i32* %Ptr)
135   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
136   // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
137 
138   matrix_t<int, 3, 2> M1 = __builtin_matrix_column_major_load(Ptr, constexpr3(), 2, 3);
139 }
140 
constexpr1()141 constexpr int constexpr1() { return 1; }
142 
test_column_major_load_constexpr_num_columns(int * Ptr)143 void test_column_major_load_constexpr_num_columns(int *Ptr) {
144   // CHECK-LABEL: define void @_Z44test_column_major_load_constexpr_num_columnsPi(i32* %Ptr)
145   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
146   // CHECK-NEXT:    call <2 x i32> @llvm.matrix.column.major.load.v2i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1)
147   matrix_t<int, 2, 1> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr1(), 3);
148 }
149 
150 template <unsigned N>
constexpr_plus1()151 constexpr int constexpr_plus1() { return N + 1; }
152 
test_column_major_load_constexpr_num_columns_temp(int * Ptr)153 void test_column_major_load_constexpr_num_columns_temp(int *Ptr) {
154   // CHECK-LABEL:  define void @_Z49test_column_major_load_constexpr_num_columns_tempPi(i32* %Ptr)
155   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
156   // CHECK-NEXT:    call <10 x i32> @llvm.matrix.column.major.load.v10i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5)
157   matrix_t<int, 2, 5> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr_plus1<4>(), 3);
158 }
159 
test_column_major_load_constexpr_stride_constexpr(int * Ptr)160 void test_column_major_load_constexpr_stride_constexpr(int *Ptr) {
161   // CHECK-LABEL: define void @_Z49test_column_major_load_constexpr_stride_constexprPi(i32* %Ptr)
162   // CHECK:         [[STRIDE:%.*]] = call i32 @_Z10constexpr3v()
163   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
164   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
165   // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
166 
167   matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, constexpr3());
168 }
169 
170 template <typename T>
171 struct remove_pointer {
172   typedef T type;
173 };
174 
175 template <typename T>
176 struct remove_pointer<T *> {
177   typedef typename remove_pointer<T>::type type;
178 };
179 
180 // Same as column_major_load_with_stride, but with the PtrT argument itself begin a pointer type.
181 template <typename PtrT, unsigned R, unsigned C, unsigned S>
column_major_load_with_stride2(PtrT Ptr)182 matrix_t<typename remove_pointer<PtrT>::type, R, C> column_major_load_with_stride2(PtrT Ptr) {
183   return __builtin_matrix_column_major_load(Ptr, R, C, S);
184 }
185 
call_column_major_load_with_stride2(float * Ptr)186 void call_column_major_load_with_stride2(float *Ptr) {
187   matrix_t<float, 2, 2> m = column_major_load_with_stride2<float *, 2, 2, 2>(Ptr);
188 }
189 
190 template <typename T, unsigned R, unsigned C, unsigned S>
column_major_store_with_stride(matrix_t<T,R,C> & m,T * Ptr)191 void column_major_store_with_stride(matrix_t<T, R, C> &m, T *Ptr) {
192   __builtin_matrix_column_major_store(m, Ptr, S);
193 }
194 
test_column_major_store_with_stride_template_double(double * Ptr)195 void test_column_major_store_with_stride_template_double(double *Ptr) {
196   // CHECK-LABEL: define void @_Z51test_column_major_store_with_stride_template_doublePd(double* %Ptr)
197   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
198   // CHECK-NEXT:    call void @_Z30column_major_store_with_strideIdLj10ELj4ELj15EEvRU11matrix_typeXT0_EXT1_ET_PS0_([40 x double]* nonnull align 8 dereferenceable(320) %M1, double* [[PTR]])
199 
200   // CHECK-LABEL:  define linkonce_odr void @_Z30column_major_store_with_strideIdLj10ELj4ELj15EEvRU11matrix_typeXT0_EXT1_ET_PS0_([40 x double]* nonnull align 8 dereferenceable(320) %m, double* %Ptr)
201   // CHECK:         [[M:%.*]] = load <40 x double>, <40 x double>* {{.*}}, align 8
202   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
203   // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v40f64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
204 
205   matrix_t<double, 10, 4> M1;
206   column_major_store_with_stride<double, 10, 4, 15>(M1, Ptr);
207 }
208 
test_column_major_store_with_stride_template_int(int * Ptr)209 void test_column_major_store_with_stride_template_int(int *Ptr) {
210   // CHECK-LABEL: define void @_Z48test_column_major_store_with_stride_template_intPi(i32* %Ptr)
211   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
212   // CHECK-NEXT:    call void @_Z30column_major_store_with_strideIiLj3ELj2ELj3EEvRU11matrix_typeXT0_EXT1_ET_PS0_([6 x i32]* nonnull align 4 dereferenceable(24) %M1, i32* [[PTR]])
213 
214   // CHECK-LABEL:  define linkonce_odr void @_Z30column_major_store_with_strideIiLj3ELj2ELj3EEvRU11matrix_typeXT0_EXT1_ET_PS0_([6 x i32]* nonnull align 4 dereferenceable(24) %m, i32* %Ptr)
215   // CHECK:         [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
216   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
217   // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v6i32(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
218 
219   matrix_t<int, 3, 2> M1;
220   column_major_store_with_stride<int, 3, 2, 3>(M1, Ptr);
221 }
222 
test_column_major_store_stride_wrapper(int * Ptr,UnsignedWrapper & W)223 void test_column_major_store_stride_wrapper(int *Ptr, UnsignedWrapper &W) {
224   // CHECK-LABEL: define void @_Z38test_column_major_store_stride_wrapperPiR15UnsignedWrapper(i32* %Ptr, %struct.UnsignedWrapper* nonnull align 1 dereferenceable(1) %W)
225   // CHECK:         [[M:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4
226   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
227   // CHECK-NEXT:    [[W:%.*]] = load %struct.UnsignedWrapper*, %struct.UnsignedWrapper** %W.addr, align 8
228   // CHECK-NEXT:    [[IDX:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* [[W]])
229   // CHECK-NEXT:    [[IDX_EXT:%.*]] = zext i32 [[IDX]] to i64
230   // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
231 
232   matrix_t<int, 2, 2> M1;
233   __builtin_matrix_column_major_store(M1, Ptr, W);
234 }
235 
test_column_major_store_constexpr_stride_constexpr(int * Ptr)236 void test_column_major_store_constexpr_stride_constexpr(int *Ptr) {
237   // CHECK-LABEL: define void @_Z50test_column_major_store_constexpr_stride_constexprPi(i32* %Ptr)
238   // CHECK:         [[M:%.*]] = load <4 x i32>, <4 x i32>* %0, align 4
239   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
240   // CHECK-NEXT:    [[IDX:%.*]] = call i32 @_Z10constexpr3v()
241   // CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IDX]] to i64
242   // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
243 
244   matrix_t<int, 2, 2> M;
245   __builtin_matrix_column_major_store(M, Ptr, constexpr3());
246 }
247