1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
5 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
6 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
7
8 // Floating point matrix/scalar additions.
9
add_matrix_matrix_double(dx5x5_t a,dx5x5_t b,dx5x5_t c)10 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
11 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> %a, <25 x double> %b, <25 x double> %c)
12 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
13 // CHECK-NEXT: [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
14 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
15 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
16
17 a = b + c;
18 }
19
add_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)20 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
21 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> %a, <25 x double> %b)
22 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
23 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
24 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
25 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
26
27 a += b;
28 }
29
subtract_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)30 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
31 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> %a, <25 x double> %b)
32 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
33 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
34 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
35 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
36
37 a -= b;
38 }
39
add_matrix_matrix_float(fx2x3_t a,fx2x3_t b,fx2x3_t c)40 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
41 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> %a, <6 x float> %b, <6 x float> %c)
42 // CHECK: [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
43 // CHECK-NEXT: [[C:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
44 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
45 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
46
47 a = b + c;
48 }
49
add_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)50 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
51 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> %a, <6 x float> %b)
52 // CHECK: [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
53 // CHECK-NEXT: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
54 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
55 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
56
57 a += b;
58 }
59
subtract_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)60 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
61 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> %a, <6 x float> %b)
62 // CHECK: [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
63 // CHECK-NEXT: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
64 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
65 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
66
67 a -= b;
68 }
69
add_matrix_scalar_double_float(dx5x5_t a,float vf)70 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
71 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> %a, float %vf)
72 // CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
73 // CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
74 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
75 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
76 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
77 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
78 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
79
80 a = a + vf;
81 }
82
add_compound_matrix_scalar_double_float(dx5x5_t a,float vf)83 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
84 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> %a, float %vf)
85 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
86 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
87 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
88 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
89 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
90 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
91 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
92
93 a += vf;
94 }
95
subtract_compound_matrix_scalar_double_float(dx5x5_t a,float vf)96 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
97 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> %a, float %vf)
98 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
99 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
100 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
101 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
102 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
103 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
104 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
105
106 a -= vf;
107 }
108
add_matrix_scalar_double_double(dx5x5_t a,double vd)109 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
110 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> %a, double %vd)
111 // CHECK: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
112 // CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
113 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
114 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
115 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
116 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
117
118 a = a + vd;
119 }
120
add_compound_matrix_scalar_double_double(dx5x5_t a,double vd)121 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
122 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> %a, double %vd)
123 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
124 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
125 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
126 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
127 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
128 // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
129 a += vd;
130 }
131
subtract_compound_matrix_scalar_double_double(dx5x5_t a,double vd)132 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
133 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> %a, double %vd)
134 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
135 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
136 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
137 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
138 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
139 // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
140 a -= vd;
141 }
142
add_matrix_scalar_float_float(fx2x3_t b,float vf)143 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
144 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> %b, float %vf)
145 // CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
146 // CHECK-NEXT: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
147 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
148 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
149 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
150 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
151
152 b = b + vf;
153 }
154
add_compound_matrix_scalar_float_float(fx2x3_t b,float vf)155 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
156 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> %b, float %vf)
157 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
158 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
159 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
160 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
161 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
162 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
163 b += vf;
164 }
165
subtract_compound_matrix_scalar_float_float(fx2x3_t b,float vf)166 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
167 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> %b, float %vf)
168 // CHECK: [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
169 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
170 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
171 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
172 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
173 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
174 b -= vf;
175 }
176
add_matrix_scalar_float_double(fx2x3_t b,double vd)177 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
178 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> %b, double %vd)
179 // CHECK: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
180 // CHECK-NEXT: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
181 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
182 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
183 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
184 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
185 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
186
187 b = b + vd;
188 }
189
add_compound_matrix_scalar_float_double(fx2x3_t b,double vd)190 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
191 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> %b, double %vd)
192 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
193 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
194 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
195 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
196 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
197 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
198 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
199 b += vd;
200 }
201
subtract_compound_matrix_scalar_float_double(fx2x3_t b,double vd)202 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
203 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> %b, double %vd)
204 // CHECK: [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
205 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
206 // CHECK-NEXT: [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
207 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
208 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
209 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
210 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
211 b -= vd;
212 }
213
214 // Integer matrix/scalar additions
215
add_matrix_matrix_int(ix9x3_t a,ix9x3_t b,ix9x3_t c)216 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
217 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b, <27 x i32> %c)
218 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
219 // CHECK-NEXT: [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
220 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
221 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
222 a = b + c;
223 }
224
add_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)225 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
226 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b)
227 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
228 // CHECK: [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
229 // CHECK: [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
230 // CHECK: store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
231 a += b;
232 }
233
subtract_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)234 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
235 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b)
236 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
237 // CHECK: [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
238 // CHECK: [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
239 // CHECK: store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
240 a -= b;
241 }
242
add_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b,ullx4x2_t c)243 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
244 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c)
245 // CHECK: [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
246 // CHECK-NEXT: [[C:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
247 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
248 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
249
250 a = b + c;
251 }
252
add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)253 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
254 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b)
255 // CHECK: [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
256 // CHECK-NEXT: [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
257 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
258 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
259
260 a += b;
261 }
262
subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)263 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
264 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b)
265 // CHECK: [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
266 // CHECK-NEXT: [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
267 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
268 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
269
270 a -= b;
271 }
272
add_matrix_scalar_int_short(ix9x3_t a,short vs)273 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
274 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
275 // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
276 // CHECK-NEXT: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
277 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
278 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i32 0
279 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
280 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
281 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
282
283 a = a + vs;
284 }
285
add_compound_matrix_scalar_int_short(ix9x3_t a,short vs)286 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
287 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
288 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
289 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
290 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
291 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
292 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
293 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
294 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
295
296 a += vs;
297 }
298
subtract_compound_matrix_scalar_int_short(ix9x3_t a,short vs)299 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
300 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
301 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
302 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
303 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
304 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
305 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
306 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
307 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
308
309 a -= vs;
310 }
311
add_matrix_scalar_int_long_int(ix9x3_t a,long int vli)312 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
313 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
314 // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
315 // CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
316 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
317 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
318 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
319 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
320 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
321
322 a = a + vli;
323 }
324
add_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)325 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
326 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
327 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
328 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
329 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
330 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
331 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
332 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
333 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
334
335 a += vli;
336 }
337
subtract_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)338 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
339 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
340 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
341 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
342 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
343 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
344 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
345 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
346 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
347
348 a -= vli;
349 }
350
add_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)351 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
352 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
353 // CHECK: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
354 // CHECK-NEXT: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
355 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
356 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
357 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
358 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
359 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
360
361 a = a + vulli;
362 }
363
add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)364 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
365 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
366 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
367 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
368 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
369 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
370 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
371 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
372 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
373
374 a += vulli;
375 }
376
subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)377 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
378 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
379 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
380 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
381 // CHECK-NEXT: [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
382 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
383 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
384 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
385 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
386
387 a -= vulli;
388 }
389
add_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)390 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
391 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
392 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
393 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
394 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
395 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
396 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
397 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
398 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
399
400 b = vs + b;
401 }
402
add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)403 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
404 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
405 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
406 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
407 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
408 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
409 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
410 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
411 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
412
413 b += vs;
414 }
415
subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)416 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
417 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
418 // CHECK: [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
419 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
420 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
421 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
422 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
423 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
424 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
425
426 b -= vs;
427 }
428
add_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)429 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
430 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
431 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
432 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
433 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
434 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
435 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
436 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
437
438 b = vli + b;
439 }
440
add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)441 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
442 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
443 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
444 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
445 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
446 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
447 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
448 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
449
450 b += vli;
451 }
452
subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)453 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
454 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
455 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
456 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
457 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
458 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
459 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
460 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
461
462 b -= vli;
463 }
464
add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)465 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
466 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
467 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
468 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
469 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
470 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
471 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
472 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
473 b = vulli + b;
474 }
475
add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)476 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
477 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
478 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
479 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
480 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
481 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
482 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
483 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
484
485 b += vulli;
486 }
487
subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)488 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
489 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
490 // CHECK: [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
491 // CHECK-NEXT: [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
492 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
493 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
494 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
495 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
496
497 b -= vulli;
498 }
499
500 // Tests for matrix multiplication.
501
multiply_matrix_matrix_double(dx5x5_t b,dx5x5_t c)502 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
503 // CHECK-LABEL: @multiply_matrix_matrix_double(
504 // CHECK: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
505 // CHECK-NEXT: [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
506 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
507 // CHECK-NEXT: [[A_ADDR:%.*]] = bitcast [25 x double]* %a to <25 x double>*
508 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* [[A_ADDR]], align 8
509 // CHECK-NEXT: ret void
510 //
511
512 dx5x5_t a;
513 a = b * c;
514 }
515
multiply_compound_matrix_matrix_double(dx5x5_t b,dx5x5_t c)516 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
517 // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
518 // CHECK: [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
519 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
520 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
521 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
522 // CHECK-NEXT: ret void
523 b *= c;
524 }
525
526 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
527 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
528 // CHECK-LABEL: @multiply_matrix_matrix_int(
529 // CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
530 // CHECK-NEXT: [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
531 // CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
532 // CHECK-NEXT: [[A_ADDR:%.*]] = bitcast [81 x i32]* %a to <81 x i32>*
533 // CHECK-NEXT: store <81 x i32> [[RES]], <81 x i32>* [[A_ADDR]], align 4
534 // CHECK-NEXT: ret void
535 //
multiply_matrix_matrix_int(ix9x3_t b,ix3x9_t c)536 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
537 ix9x9_t a;
538 a = b * c;
539 }
540
541 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
542 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
543 // CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4
544 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
545 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
546 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
547 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
548 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
549 // CHECK-NEXT: ret void
550 //
multiply_double_matrix_scalar_float(dx5x5_t a,float s)551 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
552 a = a * s;
553 }
554
555 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
556 // CHECK: [[S:%.*]] = load float, float* %s.addr, align 4
557 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
558 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
559 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
560 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
561 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
562 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
563 // CHECK-NEXT: ret void
564 //
multiply_compound_double_matrix_scalar_float(dx5x5_t a,float s)565 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
566 a *= s;
567 }
568
569 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
570 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
571 // CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
572 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
573 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
574 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
575 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
576 // CHECK-NEXT: ret void
577 //
multiply_double_matrix_scalar_double(dx5x5_t a,double s)578 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
579 a = a * s;
580 }
581
582 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
583 // CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
584 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
585 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
586 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
587 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
588 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
589 // CHECK-NEXT: ret void
multiply_compound_double_matrix_scalar_double(dx5x5_t a,double s)590 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
591 a *= s;
592 }
593
594 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
595 // CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
596 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
597 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
598 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
599 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
600 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
601 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
602 // CHECK-NEXT: ret void
603 //
multiply_float_matrix_scalar_double(fx2x3_t b,double s)604 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
605 b = s * b;
606 }
607
608 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
609 // CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
610 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
611 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
612 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
613 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
614 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
615 // store <6 x float> %3, <6 x float>* %0, align 4
616 // ret void
multiply_compound_float_matrix_scalar_double(fx2x3_t b,double s)617 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
618 b *= s;
619 }
620
621 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
622 // CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2
623 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
624 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
625 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
626 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
627 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
628 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
629 // CHECK-NEXT: ret void
630 //
multiply_int_matrix_scalar_short(ix9x3_t b,short s)631 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
632 b = s * b;
633 }
634
635 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
636 // CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2
637 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
638 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
639 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
640 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
641 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
642 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
643 // CHECK-NEXT: ret void
644 //
multiply_compound_int_matrix_scalar_short(ix9x3_t b,short s)645 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
646 b *= s;
647 }
648
649 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
650 // CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
651 // CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
652 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
653 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
654 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
655 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
656 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
657 // CHECK-NEXT: ret void
658 //
multiply_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)659 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
660 b = b * s;
661 }
662
multiply_compound_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)663 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
664 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
665 // CHECK: [[S:%.*]] = load i64, i64* %s.addr, align 8
666 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
667 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
668 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
669 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
670 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
671 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
672 // CHECK-NEXT: ret void
673
674 b *= s;
675 }
676
677 // CHECK-LABEL: @multiply_float_matrix_constant(
678 // CHECK-NEXT: entry:
679 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
680 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
681 // CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
682 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
683 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
684 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
685 // CHECK-NEXT: ret void
686 //
multiply_float_matrix_constant(fx2x3_t a)687 void multiply_float_matrix_constant(fx2x3_t a) {
688 a = a * 2.5;
689 }
690
691 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
692 // CHECK-NEXT: entry:
693 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
694 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
695 // CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
696 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
697 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
698 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
699 // CHECK-NEXT: ret void
multiply_compound_float_matrix_constant(fx2x3_t a)700 void multiply_compound_float_matrix_constant(fx2x3_t a) {
701 a *= 2.5;
702 }
703
704 // CHECK-LABEL: @multiply_int_matrix_constant(
705 // CHECK-NEXT: entry:
706 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
707 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
708 // CHECK-NEXT: store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
709 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
710 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
711 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
712 // CHECK-NEXT: ret void
713 //
multiply_int_matrix_constant(ix9x3_t a)714 void multiply_int_matrix_constant(ix9x3_t a) {
715 a = 5 * a;
716 }
717
718 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
719 // CHECK-NEXT: entry:
720 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
721 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
722 // CHECK-NEXT: store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
723 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
724 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
725 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
726 // CHECK-NEXT: ret void
727 //
multiply_compound_int_matrix_constant(ix9x3_t a)728 void multiply_compound_int_matrix_constant(ix9x3_t a) {
729 a *= 5;
730 }
731
732 // CHECK-LABEL: @divide_double_matrix_scalar_float(
733 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
734 // CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4
735 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
736 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
737 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
738 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
739 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
740 // CHECK-NEXT: ret void
741 //
divide_double_matrix_scalar_float(dx5x5_t a,float s)742 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
743 a = a / s;
744 }
745
746 // CHECK-LABEL: @divide_double_matrix_scalar_double(
747 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
748 // CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
749 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
750 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
751 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
752 // CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
753 // CHECK-NEXT: ret void
754 //
divide_double_matrix_scalar_double(dx5x5_t a,double s)755 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
756 a = a / s;
757 }
758
759 // CHECK-LABEL: @divide_float_matrix_scalar_double(
760 // CHECK: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
761 // CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
762 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
763 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
764 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
765 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
766 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
767 // CHECK-NEXT: ret void
768 //
divide_float_matrix_scalar_double(fx2x3_t b,double s)769 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
770 b = b / s;
771 }
772
773 // CHECK-LABEL: @divide_int_matrix_scalar_short(
774 // CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
775 // CHECK-NEXT: [[S:%.*]] = load i16, i16* %s.addr, align 2
776 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
777 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
778 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
779 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
780 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
781 // CHECK-NEXT: ret void
782 //
divide_int_matrix_scalar_short(ix9x3_t b,short s)783 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
784 b = b / s;
785 }
786
787 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
788 // CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
789 // CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
790 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
791 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
792 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
793 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
794 // CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
795 // CHECK-NEXT: ret void
796 //
divide_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)797 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
798 b = b / s;
799 }
800
801 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
802 // CHECK: [[MAT:%.*]] = load <8 x i64>, <8 x i64>* [[MAT_ADDR:%.*]], align 8
803 // CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
804 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i32 0
805 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
806 // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
807 // CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* [[MAT_ADDR]], align 8
808 // CHECK-NEXT: ret void
809 //
divide_ull_matrix_scalar_ull(ullx4x2_t b,unsigned long long s)810 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
811 b = b / s;
812 }
813
814 // CHECK-LABEL: @divide_float_matrix_constant(
815 // CHECK-NEXT: entry:
816 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
817 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
818 // CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
819 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
820 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
821 // CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
822 // CHECK-NEXT: ret void
823 //
divide_float_matrix_constant(fx2x3_t a)824 void divide_float_matrix_constant(fx2x3_t a) {
825 a = a / 2.5;
826 }
827
828 // Tests for the matrix type operators.
829
830 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
831 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
832
833 // Check that we can use matrix index expression on different floating point
834 // matrixes and indices.
insert_double_matrix_const_idx_ll_u_double(dx5x5_t a,double d,fx2x3_t b,float e,int j,unsigned k)835 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
836 // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
837 // CHECK: [[D:%.*]] = load double, double* %d.addr, align 8
838 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
839 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
840 // CHECK-NEXT: store <25 x double> [[MATINS]], <25 x double>* {{.*}}, align 8
841 // CHECK-NEXT: ret void
842
843 a[0ll][1u] = d;
844 }
845
insert_double_matrix_const_idx_i_u_double(dx5x5_t a,double d)846 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
847 // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
848 // CHECK: [[D:%.*]] = load double, double* %d.addr, align 8
849 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, <25 x double>* [[MAT_ADDR:%.*]], align 8
850 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
851 // CHECK-NEXT: store <25 x double> [[MATINS]], <25 x double>* [[MAT_ADDR]], align 8
852 // CHECK-NEXT: ret void
853
854 a[1][4u] = d;
855 }
856
insert_float_matrix_const_idx_ull_i_float(fx2x3_t b,float e)857 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
858 // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
859 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
860 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
861 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
862 // CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
863 // CHECK-NEXT: ret void
864
865 b[1ull][1] = e;
866 }
867
insert_float_matrix_idx_i_u_float(fx2x3_t b,float e,int j,unsigned k)868 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
869 // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
870 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
871 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
872 // CHECK-NEXT: [[J_EXT:%.*]] = sext i32 [[J]] to i64
873 // CHECK-NEXT: [[K:%.*]] = load i32, i32* %k.addr, align 4
874 // CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64
875 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
876 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
877 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
878 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
879 // CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
880 // CHECK-NEXT: ret void
881
882 b[j][k] = e;
883 }
884
insert_float_matrix_idx_s_ull_float(fx2x3_t b,float e,short j,unsigned long long k)885 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
886 // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
887 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
888 // CHECK-NEXT: [[J:%.*]] = load i16, i16* %j.addr, align 2
889 // CHECK-NEXT: [[J_EXT:%.*]] = sext i16 [[J]] to i64
890 // CHECK-NEXT: [[K:%.*]] = load i64, i64* %k.addr, align 8
891 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2
892 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
893 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
894 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
895 // CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
896 // CHECK-NEXT: ret void
897
898 (b)[j][k] = e;
899 }
900
901 // Check that we can can use matrix index expressions on integer matrixes.
902 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_int_idx_expr(ix9x3_t a,int i)903 void insert_int_idx_expr(ix9x3_t a, int i) {
904 // CHECK-LABEL: @insert_int_idx_expr(
905 // CHECK: [[I1:%.*]] = load i32, i32* %i.addr, align 4
906 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i.addr, align 4
907 // CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
908 // CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
909 // CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
910 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
911 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
912 // CHECK-NEXT: store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR]], align 4
913 // CHECK-NEXT: ret void
914
915 a[4 + i][1 + 1u] = i;
916 }
917
918 // Check that we can can use matrix index expressions on FP and integer
919 // matrixes.
920 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_float_into_int_matrix(ix9x3_t * a,int i)921 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
922 // CHECK-LABEL: @insert_float_into_int_matrix(
923 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
924 // CHECK-NEXT: [[MAT_ADDR1:%.*]] = load [27 x i32]*, [27 x i32]** %a.addr, align 8
925 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [27 x i32]* [[MAT_ADDR1]] to <27 x i32>*
926 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR2]], align 4
927 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
928 // CHECK-NEXT: store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR2]], align 4
929 // CHECK-NEXT: ret void
930
931 (*a)[4][1] = i;
932 }
933
934 // Check that we can use overloaded matrix index expressions on matrixes with
935 // matching dimensions, but different element types.
936 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
937 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
insert_matching_dimensions1(dx3x3_t a,double i)938 void insert_matching_dimensions1(dx3x3_t a, double i) {
939 // CHECK-LABEL: @insert_matching_dimensions1(
940 // CHECK: [[I:%.*]] = load double, double* %i.addr, align 8
941 // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, <9 x double>* [[MAT_ADDR:%.*]], align 8
942 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
943 // CHECK-NEXT: store <9 x double> [[MATINS]], <9 x double>* [[MAT_ADDR]], align 8
944 // CHECK-NEXT: ret void
945
946 a[2u][1u] = i;
947 }
948
insert_matching_dimensions(fx3x3_t b,float e)949 void insert_matching_dimensions(fx3x3_t b, float e) {
950 // CHECK-LABEL: @insert_matching_dimensions(
951 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
952 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
953 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
954 // CHECK-NEXT: store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
955 // CHECK-NEXT: ret void
956
957 b[1u][2u] = e;
958 }
959
extract_double(dx5x5_t a)960 double extract_double(dx5x5_t a) {
961 // CHECK-LABEL: @extract_double(
962 // CHECK: [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
963 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
964 // CHECK-NEXT: ret double [[MATEXT]]
965
966 return a[2][3 - 1u];
967 }
968
extract_float(fx3x3_t b)969 double extract_float(fx3x3_t b) {
970 // CHECK-LABEL: @extract_float(
971 // CHECK: [[MAT:%.*]] = load <9 x float>, <9 x float>* {{.*}}, align 4
972 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
973 // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
974 // CHECK-NEXT: ret double [[TO_DOUBLE]]
975
976 return b[2][1];
977 }
978
extract_int(ix9x3_t c,unsigned long j)979 int extract_int(ix9x3_t c, unsigned long j) {
980 // CHECK-LABEL: @extract_int(
981 // CHECK: [[J1:%.*]] = load i64, i64* %j.addr, align 8
982 // CHECK-NEXT: [[J2:%.*]] = load i64, i64* %j.addr, align 8
983 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
984 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9
985 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
986 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
987 // CHECK-NEXT: ret i32 [[MATEXT]]
988
989 return c[j][j];
990 }
991
992 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
993
test_extract_matrix_pointer1(dx3x2_t ** ptr,unsigned j)994 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
995 // CHECK-LABEL: @test_extract_matrix_pointer1(
996 // CHECK: [[J:%.*]] = load i32, i32* %j.addr, align 4
997 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
998 // CHECK-NEXT: [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
999 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 1
1000 // CHECK-NEXT: [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1001 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 2
1002 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1003 // CHECK-NEXT: [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1004 // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]]
1005 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1006 // CHECK-NEXT: ret double [[MATEXT]]
1007
1008 return ptr[1][2][j][1];
1009 }
1010
test_extract_matrix_pointer2(dx3x2_t ** ptr)1011 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1012 // CHECK-LABEL: @test_extract_matrix_pointer2(
1013 // CHECK-NEXT: entry:
1014 // CHECK: [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
1015 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 4
1016 // CHECK-NEXT: [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1017 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 6
1018 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1019 // CHECK-NEXT: [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1020 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1021 // CHECK-NEXT: ret double [[MATEXT]]
1022
1023 return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1024 }
1025
insert_extract(dx5x5_t a,fx3x3_t b,unsigned long j,short k)1026 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1027 // CHECK-LABEL: @insert_extract(
1028 // CHECK: [[K:%.*]] = load i16, i16* %k.addr, align 2
1029 // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64
1030 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
1031 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1032 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0
1033 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX]]
1034 // CHECK-NEXT: [[J:%.*]] = load i64, i64* %j.addr, align 8
1035 // CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3
1036 // CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2
1037 // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR]], align 4
1038 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1039 // CHECK-NEXT: store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
1040 // CHECK-NEXT: ret void
1041
1042 b[2][j] = b[0][k];
1043 }
1044
insert_compound_stmt(dx5x5_t a)1045 void insert_compound_stmt(dx5x5_t a) {
1046 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> %a)
1047 // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8
1048 // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1049 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1050 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, <25 x double>* [[A_PTR]], align 8
1051 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1052 // CHECK-NEXT: store <25 x double> [[INS]], <25 x double>* [[A_PTR]], align 8
1053 // CHECK-NEXT: ret void
1054
1055 a[2][3] -= 1.0;
1056 }
1057
1058 struct Foo {
1059 fx2x3_t mat;
1060 };
1061
insert_compound_stmt_field(struct Foo * a,float f,unsigned i,unsigned j)1062 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1063 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(%struct.Foo* %a, float %f, i32 %i, i32 %j)
1064 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
1065 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
1066 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
1067 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1068 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1069 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1070 // CHECK-NEXT: [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
1071 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1072 // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1073 // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1074 // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1075 // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1076 // CHECK-NEXT: store <6 x float> [[INS]], <6 x float>* [[MAT_PTR]], align 4
1077 // CHECK-NEXT: ret void
1078
1079 a->mat[i][j] += f;
1080 }
1081
matrix_as_idx(ix9x3_t a,int i,int j,dx5x5_t b)1082 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1083 // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> %a, i32 %i, i32 %j, <25 x double> %b)
1084 // CHECK: [[I1:%.*]] = load i32, i32* %i.addr, align 4
1085 // CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1086 // CHECK-NEXT: [[J1:%.*]] = load i32, i32* %j.addr, align 4
1087 // CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1088 // CHECK-NEXT: [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
1089 // CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1090 // CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1091 // CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1092 // CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1093 // CHECK-NEXT: [[J2:%.*]] = load i32, i32* %j.addr, align 4
1094 // CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1095 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i.addr, align 4
1096 // CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1097 // CHECK-NEXT: [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
1098 // CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1099 // CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1100 // CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1101 // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1102 // CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1103 // CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1104 // CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1105 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, <25 x double>* [[B_PTR:%.*]], align 8
1106 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1107 // CHECK-NEXT: store <25 x double> [[INS]], <25 x double>* [[B_PTR]], align 8
1108 b[a[i][j]][a[j][i] + 2] = 1.5;
1109 }
1110