1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2 
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
5 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
6 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
7 
8 // Floating point matrix/scalar additions.
9 
add_matrix_matrix_double(dx5x5_t a,dx5x5_t b,dx5x5_t c)10 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
11   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> %a, <25 x double> %b, <25 x double> %c)
12   // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
13   // CHECK-NEXT:  [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
14   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
15   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
16 
17   a = b + c;
18 }
19 
add_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)20 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
21   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> %a, <25 x double> %b)
22   // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
23   // CHECK-NEXT:  [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
24   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
25   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
26 
27   a += b;
28 }
29 
subtract_compound_assign_matrix_double(dx5x5_t a,dx5x5_t b)30 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
31   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> %a, <25 x double> %b)
32   // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
33   // CHECK-NEXT:  [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
34   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
35   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
36 
37   a -= b;
38 }
39 
add_matrix_matrix_float(fx2x3_t a,fx2x3_t b,fx2x3_t c)40 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
41   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> %a, <6 x float> %b, <6 x float> %c)
42   // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
43   // CHECK-NEXT:  [[C:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
44   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
45   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
46 
47   a = b + c;
48 }
49 
add_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)50 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
51   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> %a, <6 x float> %b)
52   // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
53   // CHECK-NEXT:  [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
54   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
55   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
56 
57   a += b;
58 }
59 
subtract_compound_assign_matrix_float(fx2x3_t a,fx2x3_t b)60 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
61   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> %a, <6 x float> %b)
62   // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
63   // CHECK-NEXT:  [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
64   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
65   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
66 
67   a -= b;
68 }
69 
add_matrix_scalar_double_float(dx5x5_t a,float vf)70 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
71   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> %a, float %vf)
72   // CHECK:       [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
73   // CHECK-NEXT:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
74   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
75   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
76   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
77   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
78   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
79 
80   a = a + vf;
81 }
82 
add_compound_matrix_scalar_double_float(dx5x5_t a,float vf)83 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
84   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> %a, float %vf)
85   // CHECK:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
86   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
87   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
88   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
89   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
90   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
91   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
92 
93   a += vf;
94 }
95 
subtract_compound_matrix_scalar_double_float(dx5x5_t a,float vf)96 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
97   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> %a, float %vf)
98   // CHECK:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
99   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
100   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
101   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
102   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
103   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
104   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
105 
106   a -= vf;
107 }
108 
add_matrix_scalar_double_double(dx5x5_t a,double vd)109 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
110   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> %a, double %vd)
111   // CHECK:       [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
112   // CHECK-NEXT:  [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
113   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
114   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
115   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
116   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
117 
118   a = a + vd;
119 }
120 
add_compound_matrix_scalar_double_double(dx5x5_t a,double vd)121 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
122   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> %a, double %vd)
123   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
124   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
125   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
126   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
127   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
128   // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
129   a += vd;
130 }
131 
subtract_compound_matrix_scalar_double_double(dx5x5_t a,double vd)132 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
133   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> %a, double %vd)
134   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
135   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
136   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
137   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
138   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
139   // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
140   a -= vd;
141 }
142 
add_matrix_scalar_float_float(fx2x3_t b,float vf)143 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
144   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> %b, float %vf)
145   // CHECK:       [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
146   // CHECK-NEXT:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
147   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
148   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
149   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
150   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
151 
152   b = b + vf;
153 }
154 
add_compound_matrix_scalar_float_float(fx2x3_t b,float vf)155 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
156   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> %b, float %vf)
157   // CHECK:       [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
158   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
159   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
160   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
161   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
162   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
163   b += vf;
164 }
165 
subtract_compound_matrix_scalar_float_float(fx2x3_t b,float vf)166 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
167   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> %b, float %vf)
168   // CHECK:       [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
169   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
170   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
171   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
172   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
173   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
174   b -= vf;
175 }
176 
add_matrix_scalar_float_double(fx2x3_t b,double vd)177 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
178   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> %b, double %vd)
179   // CHECK:       [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
180   // CHECK-NEXT:  [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
181   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
182   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
183   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
184   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
185   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
186 
187   b = b + vd;
188 }
189 
add_compound_matrix_scalar_float_double(fx2x3_t b,double vd)190 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
191   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> %b, double %vd)
192   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
193   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
194   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
195   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
196   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
197   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
198   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
199   b += vd;
200 }
201 
subtract_compound_matrix_scalar_float_double(fx2x3_t b,double vd)202 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
203   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> %b, double %vd)
204   // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
205   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
206   // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
207   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
208   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
209   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
210   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
211   b -= vd;
212 }
213 
214 // Integer matrix/scalar additions
215 
add_matrix_matrix_int(ix9x3_t a,ix9x3_t b,ix9x3_t c)216 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
217   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b, <27 x i32> %c)
218   // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
219   // CHECK-NEXT:  [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
220   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
221   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
222   a = b + c;
223 }
224 
add_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)225 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
226   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b)
227   // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
228   // CHECK:       [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
229   // CHECK:       [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
230   // CHECK:       store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
231   a += b;
232 }
233 
subtract_compound_matrix_matrix_int(ix9x3_t a,ix9x3_t b)234 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
235   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b)
236   // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
237   // CHECK:       [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
238   // CHECK:       [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
239   // CHECK:       store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
240   a -= b;
241 }
242 
add_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b,ullx4x2_t c)243 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
244   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c)
245   // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
246   // CHECK-NEXT:  [[C:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
247   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
248   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
249 
250   a = b + c;
251 }
252 
add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)253 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
254   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b)
255   // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
256   // CHECK-NEXT:  [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
257   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
258   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
259 
260   a += b;
261 }
262 
subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a,ullx4x2_t b)263 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
264   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b)
265   // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
266   // CHECK-NEXT:  [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
267   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
268   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
269 
270   a -= b;
271 }
272 
add_matrix_scalar_int_short(ix9x3_t a,short vs)273 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
274   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
275   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
276   // CHECK-NEXT:   [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
277   // CHECK-NEXT:   [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
278   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i32 0
279   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
280   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
281   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
282 
283   a = a + vs;
284 }
285 
add_compound_matrix_scalar_int_short(ix9x3_t a,short vs)286 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
287   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
288   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
289   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
290   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
291   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
292   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
293   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
294   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
295 
296   a += vs;
297 }
298 
subtract_compound_matrix_scalar_int_short(ix9x3_t a,short vs)299 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
300   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
301   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
302   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
303   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
304   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
305   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
306   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
307   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
308 
309   a -= vs;
310 }
311 
add_matrix_scalar_int_long_int(ix9x3_t a,long int vli)312 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
313   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
314   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
315   // CHECK-NEXT:   [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
316   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
317   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
318   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
319   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
320   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
321 
322   a = a + vli;
323 }
324 
add_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)325 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
326   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
327   // CHECK:       [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
328   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
329   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
330   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
331   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
332   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
333   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
334 
335   a += vli;
336 }
337 
subtract_compound_matrix_scalar_int_long_int(ix9x3_t a,long int vli)338 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
339   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
340   // CHECK:       [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
341   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
342   // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
343   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
344   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
345   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
346   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
347 
348   a -= vli;
349 }
350 
add_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)351 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
352   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
353   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
354   // CHECK-NEXT:   [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
355   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
356   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
357   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
358   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
359   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
360 
361   a = a + vulli;
362 }
363 
add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)364 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
365   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
366   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
367   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
368   // CHECK-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
369   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
370   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
371   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
372   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
373 
374   a += vulli;
375 }
376 
subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a,unsigned long long int vulli)377 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
378   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
379   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
380   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
381   // CHECK-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
382   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
383   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
384   // CHECK-NEXT:   [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
385   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
386 
387   a -= vulli;
388 }
389 
add_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)390 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
391   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
392   // CHECK:         [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
393   // CHECK-NEXT:    [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
394   // CHECK-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
395   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
396   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
397   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
398   // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
399 
400   b = vs + b;
401 }
402 
add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)403 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
404   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
405   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
406   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
407   // CHECK-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
408   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
409   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
410   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
411   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
412 
413   b += vs;
414 }
415 
subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b,short vs)416 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
417   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
418   // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
419   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
420   // CHECK-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
421   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
422   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
423   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
424   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
425 
426   b -= vs;
427 }
428 
add_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)429 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
430   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
431   // CHECK:         [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
432   // CHECK-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
433   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
434   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
435   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
436   // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
437 
438   b = vli + b;
439 }
440 
add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)441 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
442   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
443   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
444   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
445   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
446   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
447   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
448   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
449 
450   b += vli;
451 }
452 
subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b,long int vli)453 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
454   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
455   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
456   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
457   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
458   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
459   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
460   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
461 
462   b -= vli;
463 }
464 
add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)465 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
466   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
467   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
468   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
469   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
470   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
471   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
472   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
473   b = vulli + b;
474 }
475 
add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)476 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
477   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
478   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
479   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
480   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
481   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
482   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
483   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
484 
485   b += vulli;
486 }
487 
subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b,unsigned long long int vulli)488 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
489   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
490   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
491   // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
492   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
493   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
494   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
495   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
496 
497   b -= vulli;
498 }
499 
500 // Tests for matrix multiplication.
501 
multiply_matrix_matrix_double(dx5x5_t b,dx5x5_t c)502 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
503   // CHECK-LABEL: @multiply_matrix_matrix_double(
504   // CHECK:         [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
505   // CHECK-NEXT:    [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
506   // CHECK-NEXT:    [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
507   // CHECK-NEXT:    [[A_ADDR:%.*]] = bitcast [25 x double]* %a to <25 x double>*
508   // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* [[A_ADDR]], align 8
509   // CHECK-NEXT:    ret void
510   //
511 
512   dx5x5_t a;
513   a = b * c;
514 }
515 
multiply_compound_matrix_matrix_double(dx5x5_t b,dx5x5_t c)516 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
517   // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
518   // CHECK:        [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
519   // CHECK-NEXT:   [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
520   // CHECK-NEXT:   [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
521   // CHECK-NEXT:   store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
522   // CHECK-NEXT:   ret void
523   b *= c;
524 }
525 
526 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
527 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
528 // CHECK-LABEL: @multiply_matrix_matrix_int(
529 // CHECK:         [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
530 // CHECK-NEXT:    [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
531 // CHECK-NEXT:    [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
532 // CHECK-NEXT:    [[A_ADDR:%.*]] = bitcast [81 x i32]* %a to <81 x i32>*
533 // CHECK-NEXT:    store <81 x i32> [[RES]], <81 x i32>* [[A_ADDR]], align 4
534 // CHECK-NEXT:    ret void
535 //
multiply_matrix_matrix_int(ix9x3_t b,ix3x9_t c)536 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
537   ix9x9_t a;
538   a = b * c;
539 }
540 
541 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
542 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
543 // CHECK-NEXT:    [[S:%.*]] = load float, float* %s.addr, align 4
544 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
545 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
546 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
547 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
548 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
549 // CHECK-NEXT:    ret void
550 //
multiply_double_matrix_scalar_float(dx5x5_t a,float s)551 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
552   a = a * s;
553 }
554 
555 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
556 // CHECK:         [[S:%.*]] = load float, float* %s.addr, align 4
557 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
558 // CHECK-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
559 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
560 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
561 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
562 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
563 // CHECK-NEXT:    ret void
564 //
multiply_compound_double_matrix_scalar_float(dx5x5_t a,float s)565 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
566   a *= s;
567 }
568 
569 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
570 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
571 // CHECK-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8
572 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
573 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
574 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
575 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
576 // CHECK-NEXT:    ret void
577 //
multiply_double_matrix_scalar_double(dx5x5_t a,double s)578 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
579   a = a * s;
580 }
581 
582 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
583 // CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
584 // CHECK-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
585 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
586 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
587 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
588 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
589 // CHECK-NEXT:    ret void
multiply_compound_double_matrix_scalar_double(dx5x5_t a,double s)590 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
591   a *= s;
592 }
593 
594 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
595 // CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
596 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
597 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
598 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
599 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
600 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
601 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
602 // CHECK-NEXT:    ret void
603 //
multiply_float_matrix_scalar_double(fx2x3_t b,double s)604 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
605   b = s * b;
606 }
607 
608 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
609 // CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
610 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
611 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
612 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
613 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
614 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
615 // store <6 x float> %3, <6 x float>* %0, align 4
616 // ret void
multiply_compound_float_matrix_scalar_double(fx2x3_t b,double s)617 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
618   b *= s;
619 }
620 
621 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
622 // CHECK:         [[S:%.*]] = load i16, i16* %s.addr, align 2
623 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
624 // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
625 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
626 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
627 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
628 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
629 // CHECK-NEXT:    ret void
630 //
multiply_int_matrix_scalar_short(ix9x3_t b,short s)631 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
632   b = s * b;
633 }
634 
635 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
636 // CHECK:        [[S:%.*]] = load i16, i16* %s.addr, align 2
637 // CHECK-NEXT:   [[S_EXT:%.*]] = sext i16 [[S]] to i32
638 // CHECK-NEXT:   [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
639 // CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
640 // CHECK-NEXT:   [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
641 // CHECK-NEXT:   [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
642 // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
643 // CHECK-NEXT:   ret void
644 //
multiply_compound_int_matrix_scalar_short(ix9x3_t b,short s)645 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
646   b *= s;
647 }
648 
649 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
650 // CHECK:         [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
651 // CHECK-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8
652 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
653 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
654 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
655 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
656 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
657 // CHECK-NEXT:    ret void
658 //
multiply_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)659 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
660   b = b * s;
661 }
662 
multiply_compound_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)663 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
664   // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
665   // CHECK:         [[S:%.*]] = load i64, i64* %s.addr, align 8
666   // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
667   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
668   // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
669   // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
670   // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
671   // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
672   // CHECK-NEXT:    ret void
673 
674   b *= s;
675 }
676 
677 // CHECK-LABEL: @multiply_float_matrix_constant(
678 // CHECK-NEXT:  entry:
679 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
680 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
681 // CHECK-NEXT:    store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
682 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
683 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
684 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
685 // CHECK-NEXT:    ret void
686 //
multiply_float_matrix_constant(fx2x3_t a)687 void multiply_float_matrix_constant(fx2x3_t a) {
688   a = a * 2.5;
689 }
690 
691 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
692 // CHECK-NEXT:  entry:
693 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
694 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
695 // CHECK-NEXT:    store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
696 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
697 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
698 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
699 // CHECK-NEXT:    ret void
multiply_compound_float_matrix_constant(fx2x3_t a)700 void multiply_compound_float_matrix_constant(fx2x3_t a) {
701   a *= 2.5;
702 }
703 
704 // CHECK-LABEL: @multiply_int_matrix_constant(
705 // CHECK-NEXT:  entry:
706 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
707 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
708 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
709 // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
710 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
711 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
712 // CHECK-NEXT:    ret void
713 //
multiply_int_matrix_constant(ix9x3_t a)714 void multiply_int_matrix_constant(ix9x3_t a) {
715   a = 5 * a;
716 }
717 
718 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
719 // CHECK-NEXT:  entry:
720 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
721 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
722 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
723 // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
724 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
725 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
726 // CHECK-NEXT:    ret void
727 //
multiply_compound_int_matrix_constant(ix9x3_t a)728 void multiply_compound_int_matrix_constant(ix9x3_t a) {
729   a *= 5;
730 }
731 
732 // CHECK-LABEL: @divide_double_matrix_scalar_float(
733 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
734 // CHECK-NEXT:    [[S:%.*]] = load float, float* %s.addr, align 4
735 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
736 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
737 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
738 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
739 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
740 // CHECK-NEXT:    ret void
741 //
divide_double_matrix_scalar_float(dx5x5_t a,float s)742 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
743   a = a / s;
744 }
745 
746 // CHECK-LABEL: @divide_double_matrix_scalar_double(
747 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
748 // CHECK-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8
749 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
750 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
751 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
752 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
753 // CHECK-NEXT:    ret void
754 //
divide_double_matrix_scalar_double(dx5x5_t a,double s)755 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
756   a = a / s;
757 }
758 
759 // CHECK-LABEL: @divide_float_matrix_scalar_double(
760 // CHECK:         [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
761 // CHECK-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8
762 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
763 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
764 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
765 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
766 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
767 // CHECK-NEXT:    ret void
768 //
divide_float_matrix_scalar_double(fx2x3_t b,double s)769 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
770   b = b / s;
771 }
772 
773 // CHECK-LABEL: @divide_int_matrix_scalar_short(
774 // CHECK:         [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
775 // CHECK-NEXT:    [[S:%.*]] = load i16, i16* %s.addr, align 2
776 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
777 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
778 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
779 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
780 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
781 // CHECK-NEXT:    ret void
782 //
divide_int_matrix_scalar_short(ix9x3_t b,short s)783 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
784   b = b / s;
785 }
786 
787 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
788 // CHECK:         [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
789 // CHECK-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8
790 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
791 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
792 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
793 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
794 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
795 // CHECK-NEXT:    ret void
796 //
divide_int_matrix_scalar_ull(ix9x3_t b,unsigned long long s)797 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
798   b = b / s;
799 }
800 
801 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
802 // CHECK:         [[MAT:%.*]] = load <8 x i64>, <8 x i64>* [[MAT_ADDR:%.*]], align 8
803 // CHECK-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8
804 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i32 0
805 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
806 // CHECK-NEXT:    [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
807 // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* [[MAT_ADDR]], align 8
808 // CHECK-NEXT:    ret void
809 //
divide_ull_matrix_scalar_ull(ullx4x2_t b,unsigned long long s)810 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
811   b = b / s;
812 }
813 
814 // CHECK-LABEL: @divide_float_matrix_constant(
815 // CHECK-NEXT:  entry:
816 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
817 // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
818 // CHECK-NEXT:    store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
819 // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
820 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
821 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
822 // CHECK-NEXT:    ret void
823 //
divide_float_matrix_constant(fx2x3_t a)824 void divide_float_matrix_constant(fx2x3_t a) {
825   a = a / 2.5;
826 }
827 
828 // Tests for the matrix type operators.
829 
830 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
831 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
832 
833 // Check that we can use matrix index expression on different floating point
834 // matrixes and indices.
insert_double_matrix_const_idx_ll_u_double(dx5x5_t a,double d,fx2x3_t b,float e,int j,unsigned k)835 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
836   // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
837   // CHECK:         [[D:%.*]] = load double, double* %d.addr, align 8
838   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
839   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
840   // CHECK-NEXT:    store <25 x double> [[MATINS]], <25 x double>* {{.*}}, align 8
841   // CHECK-NEXT:    ret void
842 
843   a[0ll][1u] = d;
844 }
845 
insert_double_matrix_const_idx_i_u_double(dx5x5_t a,double d)846 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
847   // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
848   // CHECK:         [[D:%.*]] = load double, double* %d.addr, align 8
849   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, <25 x double>* [[MAT_ADDR:%.*]], align 8
850   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
851   // CHECK-NEXT:    store <25 x double> [[MATINS]], <25 x double>* [[MAT_ADDR]], align 8
852   // CHECK-NEXT:    ret void
853 
854   a[1][4u] = d;
855 }
856 
insert_float_matrix_const_idx_ull_i_float(fx2x3_t b,float e)857 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
858   // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
859   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
860   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
861   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
862   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
863   // CHECK-NEXT:    ret void
864 
865   b[1ull][1] = e;
866 }
867 
insert_float_matrix_idx_i_u_float(fx2x3_t b,float e,int j,unsigned k)868 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
869   // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
870   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
871   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
872   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i32 [[J]] to i64
873   // CHECK-NEXT:    [[K:%.*]] = load i32, i32* %k.addr, align 4
874   // CHECK-NEXT:    [[K_EXT:%.*]] = zext i32 [[K]] to i64
875   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
876   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
877   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
878   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
879   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
880   // CHECK-NEXT:    ret void
881 
882   b[j][k] = e;
883 }
884 
insert_float_matrix_idx_s_ull_float(fx2x3_t b,float e,short j,unsigned long long k)885 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
886   // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
887   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
888   // CHECK-NEXT:    [[J:%.*]] = load i16, i16* %j.addr, align 2
889   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i16 [[J]] to i64
890   // CHECK-NEXT:    [[K:%.*]] = load i64, i64* %k.addr, align 8
891   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K]], 2
892   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
893   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
894   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
895   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
896   // CHECK-NEXT:    ret void
897 
898   (b)[j][k] = e;
899 }
900 
901 // Check that we can can use matrix index expressions on integer matrixes.
902 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_int_idx_expr(ix9x3_t a,int i)903 void insert_int_idx_expr(ix9x3_t a, int i) {
904   // CHECK-LABEL: @insert_int_idx_expr(
905   // CHECK:         [[I1:%.*]] = load i32, i32* %i.addr, align 4
906   // CHECK-NEXT:    [[I2:%.*]] = load i32, i32* %i.addr, align 4
907   // CHECK-NEXT:    [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
908   // CHECK-NEXT:    [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
909   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
910   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
911   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
912   // CHECK-NEXT:    store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR]], align 4
913   // CHECK-NEXT:    ret void
914 
915   a[4 + i][1 + 1u] = i;
916 }
917 
918 // Check that we can can use matrix index expressions on FP and integer
919 // matrixes.
920 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
insert_float_into_int_matrix(ix9x3_t * a,int i)921 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
922   // CHECK-LABEL: @insert_float_into_int_matrix(
923   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
924   // CHECK-NEXT:    [[MAT_ADDR1:%.*]] = load [27 x i32]*, [27 x i32]** %a.addr, align 8
925   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [27 x i32]* [[MAT_ADDR1]] to <27 x i32>*
926   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR2]], align 4
927   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
928   // CHECK-NEXT:    store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR2]], align 4
929   // CHECK-NEXT:    ret void
930 
931   (*a)[4][1] = i;
932 }
933 
934 // Check that we can use overloaded matrix index expressions on matrixes with
935 // matching dimensions, but different element types.
936 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
937 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
insert_matching_dimensions1(dx3x3_t a,double i)938 void insert_matching_dimensions1(dx3x3_t a, double i) {
939   // CHECK-LABEL: @insert_matching_dimensions1(
940   // CHECK:         [[I:%.*]] = load double, double* %i.addr, align 8
941   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x double>, <9 x double>* [[MAT_ADDR:%.*]], align 8
942   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
943   // CHECK-NEXT:    store <9 x double> [[MATINS]], <9 x double>* [[MAT_ADDR]], align 8
944   // CHECK-NEXT:    ret void
945 
946   a[2u][1u] = i;
947 }
948 
insert_matching_dimensions(fx3x3_t b,float e)949 void insert_matching_dimensions(fx3x3_t b, float e) {
950   // CHECK-LABEL: @insert_matching_dimensions(
951   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
952   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
953   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
954   // CHECK-NEXT:    store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
955   // CHECK-NEXT:    ret void
956 
957   b[1u][2u] = e;
958 }
959 
extract_double(dx5x5_t a)960 double extract_double(dx5x5_t a) {
961   // CHECK-LABEL: @extract_double(
962   // CHECK:         [[MAT:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
963   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
964   // CHECK-NEXT:    ret double [[MATEXT]]
965 
966   return a[2][3 - 1u];
967 }
968 
extract_float(fx3x3_t b)969 double extract_float(fx3x3_t b) {
970   // CHECK-LABEL: @extract_float(
971   // CHECK:         [[MAT:%.*]] = load <9 x float>, <9 x float>* {{.*}}, align 4
972   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
973   // CHECK-NEXT:    [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
974   // CHECK-NEXT:    ret double [[TO_DOUBLE]]
975 
976   return b[2][1];
977 }
978 
extract_int(ix9x3_t c,unsigned long j)979 int extract_int(ix9x3_t c, unsigned long j) {
980   // CHECK-LABEL: @extract_int(
981   // CHECK:         [[J1:%.*]] = load i64, i64* %j.addr, align 8
982   // CHECK-NEXT:    [[J2:%.*]] = load i64, i64* %j.addr, align 8
983   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
984   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J2]], 9
985   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
986   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
987   // CHECK-NEXT:    ret i32 [[MATEXT]]
988 
989   return c[j][j];
990 }
991 
992 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
993 
test_extract_matrix_pointer1(dx3x2_t ** ptr,unsigned j)994 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
995   // CHECK-LABEL: @test_extract_matrix_pointer1(
996   // CHECK:         [[J:%.*]] = load i32, i32* %j.addr, align 4
997   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
998   // CHECK-NEXT:    [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
999   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 1
1000   // CHECK-NEXT:    [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1001   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 2
1002   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1003   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1004   // CHECK-NEXT:    [[IDX:%.*]] = add i64 3, [[J_EXT]]
1005   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1006   // CHECK-NEXT:    ret double [[MATEXT]]
1007 
1008   return ptr[1][2][j][1];
1009 }
1010 
test_extract_matrix_pointer2(dx3x2_t ** ptr)1011 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1012   // CHECK-LABEL: @test_extract_matrix_pointer2(
1013   // CHECK-NEXT:  entry:
1014   // CHECK:         [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
1015   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 4
1016   // CHECK-NEXT:    [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1017   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 6
1018   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1019   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1020   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1021   // CHECK-NEXT:    ret double [[MATEXT]]
1022 
1023   return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1024 }
1025 
insert_extract(dx5x5_t a,fx3x3_t b,unsigned long j,short k)1026 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1027   // CHECK-LABEL: @insert_extract(
1028   // CHECK:         [[K:%.*]] = load i16, i16* %k.addr, align 2
1029   // CHECK-NEXT:    [[K_EXT:%.*]] = sext i16 [[K]] to i64
1030   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
1031   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1032   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], 0
1033   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX]]
1034   // CHECK-NEXT:    [[J:%.*]] = load i64, i64* %j.addr, align 8
1035   // CHECK-NEXT:    [[IDX3:%.*]] = mul i64 [[J]], 3
1036   // CHECK-NEXT:    [[IDX4:%.*]] = add i64 [[IDX3]], 2
1037   // CHECK-NEXT:    [[MAT2:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR]], align 4
1038   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1039   // CHECK-NEXT:    store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
1040   // CHECK-NEXT:    ret void
1041 
1042   b[2][j] = b[0][k];
1043 }
1044 
insert_compound_stmt(dx5x5_t a)1045 void insert_compound_stmt(dx5x5_t a) {
1046   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> %a)
1047   // CHECK:        [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8
1048   // CHECK-NEXT:   [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1049   // CHECK-NEXT:   [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1050   // CHECK-NEXT:   [[A2:%.*]] = load <25 x double>, <25 x double>* [[A_PTR]], align 8
1051   // CHECK-NEXT:   [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1052   // CHECK-NEXT:   store <25 x double> [[INS]], <25 x double>* [[A_PTR]], align 8
1053   // CHECK-NEXT:   ret void
1054 
1055   a[2][3] -= 1.0;
1056 }
1057 
1058 struct Foo {
1059   fx2x3_t mat;
1060 };
1061 
insert_compound_stmt_field(struct Foo * a,float f,unsigned i,unsigned j)1062 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1063   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(%struct.Foo* %a, float %f, i32 %i, i32 %j)
1064   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
1065   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
1066   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
1067   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1068   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1069   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1070   // CHECK-NEXT:    [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
1071   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1072   // CHECK-NEXT:    [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1073   // CHECK-NEXT:    [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1074   // CHECK-NEXT:    [[MAT2:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1075   // CHECK-NEXT:    [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1076   // CHECK-NEXT:    store <6 x float> [[INS]], <6 x float>* [[MAT_PTR]], align 4
1077   // CHECK-NEXT:    ret void
1078 
1079   a->mat[i][j] += f;
1080 }
1081 
matrix_as_idx(ix9x3_t a,int i,int j,dx5x5_t b)1082 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1083   // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> %a, i32 %i, i32 %j, <25 x double> %b)
1084   // CHECK:       [[I1:%.*]] = load i32, i32* %i.addr, align 4
1085   // CHECK-NEXT:  [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1086   // CHECK-NEXT:  [[J1:%.*]] = load i32, i32* %j.addr, align 4
1087   // CHECK-NEXT:  [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1088   // CHECK-NEXT:  [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
1089   // CHECK-NEXT:  [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1090   // CHECK-NEXT:  [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1091   // CHECK-NEXT:  [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1092   // CHECK-NEXT:  [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1093   // CHECK-NEXT:  [[J2:%.*]] = load i32, i32* %j.addr, align 4
1094   // CHECK-NEXT:  [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1095   // CHECK-NEXT:  [[I2:%.*]] = load i32, i32* %i.addr, align 4
1096   // CHECK-NEXT:  [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1097   // CHECK-NEXT:  [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
1098   // CHECK-NEXT:  [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1099   // CHECK-NEXT:  [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1100   // CHECK-NEXT:  [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1101   // CHECK-NEXT:  [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1102   // CHECK-NEXT:  [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1103   // CHECK-NEXT:  [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1104   // CHECK-NEXT:  [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1105   // CHECK-NEXT:  [[B:%.*]] = load <25 x double>, <25 x double>* [[B_PTR:%.*]], align 8
1106   // CHECK-NEXT:  [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1107   // CHECK-NEXT:  store <25 x double> [[INS]], <25 x double>* [[B_PTR]], align 8
1108   b[a[i][j]][a[j][i] + 2] = 1.5;
1109 }
1110