1; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+i8mm -asm-verbose=0 < %s -o - 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7define <vscale x 4 x i32> @smmla(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
8entry:
9; CHECK-LABEL: smmla:
10; CHECK-NEXT:  smmla   z0.s, z1.b, z2.b
11; CHECK-NEXT:  ret
12  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smmla.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
13  ret <vscale x 4 x i32> %val
14}
15
16define <vscale x 4 x i32> @ummla(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
17entry:
18; CHECK-LABEL: ummla:
19; CHECK-NEXT:  ummla   z0.s, z1.b, z2.b
20; CHECK-NEXT:  ret
21  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ummla.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
22  ret <vscale x 4 x i32> %val
23}
24
25define <vscale x 4 x i32> @usmmla(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
26entry:
27; CHECK-LABEL: usmmla:
28; CHECK-NEXT:  usmmla   z0.s, z1.b, z2.b
29; CHECK-NEXT:  ret
30  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usmmla.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
31  ret <vscale x 4 x i32> %val
32}
33
34define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
35entry:
36; CHECK-LABEL: usdot:
37; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b
38; CHECK-NEXT:  ret
39  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
40  ret <vscale x 4 x i32> %val
41}
42
43define <vscale x 4 x i32> @usdot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
44entry:
45; CHECK-LABEL: usdot_lane_0:
46; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[0]
47; CHECK-NEXT:  ret
48  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
49  ret <vscale x 4 x i32> %val
50}
51
52define <vscale x 4 x i32> @usdot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
53entry:
54; CHECK-LABEL: usdot_lane_1:
55; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[1]
56; CHECK-NEXT:  ret
57  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
58  ret <vscale x 4 x i32> %val
59}
60
61define <vscale x 4 x i32> @usdot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
62entry:
63; CHECK-LABEL: usdot_lane_2:
64; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[2]
65; CHECK-NEXT:  ret
66  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
67  ret <vscale x 4 x i32> %val
68}
69
70define <vscale x 4 x i32> @usdot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
71entry:
72; CHECK-LABEL: usdot_lane_3:
73; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[3]
74; CHECK-NEXT:  ret
75  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
76  ret <vscale x 4 x i32> %val
77}
78
79define <vscale x 4 x i32> @sudot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
80entry:
81; CHECK-LABEL: sudot_lane_0:
82; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[0]
83; CHECK-NEXT:  ret
84  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
85  ret <vscale x 4 x i32> %val
86}
87
88define <vscale x 4 x i32> @sudot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
89entry:
90; CHECK-LABEL: sudot_lane_1:
91; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[1]
92; CHECK-NEXT:  ret
93  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
94  ret <vscale x 4 x i32> %val
95}
96
97define <vscale x 4 x i32> @sudot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
98entry:
99; CHECK-LABEL: sudot_lane_2:
100; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[2]
101; CHECK-NEXT:  ret
102  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
103  ret <vscale x 4 x i32> %val
104}
105
106define <vscale x 4 x i32> @sudot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
107entry:
108; CHECK-LABEL: sudot_lane_3:
109; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[3]
110; CHECK-NEXT:  ret
111  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
112  ret <vscale x 4 x i32> %val
113}
114
115
116declare <vscale x 4 x i32> @llvm.aarch64.sve.smmla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
117declare <vscale x 4 x i32> @llvm.aarch64.sve.ummla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
118declare <vscale x 4 x i32> @llvm.aarch64.sve.usmmla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
119
120declare <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
121declare <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
122declare <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
123
124