1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s 2 3; 4; CADD 5; 6 7define <vscale x 16 x i8> @cadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 8; CHECK-LABEL: cadd_b: 9; CHECK: cadd z0.b, z0.b, z1.b, #90 10; CHECK-NEXT: ret 11 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8> %a, 12 <vscale x 16 x i8> %b, 13 i32 90) 14 ret <vscale x 16 x i8> %out 15} 16 17define <vscale x 8 x i16> @cadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 18; CHECK-LABEL: cadd_h: 19; CHECK: cadd z0.h, z0.h, z1.h, #90 20; CHECK-NEXT: ret 21 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16> %a, 22 <vscale x 8 x i16> %b, 23 i32 90) 24 ret <vscale x 8 x i16> %out 25} 26 27define <vscale x 4 x i32> @cadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 28; CHECK-LABEL: cadd_s: 29; CHECK: cadd z0.s, z0.s, z1.s, #270 30; CHECK-NEXT: ret 31 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32> %a, 32 <vscale x 4 x i32> %b, 33 i32 270) 34 ret <vscale x 4 x i32> %out 35} 36 37define <vscale x 2 x i64> @cadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 38; CHECK-LABEL: cadd_d: 39; CHECK: cadd z0.d, z0.d, z1.d, #270 40; CHECK-NEXT: ret 41 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64> %a, 42 <vscale x 2 x i64> %b, 43 i32 270) 44 ret <vscale x 2 x i64> %out 45} 46 47; 48; SQCADD 49; 50 51define <vscale x 16 x i8> @sqcadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 52; CHECK-LABEL: sqcadd_b: 53; CHECK: sqcadd z0.b, z0.b, z1.b, #90 54; CHECK-NEXT: ret 55 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8> %a, 56 <vscale x 16 x i8> %b, 57 i32 90) 58 ret <vscale x 16 x i8> %out 59} 60 61define <vscale x 8 x i16> @sqcadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 62; CHECK-LABEL: sqcadd_h: 63; CHECK: sqcadd z0.h, z0.h, z1.h, #90 64; CHECK-NEXT: ret 65 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16> %a, 66 <vscale x 8 x i16> %b, 67 i32 90) 68 ret <vscale x 8 x i16> %out 69} 70 71define <vscale x 4 x i32> @sqcadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 72; CHECK-LABEL: sqcadd_s: 73; CHECK: sqcadd z0.s, z0.s, z1.s, #270 74; CHECK-NEXT: ret 75 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32> %a, 76 <vscale x 4 x i32> %b, 77 i32 270) 78 ret <vscale x 4 x i32> %out 79} 80 81define <vscale x 2 x i64> @sqcadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 82; CHECK-LABEL: sqcadd_d: 83; CHECK: sqcadd z0.d, z0.d, z1.d, #270 84; CHECK-NEXT: ret 85 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64> %a, 86 <vscale x 2 x i64> %b, 87 i32 270) 88 ret <vscale x 2 x i64> %out 89} 90 91; 92; CMLA 93; 94 95define <vscale x 16 x i8> @cmla_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 96; CHECK-LABEL: cmla_b: 97; CHECK: cmla z0.b, z1.b, z2.b, #90 98; CHECK-NEXT: ret 99 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8> %a, 100 <vscale x 16 x i8> %b, 101 <vscale x 16 x i8> %c, 102 i32 90) 103 ret <vscale x 16 x i8> %out 104} 105 106define <vscale x 8 x i16> @cmla_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 107; CHECK-LABEL: cmla_h: 108; CHECK: cmla z0.h, z1.h, z2.h, #180 109; CHECK-NEXT: ret 110 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16> %a, 111 <vscale x 8 x i16> %b, 112 <vscale x 8 x i16> %c, 113 i32 180) 114 ret <vscale x 8 x i16> %out 115} 116 117define <vscale x 4 x i32> @cmla_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 118; CHECK-LABEL: cmla_s: 119; CHECK: cmla z0.s, z1.s, z2.s, #270 120; CHECK-NEXT: ret 121 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32> %a, 122 <vscale x 4 x i32> %b, 123 <vscale x 4 x i32> %c, 124 i32 270) 125 ret <vscale x 4 x i32> %out 126} 127 128define <vscale x 2 x i64> @cmla_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 129; CHECK-LABEL: cmla_d: 130; CHECK: cmla z0.d, z1.d, z2.d, #0 131; CHECK-NEXT: ret 132 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64> %a, 133 <vscale x 2 x i64> %b, 134 <vscale x 2 x i64> %c, 135 i32 0) 136 ret <vscale x 2 x i64> %out 137} 138 139; 140; CMLA_LANE 141; 142 143define <vscale x 8 x i16> @cmla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 144; CHECK-LABEL: cmla_lane_h: 145; CHECK: cmla z0.h, z1.h, z2.h[1], #180 146; CHECK-NEXT: ret 147 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16> %a, 148 <vscale x 8 x i16> %b, 149 <vscale x 8 x i16> %c, 150 i32 1, 151 i32 180) 152 ret <vscale x 8 x i16> %out 153} 154 155define <vscale x 4 x i32> @cmla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 156; CHECK-LABEL: cmla_lane_s: 157; CHECK: cmla z0.s, z1.s, z2.s[0], #270 158; CHECK-NEXT: ret 159 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32> %a, 160 <vscale x 4 x i32> %b, 161 <vscale x 4 x i32> %c, 162 i32 0, 163 i32 270) 164 ret <vscale x 4 x i32> %out 165} 166 167; 168; QRDCMLAH 169; 170 171define <vscale x 16 x i8> @sqrdcmlah_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 172; CHECK-LABEL: sqrdcmlah_b: 173; CHECK: sqrdcmlah z0.b, z1.b, z2.b, #0 174; CHECK-NEXT: ret 175 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8> %a, 176 <vscale x 16 x i8> %b, 177 <vscale x 16 x i8> %c, 178 i32 0) 179 ret <vscale x 16 x i8> %out 180} 181 182define <vscale x 8 x i16> @sqrdcmlah_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 183; CHECK-LABEL: sqrdcmlah_h: 184; CHECK: sqrdcmlah z0.h, z1.h, z2.h, #90 185; CHECK-NEXT: ret 186 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16> %a, 187 <vscale x 8 x i16> %b, 188 <vscale x 8 x i16> %c, 189 i32 90) 190 ret <vscale x 8 x i16> %out 191} 192 193define <vscale x 4 x i32> @sqrdcmlah_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 194; CHECK-LABEL: sqrdcmlah_s: 195; CHECK: sqrdcmlah z0.s, z1.s, z2.s, #180 196; CHECK-NEXT: ret 197 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32> %a, 198 <vscale x 4 x i32> %b, 199 <vscale x 4 x i32> %c, 200 i32 180) 201 ret <vscale x 4 x i32> %out 202} 203 204define <vscale x 2 x i64> @sqrdcmlah_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 205; CHECK-LABEL: sqrdcmlah_d: 206; CHECK: sqrdcmlah z0.d, z1.d, z2.d, #270 207; CHECK-NEXT: ret 208 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64> %a, 209 <vscale x 2 x i64> %b, 210 <vscale x 2 x i64> %c, 211 i32 270) 212 ret <vscale x 2 x i64> %out 213} 214 215; 216; QRDCMLAH_LANE 217; 218 219define <vscale x 8 x i16> @sqrdcmlah_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 220; CHECK-LABEL: sqrdcmlah_lane_h: 221; CHECK: sqrdcmlah z0.h, z1.h, z2.h[1], #90 222; CHECK-NEXT: ret 223 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16> %a, 224 <vscale x 8 x i16> %b, 225 <vscale x 8 x i16> %c, 226 i32 1, 227 i32 90) 228 ret <vscale x 8 x i16> %out 229} 230 231define <vscale x 4 x i32> @sqrdcmlah_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 232; CHECK-LABEL: sqrdcmlah_lane_s: 233; CHECK: sqrdcmlah z0.s, z1.s, z2.s[0], #180 234; CHECK-NEXT: ret 235 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32> %a, 236 <vscale x 4 x i32> %b, 237 <vscale x 4 x i32> %c, 238 i32 0, 239 i32 180) 240 ret <vscale x 4 x i32> %out 241} 242 243declare <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 244declare <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 245declare <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 246declare <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 247 248declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 249declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 250declare <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 251declare <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 252 253declare <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) 254declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 255declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 256declare <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 257 258declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32) 259declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32) 260 261declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) 262declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 263declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 264declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 265 266declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32) 267declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32) 268