1; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; svint8x2_t 9; 10 11define <vscale x 32 x i8> @ret_svint8x2_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 { 12; CHECK-LABEL: ret_svint8x2_t 13; CHECK: mov z0.d, z1.d 14; CHECK-NEXT: mov z1.d, z2.d 15; CHECK-NEXT: ret 16 %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) 17 ret <vscale x 32 x i8> %tuple 18} 19 20define void @call_svint8x2_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %dummy_z2, <vscale x 16 x i8> %z3) #0 { 21; CHECK-LABEL: call_svint8x2_t 22; CHECK: mov z0.d, z1.d 23; CHECK-NEXT: mov z1.d, z3.d 24; CHECK-NEXT: bl callee_svint8x2_t 25 %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z3) 26 call void @callee_svint8x2_t(<vscale x 32 x i8> %tuple) 27 ret void 28} 29 30; 31; svint16x2_t 32; 33 34define <vscale x 16 x i16> @ret_svint16x2_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 { 35; CHECK-LABEL: ret_svint16x2_t 36; CHECK: mov z0.d, z1.d 37; CHECK-NEXT: mov z1.d, z2.d 38; CHECK-NEXT: ret 39 %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) 40 ret <vscale x 16 x i16> %tuple 41} 42 43define void @call_svint16x2_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %dummy_z2, <vscale x 8 x i16> %z3) #0 { 44; CHECK-LABEL: call_svint16x2_t 45; CHECK: mov z0.d, z1.d 46; CHECK-NEXT: mov z1.d, z3.d 47; CHECK-NEXT: bl callee_svint16x2_t 48 %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z3) 49 call void @callee_svint16x2_t(<vscale x 16 x i16> %tuple) 50 ret void 51} 52 53; 54; svint32x2_t 55; 56 57define <vscale x 8 x i32> @ret_svint32x2_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 { 58; CHECK-LABEL: ret_svint32x2_t 59; CHECK: mov z0.d, z1.d 60; CHECK-NEXT: mov z1.d, z2.d 61; CHECK-NEXT: ret 62 %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 63 ret <vscale x 8 x i32> %tuple 64} 65 66define void @call_svint32x2_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %dummy_z2, <vscale x 4 x i32> %z3) #0 { 67; CHECK-LABEL: call_svint32x2_t 68; CHECK: mov z0.d, z1.d 69; CHECK-NEXT: mov z1.d, z3.d 70; CHECK-NEXT: bl callee_svint32x2_t 71 %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z3) 72 call void @callee_svint32x2_t(<vscale x 8 x i32> %tuple) 73 ret void 74} 75 76; 77; svint64x2_t 78; 79 80define <vscale x 4 x i64> @ret_svint64x2_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 { 81; CHECK-LABEL: ret_svint64x2_t 82; CHECK: mov z0.d, z1.d 83; CHECK-NEXT: mov z1.d, z2.d 84; CHECK-NEXT: ret 85 %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) 86 ret <vscale x 4 x i64> %tuple 87} 88 89define void @call_svint64x2_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %dummy_z2, <vscale x 2 x i64> %z3) #0 { 90; CHECK-LABEL: call_svint64x2_t 91; CHECK: mov z0.d, z1.d 92; CHECK-NEXT: mov z1.d, z3.d 93; CHECK-NEXT: bl callee_svint64x2_t 94 %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z3) 95 call void @callee_svint64x2_t(<vscale x 4 x i64> %tuple) 96 ret void 97} 98 99; 100; svfloatx2_t 101; 102 103define <vscale x 8 x float> @ret_svfloatx2_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 { 104; CHECK-LABEL: ret_svfloatx2_t 105; CHECK: mov z0.d, z1.d 106; CHECK-NEXT: mov z1.d, z2.d 107; CHECK-NEXT: ret 108 %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2) 109 ret <vscale x 8 x float> %tuple 110} 111 112define void @call_svfloatx2_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %dummy_z2, <vscale x 4 x float> %z3) #0 { 113; CHECK-LABEL: call_svfloatx2_t 114; CHECK: mov z0.d, z1.d 115; CHECK-NEXT: mov z1.d, z3.d 116; CHECK-NEXT: bl callee_svfloatx2_t 117 %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z3) 118 call void @callee_svfloatx2_t(<vscale x 8 x float> %tuple) 119 ret void 120} 121 122; 123; svdoublex2_t 124; 125 126define <vscale x 4 x double> @ret_svdoublex2_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 { 127; CHECK-LABEL: ret_svdoublex2_t 128; CHECK: mov z0.d, z1.d 129; CHECK-NEXT: mov z1.d, z2.d 130; CHECK-NEXT: ret 131 %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2) 132 ret <vscale x 4 x double> %tuple 133} 134 135define void @call_svdoublex2_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %dummy_z2, <vscale x 2 x double> %z3) #0 { 136; CHECK-LABEL: call_svdoublex2_t 137; CHECK: mov z0.d, z1.d 138; CHECK-NEXT: mov z1.d, z3.d 139; CHECK-NEXT: bl callee_svdoublex2_t 140 %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z3) 141 call void @callee_svdoublex2_t(<vscale x 4 x double> %tuple) 142 ret void 143} 144 145; 146; svint8x3_t 147; 148 149define <vscale x 48 x i8> @ret_svint8x3_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 { 150; CHECK-LABEL: ret_svint8x3_t 151; CHECK: mov z0.d, z1.d 152; CHECK-NEXT: mov z1.d, z2.d 153; CHECK-NEXT: mov z2.d, z3.d 154; CHECK-NEXT: ret 155 %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) 156 ret <vscale x 48 x i8> %tuple 157} 158 159define void @call_svint8x3_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4) #0 { 160; CHECK-LABEL: call_svint8x3_t 161; CHECK: mov z0.d, z1.d 162; CHECK-NEXT: mov z1.d, z2.d 163; CHECK-NEXT: mov z2.d, z4.d 164; CHECK-NEXT: bl callee_svint8x3_t 165 %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4) 166 call void @callee_svint8x3_t(<vscale x 48 x i8> %tuple) 167 ret void 168} 169 170; 171; svint16x3_t 172; 173 174define <vscale x 24 x i16> @ret_svint16x3_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 { 175; CHECK-LABEL: ret_svint16x3_t 176; CHECK: mov z0.d, z1.d 177; CHECK-NEXT: mov z1.d, z2.d 178; CHECK-NEXT: mov z2.d, z3.d 179; CHECK-NEXT: ret 180 %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) 181 ret <vscale x 24 x i16> %tuple 182} 183 184define void @call_svint16x3_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4) #0 { 185; CHECK-LABEL: call_svint16x3_t 186; CHECK: mov z0.d, z1.d 187; CHECK-NEXT: mov z1.d, z2.d 188; CHECK-NEXT: mov z2.d, z4.d 189; CHECK-NEXT: bl callee_svint16x3_t 190 %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4) 191 call void @callee_svint16x3_t(<vscale x 24 x i16> %tuple) 192 ret void 193} 194 195; 196; svint32x3_t 197; 198 199define <vscale x 12 x i32> @ret_svint32x3_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 { 200; CHECK-LABEL: ret_svint32x3_t 201; CHECK: mov z0.d, z1.d 202; CHECK-NEXT: mov z1.d, z2.d 203; CHECK-NEXT: mov z2.d, z3.d 204; CHECK-NEXT: ret 205 %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 206 ret <vscale x 12 x i32> %tuple 207} 208 209define void @call_svint32x3_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4) #0 { 210; CHECK-LABEL: call_svint32x3_t 211; CHECK: mov z0.d, z1.d 212; CHECK-NEXT: mov z1.d, z2.d 213; CHECK-NEXT: mov z2.d, z4.d 214; CHECK-NEXT: bl callee_svint32x3_t 215 %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4) 216 call void @callee_svint32x3_t(<vscale x 12 x i32> %tuple) 217 ret void 218} 219 220; 221; svint64x3_t 222; 223 224define <vscale x 6 x i64> @ret_svint64x3_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 { 225; CHECK-LABEL: ret_svint64x3_t 226; CHECK: mov z0.d, z1.d 227; CHECK-NEXT: mov z1.d, z2.d 228; CHECK-NEXT: mov z2.d, z3.d 229; CHECK-NEXT: ret 230 %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) 231 ret <vscale x 6 x i64> %tuple 232} 233 234define void @call_svint64x3_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4) #0 { 235; CHECK-LABEL: call_svint64x3_t 236; CHECK: mov z0.d, z1.d 237; CHECK-NEXT: mov z1.d, z2.d 238; CHECK-NEXT: mov z2.d, z4.d 239; CHECK-NEXT: bl callee_svint64x3_t 240 %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4) 241 call void @callee_svint64x3_t(<vscale x 6 x i64> %tuple) 242 ret void 243} 244 245; 246; svfloatx3_t 247; 248 249define <vscale x 12 x float> @ret_svfloatx3_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 { 250; CHECK-LABEL: ret_svfloatx3_t 251; CHECK: mov z0.d, z1.d 252; CHECK-NEXT: mov z1.d, z2.d 253; CHECK-NEXT: mov z2.d, z3.d 254; CHECK-NEXT: ret 255 %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) 256 ret <vscale x 12 x float> %tuple 257} 258 259define void @call_svfloatx3_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4) #0 { 260; CHECK-LABEL: call_svfloatx3_t 261; CHECK: mov z0.d, z1.d 262; CHECK-NEXT: mov z1.d, z2.d 263; CHECK-NEXT: mov z2.d, z4.d 264; CHECK-NEXT: bl callee_svfloatx3_t 265 %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4) 266 call void @callee_svfloatx3_t(<vscale x 12 x float> %tuple) 267 ret void 268} 269 270; 271; svdoublex3_t 272; 273 274define <vscale x 6 x double> @ret_svdoublex3_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 { 275; CHECK-LABEL: ret_svdoublex3_t 276; CHECK: mov z0.d, z1.d 277; CHECK-NEXT: mov z1.d, z2.d 278; CHECK-NEXT: mov z2.d, z3.d 279; CHECK-NEXT: ret 280 %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) 281 ret <vscale x 6 x double> %tuple 282} 283 284define void @call_svdoublex3_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4) #0 { 285; CHECK-LABEL: call_svdoublex3_t 286; CHECK: mov z0.d, z1.d 287; CHECK-NEXT: mov z1.d, z2.d 288; CHECK-NEXT: mov z2.d, z4.d 289; CHECK-NEXT: bl callee_svdoublex3_t 290 %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4) 291 call void @callee_svdoublex3_t(<vscale x 6 x double> %tuple) 292 ret void 293} 294 295; 296; svint8x4_t 297; 298 299define <vscale x 64 x i8> @ret_svint8x4_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4) #0 { 300; CHECK-LABEL: ret_svint8x4_t 301; CHECK: mov z0.d, z1.d 302; CHECK-NEXT: mov z1.d, z2.d 303; CHECK-NEXT: mov z2.d, z3.d 304; CHECK-NEXT: mov z3.d, z4.d 305; CHECK-NEXT: ret 306 %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4) 307 ret <vscale x 64 x i8> %tuple 308} 309 310define void @call_svint8x4_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5) #0 { 311; CHECK-LABEL: call_svint8x4_t 312; CHECK: mov z3.d, z5.d 313; CHECK-NEXT: mov z0.d, z1.d 314; CHECK-NEXT: mov z1.d, z2.d 315; CHECK-NEXT: mov z2.d, z4.d 316; CHECK-NEXT: bl callee_svint8x4_t 317 %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5) 318 call void @callee_svint8x4_t(<vscale x 64 x i8> %tuple) 319 ret void 320} 321 322; 323; svint16x4_t 324; 325 326define <vscale x 32 x i16> @ret_svint16x4_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4) #0 { 327; CHECK-LABEL: ret_svint16x4_t 328; CHECK: mov z0.d, z1.d 329; CHECK-NEXT: mov z1.d, z2.d 330; CHECK-NEXT: mov z2.d, z3.d 331; CHECK-NEXT: mov z3.d, z4.d 332; CHECK-NEXT: ret 333 %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4) 334 ret <vscale x 32 x i16> %tuple 335} 336 337define void @call_svint16x4_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5) #0 { 338; CHECK-LABEL: call_svint16x4_t 339; CHECK: mov z3.d, z5.d 340; CHECK-NEXT: mov z0.d, z1.d 341; CHECK-NEXT: mov z1.d, z2.d 342; CHECK-NEXT: mov z2.d, z4.d 343; CHECK-NEXT: bl callee_svint16x4_t 344 %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5) 345 call void @callee_svint16x4_t(<vscale x 32 x i16> %tuple) 346 ret void 347} 348 349; 350; svint32x4_t 351; 352 353define <vscale x 16 x i32> @ret_svint32x4_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4) #0 { 354; CHECK-LABEL: ret_svint32x4_t 355; CHECK: mov z0.d, z1.d 356; CHECK-NEXT: mov z1.d, z2.d 357; CHECK-NEXT: mov z2.d, z3.d 358; CHECK-NEXT: mov z3.d, z4.d 359; CHECK-NEXT: ret 360 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4) 361 ret <vscale x 16 x i32> %tuple 362} 363 364define void @call_svint32x4_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 365; CHECK-LABEL: call_svint32x4_t 366; CHECK: mov z3.d, z5.d 367; CHECK-NEXT: mov z0.d, z1.d 368; CHECK-NEXT: mov z1.d, z2.d 369; CHECK-NEXT: mov z2.d, z4.d 370; CHECK-NEXT: bl callee_svint32x4_t 371 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) 372 call void @callee_svint32x4_t(<vscale x 16 x i32> %tuple) 373 ret void 374} 375 376; 377; svint64x4_t 378; 379 380define <vscale x 8 x i64> @ret_svint64x4_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4) #0 { 381; CHECK-LABEL: ret_svint64x4_t 382; CHECK: mov z0.d, z1.d 383; CHECK-NEXT: mov z1.d, z2.d 384; CHECK-NEXT: mov z2.d, z3.d 385; CHECK-NEXT: mov z3.d, z4.d 386; CHECK-NEXT: ret 387 %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4) 388 ret <vscale x 8 x i64> %tuple 389} 390 391define void @call_svint64x4_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5) #0 { 392; CHECK-LABEL: call_svint64x4_t 393; CHECK: mov z3.d, z5.d 394; CHECK-NEXT: mov z0.d, z1.d 395; CHECK-NEXT: mov z1.d, z2.d 396; CHECK-NEXT: mov z2.d, z4.d 397; CHECK-NEXT: bl callee_svint64x4_t 398 %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5) 399 call void @callee_svint64x4_t(<vscale x 8 x i64> %tuple) 400 ret void 401} 402 403; 404; svfloatx4_t 405; 406 407define <vscale x 16 x float> @ret_svfloatx4_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4) #0 { 408; CHECK-LABEL: ret_svfloatx4_t 409; CHECK: mov z0.d, z1.d 410; CHECK-NEXT: mov z1.d, z2.d 411; CHECK-NEXT: mov z2.d, z3.d 412; CHECK-NEXT: mov z3.d, z4.d 413; CHECK-NEXT: ret 414 %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4) 415 ret <vscale x 16 x float> %tuple 416} 417 418define void @call_svfloatx4_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5) #0 { 419; CHECK-LABEL: call_svfloatx4_t 420; CHECK: mov z3.d, z5.d 421; CHECK-NEXT: mov z0.d, z1.d 422; CHECK-NEXT: mov z1.d, z2.d 423; CHECK-NEXT: mov z2.d, z4.d 424; CHECK-NEXT: bl callee_svfloatx4_t 425 %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5) 426 call void @callee_svfloatx4_t(<vscale x 16 x float> %tuple) 427 ret void 428} 429 430; 431; svdoublex4_t 432; 433 434define <vscale x 8 x double> @ret_svdoublex4_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4) #0 { 435; CHECK-LABEL: ret_svdoublex4_t 436; CHECK: mov z0.d, z1.d 437; CHECK-NEXT: mov z1.d, z2.d 438; CHECK-NEXT: mov z2.d, z3.d 439; CHECK-NEXT: mov z3.d, z4.d 440; CHECK-NEXT: ret 441 %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4) 442 ret <vscale x 8 x double> %tuple 443} 444 445define void @call_svdoublex4_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5) #0 { 446; CHECK-LABEL: call_svdoublex4_t 447; CHECK: mov z3.d, z5.d 448; CHECK-NEXT: mov z0.d, z1.d 449; CHECK-NEXT: mov z1.d, z2.d 450; CHECK-NEXT: mov z2.d, z4.d 451; CHECK-NEXT: bl callee_svdoublex4_t 452 %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5) 453 call void @callee_svdoublex4_t(<vscale x 8 x double> %tuple) 454 ret void 455} 456 457attributes #0 = { nounwind "target-features"="+sve" } 458 459declare void @callee_svint8x2_t(<vscale x 32 x i8>) 460declare void @callee_svint16x2_t(<vscale x 16 x i16>) 461declare void @callee_svint32x2_t(<vscale x 8 x i32>) 462declare void @callee_svint64x2_t(<vscale x 4 x i64>) 463declare void @callee_svfloatx2_t(<vscale x 8 x float>) 464declare void @callee_svdoublex2_t(<vscale x 4 x double>) 465 466declare void @callee_svint8x3_t(<vscale x 48 x i8>) 467declare void @callee_svint16x3_t(<vscale x 24 x i16>) 468declare void @callee_svint32x3_t(<vscale x 12 x i32>) 469declare void @callee_svint64x3_t(<vscale x 6 x i64>) 470declare void @callee_svfloatx3_t(<vscale x 12 x float>) 471declare void @callee_svdoublex3_t(<vscale x 6 x double>) 472 473declare void @callee_svint8x4_t(<vscale x 64 x i8>) 474declare void @callee_svint16x4_t(<vscale x 32 x i16>) 475declare void @callee_svint32x4_t(<vscale x 16 x i32>) 476declare void @callee_svint64x4_t(<vscale x 8 x i64>) 477declare void @callee_svfloatx4_t(<vscale x 16 x float>) 478declare void @callee_svdoublex4_t(<vscale x 8 x double>) 479 480 481; x2 482declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 483declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 484declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 485declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 486declare <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 487declare <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 488 489; x3 490declare <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 491declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 492declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 493declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 494declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 495declare <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 496 497; x4 498declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 499declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 500declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 501declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 502declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 503declare <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 504