1; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s 2 3; All these tests create a vector tuple, insert z5 into one of the elements, 4; and finally extracts that element from the wide vector to return it. These 5; checks ensure that z5 is always the value that is returned. 6 7; 8; Insert into two element tuples 9; 10 11; tuple: { tuple2.res0, tuple2.res1 } 12; insert z5: { z5 , tuple2.res1 } 13; extract z5: ^^ 14define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 15 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 16 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 17 ; CHECK-LABEL: set_tuple2_nxv8i32_elt0: 18 ; CHECK-NEXT: mov z0.d, z5.d 19 ; CHECK-NEXT: ret 20 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) 21 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) 22 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0) 23 ret <vscale x 4 x i32> %ext 24} 25 26; tuple: { tuple2.res0, tuple2.res1 } 27; insert z5: { tuple2.res0, z5 } 28; extract z5: ^^ 29define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 30 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 31 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 32 ; CHECK-LABEL: set_tuple2_nxv8i32_elt1: 33 ; CHECK-NEXT: mov z0.d, z5.d 34 ; CHECK-NEXT: ret 35 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) 36 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 37 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1) 38 ret <vscale x 4 x i32> %ext 39} 40 41; This test checks the elements _not_ being set aren't changed. 42 43; tuple: { tuple2.res0, tuple2.res1 } 44; insert z5: { tuple2.res0, z5 } 45; extract z0: ^^ 46define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 47 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 48 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 49 ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0: 50 ; CHECK-NEXT: ret 51 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) 52 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 53 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0) 54 ret <vscale x 4 x i32> %ext 55} 56 57; Test extract of tuple passed into function 58define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 { 59 ; CHECK-LABEL: get_tuple2_nxv8i32_elt1: 60 ; CHECK-NEXT: mov z0.d, z1.d 61 ; CHECK-NEXT: ret 62 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1) 63 ret <vscale x 4 x i32> %ext 64} 65 66; 67; Insert into three element tuples 68; 69 70; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 71; insert z5: { z5 , tuple3.res0, tuple3.res2 } 72; extract z5: ^^ 73define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 74 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 75 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 76 ; CHECK-LABEL: set_tuple3_nxv12i32_elt0: 77 ; CHECK-NEXT: mov z0.d, z5.d 78 ; CHECK-NEXT: ret 79 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 80 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) 81 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0) 82 ret <vscale x 4 x i32> %ext 83} 84 85; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 86; insert z5: { tuple3.res0, z5 , tuple3.res2 } 87; extract z5: ^^ 88define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 89 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 90 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 91 ; CHECK-LABEL: set_tuple3_nxv12i32_elt1: 92 ; CHECK-NEXT: mov z0.d, z5.d 93 ; CHECK-NEXT: ret 94 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 95 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 96 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1) 97 ret <vscale x 4 x i32> %ext 98} 99 100; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 101; insert z5: { tuple3.res0, tuple3.res1, z5 } 102; extract z5: ^^ 103define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 104 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 105 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 106 ; CHECK-LABEL: set_tuple3_nxv12i32_elt2: 107 ; CHECK-NEXT: mov z0.d, z5.d 108 ; CHECK-NEXT: ret 109 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 110 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5) 111 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2) 112 ret <vscale x 4 x i32> %ext 113} 114 115; This test checks the elements _not_ being set aren't changed. 116 117; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 118; insert z5: { tuple3.res0, z5 , tuple3.res2 } 119; extract z2: ^^ 120define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 121 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 122 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 123 ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2: 124 ; CHECK-NEXT: mov z0.d, z2.d 125 ; CHECK-NEXT: ret 126 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 127 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 128 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2) 129 ret <vscale x 4 x i32> %ext 130} 131 132; Test extract of tuple passed into function 133define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 { 134 ; CHECK-LABEL: get_tuple3_nxv12i32_elt2: 135 ; CHECK-NEXT: mov z0.d, z3.d 136 ; CHECK-NEXT: ret 137 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2) 138 ret <vscale x 4 x i32> %ext 139} 140 141; 142; Insert into four element tuples 143; 144 145; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 146; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 } 147; extract z5: ^^ 148define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 149 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 150 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 151 ; CHECK-LABEL: set_tuple4_nxv16i32_elt0: 152 ; CHECK-NEXT: mov z0.d, z5.d 153 ; CHECK-NEXT: ret 154 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 155 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) 156 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0) 157 ret <vscale x 4 x i32> %ext 158} 159 160; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 161; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 } 162; extract z5: ^^ 163define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 164 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 165 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 166 ; CHECK-LABEL: set_tuple4_nxv16i32_elt1: 167 ; CHECK-NEXT: mov z0.d, z5.d 168 ; CHECK-NEXT: ret 169 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 170 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 171 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1) 172 ret <vscale x 4 x i32> %ext 173} 174 175; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 176; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 } 177; extract z5: ^^ 178define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 179 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 180 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 181 ; CHECK-LABEL: set_tuple4_nxv16i32_elt2: 182 ; CHECK-NEXT: mov z0.d, z5.d 183 ; CHECK-NEXT: ret 184 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 185 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5) 186 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2) 187 ret <vscale x 4 x i32> %ext 188} 189 190; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 191; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } 192; extract z5: ^^ 193define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 194 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 195 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 196 ; CHECK-LABEL: set_tuple4_nxv16i32_elt3: 197 ; CHECK-NEXT: mov z0.d, z5.d 198 ; CHECK-NEXT: ret 199 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 200 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5) 201 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3) 202 ret <vscale x 4 x i32> %ext 203} 204 205; This test checks the elements _not_ being set aren't changed. 206 207; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 208; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } 209; extract z2: ^^ 210define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 211 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 212 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 213 ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2: 214 ; CHECK-NEXT: mov z0.d, z2.d 215 ; CHECK-NEXT: ret 216 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 217 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5) 218 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2) 219 ret <vscale x 4 x i32> %ext 220} 221 222; Test extract of tuple passed into function 223define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 { 224 ; CHECK-LABEL: get_tuple4_nxv16i32_elt3: 225 ; CHECK-NEXT: mov z0.d, z3.d 226 ; CHECK-NEXT: ret 227 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3) 228 ret <vscale x 4 x i32> %ext 229} 230 231attributes #0 = { nounwind "target-features"="+sve" } 232 233declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 234declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>) 235declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32) 236 237declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 238declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>) 239declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32) 240 241declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 242declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>) 243declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32) 244