1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3define <16 x i32> @_inreg16xi32(i32 %a) { 4; CHECK-LABEL: _inreg16xi32: 5; CHECK: ## BB#0: 6; CHECK-NEXT: vpbroadcastd %edi, %zmm0 7; CHECK-NEXT: retq 8 %b = insertelement <16 x i32> undef, i32 %a, i32 0 9 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 10 ret <16 x i32> %c 11} 12 13define <8 x i64> @_inreg8xi64(i64 %a) { 14; CHECK-LABEL: _inreg8xi64: 15; CHECK: ## BB#0: 16; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 17; CHECK-NEXT: retq 18 %b = insertelement <8 x i64> undef, i64 %a, i32 0 19 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 20 ret <8 x i64> %c 21} 22 23;CHECK-LABEL: _ss16xfloat_v4 24;CHECK: vbroadcastss %xmm0, %zmm0 25;CHECK: ret 26define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { 27 %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer 28 ret <16 x float> %b 29} 30 31define <16 x float> @_inreg16xfloat(float %a) { 32; CHECK-LABEL: _inreg16xfloat: 33; CHECK: ## BB#0: 34; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 35; CHECK-NEXT: retq 36 %b = insertelement <16 x float> undef, float %a, i32 0 37 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 38 ret <16 x float> %c 39} 40 41;CHECK-LABEL: _ss16xfloat_mask: 42;CHECK: vbroadcastss %xmm0, %zmm1 {%k1} 43;CHECK: ret 44define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { 45 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 46 %b = insertelement <16 x float> undef, float %a, i32 0 47 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 48 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 49 ret <16 x float> %r 50} 51 52;CHECK-LABEL: _ss16xfloat_maskz: 53;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z} 54;CHECK: ret 55define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { 56 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 57 %b = insertelement <16 x float> undef, float %a, i32 0 58 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 59 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 60 ret <16 x float> %r 61} 62 63;CHECK-LABEL: _ss16xfloat_load: 64;CHECK: vbroadcastss (%{{.*}}, %zmm 65;CHECK: ret 66define <16 x float> @_ss16xfloat_load(float* %a.ptr) { 67 %a = load float* %a.ptr 68 %b = insertelement <16 x float> undef, float %a, i32 0 69 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 70 ret <16 x float> %c 71} 72 73;CHECK-LABEL: _ss16xfloat_mask_load: 74;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} 75;CHECK: ret 76define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { 77 %a = load float* %a.ptr 78 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 79 %b = insertelement <16 x float> undef, float %a, i32 0 80 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 81 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 82 ret <16 x float> %r 83} 84 85;CHECK-LABEL: _ss16xfloat_maskz_load: 86;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z} 87;CHECK: ret 88define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { 89 %a = load float* %a.ptr 90 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 91 %b = insertelement <16 x float> undef, float %a, i32 0 92 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 93 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 94 ret <16 x float> %r 95} 96 97define <8 x double> @_inreg8xdouble(double %a) { 98; CHECK-LABEL: _inreg8xdouble: 99; CHECK: ## BB#0: 100; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 101; CHECK-NEXT: retq 102 %b = insertelement <8 x double> undef, double %a, i32 0 103 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 104 ret <8 x double> %c 105} 106 107;CHECK-LABEL: _sd8xdouble_mask: 108;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1} 109;CHECK: ret 110define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { 111 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 112 %b = insertelement <8 x double> undef, double %a, i32 0 113 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 114 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 115 ret <8 x double> %r 116} 117 118;CHECK-LABEL: _sd8xdouble_maskz: 119;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 120;CHECK: ret 121define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { 122 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 123 %b = insertelement <8 x double> undef, double %a, i32 0 124 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 125 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 126 ret <8 x double> %r 127} 128 129;CHECK-LABEL: _sd8xdouble_load: 130;CHECK: vbroadcastsd (%rdi), %zmm 131;CHECK: ret 132define <8 x double> @_sd8xdouble_load(double* %a.ptr) { 133 %a = load double* %a.ptr 134 %b = insertelement <8 x double> undef, double %a, i32 0 135 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 136 ret <8 x double> %c 137} 138 139;CHECK-LABEL: _sd8xdouble_mask_load: 140;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} 141;CHECK: ret 142define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { 143 %a = load double* %a.ptr 144 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 145 %b = insertelement <8 x double> undef, double %a, i32 0 146 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 147 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 148 ret <8 x double> %r 149} 150 151define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { 152; CHECK-LABEL: _sd8xdouble_maskz_load: 153; CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} {z} 154; CHECK: ret 155 %a = load double* %a.ptr 156 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 157 %b = insertelement <8 x double> undef, double %a, i32 0 158 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 159 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 160 ret <8 x double> %r 161} 162 163define <16 x i32> @_xmm16xi32(<16 x i32> %a) { 164; CHECK-LABEL: _xmm16xi32: 165; CHECK: ## BB#0: 166; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0 167; CHECK-NEXT: retq 168 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer 169 ret <16 x i32> %b 170} 171 172define <16 x float> @_xmm16xfloat(<16 x float> %a) { 173; CHECK-LABEL: _xmm16xfloat: 174; CHECK: ## BB#0: 175; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 176; CHECK-NEXT: retq 177 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer 178 ret <16 x float> %b 179} 180 181define <16 x i32> @test_vbroadcast() { 182; CHECK-LABEL: test_vbroadcast: 183; CHECK: ## BB#0: ## %entry 184; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 185; CHECK-NEXT: vcmpunordps %zmm0, %zmm0, %k1 186; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 187; CHECK-NEXT: knotw %k1, %k1 188; CHECK-NEXT: vmovdqu32 %zmm0, %zmm0 {%k1} {z} 189; CHECK-NEXT: retq 190entry: 191 %0 = sext <16 x i1> zeroinitializer to <16 x i32> 192 %1 = fcmp uno <16 x float> undef, zeroinitializer 193 %2 = sext <16 x i1> %1 to <16 x i32> 194 %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 195 ret <16 x i32> %3 196} 197 198; We implement the set1 intrinsics with vector initializers. Verify that the 199; IR generated will produce broadcasts at the end. 200define <8 x double> @test_set1_pd(double %d) #2 { 201; CHECK-LABEL: test_set1_pd: 202; CHECK: ## BB#0: ## %entry 203; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 204; CHECK-NEXT: retq 205entry: 206 %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 207 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1 208 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2 209 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3 210 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4 211 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5 212 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6 213 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7 214 ret <8 x double> %vecinit7.i 215} 216 217define <8 x i64> @test_set1_epi64(i64 %d) #2 { 218; CHECK-LABEL: test_set1_epi64: 219; CHECK: ## BB#0: ## %entry 220; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 221; CHECK-NEXT: retq 222entry: 223 %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0 224 %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1 225 %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2 226 %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3 227 %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4 228 %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5 229 %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6 230 %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7 231 ret <8 x i64> %vecinit7.i 232} 233 234define <16 x float> @test_set1_ps(float %f) #2 { 235; CHECK-LABEL: test_set1_ps: 236; CHECK: ## BB#0: ## %entry 237; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 238; CHECK-NEXT: retq 239entry: 240 %vecinit.i = insertelement <16 x float> undef, float %f, i32 0 241 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1 242 %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2 243 %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3 244 %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4 245 %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5 246 %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6 247 %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7 248 %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8 249 %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9 250 %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10 251 %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11 252 %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12 253 %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13 254 %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14 255 %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15 256 ret <16 x float> %vecinit15.i 257} 258 259define <16 x i32> @test_set1_epi32(i32 %f) #2 { 260; CHECK-LABEL: test_set1_epi32: 261; CHECK: ## BB#0: ## %entry 262; CHECK-NEXT: vpbroadcastd %edi, %zmm0 263; CHECK-NEXT: retq 264entry: 265 %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0 266 %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1 267 %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2 268 %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3 269 %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4 270 %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5 271 %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6 272 %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7 273 %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8 274 %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9 275 %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10 276 %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11 277 %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12 278 %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13 279 %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14 280 %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15 281 ret <16 x i32> %vecinit15.i 282} 283 284; We implement the scalar broadcast intrinsics with vector initializers. 285; Verify that the IR generated will produce the broadcast at the end. 286define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) { 287; CHECK-LABEL: test_mm512_broadcastsd_pd: 288; CHECK: ## BB#0: ## %entry 289; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 290; CHECK-NEXT: retq 291entry: 292 %0 = extractelement <2 x double> %a, i32 0 293 %vecinit.i = insertelement <8 x double> undef, double %0, i32 0 294 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1 295 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2 296 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3 297 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4 298 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5 299 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6 300 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7 301 ret <8 x double> %vecinit7.i 302} 303