1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3; CHECK-LABEL: addpd512 4; CHECK: vaddpd 5; CHECK: ret 6define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 7entry: 8 %add.i = fadd <8 x double> %x, %y 9 ret <8 x double> %add.i 10} 11 12; CHECK-LABEL: addpd512fold 13; CHECK: vaddpd LCP{{.*}}(%rip) 14; CHECK: ret 15define <8 x double> @addpd512fold(<8 x double> %y) { 16entry: 17 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 18 ret <8 x double> %add.i 19} 20 21; CHECK-LABEL: addps512 22; CHECK: vaddps 23; CHECK: ret 24define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 25entry: 26 %add.i = fadd <16 x float> %x, %y 27 ret <16 x float> %add.i 28} 29 30; CHECK-LABEL: addps512fold 31; CHECK: vaddps LCP{{.*}}(%rip) 32; CHECK: ret 33define <16 x float> @addps512fold(<16 x float> %y) { 34entry: 35 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 36 ret <16 x float> %add.i 37} 38 39; CHECK-LABEL: subpd512 40; CHECK: vsubpd 41; CHECK: ret 42define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 43entry: 44 %sub.i = fsub <8 x double> %x, %y 45 ret <8 x double> %sub.i 46} 47 48; CHECK-LABEL: @subpd512fold 49; CHECK: vsubpd (% 50; CHECK: ret 51define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 52entry: 53 %tmp2 = load <8 x double>* %x, align 8 54 %sub.i = fsub <8 x double> %y, %tmp2 55 ret <8 x double> %sub.i 56} 57 58; CHECK-LABEL: @subps512 59; CHECK: vsubps 60; CHECK: ret 61define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 62entry: 63 %sub.i = fsub <16 x float> %x, %y 64 ret <16 x float> %sub.i 65} 66 67; CHECK-LABEL: subps512fold 68; CHECK: vsubps (% 69; CHECK: ret 70define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 71entry: 72 %tmp2 = load <16 x float>* %x, align 4 73 %sub.i = fsub <16 x float> %y, %tmp2 74 ret <16 x float> %sub.i 75} 76 77; CHECK-LABEL: imulq512 78; CHECK: vpmuludq 79; CHECK: vpmuludq 80; CHECK: ret 81define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 82 %z = mul <8 x i64>%x, %y 83 ret <8 x i64>%z 84} 85 86; CHECK-LABEL: mulpd512 87; CHECK: vmulpd 88; CHECK: ret 89define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 90entry: 91 %mul.i = fmul <8 x double> %x, %y 92 ret <8 x double> %mul.i 93} 94 95; CHECK-LABEL: mulpd512fold 96; CHECK: vmulpd LCP{{.*}}(%rip) 97; CHECK: ret 98define <8 x double> @mulpd512fold(<8 x double> %y) { 99entry: 100 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 101 ret <8 x double> %mul.i 102} 103 104; CHECK-LABEL: mulps512 105; CHECK: vmulps 106; CHECK: ret 107define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 108entry: 109 %mul.i = fmul <16 x float> %x, %y 110 ret <16 x float> %mul.i 111} 112 113; CHECK-LABEL: mulps512fold 114; CHECK: vmulps LCP{{.*}}(%rip) 115; CHECK: ret 116define <16 x float> @mulps512fold(<16 x float> %y) { 117entry: 118 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 119 ret <16 x float> %mul.i 120} 121 122; CHECK-LABEL: divpd512 123; CHECK: vdivpd 124; CHECK: ret 125define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 126entry: 127 %div.i = fdiv <8 x double> %x, %y 128 ret <8 x double> %div.i 129} 130 131; CHECK-LABEL: divpd512fold 132; CHECK: vdivpd LCP{{.*}}(%rip) 133; CHECK: ret 134define <8 x double> @divpd512fold(<8 x double> %y) { 135entry: 136 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 137 ret <8 x double> %div.i 138} 139 140; CHECK-LABEL: divps512 141; CHECK: vdivps 142; CHECK: ret 143define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 144entry: 145 %div.i = fdiv <16 x float> %x, %y 146 ret <16 x float> %div.i 147} 148 149; CHECK-LABEL: divps512fold 150; CHECK: vdivps LCP{{.*}}(%rip) 151; CHECK: ret 152define <16 x float> @divps512fold(<16 x float> %y) { 153entry: 154 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 155 ret <16 x float> %div.i 156} 157 158; CHECK-LABEL: vpaddq_test 159; CHECK: vpaddq %zmm 160; CHECK: ret 161define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 162 %x = add <8 x i64> %i, %j 163 ret <8 x i64> %x 164} 165 166; CHECK-LABEL: vpaddd_test 167; CHECK: vpaddd %zmm 168; CHECK: ret 169define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 170 %x = add <16 x i32> %i, %j 171 ret <16 x i32> %x 172} 173 174; CHECK-LABEL: vpsubq_test 175; CHECK: vpsubq %zmm 176; CHECK: ret 177define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 178 %x = sub <8 x i64> %i, %j 179 ret <8 x i64> %x 180} 181 182; CHECK-LABEL: vpsubd_test 183; CHECK: vpsubd 184; CHECK: ret 185define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 186 %x = sub <16 x i32> %i, %j 187 ret <16 x i32> %x 188} 189 190; CHECK-LABEL: vpmulld_test 191; CHECK: vpmulld %zmm 192; CHECK: ret 193define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 194 %x = mul <16 x i32> %i, %j 195 ret <16 x i32> %x 196} 197 198; CHECK-LABEL: sqrtA 199; CHECK: vsqrtssz 200; CHECK: ret 201declare float @sqrtf(float) readnone 202define float @sqrtA(float %a) nounwind uwtable readnone ssp { 203entry: 204 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 205 ret float %conv1 206} 207 208; CHECK-LABEL: sqrtB 209; CHECK: vsqrtsdz 210; CHECK: ret 211declare double @sqrt(double) readnone 212define double @sqrtB(double %a) nounwind uwtable readnone ssp { 213entry: 214 %call = tail call double @sqrt(double %a) nounwind readnone 215 ret double %call 216} 217 218; CHECK-LABEL: sqrtC 219; CHECK: vsqrtssz 220; CHECK: ret 221declare float @llvm.sqrt.f32(float) 222define float @sqrtC(float %a) nounwind { 223 %b = call float @llvm.sqrt.f32(float %a) 224 ret float %b 225} 226 227; CHECK-LABEL: fadd_broadcast 228; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0 229; CHECK: ret 230define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 231 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 232 ret <16 x float> %b 233} 234 235; CHECK-LABEL: addq_broadcast 236; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 237; CHECK: ret 238define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 239 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 240 ret <8 x i64> %b 241} 242 243; CHECK-LABEL: orq_broadcast 244; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 245; CHECK: ret 246define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 247 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 248 ret <8 x i64> %b 249} 250 251; CHECK-LABEL: andd512fold 252; CHECK: vpandd (% 253; CHECK: ret 254define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 255entry: 256 %a = load <16 x i32>* %x, align 4 257 %b = and <16 x i32> %y, %a 258 ret <16 x i32> %b 259} 260 261; CHECK-LABEL: andqbrst 262; CHECK: vpandq (%rdi){1to8}, %zmm 263; CHECK: ret 264define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 265entry: 266 %a = load i64* %ap, align 8 267 %b = insertelement <8 x i64> undef, i64 %a, i32 0 268 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 269 %d = and <8 x i64> %p1, %c 270 ret <8 x i64>%d 271} 272