1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK 4 5; VFMADD 6define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 7; CHECK-LABEL: test_x86_fma4_vfmadd_ss: 8; CHECK: # %bb.0: 9; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10] 10; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 11; CHECK-NEXT: retq # encoding: [0xc3] 12 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 13 ret <4 x float> %res 14} 15 16define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 17; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss: 18; CHECK: # %bb.0: 19; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00] 20; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 21; CHECK-NEXT: retq # encoding: [0xc3] 22 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 23 ret <4 x float> %res 24} 25declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 26 27define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 28; CHECK-LABEL: test_x86_fma4_vfmadd_sd: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10] 31; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 32; CHECK-NEXT: retq # encoding: [0xc3] 33 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 34 ret <2 x double> %res 35} 36 37define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 38; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00] 41; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 42; CHECK-NEXT: retq # encoding: [0xc3] 43 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 44 ret <2 x double> %res 45} 46declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 47 48define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 49; CHECK-LABEL: test_x86_fma_vfmadd_ps: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10] 52; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 53; CHECK-NEXT: retq # encoding: [0xc3] 54 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 55 ret <4 x float> %1 56} 57 58define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 59; CHECK-LABEL: test_x86_fma_vfmadd_pd: 60; CHECK: # %bb.0: 61; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10] 62; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 63; CHECK-NEXT: retq # encoding: [0xc3] 64 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 65 ret <2 x double> %1 66} 67 68define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 69; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10] 72; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 73; CHECK-NEXT: retq # encoding: [0xc3] 74 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 75 ret <8 x float> %1 76} 77 78define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 79; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10] 82; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 83; CHECK-NEXT: retq # encoding: [0xc3] 84 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 85 ret <4 x double> %1 86} 87 88; VFMSUB 89define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 90; CHECK-LABEL: test_x86_fma_vfmsub_ps: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] 93; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 94; CHECK-NEXT: retq # encoding: [0xc3] 95 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 96 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1) 97 ret <4 x float> %2 98} 99 100define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 101; CHECK-LABEL: test_x86_fma_vfmsub_pd: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] 104; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 105; CHECK-NEXT: retq # encoding: [0xc3] 106 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 107 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1) 108 ret <2 x double> %2 109} 110 111define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 112; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: 113; CHECK: # %bb.0: 114; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] 115; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 116; CHECK-NEXT: retq # encoding: [0xc3] 117 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 118 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1) 119 ret <8 x float> %2 120} 121 122define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 123; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: 124; CHECK: # %bb.0: 125; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] 126; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 127; CHECK-NEXT: retq # encoding: [0xc3] 128 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 129 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1) 130 ret <4 x double> %2 131} 132 133; VFNMADD 134define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 135; CHECK-LABEL: test_x86_fma_vfnmadd_ps: 136; CHECK: # %bb.0: 137; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] 138; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 139; CHECK-NEXT: retq # encoding: [0xc3] 140 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 141 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2) 142 ret <4 x float> %2 143} 144 145define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 146; CHECK-LABEL: test_x86_fma_vfnmadd_pd: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] 149; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 150; CHECK-NEXT: retq # encoding: [0xc3] 151 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 152 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2) 153 ret <2 x double> %2 154} 155 156define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 157; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: 158; CHECK: # %bb.0: 159; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] 160; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 161; CHECK-NEXT: retq # encoding: [0xc3] 162 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 163 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2) 164 ret <8 x float> %2 165} 166 167define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 168; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] 171; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 172; CHECK-NEXT: retq # encoding: [0xc3] 173 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 174 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2) 175 ret <4 x double> %2 176} 177 178; VFNMSUB 179define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 180; CHECK-LABEL: test_x86_fma_vfnmsub_ps: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] 183; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 184; CHECK-NEXT: retq # encoding: [0xc3] 185 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 186 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 187 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2) 188 ret <4 x float> %3 189} 190 191define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 192; CHECK-LABEL: test_x86_fma_vfnmsub_pd: 193; CHECK: # %bb.0: 194; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] 195; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 196; CHECK-NEXT: retq # encoding: [0xc3] 197 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 198 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 199 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2) 200 ret <2 x double> %3 201} 202 203define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 204; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] 207; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 208; CHECK-NEXT: retq # encoding: [0xc3] 209 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 210 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 211 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2) 212 ret <8 x float> %3 213} 214 215define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 216; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: 217; CHECK: # %bb.0: 218; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] 219; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 220; CHECK-NEXT: retq # encoding: [0xc3] 221 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 222 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 223 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2) 224 ret <4 x double> %3 225} 226 227; VFMADDSUB 228define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 229; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: 230; CHECK: # %bb.0: 231; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] 232; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 233; CHECK-NEXT: retq # encoding: [0xc3] 234 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 235 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 236 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 237 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 238 ret <4 x float> %4 239} 240 241define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 242; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] 245; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 246; CHECK-NEXT: retq # encoding: [0xc3] 247 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 248 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 249 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 250 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3> 251 ret <2 x double> %4 252} 253 254define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 255; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: 256; CHECK: # %bb.0: 257; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] 258; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 259; CHECK-NEXT: retq # encoding: [0xc3] 260 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 261 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 262 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 263 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 264 ret <8 x float> %4 265} 266 267define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 268; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: 269; CHECK: # %bb.0: 270; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] 271; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 272; CHECK-NEXT: retq # encoding: [0xc3] 273 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 274 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 275 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 276 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 277 ret <4 x double> %4 278} 279 280; VFMSUBADD 281define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 282; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] 285; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 286; CHECK-NEXT: retq # encoding: [0xc3] 287 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 288 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 289 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 290 %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 291 ret <4 x float> %4 292} 293 294define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 295; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: 296; CHECK: # %bb.0: 297; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] 298; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 299; CHECK-NEXT: retq # encoding: [0xc3] 300 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 301 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 302 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 303 %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3> 304 ret <2 x double> %4 305} 306 307define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 308; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: 309; CHECK: # %bb.0: 310; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] 311; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 312; CHECK-NEXT: retq # encoding: [0xc3] 313 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 314 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 315 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 316 %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 317 ret <8 x float> %4 318} 319 320define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 321; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: 322; CHECK: # %bb.0: 323; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] 324; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 325; CHECK-NEXT: retq # encoding: [0xc3] 326 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 327 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 328 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 329 %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 330 ret <4 x double> %4 331} 332 333declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 334declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #2 335declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #2 336declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #2 337 338attributes #0 = { nounwind } 339