1//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes FMA (Fused Multiply-Add) instructions.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// FMA3 - Intel 3 operand Fused Multiply-Add instructions
16//===----------------------------------------------------------------------===//
17
18let Constraints = "$src1 = $dst" in {
19multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
20                    PatFrag MemFrag128, PatFrag MemFrag256,
21                    ValueType OpVT128, ValueType OpVT256,
22                    SDPatternOperator Op = null_frag> {
23  let isCommutable = 1 in
24  def r     : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
25                   (ins VR128:$src1, VR128:$src2, VR128:$src3),
26                   !strconcat(OpcodeStr,
27                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
28                   [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
29                                               VR128:$src1, VR128:$src3)))]>;
30
31  let mayLoad = 1 in
32  def m     : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
33                   (ins VR128:$src1, VR128:$src2, f128mem:$src3),
34                   !strconcat(OpcodeStr,
35                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
36                   [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
37                                               (MemFrag128 addr:$src3))))]>;
38
39  let isCommutable = 1 in
40  def rY    : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
41                   (ins VR256:$src1, VR256:$src2, VR256:$src3),
42                   !strconcat(OpcodeStr,
43                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
44                   [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
45                                               VR256:$src3)))]>, VEX_L;
46
47  let mayLoad = 1 in
48  def mY    : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
49                   (ins VR256:$src1, VR256:$src2, f256mem:$src3),
50                   !strconcat(OpcodeStr,
51                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
52                   [(set VR256:$dst,
53                     (OpVT256 (Op VR256:$src2, VR256:$src1,
54                               (MemFrag256 addr:$src3))))]>, VEX_L;
55}
56} // Constraints = "$src1 = $dst"
57
58multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
59                       string OpcodeStr, string PackTy,
60                       PatFrag MemFrag128, PatFrag MemFrag256,
61                       SDNode Op, ValueType OpTy128, ValueType OpTy256> {
62  defm r213 : fma3p_rm<opc213,
63                       !strconcat(OpcodeStr, "213", PackTy),
64                       MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
65let neverHasSideEffects = 1 in {
66  defm r132 : fma3p_rm<opc132,
67                       !strconcat(OpcodeStr, "132", PackTy),
68                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
69  defm r231 : fma3p_rm<opc231,
70                       !strconcat(OpcodeStr, "231", PackTy),
71                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
72} // neverHasSideEffects = 1
73}
74
75// Fused Multiply-Add
76let ExeDomain = SSEPackedSingle in {
77  defm VFMADDPS    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", loadv4f32,
78                                 loadv8f32, X86Fmadd, v4f32, v8f32>;
79  defm VFMSUBPS    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", loadv4f32,
80                                 loadv8f32, X86Fmsub, v4f32, v8f32>;
81  defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
82                                 loadv4f32, loadv8f32, X86Fmaddsub,
83                                 v4f32, v8f32>;
84  defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
85                                 loadv4f32, loadv8f32, X86Fmsubadd,
86                                 v4f32, v8f32>;
87}
88
89let ExeDomain = SSEPackedDouble in {
90  defm VFMADDPD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", loadv2f64,
91                                 loadv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
92  defm VFMSUBPD    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", loadv2f64,
93                                 loadv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
94  defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
95                                 loadv2f64, loadv4f64, X86Fmaddsub,
96                                 v2f64, v4f64>, VEX_W;
97  defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
98                                 loadv2f64, loadv4f64, X86Fmsubadd,
99                                 v2f64, v4f64>, VEX_W;
100}
101
102// Fused Negative Multiply-Add
103let ExeDomain = SSEPackedSingle in {
104  defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps",  loadv4f32,
105                               loadv8f32, X86Fnmadd, v4f32, v8f32>;
106  defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps",  loadv4f32,
107                               loadv8f32, X86Fnmsub, v4f32, v8f32>;
108}
109let ExeDomain = SSEPackedDouble in {
110  defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", loadv2f64,
111                               loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
112  defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
113                               loadv2f64, loadv4f64, X86Fnmsub, v2f64,
114                               v4f64>, VEX_W;
115}
116
117let Constraints = "$src1 = $dst" in {
118multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
119                    RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
120                    SDPatternOperator OpNode = null_frag> {
121  let isCommutable = 1 in
122  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
123                   (ins RC:$src1, RC:$src2, RC:$src3),
124                   !strconcat(OpcodeStr,
125                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
126                   [(set RC:$dst,
127                     (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
128  let mayLoad = 1 in
129  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
130                   (ins RC:$src1, RC:$src2, x86memop:$src3),
131                   !strconcat(OpcodeStr,
132                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
133                   [(set RC:$dst,
134                     (OpVT (OpNode RC:$src2, RC:$src1,
135                            (mem_frag addr:$src3))))]>;
136}
137
138multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
139                        ComplexPattern mem_cpat, Intrinsic IntId,
140                        RegisterClass RC> {
141  let isCommutable = 1 in
142  def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
143                   (ins VR128:$src1, VR128:$src2, VR128:$src3),
144                   !strconcat(OpcodeStr,
145                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
146                   [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
147                     VR128:$src3))]>;
148  def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
149                   (ins VR128:$src1, VR128:$src2, memop:$src3),
150                   !strconcat(OpcodeStr,
151                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
152                   [(set VR128:$dst,
153                     (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
154}
155} // Constraints = "$src1 = $dst"
156
157multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
158                       string OpStr, string PackTy, Intrinsic Int,
159                       SDNode OpNode, RegisterClass RC, ValueType OpVT,
160                       X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
161                       ComplexPattern mem_cpat> {
162let neverHasSideEffects = 1 in {
163  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
164                       x86memop, RC, OpVT, mem_frag>;
165  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
166                       x86memop, RC, OpVT, mem_frag>;
167}
168
169defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
170                     x86memop, RC, OpVT, mem_frag, OpNode>,
171            fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
172                         memop, mem_cpat, Int, RC>;
173}
174
175multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
176                 string OpStr, Intrinsic IntF32, Intrinsic IntF64,
177                 SDNode OpNode> {
178  defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
179                        FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
180  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
181                        FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
182}
183
184defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
185                    int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
186defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
187                    int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
188
189defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
190                     int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
191defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
192                     int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
193
194
195//===----------------------------------------------------------------------===//
196// FMA4 - AMD 4 operand Fused Multiply-Add instructions
197//===----------------------------------------------------------------------===//
198
199
200multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
201                 X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
202                 PatFrag mem_frag> {
203  let isCommutable = 1 in
204  def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
205           (ins RC:$src1, RC:$src2, RC:$src3),
206           !strconcat(OpcodeStr,
207           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
208           [(set RC:$dst,
209             (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
210  def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
211           (ins RC:$src1, RC:$src2, x86memop:$src3),
212           !strconcat(OpcodeStr,
213           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
214           [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
215                           (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
216  def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
217           (ins RC:$src1, x86memop:$src2, RC:$src3),
218           !strconcat(OpcodeStr,
219           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
220           [(set RC:$dst,
221             (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG;
222// For disassembler
223let isCodeGenOnly = 1, hasSideEffects = 0 in
224  def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
225               (ins RC:$src1, RC:$src2, RC:$src3),
226               !strconcat(OpcodeStr,
227               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
228               VEX_LIG;
229}
230
231multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
232                     ComplexPattern mem_cpat, Intrinsic Int> {
233  let isCommutable = 1 in
234  def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
235               (ins VR128:$src1, VR128:$src2, VR128:$src3),
236               !strconcat(OpcodeStr,
237               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
238               [(set VR128:$dst,
239                 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG, MemOp4;
240  def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
241               (ins VR128:$src1, VR128:$src2, memop:$src3),
242               !strconcat(OpcodeStr,
243               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
244               [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
245                                  mem_cpat:$src3))]>, VEX_W, VEX_LIG, MemOp4;
246  def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
247               (ins VR128:$src1, memop:$src2, VR128:$src3),
248               !strconcat(OpcodeStr,
249               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
250               [(set VR128:$dst,
251                 (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG;
252}
253
254multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
255                 ValueType OpVT128, ValueType OpVT256,
256                 PatFrag ld_frag128, PatFrag ld_frag256> {
257  let isCommutable = 1 in
258  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
259           (ins VR128:$src1, VR128:$src2, VR128:$src3),
260           !strconcat(OpcodeStr,
261           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
262           [(set VR128:$dst,
263             (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
264           VEX_W, MemOp4;
265  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
266           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
267           !strconcat(OpcodeStr,
268           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
269           [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
270                              (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
271  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
272           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
273           !strconcat(OpcodeStr,
274           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
275           [(set VR128:$dst,
276             (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
277  let isCommutable = 1 in
278  def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
279           (ins VR256:$src1, VR256:$src2, VR256:$src3),
280           !strconcat(OpcodeStr,
281           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
282           [(set VR256:$dst,
283             (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
284           VEX_W, MemOp4, VEX_L;
285  def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
286           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
287           !strconcat(OpcodeStr,
288           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
289           [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
290                              (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L;
291  def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
292           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
293           !strconcat(OpcodeStr,
294           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
295           [(set VR256:$dst, (OpNode VR256:$src1,
296                              (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
297// For disassembler
298let isCodeGenOnly = 1, hasSideEffects = 0 in {
299  def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
300               (ins VR128:$src1, VR128:$src2, VR128:$src3),
301               !strconcat(OpcodeStr,
302               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
303  def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
304                (ins VR256:$src1, VR256:$src2, VR256:$src3),
305                !strconcat(OpcodeStr,
306                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
307                VEX_L;
308} // isCodeGenOnly = 1
309}
310
311defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
312                  fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
313                            int_x86_fma_vfmadd_ss>;
314defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
315                  fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
316                            int_x86_fma_vfmadd_sd>;
317defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
318                  fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
319                            int_x86_fma_vfmsub_ss>;
320defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
321                  fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
322                            int_x86_fma_vfmsub_sd>;
323defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
324                        X86Fnmadd, loadf32>,
325                  fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
326                            int_x86_fma_vfnmadd_ss>;
327defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
328                        X86Fnmadd, loadf64>,
329                  fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
330                            int_x86_fma_vfnmadd_sd>;
331defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
332                        X86Fnmsub, loadf32>,
333                  fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
334                            int_x86_fma_vfnmsub_ss>;
335defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
336                        X86Fnmsub, loadf64>,
337                  fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
338                            int_x86_fma_vfnmsub_sd>;
339
340let ExeDomain = SSEPackedSingle in {
341  defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
342                            loadv4f32, loadv8f32>;
343  defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
344                            loadv4f32, loadv8f32>;
345  defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
346                            loadv4f32, loadv8f32>;
347  defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
348                            loadv4f32, loadv8f32>;
349  defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
350                            loadv4f32, loadv8f32>;
351  defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
352                            loadv4f32, loadv8f32>;
353}
354
355let ExeDomain = SSEPackedDouble in {
356  defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
357                            loadv2f64, loadv4f64>;
358  defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
359                            loadv2f64, loadv4f64>;
360  defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
361                            loadv2f64, loadv4f64>;
362  defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
363                            loadv2f64, loadv4f64>;
364  defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
365                            loadv2f64, loadv4f64>;
366  defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
367                            loadv2f64, loadv4f64>;
368}
369
370