1//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the instructions that make up the Intel AMX instruction
10// set.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// AMX instructions
16
17multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> {
18let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in {
19  let hasSideEffects = 1,
20      Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
21  def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
22                           "ldtilecfg\t$src",
23                           [(int_x86_ldtilecfg addr:$src)]>,
24                         T8, PS;
25  let hasSideEffects = 1 in
26  def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
27                           "sttilecfg\t$src",
28                           [(int_x86_sttilecfg addr:$src)]>,
29                         T8, PD;
30  let mayLoad = 1 in
31  def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
32                           (ins sibmem:$src),
33                           "tileloadd\t{$src, $dst|$dst, $src}", []>,
34                         T8, XD;
35  let mayLoad = 1 in
36  def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
37                             (ins sibmem:$src),
38                             "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
39                           T8, PD;
40  let mayStore = 1 in
41  def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs),
42                            (ins sibmem:$dst, TILE:$src),
43                            "tilestored\t{$src, $dst|$dst, $src}", []>,
44                          T8, XS;
45}
46}
47
48let SchedRW = [WriteSystem] in {
49  defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX;
50  defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8;
51
52  let Predicates = [HasAMXTILE, In64BitMode] in {
53    let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
54    def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
55                        "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS;
56    def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
57                     "tilezero\t$dst", []>,
58                     VEX, T8, XD;
59
60    // Pseduo instruction for RA.
61    let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
62        Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
63    def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
64    let isPseudo = true, mayLoad = 1 in
65    def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
66                                                     GR16:$src2,
67                                                     opaquemem:$src3), []>;
68    let isPseudo = true, mayLoad = 1 in
69    def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
70                                                       GR16:$src2,
71                                                       opaquemem:$src3), []>;
72    let isPseudo = true, mayStore = 1 in
73    def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
74                                            GR16:$src2, opaquemem:$src3,
75                                            TILE:$src4), []>;
76    let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
77        canFoldAsLoad = 1 in
78      def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
79                                [(set TILE:$dst, (int_x86_tilezero_internal
80                                  GR16:$src1, GR16:$src2))]>;
81
82    let usesCustomInserter = 1 in {
83      // Pseudo instructions, using immediates instead of tile registers.
84      // To be translated to the actual instructions in X86ISelLowering.cpp
85      let mayLoad = 1 in
86      def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
87      let mayLoad = 1 in
88      def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
89                                          sibmem:$src2), []>;
90      let mayStore = 1 in
91      def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
92      def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
93                              [(int_x86_tilezero timm:$src)]>;
94    }
95  } // Predicates
96} // SchedRW
97
98let Predicates = [HasAMXINT8, In64BitMode] in {
99  let SchedRW = [WriteSystem] in {
100    let Constraints = "$src1 = $dst" in {
101      def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
102                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
103                      "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
104                      VEX, VVVV, T8, XD;
105      def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
106                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
107                      "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
108                      VEX, VVVV, T8, XS;
109      def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
110                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
111                      "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
112                      VEX, VVVV, T8, PD;
113      def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
114                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
115                      "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
116                      VEX, VVVV, T8;
117    }
118
119    // Pseduo instruction for RA.
120    let isPseudo = true, Constraints = "$src4 = $dst" in {
121      def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
122                              GR16:$src2, GR16:$src3, TILE:$src4,
123                              TILE:$src5, TILE:$src6),
124                              [(set TILE: $dst,
125                              (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
126                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
127      def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
128                              GR16:$src2, GR16:$src3, TILE:$src4,
129                              TILE:$src5, TILE:$src6),
130                              [(set TILE: $dst,
131                              (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
132                               GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
133      def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
134                              GR16:$src2, GR16:$src3, TILE:$src4,
135                              TILE:$src5, TILE:$src6),
136                              [(set TILE: $dst,
137                              (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
138                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
139      def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
140                              GR16:$src2, GR16:$src3, TILE:$src4,
141                              TILE:$src5, TILE:$src6),
142                              [(set TILE: $dst,
143                              (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
144                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
145    }
146
147    let usesCustomInserter = 1 in {
148      // Pseudo instructions, using immediates instead of tile registers.
149      // To be translated to the actual instructions in X86ISelLowering.cpp
150      def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
151                             u8imm:$src2, u8imm:$src3),
152                             [(int_x86_tdpbssd timm:$src1,
153                               timm:$src2, timm:$src3)]>;
154      def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
155                             u8imm:$src2, u8imm:$src3),
156                             [(int_x86_tdpbsud timm:$src1,
157                               timm:$src2, timm:$src3)]>;
158      def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
159                             u8imm:$src2, u8imm:$src3),
160                             [(int_x86_tdpbusd timm:$src1,
161                               timm:$src2, timm:$src3)]>;
162      def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
163                             u8imm:$src2, u8imm:$src3),
164                             [(int_x86_tdpbuud timm:$src1,
165                               timm:$src2, timm:$src3)]>;
166    }
167  }
168} // HasAMXTILE
169
170let Predicates = [HasAMXBF16, In64BitMode] in {
171  let SchedRW = [WriteSystem] in {
172    let Constraints = "$src1 = $dst" in
173    def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
174                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
175                      "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
176                      []>, VEX, VVVV, T8, XS;
177
178    // Pseduo instruction for RA.
179    let isPseudo = true, Constraints = "$src4 = $dst" in
180      def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
181                                 GR16:$src2, GR16:$src3, TILE:$src4,
182                                 TILE:$src5, TILE:$src6),
183                                 [(set TILE: $dst,
184                                  (int_x86_tdpbf16ps_internal GR16:$src1,
185                                   GR16:$src2, GR16:$src3, TILE:$src4,
186                                   TILE:$src5, TILE:$src6))]>;
187
188    let usesCustomInserter = 1 in {
189      // Pseudo instructions, using immediates instead of tile registers.
190      // To be translated to the actual instructions in X86ISelLowering.cpp
191      def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
192                               u8imm:$src2, u8imm:$src3),
193                               [(int_x86_tdpbf16ps timm:$src1,
194                                 timm:$src2, timm:$src3)]>;
195    }
196  }
197} // HasAMXTILE, HasAMXBF16
198
199//AMX-FP16
200let Predicates = [HasAMXFP16, In64BitMode] in {
201  let SchedRW = [WriteSystem] in {
202    let Constraints = "$src1 = $dst" in {
203      def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
204                        (ins TILE:$src1, TILE:$src2, TILE:$src3),
205                        "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
206                        []>, VEX, VVVV, T8, XD;
207    }
208
209    // Pseduo instruction for RA.
210    let isPseudo = true, Constraints = "$src4 = $dst" in {
211      def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
212                                 GR16:$src2, GR16:$src3, TILE:$src4,
213                                 TILE:$src5, TILE:$src6),
214                                 [(set TILE: $dst,
215                                  (int_x86_tdpfp16ps_internal GR16:$src1,
216                                   GR16:$src2, GR16:$src3, TILE:$src4,
217                                   TILE:$src5, TILE:$src6))]>;
218    }
219
220    let  usesCustomInserter = 1 in {
221      def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
222                               u8imm:$src2, u8imm:$src3),
223                               [(int_x86_tdpfp16ps timm:$src1,
224                                 timm:$src2, timm:$src3)]>;
225    }
226  }
227} // HasAMXTILE, HasAMXFP16
228
229let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
230  let SchedRW = [WriteSystem] in {
231    let Constraints = "$src1 = $dst" in {
232      def TCMMIMFP16PS   : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
233                            (ins TILE:$src1, TILE:$src2, TILE:$src3),
234                            "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
235                            []>, T8, PD, VEX, VVVV;
236      def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
237                            (ins TILE:$src1, TILE:$src2, TILE:$src3),
238                            "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
239                            []>, VEX, VVVV, WIG, T8;
240
241    } // Constraints = "$src1 = $dst"
242
243    let Constraints = "$src4 = $dst" in {
244      def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
245                                  GR16:$src2, GR16:$src3, TILE:$src4,
246                                  TILE:$src5, TILE:$src6),
247                                  [(set TILE: $dst,
248                                  (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
249                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
250      def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
251                                  GR16:$src2, GR16:$src3, TILE:$src4,
252                                  TILE:$src5, TILE:$src6),
253                                  [(set TILE: $dst,
254                                  (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
255                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
256    }
257
258    let usesCustomInserter = 1 in {
259      def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
260                                u8imm:$src2, u8imm:$src3),
261                                [(int_x86_tcmmimfp16ps timm:$src1,
262                                  timm:$src2, timm:$src3)]>;
263      def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
264                                u8imm:$src2, u8imm:$src3),
265                                [(int_x86_tcmmrlfp16ps timm:$src1,
266                                  timm:$src2, timm:$src3)]>;
267    }
268  } // SchedRW = [WriteSystem]
269}
270