10b57cec5SDimitry Andric //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains the implementation of the classes providing information
100b57cec5SDimitry Andric // about existing X86 FMA3 opcodes, classifying and grouping them.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "X86InstrFMA3Info.h"
150b57cec5SDimitry Andric #include "X86InstrInfo.h"
160b57cec5SDimitry Andric #include "llvm/Support/Threading.h"
17753f127fSDimitry Andric #include <atomic>
180b57cec5SDimitry Andric #include <cassert>
190b57cec5SDimitry Andric #include <cstdint>
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric using namespace llvm;
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric #define FMA3GROUP(Name, Suf, Attrs) \
240b57cec5SDimitry Andric   { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #define FMA3GROUP_MASKED(Name, Suf, Attrs) \
270b57cec5SDimitry Andric   FMA3GROUP(Name, Suf, Attrs) \
280b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
290b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
300b57cec5SDimitry Andric 
31349cc55cSDimitry Andric #define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
320b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
330b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
340b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
350b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
360b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
370b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
38349cc55cSDimitry Andric 
39349cc55cSDimitry Andric #define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
40349cc55cSDimitry Andric   FMA3GROUP(Name, Suf##Ym, Attrs) \
41349cc55cSDimitry Andric   FMA3GROUP(Name, Suf##Yr, Attrs) \
42349cc55cSDimitry Andric   FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
430b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##m, Attrs) \
440b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##r, Attrs)
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric #define FMA3GROUP_PACKED(Name, Attrs) \
47349cc55cSDimitry Andric   FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
48349cc55cSDimitry Andric   FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
49349cc55cSDimitry Andric   FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
500b57cec5SDimitry Andric 
51349cc55cSDimitry Andric #define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
520b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##Zm, Attrs) \
530b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
540b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##Zr, Attrs) \
550b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
56349cc55cSDimitry Andric 
57349cc55cSDimitry Andric #define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
58349cc55cSDimitry Andric   FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
590b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##m, Attrs) \
600b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
610b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##r, Attrs) \
620b57cec5SDimitry Andric   FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric #define FMA3GROUP_SCALAR(Name, Attrs) \
65349cc55cSDimitry Andric   FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
66349cc55cSDimitry Andric   FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
67349cc55cSDimitry Andric   FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric #define FMA3GROUP_FULL(Name, Attrs) \
700b57cec5SDimitry Andric   FMA3GROUP_PACKED(Name, Attrs) \
710b57cec5SDimitry Andric   FMA3GROUP_SCALAR(Name, Attrs)
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric static const X86InstrFMA3Group Groups[] = {
740b57cec5SDimitry Andric   FMA3GROUP_FULL(VFMADD, 0)
750b57cec5SDimitry Andric   FMA3GROUP_PACKED(VFMADDSUB, 0)
760b57cec5SDimitry Andric   FMA3GROUP_FULL(VFMSUB, 0)
770b57cec5SDimitry Andric   FMA3GROUP_PACKED(VFMSUBADD, 0)
780b57cec5SDimitry Andric   FMA3GROUP_FULL(VFNMADD, 0)
790b57cec5SDimitry Andric   FMA3GROUP_FULL(VFNMSUB, 0)
800b57cec5SDimitry Andric };
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric #define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
830b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
840b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
850b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric #define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
880b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
89349cc55cSDimitry Andric   FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
900b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric #define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
930b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
94349cc55cSDimitry Andric   FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
950b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric #define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
980b57cec5SDimitry Andric   FMA3GROUP(Name, SDZ##Suf, Attrs) \
990b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
100349cc55cSDimitry Andric   FMA3GROUP(Name, SHZ##Suf, Attrs) \
101349cc55cSDimitry Andric   FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
1020b57cec5SDimitry Andric   FMA3GROUP(Name, SSZ##Suf, Attrs) \
1030b57cec5SDimitry Andric   FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric static const X86InstrFMA3Group BroadcastGroups[] = {
1060b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
1070b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
1080b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
1090b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
1100b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
1110b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
1120b57cec5SDimitry Andric };
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric static const X86InstrFMA3Group RoundGroups[] = {
1150b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
1160b57cec5SDimitry Andric   FMA3GROUP_SCALAR_AVX512_ROUND(VFMADD, rb, X86InstrFMA3Group::Intrinsic)
1170b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
1180b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
1190b57cec5SDimitry Andric   FMA3GROUP_SCALAR_AVX512_ROUND(VFMSUB, rb, X86InstrFMA3Group::Intrinsic)
1200b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
1210b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
1220b57cec5SDimitry Andric   FMA3GROUP_SCALAR_AVX512_ROUND(VFNMADD, rb, X86InstrFMA3Group::Intrinsic)
1230b57cec5SDimitry Andric   FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
1240b57cec5SDimitry Andric   FMA3GROUP_SCALAR_AVX512_ROUND(VFNMSUB, rb, X86InstrFMA3Group::Intrinsic)
1250b57cec5SDimitry Andric };
1260b57cec5SDimitry Andric 
verifyTables()1270b57cec5SDimitry Andric static void verifyTables() {
1280b57cec5SDimitry Andric #ifndef NDEBUG
1290b57cec5SDimitry Andric   static std::atomic<bool> TableChecked(false);
1300b57cec5SDimitry Andric   if (!TableChecked.load(std::memory_order_relaxed)) {
1315ffd83dbSDimitry Andric     assert(llvm::is_sorted(Groups) && llvm::is_sorted(RoundGroups) &&
1325ffd83dbSDimitry Andric            llvm::is_sorted(BroadcastGroups) && "FMA3 tables not sorted!");
1330b57cec5SDimitry Andric     TableChecked.store(true, std::memory_order_relaxed);
1340b57cec5SDimitry Andric   }
1350b57cec5SDimitry Andric #endif
1360b57cec5SDimitry Andric }
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric /// Returns a reference to a group of FMA3 opcodes to where the given
1390b57cec5SDimitry Andric /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
1400b57cec5SDimitry Andric /// and not included into any FMA3 group, then nullptr is returned.
getFMA3Group(unsigned Opcode,uint64_t TSFlags)1410b57cec5SDimitry Andric const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   // FMA3 instructions have a well defined encoding pattern we can exploit.
1440b57cec5SDimitry Andric   uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
145349cc55cSDimitry Andric   bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
1460b57cec5SDimitry Andric                        (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
1470b57cec5SDimitry Andric                        (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
148349cc55cSDimitry Andric   bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
149349cc55cSDimitry Andric                          (TSFlags & X86II::OpMapMask) == X86II::T8) ||
150349cc55cSDimitry Andric                         ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
151349cc55cSDimitry Andric                          ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
152349cc55cSDimitry Andric                           (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
153349cc55cSDimitry Andric   bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
154349cc55cSDimitry Andric   if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
1550b57cec5SDimitry Andric     return nullptr;
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric   verifyTables();
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric   ArrayRef<X86InstrFMA3Group> Table;
1600b57cec5SDimitry Andric   if (TSFlags & X86II::EVEX_RC)
161bdd1243dSDimitry Andric     Table = ArrayRef(RoundGroups);
1620b57cec5SDimitry Andric   else if (TSFlags & X86II::EVEX_B)
163bdd1243dSDimitry Andric     Table = ArrayRef(BroadcastGroups);
1640b57cec5SDimitry Andric   else
165bdd1243dSDimitry Andric     Table = ArrayRef(Groups);
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric   // FMA 132 instructions have an opcode of 0x96-0x9F
1680b57cec5SDimitry Andric   // FMA 213 instructions have an opcode of 0xA6-0xAF
1690b57cec5SDimitry Andric   // FMA 231 instructions have an opcode of 0xB6-0xBF
1700b57cec5SDimitry Andric   unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
1730b57cec5SDimitry Andric     return Group.Opcodes[FormIndex] < Opcode;
1740b57cec5SDimitry Andric   });
1750b57cec5SDimitry Andric   assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
1760b57cec5SDimitry Andric          "Couldn't find FMA3 opcode!");
1770b57cec5SDimitry Andric   return I;
1780b57cec5SDimitry Andric }
179