1 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 memory folding tables.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "X86InstrFoldTables.h"
14 #include "X86InstrInfo.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include <atomic>
17 #include <vector>
18 
19 using namespace llvm;
20 
21 // These tables are sorted by their RegOp value allowing them to be binary
22 // searched at runtime without the need for additional storage. The enum values
23 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
24 // makes sorting these tables a simple matter of alphabetizing the table.
25 #include "X86GenFoldTables.inc"
26 static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
27   { X86::VADDPDZ128rr,   X86::VADDPDZ128rmb,   TB_BCAST_SD },
28   { X86::VADDPDZ256rr,   X86::VADDPDZ256rmb,   TB_BCAST_SD },
29   { X86::VADDPDZrr,      X86::VADDPDZrmb,      TB_BCAST_SD },
30   { X86::VADDPSZ128rr,   X86::VADDPSZ128rmb,   TB_BCAST_SS },
31   { X86::VADDPSZ256rr,   X86::VADDPSZ256rmb,   TB_BCAST_SS },
32   { X86::VADDPSZrr,      X86::VADDPSZrmb,      TB_BCAST_SS },
33   { X86::VANDNPDZ128rr,  X86::VANDNPDZ128rmb,  TB_BCAST_SD },
34   { X86::VANDNPDZ256rr,  X86::VANDNPDZ256rmb,  TB_BCAST_SD },
35   { X86::VANDNPDZrr,     X86::VANDNPDZrmb,     TB_BCAST_SD },
36   { X86::VANDNPSZ128rr,  X86::VANDNPSZ128rmb,  TB_BCAST_SS },
37   { X86::VANDNPSZ256rr,  X86::VANDNPSZ256rmb,  TB_BCAST_SS },
38   { X86::VANDNPSZrr,     X86::VANDNPSZrmb,     TB_BCAST_SS },
39   { X86::VANDPDZ128rr,   X86::VANDPDZ128rmb,   TB_BCAST_SD },
40   { X86::VANDPDZ256rr,   X86::VANDPDZ256rmb,   TB_BCAST_SD },
41   { X86::VANDPDZrr,      X86::VANDPDZrmb,      TB_BCAST_SD },
42   { X86::VANDPSZ128rr,   X86::VANDPSZ128rmb,   TB_BCAST_SS },
43   { X86::VANDPSZ256rr,   X86::VANDPSZ256rmb,   TB_BCAST_SS },
44   { X86::VANDPSZrr,      X86::VANDPSZrmb,      TB_BCAST_SS },
45   { X86::VCMPPDZ128rri,  X86::VCMPPDZ128rmbi,  TB_BCAST_SD },
46   { X86::VCMPPDZ256rri,  X86::VCMPPDZ256rmbi,  TB_BCAST_SD },
47   { X86::VCMPPDZrri,     X86::VCMPPDZrmbi,     TB_BCAST_SD },
48   { X86::VCMPPSZ128rri,  X86::VCMPPSZ128rmbi,  TB_BCAST_SS },
49   { X86::VCMPPSZ256rri,  X86::VCMPPSZ256rmbi,  TB_BCAST_SS },
50   { X86::VCMPPSZrri,     X86::VCMPPSZrmbi,     TB_BCAST_SS },
51   { X86::VDIVPDZ128rr,   X86::VDIVPDZ128rmb,   TB_BCAST_SD },
52   { X86::VDIVPDZ256rr,   X86::VDIVPDZ256rmb,   TB_BCAST_SD },
53   { X86::VDIVPDZrr,      X86::VDIVPDZrmb,      TB_BCAST_SD },
54   { X86::VDIVPSZ128rr,   X86::VDIVPSZ128rmb,   TB_BCAST_SS },
55   { X86::VDIVPSZ256rr,   X86::VDIVPSZ256rmb,   TB_BCAST_SS },
56   { X86::VDIVPSZrr,      X86::VDIVPSZrmb,      TB_BCAST_SS },
57   { X86::VMAXCPDZ128rr,  X86::VMAXCPDZ128rmb,  TB_BCAST_SD },
58   { X86::VMAXCPDZ256rr,  X86::VMAXCPDZ256rmb,  TB_BCAST_SD },
59   { X86::VMAXCPDZrr,     X86::VMAXCPDZrmb,     TB_BCAST_SD },
60   { X86::VMAXCPSZ128rr,  X86::VMAXCPSZ128rmb,  TB_BCAST_SS },
61   { X86::VMAXCPSZ256rr,  X86::VMAXCPSZ256rmb,  TB_BCAST_SS },
62   { X86::VMAXCPSZrr,     X86::VMAXCPSZrmb,     TB_BCAST_SS },
63   { X86::VMAXPDZ128rr,   X86::VMAXPDZ128rmb,   TB_BCAST_SD },
64   { X86::VMAXPDZ256rr,   X86::VMAXPDZ256rmb,   TB_BCAST_SD },
65   { X86::VMAXPDZrr,      X86::VMAXPDZrmb,      TB_BCAST_SD },
66   { X86::VMAXPSZ128rr,   X86::VMAXPSZ128rmb,   TB_BCAST_SS },
67   { X86::VMAXPSZ256rr,   X86::VMAXPSZ256rmb,   TB_BCAST_SS },
68   { X86::VMAXPSZrr,      X86::VMAXPSZrmb,      TB_BCAST_SS },
69   { X86::VMINCPDZ128rr,  X86::VMINCPDZ128rmb,  TB_BCAST_SD },
70   { X86::VMINCPDZ256rr,  X86::VMINCPDZ256rmb,  TB_BCAST_SD },
71   { X86::VMINCPDZrr,     X86::VMINCPDZrmb,     TB_BCAST_SD },
72   { X86::VMINCPSZ128rr,  X86::VMINCPSZ128rmb,  TB_BCAST_SS },
73   { X86::VMINCPSZ256rr,  X86::VMINCPSZ256rmb,  TB_BCAST_SS },
74   { X86::VMINCPSZrr,     X86::VMINCPSZrmb,     TB_BCAST_SS },
75   { X86::VMINPDZ128rr,   X86::VMINPDZ128rmb,   TB_BCAST_SD },
76   { X86::VMINPDZ256rr,   X86::VMINPDZ256rmb,   TB_BCAST_SD },
77   { X86::VMINPDZrr,      X86::VMINPDZrmb,      TB_BCAST_SD },
78   { X86::VMINPSZ128rr,   X86::VMINPSZ128rmb,   TB_BCAST_SS },
79   { X86::VMINPSZ256rr,   X86::VMINPSZ256rmb,   TB_BCAST_SS },
80   { X86::VMINPSZrr,      X86::VMINPSZrmb,      TB_BCAST_SS },
81   { X86::VMULPDZ128rr,   X86::VMULPDZ128rmb,   TB_BCAST_SD },
82   { X86::VMULPDZ256rr,   X86::VMULPDZ256rmb,   TB_BCAST_SD },
83   { X86::VMULPDZrr,      X86::VMULPDZrmb,      TB_BCAST_SD },
84   { X86::VMULPSZ128rr,   X86::VMULPSZ128rmb,   TB_BCAST_SS },
85   { X86::VMULPSZ256rr,   X86::VMULPSZ256rmb,   TB_BCAST_SS },
86   { X86::VMULPSZrr,      X86::VMULPSZrmb,      TB_BCAST_SS },
87   { X86::VORPDZ128rr,    X86::VORPDZ128rmb,    TB_BCAST_SD },
88   { X86::VORPDZ256rr,    X86::VORPDZ256rmb,    TB_BCAST_SD },
89   { X86::VORPDZrr,       X86::VORPDZrmb,       TB_BCAST_SD },
90   { X86::VORPSZ128rr,    X86::VORPSZ128rmb,    TB_BCAST_SS },
91   { X86::VORPSZ256rr,    X86::VORPSZ256rmb,    TB_BCAST_SS },
92   { X86::VORPSZrr,       X86::VORPSZrmb,       TB_BCAST_SS },
93   { X86::VPADDDZ128rr,   X86::VPADDDZ128rmb,   TB_BCAST_D },
94   { X86::VPADDDZ256rr,   X86::VPADDDZ256rmb,   TB_BCAST_D },
95   { X86::VPADDDZrr,      X86::VPADDDZrmb,      TB_BCAST_D },
96   { X86::VPADDQZ128rr,   X86::VPADDQZ128rmb,   TB_BCAST_Q },
97   { X86::VPADDQZ256rr,   X86::VPADDQZ256rmb,   TB_BCAST_Q },
98   { X86::VPADDQZrr,      X86::VPADDQZrmb,      TB_BCAST_Q },
99   { X86::VPANDDZ128rr,   X86::VPANDDZ128rmb,   TB_BCAST_D },
100   { X86::VPANDDZ256rr,   X86::VPANDDZ256rmb,   TB_BCAST_D },
101   { X86::VPANDDZrr,      X86::VPANDDZrmb,      TB_BCAST_D },
102   { X86::VPANDNDZ128rr,  X86::VPANDNDZ128rmb,  TB_BCAST_D },
103   { X86::VPANDNDZ256rr,  X86::VPANDNDZ256rmb,  TB_BCAST_D },
104   { X86::VPANDNDZrr,     X86::VPANDNDZrmb,     TB_BCAST_D },
105   { X86::VPANDNQZ128rr,  X86::VPANDNQZ128rmb,  TB_BCAST_Q },
106   { X86::VPANDNQZ256rr,  X86::VPANDNQZ256rmb,  TB_BCAST_Q },
107   { X86::VPANDNQZrr,     X86::VPANDNQZrmb,     TB_BCAST_Q },
108   { X86::VPANDQZ128rr,   X86::VPANDQZ128rmb,   TB_BCAST_Q },
109   { X86::VPANDQZ256rr,   X86::VPANDQZ256rmb,   TB_BCAST_Q },
110   { X86::VPANDQZrr,      X86::VPANDQZrmb,      TB_BCAST_Q },
111   { X86::VPCMPDZ128rri,  X86::VPCMPDZ128rmib,  TB_BCAST_D },
112   { X86::VPCMPDZ256rri,  X86::VPCMPDZ256rmib,  TB_BCAST_D },
113   { X86::VPCMPDZrri,     X86::VPCMPDZrmib,     TB_BCAST_D },
114   { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D },
115   { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D },
116   { X86::VPCMPEQDZrr,    X86::VPCMPEQDZrmb,    TB_BCAST_D },
117   { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q },
118   { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q },
119   { X86::VPCMPEQQZrr,    X86::VPCMPEQQZrmb,    TB_BCAST_Q },
120   { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D },
121   { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D },
122   { X86::VPCMPGTDZrr,    X86::VPCMPGTDZrmb,    TB_BCAST_D },
123   { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q },
124   { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q },
125   { X86::VPCMPGTQZrr,    X86::VPCMPGTQZrmb,    TB_BCAST_Q },
126   { X86::VPCMPQZ128rri,  X86::VPCMPQZ128rmib,  TB_BCAST_Q },
127   { X86::VPCMPQZ256rri,  X86::VPCMPQZ256rmib,  TB_BCAST_Q },
128   { X86::VPCMPQZrri,     X86::VPCMPQZrmib,     TB_BCAST_Q },
129   { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D },
130   { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D },
131   { X86::VPCMPUDZrri,    X86::VPCMPUDZrmib,    TB_BCAST_D },
132   { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q },
133   { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q },
134   { X86::VPCMPUQZrri,    X86::VPCMPUQZrmib,    TB_BCAST_Q },
135   { X86::VPMAXSDZ128rr,  X86::VPMAXSDZ128rmb,  TB_BCAST_D },
136   { X86::VPMAXSDZ256rr,  X86::VPMAXSDZ256rmb,  TB_BCAST_D },
137   { X86::VPMAXSDZrr,     X86::VPMAXSDZrmb,     TB_BCAST_D },
138   { X86::VPMAXSQZ128rr,  X86::VPMAXSQZ128rmb,  TB_BCAST_Q },
139   { X86::VPMAXSQZ256rr,  X86::VPMAXSQZ256rmb,  TB_BCAST_Q },
140   { X86::VPMAXSQZrr,     X86::VPMAXSQZrmb,     TB_BCAST_Q },
141   { X86::VPMAXUDZ128rr,  X86::VPMAXUDZ128rmb,  TB_BCAST_D },
142   { X86::VPMAXUDZ256rr,  X86::VPMAXUDZ256rmb,  TB_BCAST_D },
143   { X86::VPMAXUDZrr,     X86::VPMAXUDZrmb,     TB_BCAST_D },
144   { X86::VPMAXUQZ128rr,  X86::VPMAXUQZ128rmb,  TB_BCAST_Q },
145   { X86::VPMAXUQZ256rr,  X86::VPMAXUQZ256rmb,  TB_BCAST_Q },
146   { X86::VPMAXUQZrr,     X86::VPMAXUQZrmb,     TB_BCAST_Q },
147   { X86::VPMINSDZ128rr,  X86::VPMINSDZ128rmb,  TB_BCAST_D },
148   { X86::VPMINSDZ256rr,  X86::VPMINSDZ256rmb,  TB_BCAST_D },
149   { X86::VPMINSDZrr,     X86::VPMINSDZrmb,     TB_BCAST_D },
150   { X86::VPMINSQZ128rr,  X86::VPMINSQZ128rmb,  TB_BCAST_Q },
151   { X86::VPMINSQZ256rr,  X86::VPMINSQZ256rmb,  TB_BCAST_Q },
152   { X86::VPMINSQZrr,     X86::VPMINSQZrmb,     TB_BCAST_Q },
153   { X86::VPMINUDZ128rr,  X86::VPMINUDZ128rmb,  TB_BCAST_D },
154   { X86::VPMINUDZ256rr,  X86::VPMINUDZ256rmb,  TB_BCAST_D },
155   { X86::VPMINUDZrr,     X86::VPMINUDZrmb,     TB_BCAST_D },
156   { X86::VPMINUQZ128rr,  X86::VPMINUQZ128rmb,  TB_BCAST_Q },
157   { X86::VPMINUQZ256rr,  X86::VPMINUQZ256rmb,  TB_BCAST_Q },
158   { X86::VPMINUQZrr,     X86::VPMINUQZrmb,     TB_BCAST_Q },
159   { X86::VPMULLDZ128rr,  X86::VPMULLDZ128rmb,  TB_BCAST_D },
160   { X86::VPMULLDZ256rr,  X86::VPMULLDZ256rmb,  TB_BCAST_D },
161   { X86::VPMULLDZrr,     X86::VPMULLDZrmb,     TB_BCAST_D },
162   { X86::VPMULLQZ128rr,  X86::VPMULLQZ128rmb,  TB_BCAST_Q },
163   { X86::VPMULLQZ256rr,  X86::VPMULLQZ256rmb,  TB_BCAST_Q },
164   { X86::VPMULLQZrr,     X86::VPMULLQZrmb,     TB_BCAST_Q },
165   { X86::VPORDZ128rr,    X86::VPORDZ128rmb,    TB_BCAST_D },
166   { X86::VPORDZ256rr,    X86::VPORDZ256rmb,    TB_BCAST_D },
167   { X86::VPORDZrr,       X86::VPORDZrmb,       TB_BCAST_D },
168   { X86::VPORQZ128rr,    X86::VPORQZ128rmb,    TB_BCAST_Q },
169   { X86::VPORQZ256rr,    X86::VPORQZ256rmb,    TB_BCAST_Q },
170   { X86::VPORQZrr,       X86::VPORQZrmb,       TB_BCAST_Q },
171   { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D },
172   { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D },
173   { X86::VPTESTMDZrr,    X86::VPTESTMDZrmb,    TB_BCAST_D },
174   { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q },
175   { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q },
176   { X86::VPTESTMQZrr,    X86::VPTESTMQZrmb,    TB_BCAST_Q },
177   { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D },
178   { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D },
179   { X86::VPTESTNMDZrr,   X86::VPTESTNMDZrmb,   TB_BCAST_D },
180   { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q },
181   { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q },
182   { X86::VPTESTNMQZrr,   X86::VPTESTNMQZrmb,   TB_BCAST_Q },
183   { X86::VPXORDZ128rr,   X86::VPXORDZ128rmb,   TB_BCAST_D },
184   { X86::VPXORDZ256rr,   X86::VPXORDZ256rmb,   TB_BCAST_D },
185   { X86::VPXORDZrr,      X86::VPXORDZrmb,      TB_BCAST_D },
186   { X86::VPXORQZ128rr,   X86::VPXORQZ128rmb,   TB_BCAST_Q },
187   { X86::VPXORQZ256rr,   X86::VPXORQZ256rmb,   TB_BCAST_Q },
188   { X86::VPXORQZrr,      X86::VPXORQZrmb,      TB_BCAST_Q },
189   { X86::VSUBPDZ128rr,   X86::VSUBPDZ128rmb,   TB_BCAST_SD },
190   { X86::VSUBPDZ256rr,   X86::VSUBPDZ256rmb,   TB_BCAST_SD },
191   { X86::VSUBPDZrr,      X86::VSUBPDZrmb,      TB_BCAST_SD },
192   { X86::VSUBPSZ128rr,   X86::VSUBPSZ128rmb,   TB_BCAST_SS },
193   { X86::VSUBPSZ256rr,   X86::VSUBPSZ256rmb,   TB_BCAST_SS },
194   { X86::VSUBPSZrr,      X86::VSUBPSZrmb,      TB_BCAST_SS },
195   { X86::VXORPDZ128rr,   X86::VXORPDZ128rmb,   TB_BCAST_SD },
196   { X86::VXORPDZ256rr,   X86::VXORPDZ256rmb,   TB_BCAST_SD },
197   { X86::VXORPDZrr,      X86::VXORPDZrmb,      TB_BCAST_SD },
198   { X86::VXORPSZ128rr,   X86::VXORPSZ128rmb,   TB_BCAST_SS },
199   { X86::VXORPSZ256rr,   X86::VXORPSZ256rmb,   TB_BCAST_SS },
200   { X86::VXORPSZrr,      X86::VXORPSZrmb,      TB_BCAST_SS },
201 };
202 
203 static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
204   { X86::VFMADD132PDZ128r,     X86::VFMADD132PDZ128mb,    TB_BCAST_SD },
205   { X86::VFMADD132PDZ256r,     X86::VFMADD132PDZ256mb,    TB_BCAST_SD },
206   { X86::VFMADD132PDZr,        X86::VFMADD132PDZmb,       TB_BCAST_SD },
207   { X86::VFMADD132PSZ128r,     X86::VFMADD132PSZ128mb,    TB_BCAST_SS },
208   { X86::VFMADD132PSZ256r,     X86::VFMADD132PSZ256mb,    TB_BCAST_SS },
209   { X86::VFMADD132PSZr,        X86::VFMADD132PSZmb,       TB_BCAST_SS },
210   { X86::VFMADD213PDZ128r,     X86::VFMADD213PDZ128mb,    TB_BCAST_SD },
211   { X86::VFMADD213PDZ256r,     X86::VFMADD213PDZ256mb,    TB_BCAST_SD },
212   { X86::VFMADD213PDZr,        X86::VFMADD213PDZmb,       TB_BCAST_SD },
213   { X86::VFMADD213PSZ128r,     X86::VFMADD213PSZ128mb,    TB_BCAST_SS },
214   { X86::VFMADD213PSZ256r,     X86::VFMADD213PSZ256mb,    TB_BCAST_SS },
215   { X86::VFMADD213PSZr,        X86::VFMADD213PSZmb,       TB_BCAST_SS },
216   { X86::VFMADD231PDZ128r,     X86::VFMADD231PDZ128mb,    TB_BCAST_SD },
217   { X86::VFMADD231PDZ256r,     X86::VFMADD231PDZ256mb,    TB_BCAST_SD },
218   { X86::VFMADD231PDZr,        X86::VFMADD231PDZmb,       TB_BCAST_SD },
219   { X86::VFMADD231PSZ128r,     X86::VFMADD231PSZ128mb,    TB_BCAST_SS },
220   { X86::VFMADD231PSZ256r,     X86::VFMADD231PSZ256mb,    TB_BCAST_SS },
221   { X86::VFMADD231PSZr,        X86::VFMADD231PSZmb,       TB_BCAST_SS },
222   { X86::VFMADDSUB132PDZ128r,  X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD },
223   { X86::VFMADDSUB132PDZ256r,  X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD },
224   { X86::VFMADDSUB132PDZr,     X86::VFMADDSUB132PDZmb,    TB_BCAST_SD },
225   { X86::VFMADDSUB132PSZ128r,  X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS },
226   { X86::VFMADDSUB132PSZ256r,  X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS },
227   { X86::VFMADDSUB132PSZr,     X86::VFMADDSUB132PSZmb,    TB_BCAST_SS },
228   { X86::VFMADDSUB213PDZ128r,  X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD },
229   { X86::VFMADDSUB213PDZ256r,  X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD },
230   { X86::VFMADDSUB213PDZr,     X86::VFMADDSUB213PDZmb,    TB_BCAST_SD },
231   { X86::VFMADDSUB213PSZ128r,  X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS },
232   { X86::VFMADDSUB213PSZ256r,  X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS },
233   { X86::VFMADDSUB213PSZr,     X86::VFMADDSUB213PSZmb,    TB_BCAST_SS },
234   { X86::VFMADDSUB231PDZ128r,  X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD },
235   { X86::VFMADDSUB231PDZ256r,  X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD },
236   { X86::VFMADDSUB231PDZr,     X86::VFMADDSUB231PDZmb,    TB_BCAST_SD },
237   { X86::VFMADDSUB231PSZ128r,  X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS },
238   { X86::VFMADDSUB231PSZ256r,  X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS },
239   { X86::VFMADDSUB231PSZr,     X86::VFMADDSUB231PSZmb,    TB_BCAST_SS },
240   { X86::VFMSUB132PDZ128r,     X86::VFMSUB132PDZ128mb,    TB_BCAST_SD },
241   { X86::VFMSUB132PDZ256r,     X86::VFMSUB132PDZ256mb,    TB_BCAST_SD },
242   { X86::VFMSUB132PDZr,        X86::VFMSUB132PDZmb,       TB_BCAST_SD },
243   { X86::VFMSUB132PSZ128r,     X86::VFMSUB132PSZ128mb,    TB_BCAST_SS },
244   { X86::VFMSUB132PSZ256r,     X86::VFMSUB132PSZ256mb,    TB_BCAST_SS },
245   { X86::VFMSUB132PSZr,        X86::VFMSUB132PSZmb,       TB_BCAST_SS },
246   { X86::VFMSUB213PDZ128r,     X86::VFMSUB213PDZ128mb,    TB_BCAST_SD },
247   { X86::VFMSUB213PDZ256r,     X86::VFMSUB213PDZ256mb,    TB_BCAST_SD },
248   { X86::VFMSUB213PDZr,        X86::VFMSUB213PDZmb,       TB_BCAST_SD },
249   { X86::VFMSUB213PSZ128r,     X86::VFMSUB213PSZ128mb,    TB_BCAST_SS },
250   { X86::VFMSUB213PSZ256r,     X86::VFMSUB213PSZ256mb,    TB_BCAST_SS },
251   { X86::VFMSUB213PSZr,        X86::VFMSUB213PSZmb,       TB_BCAST_SS },
252   { X86::VFMSUB231PDZ128r,     X86::VFMSUB231PDZ128mb,    TB_BCAST_SD },
253   { X86::VFMSUB231PDZ256r,     X86::VFMSUB231PDZ256mb,    TB_BCAST_SD },
254   { X86::VFMSUB231PDZr,        X86::VFMSUB231PDZmb,       TB_BCAST_SD },
255   { X86::VFMSUB231PSZ128r,     X86::VFMSUB231PSZ128mb,    TB_BCAST_SS },
256   { X86::VFMSUB231PSZ256r,     X86::VFMSUB231PSZ256mb,    TB_BCAST_SS },
257   { X86::VFMSUB231PSZr,        X86::VFMSUB231PSZmb,       TB_BCAST_SS },
258   { X86::VFMSUBADD132PDZ128r,  X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD },
259   { X86::VFMSUBADD132PDZ256r,  X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD },
260   { X86::VFMSUBADD132PDZr,     X86::VFMSUBADD132PDZmb,    TB_BCAST_SD },
261   { X86::VFMSUBADD132PSZ128r,  X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS },
262   { X86::VFMSUBADD132PSZ256r,  X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS },
263   { X86::VFMSUBADD132PSZr,     X86::VFMSUBADD132PSZmb,    TB_BCAST_SS },
264   { X86::VFMSUBADD213PDZ128r,  X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD },
265   { X86::VFMSUBADD213PDZ256r,  X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD },
266   { X86::VFMSUBADD213PDZr,     X86::VFMSUBADD213PDZmb,    TB_BCAST_SD },
267   { X86::VFMSUBADD213PSZ128r,  X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS },
268   { X86::VFMSUBADD213PSZ256r,  X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS },
269   { X86::VFMSUBADD213PSZr,     X86::VFMSUBADD213PSZmb,    TB_BCAST_SS },
270   { X86::VFMSUBADD231PDZ128r,  X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD },
271   { X86::VFMSUBADD231PDZ256r,  X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD },
272   { X86::VFMSUBADD231PDZr,     X86::VFMSUBADD231PDZmb,    TB_BCAST_SD },
273   { X86::VFMSUBADD231PSZ128r,  X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS },
274   { X86::VFMSUBADD231PSZ256r,  X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS },
275   { X86::VFMSUBADD231PSZr,     X86::VFMSUBADD231PSZmb,    TB_BCAST_SS },
276   { X86::VFNMADD132PDZ128r,    X86::VFNMADD132PDZ128mb,   TB_BCAST_SD },
277   { X86::VFNMADD132PDZ256r,    X86::VFNMADD132PDZ256mb,   TB_BCAST_SD },
278   { X86::VFNMADD132PDZr,       X86::VFNMADD132PDZmb,      TB_BCAST_SD },
279   { X86::VFNMADD132PSZ128r,    X86::VFNMADD132PSZ128mb,   TB_BCAST_SS },
280   { X86::VFNMADD132PSZ256r,    X86::VFNMADD132PSZ256mb,   TB_BCAST_SS },
281   { X86::VFNMADD132PSZr,       X86::VFNMADD132PSZmb,      TB_BCAST_SS },
282   { X86::VFNMADD213PDZ128r,    X86::VFNMADD213PDZ128mb,   TB_BCAST_SD },
283   { X86::VFNMADD213PDZ256r,    X86::VFNMADD213PDZ256mb,   TB_BCAST_SD },
284   { X86::VFNMADD213PDZr,       X86::VFNMADD213PDZmb,      TB_BCAST_SD },
285   { X86::VFNMADD213PSZ128r,    X86::VFNMADD213PSZ128mb,   TB_BCAST_SS },
286   { X86::VFNMADD213PSZ256r,    X86::VFNMADD213PSZ256mb,   TB_BCAST_SS },
287   { X86::VFNMADD213PSZr,       X86::VFNMADD213PSZmb,      TB_BCAST_SS },
288   { X86::VFNMADD231PDZ128r,    X86::VFNMADD231PDZ128mb,   TB_BCAST_SD },
289   { X86::VFNMADD231PDZ256r,    X86::VFNMADD231PDZ256mb,   TB_BCAST_SD },
290   { X86::VFNMADD231PDZr,       X86::VFNMADD231PDZmb,      TB_BCAST_SD },
291   { X86::VFNMADD231PSZ128r,    X86::VFNMADD231PSZ128mb,   TB_BCAST_SS },
292   { X86::VFNMADD231PSZ256r,    X86::VFNMADD231PSZ256mb,   TB_BCAST_SS },
293   { X86::VFNMADD231PSZr,       X86::VFNMADD231PSZmb,      TB_BCAST_SS },
294   { X86::VFNMSUB132PDZ128r,    X86::VFNMSUB132PDZ128mb,   TB_BCAST_SD },
295   { X86::VFNMSUB132PDZ256r,    X86::VFNMSUB132PDZ256mb,   TB_BCAST_SD },
296   { X86::VFNMSUB132PDZr,       X86::VFNMSUB132PDZmb,      TB_BCAST_SD },
297   { X86::VFNMSUB132PSZ128r,    X86::VFNMSUB132PSZ128mb,   TB_BCAST_SS },
298   { X86::VFNMSUB132PSZ256r,    X86::VFNMSUB132PSZ256mb,   TB_BCAST_SS },
299   { X86::VFNMSUB132PSZr,       X86::VFNMSUB132PSZmb,      TB_BCAST_SS },
300   { X86::VFNMSUB213PDZ128r,    X86::VFNMSUB213PDZ128mb,   TB_BCAST_SD },
301   { X86::VFNMSUB213PDZ256r,    X86::VFNMSUB213PDZ256mb,   TB_BCAST_SD },
302   { X86::VFNMSUB213PDZr,       X86::VFNMSUB213PDZmb,      TB_BCAST_SD },
303   { X86::VFNMSUB213PSZ128r,    X86::VFNMSUB213PSZ128mb,   TB_BCAST_SS },
304   { X86::VFNMSUB213PSZ256r,    X86::VFNMSUB213PSZ256mb,   TB_BCAST_SS },
305   { X86::VFNMSUB213PSZr,       X86::VFNMSUB213PSZmb,      TB_BCAST_SS },
306   { X86::VFNMSUB231PDZ128r,    X86::VFNMSUB231PDZ128mb,   TB_BCAST_SD },
307   { X86::VFNMSUB231PDZ256r,    X86::VFNMSUB231PDZ256mb,   TB_BCAST_SD },
308   { X86::VFNMSUB231PDZr,       X86::VFNMSUB231PDZmb,      TB_BCAST_SD },
309   { X86::VFNMSUB231PSZ128r,    X86::VFNMSUB231PSZ128mb,   TB_BCAST_SS },
310   { X86::VFNMSUB231PSZ256r,    X86::VFNMSUB231PSZ256mb,   TB_BCAST_SS },
311   { X86::VFNMSUB231PSZr,       X86::VFNMSUB231PSZmb,      TB_BCAST_SS },
312   { X86::VPTERNLOGDZ128rri,    X86::VPTERNLOGDZ128rmbi,   TB_BCAST_D },
313   { X86::VPTERNLOGDZ256rri,    X86::VPTERNLOGDZ256rmbi,   TB_BCAST_D },
314   { X86::VPTERNLOGDZrri,       X86::VPTERNLOGDZrmbi,      TB_BCAST_D },
315   { X86::VPTERNLOGQZ128rri,    X86::VPTERNLOGQZ128rmbi,   TB_BCAST_Q },
316   { X86::VPTERNLOGQZ256rri,    X86::VPTERNLOGQZ256rmbi,   TB_BCAST_Q },
317   { X86::VPTERNLOGQZrri,       X86::VPTERNLOGQZrmbi,      TB_BCAST_Q },
318 };
319 
320 // Table to map instructions safe to broadcast using a different width from the
321 // element width.
322 static const X86MemoryFoldTableEntry BroadcastSizeFoldTable2[] = {
323   { X86::VANDNPDZ128rr,        X86::VANDNPSZ128rmb,       TB_BCAST_SS },
324   { X86::VANDNPDZ256rr,        X86::VANDNPSZ256rmb,       TB_BCAST_SS },
325   { X86::VANDNPDZrr,           X86::VANDNPSZrmb,          TB_BCAST_SS },
326   { X86::VANDNPSZ128rr,        X86::VANDNPDZ128rmb,       TB_BCAST_SD },
327   { X86::VANDNPSZ256rr,        X86::VANDNPDZ256rmb,       TB_BCAST_SD },
328   { X86::VANDNPSZrr,           X86::VANDNPDZrmb,          TB_BCAST_SD },
329   { X86::VANDPDZ128rr,         X86::VANDPSZ128rmb,        TB_BCAST_SS },
330   { X86::VANDPDZ256rr,         X86::VANDPSZ256rmb,        TB_BCAST_SS },
331   { X86::VANDPDZrr,            X86::VANDPSZrmb,           TB_BCAST_SS },
332   { X86::VANDPSZ128rr,         X86::VANDPDZ128rmb,        TB_BCAST_SD },
333   { X86::VANDPSZ256rr,         X86::VANDPDZ256rmb,        TB_BCAST_SD },
334   { X86::VANDPSZrr,            X86::VANDPDZrmb,           TB_BCAST_SD },
335   { X86::VORPDZ128rr,          X86::VORPSZ128rmb,         TB_BCAST_SS },
336   { X86::VORPDZ256rr,          X86::VORPSZ256rmb,         TB_BCAST_SS },
337   { X86::VORPDZrr,             X86::VORPSZrmb,            TB_BCAST_SS },
338   { X86::VORPSZ128rr,          X86::VORPDZ128rmb,         TB_BCAST_SD },
339   { X86::VORPSZ256rr,          X86::VORPDZ256rmb,         TB_BCAST_SD },
340   { X86::VORPSZrr,             X86::VORPDZrmb,            TB_BCAST_SD },
341   { X86::VPANDDZ128rr,         X86::VPANDQZ128rmb,        TB_BCAST_Q },
342   { X86::VPANDDZ256rr,         X86::VPANDQZ256rmb,        TB_BCAST_Q },
343   { X86::VPANDDZrr,            X86::VPANDQZrmb,           TB_BCAST_Q },
344   { X86::VPANDNDZ128rr,        X86::VPANDNQZ128rmb,       TB_BCAST_Q },
345   { X86::VPANDNDZ256rr,        X86::VPANDNQZ256rmb,       TB_BCAST_Q },
346   { X86::VPANDNDZrr,           X86::VPANDNQZrmb,          TB_BCAST_Q },
347   { X86::VPANDNQZ128rr,        X86::VPANDNDZ128rmb,       TB_BCAST_D },
348   { X86::VPANDNQZ256rr,        X86::VPANDNDZ256rmb,       TB_BCAST_D },
349   { X86::VPANDNQZrr,           X86::VPANDNDZrmb,          TB_BCAST_D },
350   { X86::VPANDQZ128rr,         X86::VPANDDZ128rmb,        TB_BCAST_D },
351   { X86::VPANDQZ256rr,         X86::VPANDDZ256rmb,        TB_BCAST_D },
352   { X86::VPANDQZrr,            X86::VPANDDZrmb,           TB_BCAST_D },
353   { X86::VPORDZ128rr,          X86::VPORQZ128rmb,         TB_BCAST_Q },
354   { X86::VPORDZ256rr,          X86::VPORQZ256rmb,         TB_BCAST_Q },
355   { X86::VPORDZrr,             X86::VPORQZrmb,            TB_BCAST_Q },
356   { X86::VPORQZ128rr,          X86::VPORDZ128rmb,         TB_BCAST_D },
357   { X86::VPORQZ256rr,          X86::VPORDZ256rmb,         TB_BCAST_D },
358   { X86::VPORQZrr,             X86::VPORDZrmb,            TB_BCAST_D },
359   { X86::VPXORDZ128rr,         X86::VPXORQZ128rmb,        TB_BCAST_Q },
360   { X86::VPXORDZ256rr,         X86::VPXORQZ256rmb,        TB_BCAST_Q },
361   { X86::VPXORDZrr,            X86::VPXORQZrmb,           TB_BCAST_Q },
362   { X86::VPXORQZ128rr,         X86::VPXORDZ128rmb,        TB_BCAST_D },
363   { X86::VPXORQZ256rr,         X86::VPXORDZ256rmb,        TB_BCAST_D },
364   { X86::VPXORQZrr,            X86::VPXORDZrmb,           TB_BCAST_D },
365   { X86::VXORPDZ128rr,         X86::VXORPSZ128rmb,        TB_BCAST_SS },
366   { X86::VXORPDZ256rr,         X86::VXORPSZ256rmb,        TB_BCAST_SS },
367   { X86::VXORPDZrr,            X86::VXORPSZrmb,           TB_BCAST_SS },
368   { X86::VXORPSZ128rr,         X86::VXORPDZ128rmb,        TB_BCAST_SD },
369   { X86::VXORPSZ256rr,         X86::VXORPDZ256rmb,        TB_BCAST_SD },
370   { X86::VXORPSZrr,            X86::VXORPDZrmb,           TB_BCAST_SD },
371 };
372 
373 static const X86MemoryFoldTableEntry BroadcastSizeFoldTable3[] = {
374   { X86::VPTERNLOGDZ128rri,    X86::VPTERNLOGQZ128rmbi,   TB_BCAST_Q },
375   { X86::VPTERNLOGDZ256rri,    X86::VPTERNLOGQZ256rmbi,   TB_BCAST_Q },
376   { X86::VPTERNLOGDZrri,       X86::VPTERNLOGQZrmbi,      TB_BCAST_Q },
377   { X86::VPTERNLOGQZ128rri,    X86::VPTERNLOGDZ128rmbi,   TB_BCAST_D },
378   { X86::VPTERNLOGQZ256rri,    X86::VPTERNLOGDZ256rmbi,   TB_BCAST_D },
379   { X86::VPTERNLOGQZrri,       X86::VPTERNLOGDZrmbi,      TB_BCAST_D },
380 };
381 
382 static const X86MemoryFoldTableEntry *
383 lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
384 #ifndef NDEBUG
385   // Make sure the tables are sorted.
386   static std::atomic<bool> FoldTablesChecked(false);
387   if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
388     assert(llvm::is_sorted(MemoryFoldTable2Addr) &&
389            std::adjacent_find(std::begin(MemoryFoldTable2Addr),
390                               std::end(MemoryFoldTable2Addr)) ==
391                std::end(MemoryFoldTable2Addr) &&
392            "MemoryFoldTable2Addr is not sorted and unique!");
393     assert(llvm::is_sorted(MemoryFoldTable0) &&
394            std::adjacent_find(std::begin(MemoryFoldTable0),
395                               std::end(MemoryFoldTable0)) ==
396                std::end(MemoryFoldTable0) &&
397            "MemoryFoldTable0 is not sorted and unique!");
398     assert(llvm::is_sorted(MemoryFoldTable1) &&
399            std::adjacent_find(std::begin(MemoryFoldTable1),
400                               std::end(MemoryFoldTable1)) ==
401                std::end(MemoryFoldTable1) &&
402            "MemoryFoldTable1 is not sorted and unique!");
403     assert(llvm::is_sorted(MemoryFoldTable2) &&
404            std::adjacent_find(std::begin(MemoryFoldTable2),
405                               std::end(MemoryFoldTable2)) ==
406                std::end(MemoryFoldTable2) &&
407            "MemoryFoldTable2 is not sorted and unique!");
408     assert(llvm::is_sorted(MemoryFoldTable3) &&
409            std::adjacent_find(std::begin(MemoryFoldTable3),
410                               std::end(MemoryFoldTable3)) ==
411                std::end(MemoryFoldTable3) &&
412            "MemoryFoldTable3 is not sorted and unique!");
413     assert(llvm::is_sorted(MemoryFoldTable4) &&
414            std::adjacent_find(std::begin(MemoryFoldTable4),
415                               std::end(MemoryFoldTable4)) ==
416                std::end(MemoryFoldTable4) &&
417            "MemoryFoldTable4 is not sorted and unique!");
418     assert(llvm::is_sorted(BroadcastFoldTable2) &&
419            std::adjacent_find(std::begin(BroadcastFoldTable2),
420                               std::end(BroadcastFoldTable2)) ==
421                std::end(BroadcastFoldTable2) &&
422            "BroadcastFoldTable2 is not sorted and unique!");
423     assert(llvm::is_sorted(BroadcastFoldTable3) &&
424            std::adjacent_find(std::begin(BroadcastFoldTable3),
425                               std::end(BroadcastFoldTable3)) ==
426                std::end(BroadcastFoldTable3) &&
427            "BroadcastFoldTable3 is not sorted and unique!");
428     assert(llvm::is_sorted(BroadcastSizeFoldTable2) &&
429            std::adjacent_find(std::begin(BroadcastSizeFoldTable2),
430                               std::end(BroadcastSizeFoldTable2)) ==
431                std::end(BroadcastSizeFoldTable2) &&
432            "BroadcastSizeFoldTable2 is not sorted and unique!");
433     assert(llvm::is_sorted(BroadcastSizeFoldTable3) &&
434            std::adjacent_find(std::begin(BroadcastSizeFoldTable3),
435                               std::end(BroadcastSizeFoldTable3)) ==
436                std::end(BroadcastSizeFoldTable3) &&
437            "BroadcastSizeFoldTable3 is not sorted and unique!");
438     FoldTablesChecked.store(true, std::memory_order_relaxed);
439   }
440 #endif
441 
442   const X86MemoryFoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
443   if (Data != Table.end() && Data->KeyOp == RegOp &&
444       !(Data->Flags & TB_NO_FORWARD))
445     return Data;
446   return nullptr;
447 }
448 
449 const X86MemoryFoldTableEntry *
450 llvm::lookupTwoAddrFoldTable(unsigned RegOp) {
451   return lookupFoldTableImpl(MemoryFoldTable2Addr, RegOp);
452 }
453 
454 const X86MemoryFoldTableEntry *
455 llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
456   ArrayRef<X86MemoryFoldTableEntry> FoldTable;
457   if (OpNum == 0)
458     FoldTable = ArrayRef(MemoryFoldTable0);
459   else if (OpNum == 1)
460     FoldTable = ArrayRef(MemoryFoldTable1);
461   else if (OpNum == 2)
462     FoldTable = ArrayRef(MemoryFoldTable2);
463   else if (OpNum == 3)
464     FoldTable = ArrayRef(MemoryFoldTable3);
465   else if (OpNum == 4)
466     FoldTable = ArrayRef(MemoryFoldTable4);
467   else
468     return nullptr;
469 
470   return lookupFoldTableImpl(FoldTable, RegOp);
471 }
472 
473 namespace {
474 
475 // This class stores the memory unfolding tables. It is instantiated as a
476 // function scope static variable to lazily init the unfolding table.
477 struct X86MemUnfoldTable {
478   // Stores memory unfolding tables entries sorted by opcode.
479   std::vector<X86MemoryFoldTableEntry> Table;
480 
481   X86MemUnfoldTable() {
482     for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2Addr)
483       // Index 0, folded load and store, no alignment requirement.
484       addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
485 
486     for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable0)
487       // Index 0, mix of loads and stores.
488       addTableEntry(Entry, TB_INDEX_0);
489 
490     for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable1)
491       // Index 1, folded load
492       addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
493 
494     for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2)
495       // Index 2, folded load
496       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
497 
498     for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable3)
499       // Index 3, folded load
500       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
501 
502     for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable4)
503       // Index 4, folded load
504       addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
505 
506     // Broadcast tables.
507     for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable2)
508       // Index 2, folded broadcast
509       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
510 
511     for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3)
512       // Index 3, folded broadcast
513       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
514 
515     // Sort the memory->reg unfold table.
516     array_pod_sort(Table.begin(), Table.end());
517 
518     // Now that it's sorted, ensure its unique.
519     assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
520            "Memory unfolding table is not unique!");
521   }
522 
523   void addTableEntry(const X86MemoryFoldTableEntry &Entry,
524                      uint16_t ExtraFlags) {
525     // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
526     if ((Entry.Flags & TB_NO_REVERSE) == 0)
527       Table.push_back({Entry.DstOp, Entry.KeyOp,
528                       static_cast<uint16_t>(Entry.Flags | ExtraFlags) });
529   }
530 };
531 }
532 
533 const X86MemoryFoldTableEntry *
534 llvm::lookupUnfoldTable(unsigned MemOp) {
535   static X86MemUnfoldTable MemUnfoldTable;
536   auto &Table = MemUnfoldTable.Table;
537   auto I = llvm::lower_bound(Table, MemOp);
538   if (I != Table.end() && I->KeyOp == MemOp)
539     return &*I;
540   return nullptr;
541 }
542 
543 namespace {
544 
545 // This class stores the memory -> broadcast folding tables. It is instantiated
546 // as a function scope static variable to lazily init the folding table.
547 struct X86MemBroadcastFoldTable {
548   // Stores memory broadcast folding tables entries sorted by opcode.
549   std::vector<X86MemoryFoldTableEntry> Table;
550 
551   X86MemBroadcastFoldTable() {
552     // Broadcast tables.
553     for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable2) {
554       unsigned RegOp = Reg2Bcst.KeyOp;
555       unsigned BcstOp = Reg2Bcst.DstOp;
556       if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
557         unsigned MemOp = Reg2Mem->DstOp;
558         uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
559                          TB_FOLDED_LOAD | TB_FOLDED_BCAST;
560         Table.push_back({MemOp, BcstOp, Flags});
561       }
562     }
563     for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable2) {
564       unsigned RegOp = Reg2Bcst.KeyOp;
565       unsigned BcstOp = Reg2Bcst.DstOp;
566       if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
567         unsigned MemOp = Reg2Mem->DstOp;
568         uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
569                          TB_FOLDED_LOAD | TB_FOLDED_BCAST;
570         Table.push_back({MemOp, BcstOp, Flags});
571       }
572     }
573 
574     for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable3) {
575       unsigned RegOp = Reg2Bcst.KeyOp;
576       unsigned BcstOp = Reg2Bcst.DstOp;
577       if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
578         unsigned MemOp = Reg2Mem->DstOp;
579         uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
580                          TB_FOLDED_LOAD | TB_FOLDED_BCAST;
581         Table.push_back({MemOp, BcstOp, Flags});
582       }
583     }
584     for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable3) {
585       unsigned RegOp = Reg2Bcst.KeyOp;
586       unsigned BcstOp = Reg2Bcst.DstOp;
587       if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
588         unsigned MemOp = Reg2Mem->DstOp;
589         uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
590                          TB_FOLDED_LOAD | TB_FOLDED_BCAST;
591         Table.push_back({MemOp, BcstOp, Flags});
592       }
593     }
594 
595     // Sort the memory->broadcast fold table.
596     array_pod_sort(Table.begin(), Table.end());
597   }
598 };
599 } // namespace
600 
601 static bool matchBroadcastSize(const X86MemoryFoldTableEntry &Entry,
602                                unsigned BroadcastBits) {
603   switch (Entry.Flags & TB_BCAST_MASK) {
604   case TB_BCAST_SD:
605   case TB_BCAST_Q:
606     return BroadcastBits == 64;
607   case TB_BCAST_SS:
608   case TB_BCAST_D:
609     return BroadcastBits == 32;
610   }
611   return false;
612 }
613 
614 const X86MemoryFoldTableEntry *
615 llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) {
616   static X86MemBroadcastFoldTable MemBroadcastFoldTable;
617   auto &Table = MemBroadcastFoldTable.Table;
618   for (auto I = llvm::lower_bound(Table, MemOp);
619        I != Table.end() && I->KeyOp == MemOp; ++I) {
620     if (matchBroadcastSize(*I, BroadcastBits))
621       return &*I;
622   }
623   return nullptr;
624 }
625