1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DebugInfo.h"
19 #include "llvm/IR/DiagnosticInfo.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/InstVisitor.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/IR/IntrinsicsAArch64.h"
27 #include "llvm/IR/IntrinsicsARM.h"
28 #include "llvm/IR/IntrinsicsX86.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Verifier.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/Regex.h"
34 #include <cstring>
35 using namespace llvm;
36 
37 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
38 
39 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
40 // changed their type from v4f32 to v2i64.
41 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
42                                   Function *&NewFn) {
43   // Check whether this is an old version of the function, which received
44   // v4f32 arguments.
45   Type *Arg0Type = F->getFunctionType()->getParamType(0);
46   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
47     return false;
48 
49   // Yes, it's old, replace it with new version.
50   rename(F);
51   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52   return true;
53 }
54 
55 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
56 // arguments have changed their type from i32 to i8.
57 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
58                                              Function *&NewFn) {
59   // Check that the last argument is an i32.
60   Type *LastArgType = F->getFunctionType()->getParamType(
61      F->getFunctionType()->getNumParams() - 1);
62   if (!LastArgType->isIntegerTy(32))
63     return false;
64 
65   // Move this function aside and map down.
66   rename(F);
67   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68   return true;
69 }
70 
71 // Upgrade the declaration of fp compare intrinsics that change return type
72 // from scalar to vXi1 mask.
73 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
74                                       Function *&NewFn) {
75   // Check if the return type is a vector.
76   if (F->getReturnType()->isVectorTy())
77     return false;
78 
79   rename(F);
80   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
81   return true;
82 }
83 
84 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
85   // All of the intrinsics matches below should be marked with which llvm
86   // version started autoupgrading them. At some point in the future we would
87   // like to use this information to remove upgrade code for some older
88   // intrinsics. It is currently undecided how we will determine that future
89   // point.
90   if (Name == "addcarryx.u32" || // Added in 8.0
91       Name == "addcarryx.u64" || // Added in 8.0
92       Name == "addcarry.u32" || // Added in 8.0
93       Name == "addcarry.u64" || // Added in 8.0
94       Name == "subborrow.u32" || // Added in 8.0
95       Name == "subborrow.u64" || // Added in 8.0
96       Name.startswith("sse2.padds.") || // Added in 8.0
97       Name.startswith("sse2.psubs.") || // Added in 8.0
98       Name.startswith("sse2.paddus.") || // Added in 8.0
99       Name.startswith("sse2.psubus.") || // Added in 8.0
100       Name.startswith("avx2.padds.") || // Added in 8.0
101       Name.startswith("avx2.psubs.") || // Added in 8.0
102       Name.startswith("avx2.paddus.") || // Added in 8.0
103       Name.startswith("avx2.psubus.") || // Added in 8.0
104       Name.startswith("avx512.padds.") || // Added in 8.0
105       Name.startswith("avx512.psubs.") || // Added in 8.0
106       Name.startswith("avx512.mask.padds.") || // Added in 8.0
107       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
108       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
109       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
110       Name=="ssse3.pabs.b.128" || // Added in 6.0
111       Name=="ssse3.pabs.w.128" || // Added in 6.0
112       Name=="ssse3.pabs.d.128" || // Added in 6.0
113       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
114       Name.startswith("fma.vfmadd.") || // Added in 7.0
115       Name.startswith("fma.vfmsub.") || // Added in 7.0
116       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
117       Name.startswith("fma.vfnmadd.") || // Added in 7.0
118       Name.startswith("fma.vfnmsub.") || // Added in 7.0
119       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
120       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
121       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
122       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
123       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
124       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
125       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
126       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
127       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
128       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
129       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
130       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
131       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
132       Name.startswith("avx512.kunpck") || //added in 6.0
133       Name.startswith("avx2.pabs.") || // Added in 6.0
134       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
135       Name.startswith("avx512.broadcastm") || // Added in 6.0
136       Name == "sse.sqrt.ss" || // Added in 7.0
137       Name == "sse2.sqrt.sd" || // Added in 7.0
138       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
139       Name.startswith("avx.sqrt.p") || // Added in 7.0
140       Name.startswith("sse2.sqrt.p") || // Added in 7.0
141       Name.startswith("sse.sqrt.p") || // Added in 7.0
142       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
143       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
144       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
145       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
146       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
147       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
148       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
149       Name.startswith("avx.vperm2f128.") || // Added in 6.0
150       Name == "avx2.vperm2i128" || // Added in 6.0
151       Name == "sse.add.ss" || // Added in 4.0
152       Name == "sse2.add.sd" || // Added in 4.0
153       Name == "sse.sub.ss" || // Added in 4.0
154       Name == "sse2.sub.sd" || // Added in 4.0
155       Name == "sse.mul.ss" || // Added in 4.0
156       Name == "sse2.mul.sd" || // Added in 4.0
157       Name == "sse.div.ss" || // Added in 4.0
158       Name == "sse2.div.sd" || // Added in 4.0
159       Name == "sse41.pmaxsb" || // Added in 3.9
160       Name == "sse2.pmaxs.w" || // Added in 3.9
161       Name == "sse41.pmaxsd" || // Added in 3.9
162       Name == "sse2.pmaxu.b" || // Added in 3.9
163       Name == "sse41.pmaxuw" || // Added in 3.9
164       Name == "sse41.pmaxud" || // Added in 3.9
165       Name == "sse41.pminsb" || // Added in 3.9
166       Name == "sse2.pmins.w" || // Added in 3.9
167       Name == "sse41.pminsd" || // Added in 3.9
168       Name == "sse2.pminu.b" || // Added in 3.9
169       Name == "sse41.pminuw" || // Added in 3.9
170       Name == "sse41.pminud" || // Added in 3.9
171       Name == "avx512.kand.w" || // Added in 7.0
172       Name == "avx512.kandn.w" || // Added in 7.0
173       Name == "avx512.knot.w" || // Added in 7.0
174       Name == "avx512.kor.w" || // Added in 7.0
175       Name == "avx512.kxor.w" || // Added in 7.0
176       Name == "avx512.kxnor.w" || // Added in 7.0
177       Name == "avx512.kortestc.w" || // Added in 7.0
178       Name == "avx512.kortestz.w" || // Added in 7.0
179       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
180       Name.startswith("avx2.pmax") || // Added in 3.9
181       Name.startswith("avx2.pmin") || // Added in 3.9
182       Name.startswith("avx512.mask.pmax") || // Added in 4.0
183       Name.startswith("avx512.mask.pmin") || // Added in 4.0
184       Name.startswith("avx2.vbroadcast") || // Added in 3.8
185       Name.startswith("avx2.pbroadcast") || // Added in 3.8
186       Name.startswith("avx.vpermil.") || // Added in 3.1
187       Name.startswith("sse2.pshuf") || // Added in 3.9
188       Name.startswith("avx512.pbroadcast") || // Added in 3.9
189       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
190       Name.startswith("avx512.mask.movddup") || // Added in 3.9
191       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
192       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
193       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
194       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
195       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
196       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
197       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
198       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
199       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
200       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
201       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
202       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
203       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
204       Name.startswith("avx512.mask.pand.") || // Added in 3.9
205       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
206       Name.startswith("avx512.mask.por.") || // Added in 3.9
207       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
208       Name.startswith("avx512.mask.and.") || // Added in 3.9
209       Name.startswith("avx512.mask.andn.") || // Added in 3.9
210       Name.startswith("avx512.mask.or.") || // Added in 3.9
211       Name.startswith("avx512.mask.xor.") || // Added in 3.9
212       Name.startswith("avx512.mask.padd.") || // Added in 4.0
213       Name.startswith("avx512.mask.psub.") || // Added in 4.0
214       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
215       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
216       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
217       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
218       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
219       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
220       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
221       Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
222       Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
223       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
224       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
225       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
226       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
227       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
228       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
229       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
230       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
231       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
232       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
233       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
234       Name == "avx512.cvtusi2sd" || // Added in 7.0
235       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
236       Name == "sse2.pmulu.dq" || // Added in 7.0
237       Name == "sse41.pmuldq" || // Added in 7.0
238       Name == "avx2.pmulu.dq" || // Added in 7.0
239       Name == "avx2.pmul.dq" || // Added in 7.0
240       Name == "avx512.pmulu.dq.512" || // Added in 7.0
241       Name == "avx512.pmul.dq.512" || // Added in 7.0
242       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
243       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
244       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
245       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
246       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
247       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
248       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
249       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
250       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
251       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
252       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
253       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
254       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
255       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
256       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
257       Name.startswith("avx512.cmp.p") || // Added in 12.0
258       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
259       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
260       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
261       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
262       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
263       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
264       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
265       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
266       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
267       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
268       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
269       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
270       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
271       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
272       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
273       Name.startswith("avx512.mask.pslli") || // Added in 4.0
274       Name.startswith("avx512.mask.psrai") || // Added in 4.0
275       Name.startswith("avx512.mask.psrli") || // Added in 4.0
276       Name.startswith("avx512.mask.psllv") || // Added in 4.0
277       Name.startswith("avx512.mask.psrav") || // Added in 4.0
278       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
279       Name.startswith("sse41.pmovsx") || // Added in 3.8
280       Name.startswith("sse41.pmovzx") || // Added in 3.9
281       Name.startswith("avx2.pmovsx") || // Added in 3.9
282       Name.startswith("avx2.pmovzx") || // Added in 3.9
283       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
284       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
285       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
286       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
287       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
288       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
289       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
290       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
291       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
292       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
293       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
294       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
295       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
296       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
297       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
298       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
299       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
300       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
301       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
302       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
303       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
304       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
305       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
306       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
307       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
308       Name.startswith("avx512.vpshld.") || // Added in 8.0
309       Name.startswith("avx512.vpshrd.") || // Added in 8.0
310       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
311       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
312       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
313       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
314       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
315       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
316       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
317       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
318       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
319       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
320       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
321       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
322       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
323       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
324       Name == "sse.cvtsi2ss" || // Added in 7.0
325       Name == "sse.cvtsi642ss" || // Added in 7.0
326       Name == "sse2.cvtsi2sd" || // Added in 7.0
327       Name == "sse2.cvtsi642sd" || // Added in 7.0
328       Name == "sse2.cvtss2sd" || // Added in 7.0
329       Name == "sse2.cvtdq2pd" || // Added in 3.9
330       Name == "sse2.cvtdq2ps" || // Added in 7.0
331       Name == "sse2.cvtps2pd" || // Added in 3.9
332       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
333       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
334       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
335       Name.startswith("vcvtph2ps.") || // Added in 11.0
336       Name.startswith("avx.vinsertf128.") || // Added in 3.7
337       Name == "avx2.vinserti128" || // Added in 3.7
338       Name.startswith("avx512.mask.insert") || // Added in 4.0
339       Name.startswith("avx.vextractf128.") || // Added in 3.7
340       Name == "avx2.vextracti128" || // Added in 3.7
341       Name.startswith("avx512.mask.vextract") || // Added in 4.0
342       Name.startswith("sse4a.movnt.") || // Added in 3.9
343       Name.startswith("avx.movnt.") || // Added in 3.2
344       Name.startswith("avx512.storent.") || // Added in 3.9
345       Name == "sse41.movntdqa" || // Added in 5.0
346       Name == "avx2.movntdqa" || // Added in 5.0
347       Name == "avx512.movntdqa" || // Added in 5.0
348       Name == "sse2.storel.dq" || // Added in 3.9
349       Name.startswith("sse.storeu.") || // Added in 3.9
350       Name.startswith("sse2.storeu.") || // Added in 3.9
351       Name.startswith("avx.storeu.") || // Added in 3.9
352       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
353       Name.startswith("avx512.mask.store.p") || // Added in 3.9
354       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
355       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
356       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
357       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
358       Name == "avx512.mask.store.ss" || // Added in 7.0
359       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
360       Name.startswith("avx512.mask.load.") || // Added in 3.9
361       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
362       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
363       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
364       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
365       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
366       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
367       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
368       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
369       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
370       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
371       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
372       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
373       Name == "sse42.crc32.64.8" || // Added in 3.4
374       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
375       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
376       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
377       Name.startswith("avx512.mask.valign.") || // Added in 4.0
378       Name.startswith("sse2.psll.dq") || // Added in 3.7
379       Name.startswith("sse2.psrl.dq") || // Added in 3.7
380       Name.startswith("avx2.psll.dq") || // Added in 3.7
381       Name.startswith("avx2.psrl.dq") || // Added in 3.7
382       Name.startswith("avx512.psll.dq") || // Added in 3.9
383       Name.startswith("avx512.psrl.dq") || // Added in 3.9
384       Name == "sse41.pblendw" || // Added in 3.7
385       Name.startswith("sse41.blendp") || // Added in 3.7
386       Name.startswith("avx.blend.p") || // Added in 3.7
387       Name == "avx2.pblendw" || // Added in 3.7
388       Name.startswith("avx2.pblendd.") || // Added in 3.7
389       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
390       Name == "avx2.vbroadcasti128" || // Added in 3.7
391       Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
392       Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
393       Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
394       Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
395       Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
396       Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
397       Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
398       Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
399       Name == "xop.vpcmov" || // Added in 3.8
400       Name == "xop.vpcmov.256" || // Added in 5.0
401       Name.startswith("avx512.mask.move.s") || // Added in 4.0
402       Name.startswith("avx512.cvtmask2") || // Added in 5.0
403       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
404       Name.startswith("xop.vprot") || // Added in 8.0
405       Name.startswith("avx512.prol") || // Added in 8.0
406       Name.startswith("avx512.pror") || // Added in 8.0
407       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
408       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
409       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
410       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
411       Name.startswith("avx512.ptestm") || //Added in 6.0
412       Name.startswith("avx512.ptestnm") || //Added in 6.0
413       Name.startswith("avx512.mask.pavg")) // Added in 6.0
414     return true;
415 
416   return false;
417 }
418 
419 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
420                                         Function *&NewFn) {
421   // Only handle intrinsics that start with "x86.".
422   if (!Name.startswith("x86."))
423     return false;
424   // Remove "x86." prefix.
425   Name = Name.substr(4);
426 
427   if (ShouldUpgradeX86Intrinsic(F, Name)) {
428     NewFn = nullptr;
429     return true;
430   }
431 
432   if (Name == "rdtscp") { // Added in 8.0
433     // If this intrinsic has 0 operands, it's the new version.
434     if (F->getFunctionType()->getNumParams() == 0)
435       return false;
436 
437     rename(F);
438     NewFn = Intrinsic::getDeclaration(F->getParent(),
439                                       Intrinsic::x86_rdtscp);
440     return true;
441   }
442 
443   // SSE4.1 ptest functions may have an old signature.
444   if (Name.startswith("sse41.ptest")) { // Added in 3.2
445     if (Name.substr(11) == "c")
446       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
447     if (Name.substr(11) == "z")
448       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
449     if (Name.substr(11) == "nzc")
450       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
451   }
452   // Several blend and other instructions with masks used the wrong number of
453   // bits.
454   if (Name == "sse41.insertps") // Added in 3.6
455     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
456                                             NewFn);
457   if (Name == "sse41.dppd") // Added in 3.6
458     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
459                                             NewFn);
460   if (Name == "sse41.dpps") // Added in 3.6
461     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
462                                             NewFn);
463   if (Name == "sse41.mpsadbw") // Added in 3.6
464     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
465                                             NewFn);
466   if (Name == "avx.dp.ps.256") // Added in 3.6
467     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
468                                             NewFn);
469   if (Name == "avx2.mpsadbw") // Added in 3.6
470     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
471                                             NewFn);
472   if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
473     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
474                                      NewFn);
475   if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
476     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
477                                      NewFn);
478   if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
479     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
480                                      NewFn);
481   if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
482     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
483                                      NewFn);
484   if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
485     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
486                                      NewFn);
487   if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
488     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
489                                      NewFn);
490 
491   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
492   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
493     rename(F);
494     NewFn = Intrinsic::getDeclaration(F->getParent(),
495                                       Intrinsic::x86_xop_vfrcz_ss);
496     return true;
497   }
498   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
499     rename(F);
500     NewFn = Intrinsic::getDeclaration(F->getParent(),
501                                       Intrinsic::x86_xop_vfrcz_sd);
502     return true;
503   }
504   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
505   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
506     auto Idx = F->getFunctionType()->getParamType(2);
507     if (Idx->isFPOrFPVectorTy()) {
508       rename(F);
509       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
510       unsigned EltSize = Idx->getScalarSizeInBits();
511       Intrinsic::ID Permil2ID;
512       if (EltSize == 64 && IdxSize == 128)
513         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
514       else if (EltSize == 32 && IdxSize == 128)
515         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
516       else if (EltSize == 64 && IdxSize == 256)
517         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
518       else
519         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
520       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
521       return true;
522     }
523   }
524 
525   if (Name == "seh.recoverfp") {
526     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
527     return true;
528   }
529 
530   return false;
531 }
532 
533 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
534   assert(F && "Illegal to upgrade a non-existent Function.");
535 
536   // Quickly eliminate it, if it's not a candidate.
537   StringRef Name = F->getName();
538   if (Name.size() <= 8 || !Name.startswith("llvm."))
539     return false;
540   Name = Name.substr(5); // Strip off "llvm."
541 
542   switch (Name[0]) {
543   default: break;
544   case 'a': {
545     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
546       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
547                                         F->arg_begin()->getType());
548       return true;
549     }
550     if (Name.startswith("aarch64.neon.frintn")) {
551       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
552                                         F->arg_begin()->getType());
553       return true;
554     }
555     if (Name.startswith("aarch64.neon.rbit")) {
556       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
557                                         F->arg_begin()->getType());
558       return true;
559     }
560     if (Name.startswith("arm.neon.vclz")) {
561       Type* args[2] = {
562         F->arg_begin()->getType(),
563         Type::getInt1Ty(F->getContext())
564       };
565       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
566       // the end of the name. Change name from llvm.arm.neon.vclz.* to
567       //  llvm.ctlz.*
568       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
569       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
570                                "llvm.ctlz." + Name.substr(14), F->getParent());
571       return true;
572     }
573     if (Name.startswith("arm.neon.vcnt")) {
574       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
575                                         F->arg_begin()->getType());
576       return true;
577     }
578     static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
579     if (vldRegex.match(Name)) {
580       auto fArgs = F->getFunctionType()->params();
581       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
582       // Can't use Intrinsic::getDeclaration here as the return types might
583       // then only be structurally equal.
584       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
585       StringRef Suffix =
586           F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
587       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
588                                "llvm." + Name + "." + Suffix, F->getParent());
589       return true;
590     }
591     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
592     if (vstRegex.match(Name)) {
593       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
594                                                 Intrinsic::arm_neon_vst2,
595                                                 Intrinsic::arm_neon_vst3,
596                                                 Intrinsic::arm_neon_vst4};
597 
598       static const Intrinsic::ID StoreLaneInts[] = {
599         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
600         Intrinsic::arm_neon_vst4lane
601       };
602 
603       auto fArgs = F->getFunctionType()->params();
604       Type *Tys[] = {fArgs[0], fArgs[1]};
605       if (!Name.contains("lane"))
606         NewFn = Intrinsic::getDeclaration(F->getParent(),
607                                           StoreInts[fArgs.size() - 3], Tys);
608       else
609         NewFn = Intrinsic::getDeclaration(F->getParent(),
610                                           StoreLaneInts[fArgs.size() - 5], Tys);
611       return true;
612     }
613     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
614       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
615       return true;
616     }
617     if (Name.startswith("arm.neon.vqadds.")) {
618       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
619                                         F->arg_begin()->getType());
620       return true;
621     }
622     if (Name.startswith("arm.neon.vqaddu.")) {
623       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
624                                         F->arg_begin()->getType());
625       return true;
626     }
627     if (Name.startswith("arm.neon.vqsubs.")) {
628       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
629                                         F->arg_begin()->getType());
630       return true;
631     }
632     if (Name.startswith("arm.neon.vqsubu.")) {
633       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
634                                         F->arg_begin()->getType());
635       return true;
636     }
637     if (Name.startswith("aarch64.neon.addp")) {
638       if (F->arg_size() != 2)
639         break; // Invalid IR.
640       VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
641       if (Ty && Ty->getElementType()->isFloatingPointTy()) {
642         NewFn = Intrinsic::getDeclaration(F->getParent(),
643                                           Intrinsic::aarch64_neon_faddp, Ty);
644         return true;
645       }
646     }
647 
648     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
649     // respectively
650     if ((Name.startswith("arm.neon.bfdot.") ||
651          Name.startswith("aarch64.neon.bfdot.")) &&
652         Name.endswith("i8")) {
653       Intrinsic::ID IID =
654           StringSwitch<Intrinsic::ID>(Name)
655               .Cases("arm.neon.bfdot.v2f32.v8i8",
656                      "arm.neon.bfdot.v4f32.v16i8",
657                      Intrinsic::arm_neon_bfdot)
658               .Cases("aarch64.neon.bfdot.v2f32.v8i8",
659                      "aarch64.neon.bfdot.v4f32.v16i8",
660                      Intrinsic::aarch64_neon_bfdot)
661               .Default(Intrinsic::not_intrinsic);
662       if (IID == Intrinsic::not_intrinsic)
663         break;
664 
665       size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
666       assert((OperandWidth == 64 || OperandWidth == 128) &&
667              "Unexpected operand width");
668       LLVMContext &Ctx = F->getParent()->getContext();
669       std::array<Type *, 2> Tys {{
670         F->getReturnType(),
671         FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
672       }};
673       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
674       return true;
675     }
676 
677     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
678     // and accept v8bf16 instead of v16i8
679     if ((Name.startswith("arm.neon.bfm") ||
680          Name.startswith("aarch64.neon.bfm")) &&
681         Name.endswith(".v4f32.v16i8")) {
682       Intrinsic::ID IID =
683           StringSwitch<Intrinsic::ID>(Name)
684               .Case("arm.neon.bfmmla.v4f32.v16i8",
685                     Intrinsic::arm_neon_bfmmla)
686               .Case("arm.neon.bfmlalb.v4f32.v16i8",
687                     Intrinsic::arm_neon_bfmlalb)
688               .Case("arm.neon.bfmlalt.v4f32.v16i8",
689                     Intrinsic::arm_neon_bfmlalt)
690               .Case("aarch64.neon.bfmmla.v4f32.v16i8",
691                     Intrinsic::aarch64_neon_bfmmla)
692               .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
693                     Intrinsic::aarch64_neon_bfmlalb)
694               .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
695                     Intrinsic::aarch64_neon_bfmlalt)
696               .Default(Intrinsic::not_intrinsic);
697       if (IID == Intrinsic::not_intrinsic)
698         break;
699 
700       std::array<Type *, 0> Tys;
701       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
702       return true;
703     }
704 
705     if (Name == "arm.mve.vctp64" &&
706         cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
707       // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
708       // function and deal with it below in UpgradeIntrinsicCall.
709       rename(F);
710       return true;
711     }
712     // These too are changed to accept a v2i1 insteead of the old v4i1.
713     if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
714         Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
715         Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
716         Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
717         Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
718         Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
719         Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
720         Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
721         Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
722         Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
723         Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
724         Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
725         Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
726         Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
727       return true;
728 
729     if (Name == "amdgcn.alignbit") {
730       // Target specific intrinsic became redundant
731       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
732                                         {F->getReturnType()});
733       return true;
734     }
735 
736     break;
737   }
738 
739   case 'c': {
740     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
741       rename(F);
742       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
743                                         F->arg_begin()->getType());
744       return true;
745     }
746     if (Name.startswith("cttz.") && F->arg_size() == 1) {
747       rename(F);
748       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
749                                         F->arg_begin()->getType());
750       return true;
751     }
752     break;
753   }
754   case 'd': {
755     if (Name == "dbg.value" && F->arg_size() == 4) {
756       rename(F);
757       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
758       return true;
759     }
760     break;
761   }
762   case 'e': {
763     SmallVector<StringRef, 2> Groups;
764     static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
765     if (R.match(Name, &Groups)) {
766       Intrinsic::ID ID;
767       ID = StringSwitch<Intrinsic::ID>(Groups[1])
768                .Case("add", Intrinsic::vector_reduce_add)
769                .Case("mul", Intrinsic::vector_reduce_mul)
770                .Case("and", Intrinsic::vector_reduce_and)
771                .Case("or", Intrinsic::vector_reduce_or)
772                .Case("xor", Intrinsic::vector_reduce_xor)
773                .Case("smax", Intrinsic::vector_reduce_smax)
774                .Case("smin", Intrinsic::vector_reduce_smin)
775                .Case("umax", Intrinsic::vector_reduce_umax)
776                .Case("umin", Intrinsic::vector_reduce_umin)
777                .Case("fmax", Intrinsic::vector_reduce_fmax)
778                .Case("fmin", Intrinsic::vector_reduce_fmin)
779                .Default(Intrinsic::not_intrinsic);
780       if (ID != Intrinsic::not_intrinsic) {
781         rename(F);
782         auto Args = F->getFunctionType()->params();
783         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
784         return true;
785       }
786     }
787     static const Regex R2(
788         "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
789     Groups.clear();
790     if (R2.match(Name, &Groups)) {
791       Intrinsic::ID ID = Intrinsic::not_intrinsic;
792       if (Groups[1] == "fadd")
793         ID = Intrinsic::vector_reduce_fadd;
794       if (Groups[1] == "fmul")
795         ID = Intrinsic::vector_reduce_fmul;
796       if (ID != Intrinsic::not_intrinsic) {
797         rename(F);
798         auto Args = F->getFunctionType()->params();
799         Type *Tys[] = {Args[1]};
800         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
801         return true;
802       }
803     }
804     break;
805   }
806   case 'i':
807   case 'l': {
808     bool IsLifetimeStart = Name.startswith("lifetime.start");
809     if (IsLifetimeStart || Name.startswith("invariant.start")) {
810       Intrinsic::ID ID = IsLifetimeStart ?
811         Intrinsic::lifetime_start : Intrinsic::invariant_start;
812       auto Args = F->getFunctionType()->params();
813       Type* ObjectPtr[1] = {Args[1]};
814       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
815         rename(F);
816         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
817         return true;
818       }
819     }
820 
821     bool IsLifetimeEnd = Name.startswith("lifetime.end");
822     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
823       Intrinsic::ID ID = IsLifetimeEnd ?
824         Intrinsic::lifetime_end : Intrinsic::invariant_end;
825 
826       auto Args = F->getFunctionType()->params();
827       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
828       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
829         rename(F);
830         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
831         return true;
832       }
833     }
834     if (Name.startswith("invariant.group.barrier")) {
835       // Rename invariant.group.barrier to launder.invariant.group
836       auto Args = F->getFunctionType()->params();
837       Type* ObjectPtr[1] = {Args[0]};
838       rename(F);
839       NewFn = Intrinsic::getDeclaration(F->getParent(),
840           Intrinsic::launder_invariant_group, ObjectPtr);
841       return true;
842 
843     }
844 
845     break;
846   }
847   case 'm': {
848     if (Name.startswith("masked.load.")) {
849       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
850       if (F->getName() !=
851           Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
852         rename(F);
853         NewFn = Intrinsic::getDeclaration(F->getParent(),
854                                           Intrinsic::masked_load,
855                                           Tys);
856         return true;
857       }
858     }
859     if (Name.startswith("masked.store.")) {
860       auto Args = F->getFunctionType()->params();
861       Type *Tys[] = { Args[0], Args[1] };
862       if (F->getName() !=
863           Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
864         rename(F);
865         NewFn = Intrinsic::getDeclaration(F->getParent(),
866                                           Intrinsic::masked_store,
867                                           Tys);
868         return true;
869       }
870     }
871     // Renaming gather/scatter intrinsics with no address space overloading
872     // to the new overload which includes an address space
873     if (Name.startswith("masked.gather.")) {
874       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
875       if (F->getName() !=
876           Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
877         rename(F);
878         NewFn = Intrinsic::getDeclaration(F->getParent(),
879                                           Intrinsic::masked_gather, Tys);
880         return true;
881       }
882     }
883     if (Name.startswith("masked.scatter.")) {
884       auto Args = F->getFunctionType()->params();
885       Type *Tys[] = {Args[0], Args[1]};
886       if (F->getName() !=
887           Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
888         rename(F);
889         NewFn = Intrinsic::getDeclaration(F->getParent(),
890                                           Intrinsic::masked_scatter, Tys);
891         return true;
892       }
893     }
894     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
895     // alignment parameter to embedding the alignment as an attribute of
896     // the pointer args.
897     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
898       rename(F);
899       // Get the types of dest, src, and len
900       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
901       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
902                                         ParamTypes);
903       return true;
904     }
905     if (Name.startswith("memmove.") && F->arg_size() == 5) {
906       rename(F);
907       // Get the types of dest, src, and len
908       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
909       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
910                                         ParamTypes);
911       return true;
912     }
913     if (Name.startswith("memset.") && F->arg_size() == 5) {
914       rename(F);
915       // Get the types of dest, and len
916       const auto *FT = F->getFunctionType();
917       Type *ParamTypes[2] = {
918           FT->getParamType(0), // Dest
919           FT->getParamType(2)  // len
920       };
921       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
922                                         ParamTypes);
923       return true;
924     }
925     break;
926   }
927   case 'n': {
928     if (Name.startswith("nvvm.")) {
929       Name = Name.substr(5);
930 
931       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
932       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
933                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
934                               .Case("clz.i", Intrinsic::ctlz)
935                               .Case("popc.i", Intrinsic::ctpop)
936                               .Default(Intrinsic::not_intrinsic);
937       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
938         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
939                                           {F->getReturnType()});
940         return true;
941       }
942 
943       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
944       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
945       //
946       // TODO: We could add lohi.i2d.
947       bool Expand = StringSwitch<bool>(Name)
948                         .Cases("abs.i", "abs.ll", true)
949                         .Cases("clz.ll", "popc.ll", "h2f", true)
950                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
951                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
952                         .StartsWith("atomic.load.add.f32.p", true)
953                         .StartsWith("atomic.load.add.f64.p", true)
954                         .Default(false);
955       if (Expand) {
956         NewFn = nullptr;
957         return true;
958       }
959     }
960     break;
961   }
962   case 'o':
963     // We only need to change the name to match the mangling including the
964     // address space.
965     if (Name.startswith("objectsize.")) {
966       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
967       if (F->arg_size() == 2 || F->arg_size() == 3 ||
968           F->getName() !=
969               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
970         rename(F);
971         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
972                                           Tys);
973         return true;
974       }
975     }
976     break;
977 
978   case 'p':
979     if (Name == "prefetch") {
980       // Handle address space overloading.
981       Type *Tys[] = {F->arg_begin()->getType()};
982       if (F->getName() !=
983           Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
984         rename(F);
985         NewFn =
986             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
987         return true;
988       }
989     } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
990       rename(F);
991       NewFn = Intrinsic::getDeclaration(F->getParent(),
992                                         Intrinsic::ptr_annotation,
993                                         F->arg_begin()->getType());
994       return true;
995     }
996     break;
997 
998   case 's':
999     if (Name == "stackprotectorcheck") {
1000       NewFn = nullptr;
1001       return true;
1002     }
1003     break;
1004 
1005   case 'v': {
1006     if (Name == "var.annotation" && F->arg_size() == 4) {
1007       rename(F);
1008       NewFn = Intrinsic::getDeclaration(F->getParent(),
1009                                         Intrinsic::var_annotation);
1010       return true;
1011     }
1012     break;
1013   }
1014 
1015   case 'x':
1016     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1017       return true;
1018   }
1019   // Remangle our intrinsic since we upgrade the mangling
1020   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1021   if (Result != None) {
1022     NewFn = Result.getValue();
1023     return true;
1024   }
1025 
1026   //  This may not belong here. This function is effectively being overloaded
1027   //  to both detect an intrinsic which needs upgrading, and to provide the
1028   //  upgraded form of the intrinsic. We should perhaps have two separate
1029   //  functions for this.
1030   return false;
1031 }
1032 
1033 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1034   NewFn = nullptr;
1035   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1036   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1037 
1038   // Upgrade intrinsic attributes.  This does not change the function.
1039   if (NewFn)
1040     F = NewFn;
1041   if (Intrinsic::ID id = F->getIntrinsicID())
1042     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1043   return Upgraded;
1044 }
1045 
1046 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1047   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1048                           GV->getName() == "llvm.global_dtors")) ||
1049       !GV->hasInitializer())
1050     return nullptr;
1051   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1052   if (!ATy)
1053     return nullptr;
1054   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1055   if (!STy || STy->getNumElements() != 2)
1056     return nullptr;
1057 
1058   LLVMContext &C = GV->getContext();
1059   IRBuilder<> IRB(C);
1060   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1061                                IRB.getInt8PtrTy());
1062   Constant *Init = GV->getInitializer();
1063   unsigned N = Init->getNumOperands();
1064   std::vector<Constant *> NewCtors(N);
1065   for (unsigned i = 0; i != N; ++i) {
1066     auto Ctor = cast<Constant>(Init->getOperand(i));
1067     NewCtors[i] = ConstantStruct::get(
1068         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1069         Constant::getNullValue(IRB.getInt8PtrTy()));
1070   }
1071   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1072 
1073   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1074                             NewInit, GV->getName());
1075 }
1076 
1077 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1078 // to byte shuffles.
1079 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
1080                                          Value *Op, unsigned Shift) {
1081   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1082   unsigned NumElts = ResultTy->getNumElements() * 8;
1083 
1084   // Bitcast from a 64-bit element type to a byte element type.
1085   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1086   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1087 
1088   // We'll be shuffling in zeroes.
1089   Value *Res = Constant::getNullValue(VecTy);
1090 
1091   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1092   // we'll just return the zero vector.
1093   if (Shift < 16) {
1094     int Idxs[64];
1095     // 256/512-bit version is split into 2/4 16-byte lanes.
1096     for (unsigned l = 0; l != NumElts; l += 16)
1097       for (unsigned i = 0; i != 16; ++i) {
1098         unsigned Idx = NumElts + i - Shift;
1099         if (Idx < NumElts)
1100           Idx -= NumElts - 16; // end of lane, switch operand.
1101         Idxs[l + i] = Idx + l;
1102       }
1103 
1104     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1105   }
1106 
1107   // Bitcast back to a 64-bit element type.
1108   return Builder.CreateBitCast(Res, ResultTy, "cast");
1109 }
1110 
1111 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1112 // to byte shuffles.
1113 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1114                                          unsigned Shift) {
1115   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1116   unsigned NumElts = ResultTy->getNumElements() * 8;
1117 
1118   // Bitcast from a 64-bit element type to a byte element type.
1119   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1120   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1121 
1122   // We'll be shuffling in zeroes.
1123   Value *Res = Constant::getNullValue(VecTy);
1124 
1125   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1126   // we'll just return the zero vector.
1127   if (Shift < 16) {
1128     int Idxs[64];
1129     // 256/512-bit version is split into 2/4 16-byte lanes.
1130     for (unsigned l = 0; l != NumElts; l += 16)
1131       for (unsigned i = 0; i != 16; ++i) {
1132         unsigned Idx = i + Shift;
1133         if (Idx >= 16)
1134           Idx += NumElts - 16; // end of lane, switch operand.
1135         Idxs[l + i] = Idx + l;
1136       }
1137 
1138     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1139   }
1140 
1141   // Bitcast back to a 64-bit element type.
1142   return Builder.CreateBitCast(Res, ResultTy, "cast");
1143 }
1144 
1145 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1146                             unsigned NumElts) {
1147   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1148   llvm::VectorType *MaskTy = FixedVectorType::get(
1149       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1150   Mask = Builder.CreateBitCast(Mask, MaskTy);
1151 
1152   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1153   // i8 and we need to extract down to the right number of elements.
1154   if (NumElts <= 4) {
1155     int Indices[4];
1156     for (unsigned i = 0; i != NumElts; ++i)
1157       Indices[i] = i;
1158     Mask = Builder.CreateShuffleVector(
1159         Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1160   }
1161 
1162   return Mask;
1163 }
1164 
1165 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
1166                             Value *Op0, Value *Op1) {
1167   // If the mask is all ones just emit the first operation.
1168   if (const auto *C = dyn_cast<Constant>(Mask))
1169     if (C->isAllOnesValue())
1170       return Op0;
1171 
1172   Mask = getX86MaskVec(Builder, Mask,
1173                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1174   return Builder.CreateSelect(Mask, Op0, Op1);
1175 }
1176 
1177 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1178                                   Value *Op0, Value *Op1) {
1179   // If the mask is all ones just emit the first operation.
1180   if (const auto *C = dyn_cast<Constant>(Mask))
1181     if (C->isAllOnesValue())
1182       return Op0;
1183 
1184   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1185                                       Mask->getType()->getIntegerBitWidth());
1186   Mask = Builder.CreateBitCast(Mask, MaskTy);
1187   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1188   return Builder.CreateSelect(Mask, Op0, Op1);
1189 }
1190 
1191 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1192 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1193 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1194 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1195                                         Value *Op1, Value *Shift,
1196                                         Value *Passthru, Value *Mask,
1197                                         bool IsVALIGN) {
1198   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1199 
1200   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1201   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1202   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1203   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1204 
1205   // Mask the immediate for VALIGN.
1206   if (IsVALIGN)
1207     ShiftVal &= (NumElts - 1);
1208 
1209   // If palignr is shifting the pair of vectors more than the size of two
1210   // lanes, emit zero.
1211   if (ShiftVal >= 32)
1212     return llvm::Constant::getNullValue(Op0->getType());
1213 
1214   // If palignr is shifting the pair of input vectors more than one lane,
1215   // but less than two lanes, convert to shifting in zeroes.
1216   if (ShiftVal > 16) {
1217     ShiftVal -= 16;
1218     Op1 = Op0;
1219     Op0 = llvm::Constant::getNullValue(Op0->getType());
1220   }
1221 
1222   int Indices[64];
1223   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1224   for (unsigned l = 0; l < NumElts; l += 16) {
1225     for (unsigned i = 0; i != 16; ++i) {
1226       unsigned Idx = ShiftVal + i;
1227       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1228         Idx += NumElts - 16; // End of lane, switch operand.
1229       Indices[l + i] = Idx + l;
1230     }
1231   }
1232 
1233   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1234                                              makeArrayRef(Indices, NumElts),
1235                                              "palignr");
1236 
1237   return EmitX86Select(Builder, Mask, Align, Passthru);
1238 }
1239 
1240 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1241                                           bool ZeroMask, bool IndexForm) {
1242   Type *Ty = CI.getType();
1243   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1244   unsigned EltWidth = Ty->getScalarSizeInBits();
1245   bool IsFloat = Ty->isFPOrFPVectorTy();
1246   Intrinsic::ID IID;
1247   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1248     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1249   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1250     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1251   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1252     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1253   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1254     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1255   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1256     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1257   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1258     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1259   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1260     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1261   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1262     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1263   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1264     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1265   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1266     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1267   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1268     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1269   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1270     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1271   else if (VecWidth == 128 && EltWidth == 16)
1272     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1273   else if (VecWidth == 256 && EltWidth == 16)
1274     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1275   else if (VecWidth == 512 && EltWidth == 16)
1276     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1277   else if (VecWidth == 128 && EltWidth == 8)
1278     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1279   else if (VecWidth == 256 && EltWidth == 8)
1280     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1281   else if (VecWidth == 512 && EltWidth == 8)
1282     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1283   else
1284     llvm_unreachable("Unexpected intrinsic");
1285 
1286   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1287                     CI.getArgOperand(2) };
1288 
1289   // If this isn't index form we need to swap operand 0 and 1.
1290   if (!IndexForm)
1291     std::swap(Args[0], Args[1]);
1292 
1293   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1294                                 Args);
1295   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1296                              : Builder.CreateBitCast(CI.getArgOperand(1),
1297                                                      Ty);
1298   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1299 }
1300 
1301 static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1302                                          Intrinsic::ID IID) {
1303   Type *Ty = CI.getType();
1304   Value *Op0 = CI.getOperand(0);
1305   Value *Op1 = CI.getOperand(1);
1306   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1307   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1308 
1309   if (CI.arg_size() == 4) { // For masked intrinsics.
1310     Value *VecSrc = CI.getOperand(2);
1311     Value *Mask = CI.getOperand(3);
1312     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1313   }
1314   return Res;
1315 }
1316 
1317 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1318                                bool IsRotateRight) {
1319   Type *Ty = CI.getType();
1320   Value *Src = CI.getArgOperand(0);
1321   Value *Amt = CI.getArgOperand(1);
1322 
1323   // Amount may be scalar immediate, in which case create a splat vector.
1324   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1325   // we only care about the lowest log2 bits anyway.
1326   if (Amt->getType() != Ty) {
1327     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1328     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1329     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1330   }
1331 
1332   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1333   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1334   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1335 
1336   if (CI.arg_size() == 4) { // For masked intrinsics.
1337     Value *VecSrc = CI.getOperand(2);
1338     Value *Mask = CI.getOperand(3);
1339     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1340   }
1341   return Res;
1342 }
1343 
1344 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1345                               bool IsSigned) {
1346   Type *Ty = CI.getType();
1347   Value *LHS = CI.getArgOperand(0);
1348   Value *RHS = CI.getArgOperand(1);
1349 
1350   CmpInst::Predicate Pred;
1351   switch (Imm) {
1352   case 0x0:
1353     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1354     break;
1355   case 0x1:
1356     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1357     break;
1358   case 0x2:
1359     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1360     break;
1361   case 0x3:
1362     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1363     break;
1364   case 0x4:
1365     Pred = ICmpInst::ICMP_EQ;
1366     break;
1367   case 0x5:
1368     Pred = ICmpInst::ICMP_NE;
1369     break;
1370   case 0x6:
1371     return Constant::getNullValue(Ty); // FALSE
1372   case 0x7:
1373     return Constant::getAllOnesValue(Ty); // TRUE
1374   default:
1375     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1376   }
1377 
1378   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1379   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1380   return Ext;
1381 }
1382 
1383 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1384                                     bool IsShiftRight, bool ZeroMask) {
1385   Type *Ty = CI.getType();
1386   Value *Op0 = CI.getArgOperand(0);
1387   Value *Op1 = CI.getArgOperand(1);
1388   Value *Amt = CI.getArgOperand(2);
1389 
1390   if (IsShiftRight)
1391     std::swap(Op0, Op1);
1392 
1393   // Amount may be scalar immediate, in which case create a splat vector.
1394   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1395   // we only care about the lowest log2 bits anyway.
1396   if (Amt->getType() != Ty) {
1397     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1398     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1399     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1400   }
1401 
1402   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1403   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1404   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1405 
1406   unsigned NumArgs = CI.arg_size();
1407   if (NumArgs >= 4) { // For masked intrinsics.
1408     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1409                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1410                                    CI.getArgOperand(0);
1411     Value *Mask = CI.getOperand(NumArgs - 1);
1412     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1413   }
1414   return Res;
1415 }
1416 
1417 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1418                                  Value *Ptr, Value *Data, Value *Mask,
1419                                  bool Aligned) {
1420   // Cast the pointer to the right type.
1421   Ptr = Builder.CreateBitCast(Ptr,
1422                               llvm::PointerType::getUnqual(Data->getType()));
1423   const Align Alignment =
1424       Aligned
1425           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1426           : Align(1);
1427 
1428   // If the mask is all ones just emit a regular store.
1429   if (const auto *C = dyn_cast<Constant>(Mask))
1430     if (C->isAllOnesValue())
1431       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1432 
1433   // Convert the mask from an integer type to a vector of i1.
1434   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1435   Mask = getX86MaskVec(Builder, Mask, NumElts);
1436   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1437 }
1438 
1439 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1440                                 Value *Ptr, Value *Passthru, Value *Mask,
1441                                 bool Aligned) {
1442   Type *ValTy = Passthru->getType();
1443   // Cast the pointer to the right type.
1444   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1445   const Align Alignment =
1446       Aligned
1447           ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1448                   8)
1449           : Align(1);
1450 
1451   // If the mask is all ones just emit a regular store.
1452   if (const auto *C = dyn_cast<Constant>(Mask))
1453     if (C->isAllOnesValue())
1454       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1455 
1456   // Convert the mask from an integer type to a vector of i1.
1457   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1458   Mask = getX86MaskVec(Builder, Mask, NumElts);
1459   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1460 }
1461 
1462 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1463   Type *Ty = CI.getType();
1464   Value *Op0 = CI.getArgOperand(0);
1465   Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1466   Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1467   if (CI.arg_size() == 3)
1468     Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1469   return Res;
1470 }
1471 
1472 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1473   Type *Ty = CI.getType();
1474 
1475   // Arguments have a vXi32 type so cast to vXi64.
1476   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1477   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1478 
1479   if (IsSigned) {
1480     // Shift left then arithmetic shift right.
1481     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1482     LHS = Builder.CreateShl(LHS, ShiftAmt);
1483     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1484     RHS = Builder.CreateShl(RHS, ShiftAmt);
1485     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1486   } else {
1487     // Clear the upper bits.
1488     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1489     LHS = Builder.CreateAnd(LHS, Mask);
1490     RHS = Builder.CreateAnd(RHS, Mask);
1491   }
1492 
1493   Value *Res = Builder.CreateMul(LHS, RHS);
1494 
1495   if (CI.arg_size() == 4)
1496     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1497 
1498   return Res;
1499 }
1500 
1501 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1502 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1503                                      Value *Mask) {
1504   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1505   if (Mask) {
1506     const auto *C = dyn_cast<Constant>(Mask);
1507     if (!C || !C->isAllOnesValue())
1508       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1509   }
1510 
1511   if (NumElts < 8) {
1512     int Indices[8];
1513     for (unsigned i = 0; i != NumElts; ++i)
1514       Indices[i] = i;
1515     for (unsigned i = NumElts; i != 8; ++i)
1516       Indices[i] = NumElts + i % NumElts;
1517     Vec = Builder.CreateShuffleVector(Vec,
1518                                       Constant::getNullValue(Vec->getType()),
1519                                       Indices);
1520   }
1521   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1522 }
1523 
1524 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1525                                    unsigned CC, bool Signed) {
1526   Value *Op0 = CI.getArgOperand(0);
1527   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1528 
1529   Value *Cmp;
1530   if (CC == 3) {
1531     Cmp = Constant::getNullValue(
1532         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1533   } else if (CC == 7) {
1534     Cmp = Constant::getAllOnesValue(
1535         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1536   } else {
1537     ICmpInst::Predicate Pred;
1538     switch (CC) {
1539     default: llvm_unreachable("Unknown condition code");
1540     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1541     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1542     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1543     case 4: Pred = ICmpInst::ICMP_NE;  break;
1544     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1545     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1546     }
1547     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1548   }
1549 
1550   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1551 
1552   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1553 }
1554 
1555 // Replace a masked intrinsic with an older unmasked intrinsic.
1556 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1557                                     Intrinsic::ID IID) {
1558   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1559   Value *Rep = Builder.CreateCall(Intrin,
1560                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1561   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1562 }
1563 
1564 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1565   Value* A = CI.getArgOperand(0);
1566   Value* B = CI.getArgOperand(1);
1567   Value* Src = CI.getArgOperand(2);
1568   Value* Mask = CI.getArgOperand(3);
1569 
1570   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1571   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1572   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1573   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1574   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1575   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1576 }
1577 
1578 
1579 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1580   Value* Op = CI.getArgOperand(0);
1581   Type* ReturnOp = CI.getType();
1582   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1583   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1584   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1585 }
1586 
1587 // Replace intrinsic with unmasked version and a select.
1588 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1589                                       CallInst &CI, Value *&Rep) {
1590   Name = Name.substr(12); // Remove avx512.mask.
1591 
1592   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1593   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1594   Intrinsic::ID IID;
1595   if (Name.startswith("max.p")) {
1596     if (VecWidth == 128 && EltWidth == 32)
1597       IID = Intrinsic::x86_sse_max_ps;
1598     else if (VecWidth == 128 && EltWidth == 64)
1599       IID = Intrinsic::x86_sse2_max_pd;
1600     else if (VecWidth == 256 && EltWidth == 32)
1601       IID = Intrinsic::x86_avx_max_ps_256;
1602     else if (VecWidth == 256 && EltWidth == 64)
1603       IID = Intrinsic::x86_avx_max_pd_256;
1604     else
1605       llvm_unreachable("Unexpected intrinsic");
1606   } else if (Name.startswith("min.p")) {
1607     if (VecWidth == 128 && EltWidth == 32)
1608       IID = Intrinsic::x86_sse_min_ps;
1609     else if (VecWidth == 128 && EltWidth == 64)
1610       IID = Intrinsic::x86_sse2_min_pd;
1611     else if (VecWidth == 256 && EltWidth == 32)
1612       IID = Intrinsic::x86_avx_min_ps_256;
1613     else if (VecWidth == 256 && EltWidth == 64)
1614       IID = Intrinsic::x86_avx_min_pd_256;
1615     else
1616       llvm_unreachable("Unexpected intrinsic");
1617   } else if (Name.startswith("pshuf.b.")) {
1618     if (VecWidth == 128)
1619       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1620     else if (VecWidth == 256)
1621       IID = Intrinsic::x86_avx2_pshuf_b;
1622     else if (VecWidth == 512)
1623       IID = Intrinsic::x86_avx512_pshuf_b_512;
1624     else
1625       llvm_unreachable("Unexpected intrinsic");
1626   } else if (Name.startswith("pmul.hr.sw.")) {
1627     if (VecWidth == 128)
1628       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1629     else if (VecWidth == 256)
1630       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1631     else if (VecWidth == 512)
1632       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1633     else
1634       llvm_unreachable("Unexpected intrinsic");
1635   } else if (Name.startswith("pmulh.w.")) {
1636     if (VecWidth == 128)
1637       IID = Intrinsic::x86_sse2_pmulh_w;
1638     else if (VecWidth == 256)
1639       IID = Intrinsic::x86_avx2_pmulh_w;
1640     else if (VecWidth == 512)
1641       IID = Intrinsic::x86_avx512_pmulh_w_512;
1642     else
1643       llvm_unreachable("Unexpected intrinsic");
1644   } else if (Name.startswith("pmulhu.w.")) {
1645     if (VecWidth == 128)
1646       IID = Intrinsic::x86_sse2_pmulhu_w;
1647     else if (VecWidth == 256)
1648       IID = Intrinsic::x86_avx2_pmulhu_w;
1649     else if (VecWidth == 512)
1650       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1651     else
1652       llvm_unreachable("Unexpected intrinsic");
1653   } else if (Name.startswith("pmaddw.d.")) {
1654     if (VecWidth == 128)
1655       IID = Intrinsic::x86_sse2_pmadd_wd;
1656     else if (VecWidth == 256)
1657       IID = Intrinsic::x86_avx2_pmadd_wd;
1658     else if (VecWidth == 512)
1659       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1660     else
1661       llvm_unreachable("Unexpected intrinsic");
1662   } else if (Name.startswith("pmaddubs.w.")) {
1663     if (VecWidth == 128)
1664       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1665     else if (VecWidth == 256)
1666       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1667     else if (VecWidth == 512)
1668       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1669     else
1670       llvm_unreachable("Unexpected intrinsic");
1671   } else if (Name.startswith("packsswb.")) {
1672     if (VecWidth == 128)
1673       IID = Intrinsic::x86_sse2_packsswb_128;
1674     else if (VecWidth == 256)
1675       IID = Intrinsic::x86_avx2_packsswb;
1676     else if (VecWidth == 512)
1677       IID = Intrinsic::x86_avx512_packsswb_512;
1678     else
1679       llvm_unreachable("Unexpected intrinsic");
1680   } else if (Name.startswith("packssdw.")) {
1681     if (VecWidth == 128)
1682       IID = Intrinsic::x86_sse2_packssdw_128;
1683     else if (VecWidth == 256)
1684       IID = Intrinsic::x86_avx2_packssdw;
1685     else if (VecWidth == 512)
1686       IID = Intrinsic::x86_avx512_packssdw_512;
1687     else
1688       llvm_unreachable("Unexpected intrinsic");
1689   } else if (Name.startswith("packuswb.")) {
1690     if (VecWidth == 128)
1691       IID = Intrinsic::x86_sse2_packuswb_128;
1692     else if (VecWidth == 256)
1693       IID = Intrinsic::x86_avx2_packuswb;
1694     else if (VecWidth == 512)
1695       IID = Intrinsic::x86_avx512_packuswb_512;
1696     else
1697       llvm_unreachable("Unexpected intrinsic");
1698   } else if (Name.startswith("packusdw.")) {
1699     if (VecWidth == 128)
1700       IID = Intrinsic::x86_sse41_packusdw;
1701     else if (VecWidth == 256)
1702       IID = Intrinsic::x86_avx2_packusdw;
1703     else if (VecWidth == 512)
1704       IID = Intrinsic::x86_avx512_packusdw_512;
1705     else
1706       llvm_unreachable("Unexpected intrinsic");
1707   } else if (Name.startswith("vpermilvar.")) {
1708     if (VecWidth == 128 && EltWidth == 32)
1709       IID = Intrinsic::x86_avx_vpermilvar_ps;
1710     else if (VecWidth == 128 && EltWidth == 64)
1711       IID = Intrinsic::x86_avx_vpermilvar_pd;
1712     else if (VecWidth == 256 && EltWidth == 32)
1713       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1714     else if (VecWidth == 256 && EltWidth == 64)
1715       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1716     else if (VecWidth == 512 && EltWidth == 32)
1717       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1718     else if (VecWidth == 512 && EltWidth == 64)
1719       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1720     else
1721       llvm_unreachable("Unexpected intrinsic");
1722   } else if (Name == "cvtpd2dq.256") {
1723     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1724   } else if (Name == "cvtpd2ps.256") {
1725     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1726   } else if (Name == "cvttpd2dq.256") {
1727     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1728   } else if (Name == "cvttps2dq.128") {
1729     IID = Intrinsic::x86_sse2_cvttps2dq;
1730   } else if (Name == "cvttps2dq.256") {
1731     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1732   } else if (Name.startswith("permvar.")) {
1733     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1734     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1735       IID = Intrinsic::x86_avx2_permps;
1736     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1737       IID = Intrinsic::x86_avx2_permd;
1738     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1739       IID = Intrinsic::x86_avx512_permvar_df_256;
1740     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1741       IID = Intrinsic::x86_avx512_permvar_di_256;
1742     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1743       IID = Intrinsic::x86_avx512_permvar_sf_512;
1744     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1745       IID = Intrinsic::x86_avx512_permvar_si_512;
1746     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1747       IID = Intrinsic::x86_avx512_permvar_df_512;
1748     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1749       IID = Intrinsic::x86_avx512_permvar_di_512;
1750     else if (VecWidth == 128 && EltWidth == 16)
1751       IID = Intrinsic::x86_avx512_permvar_hi_128;
1752     else if (VecWidth == 256 && EltWidth == 16)
1753       IID = Intrinsic::x86_avx512_permvar_hi_256;
1754     else if (VecWidth == 512 && EltWidth == 16)
1755       IID = Intrinsic::x86_avx512_permvar_hi_512;
1756     else if (VecWidth == 128 && EltWidth == 8)
1757       IID = Intrinsic::x86_avx512_permvar_qi_128;
1758     else if (VecWidth == 256 && EltWidth == 8)
1759       IID = Intrinsic::x86_avx512_permvar_qi_256;
1760     else if (VecWidth == 512 && EltWidth == 8)
1761       IID = Intrinsic::x86_avx512_permvar_qi_512;
1762     else
1763       llvm_unreachable("Unexpected intrinsic");
1764   } else if (Name.startswith("dbpsadbw.")) {
1765     if (VecWidth == 128)
1766       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1767     else if (VecWidth == 256)
1768       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1769     else if (VecWidth == 512)
1770       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1771     else
1772       llvm_unreachable("Unexpected intrinsic");
1773   } else if (Name.startswith("pmultishift.qb.")) {
1774     if (VecWidth == 128)
1775       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1776     else if (VecWidth == 256)
1777       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1778     else if (VecWidth == 512)
1779       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1780     else
1781       llvm_unreachable("Unexpected intrinsic");
1782   } else if (Name.startswith("conflict.")) {
1783     if (Name[9] == 'd' && VecWidth == 128)
1784       IID = Intrinsic::x86_avx512_conflict_d_128;
1785     else if (Name[9] == 'd' && VecWidth == 256)
1786       IID = Intrinsic::x86_avx512_conflict_d_256;
1787     else if (Name[9] == 'd' && VecWidth == 512)
1788       IID = Intrinsic::x86_avx512_conflict_d_512;
1789     else if (Name[9] == 'q' && VecWidth == 128)
1790       IID = Intrinsic::x86_avx512_conflict_q_128;
1791     else if (Name[9] == 'q' && VecWidth == 256)
1792       IID = Intrinsic::x86_avx512_conflict_q_256;
1793     else if (Name[9] == 'q' && VecWidth == 512)
1794       IID = Intrinsic::x86_avx512_conflict_q_512;
1795     else
1796       llvm_unreachable("Unexpected intrinsic");
1797   } else if (Name.startswith("pavg.")) {
1798     if (Name[5] == 'b' && VecWidth == 128)
1799       IID = Intrinsic::x86_sse2_pavg_b;
1800     else if (Name[5] == 'b' && VecWidth == 256)
1801       IID = Intrinsic::x86_avx2_pavg_b;
1802     else if (Name[5] == 'b' && VecWidth == 512)
1803       IID = Intrinsic::x86_avx512_pavg_b_512;
1804     else if (Name[5] == 'w' && VecWidth == 128)
1805       IID = Intrinsic::x86_sse2_pavg_w;
1806     else if (Name[5] == 'w' && VecWidth == 256)
1807       IID = Intrinsic::x86_avx2_pavg_w;
1808     else if (Name[5] == 'w' && VecWidth == 512)
1809       IID = Intrinsic::x86_avx512_pavg_w_512;
1810     else
1811       llvm_unreachable("Unexpected intrinsic");
1812   } else
1813     return false;
1814 
1815   SmallVector<Value *, 4> Args(CI.args());
1816   Args.pop_back();
1817   Args.pop_back();
1818   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1819                            Args);
1820   unsigned NumArgs = CI.arg_size();
1821   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1822                       CI.getArgOperand(NumArgs - 2));
1823   return true;
1824 }
1825 
1826 /// Upgrade comment in call to inline asm that represents an objc retain release
1827 /// marker.
1828 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1829   size_t Pos;
1830   if (AsmStr->find("mov\tfp") == 0 &&
1831       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1832       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1833     AsmStr->replace(Pos, 1, ";");
1834   }
1835 }
1836 
1837 static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F,
1838                                       IRBuilder<> &Builder) {
1839   if (Name == "mve.vctp64.old") {
1840     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
1841     // correct type.
1842     Value *VCTP = Builder.CreateCall(
1843         Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
1844         CI->getArgOperand(0), CI->getName());
1845     Value *C1 = Builder.CreateCall(
1846         Intrinsic::getDeclaration(
1847             F->getParent(), Intrinsic::arm_mve_pred_v2i,
1848             {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1849         VCTP);
1850     return Builder.CreateCall(
1851         Intrinsic::getDeclaration(
1852             F->getParent(), Intrinsic::arm_mve_pred_i2v,
1853             {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1854         C1);
1855   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1856              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1857              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1858              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1859              Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1860              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1861              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1862              Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1863              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
1864              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
1865              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
1866              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
1867              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
1868              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
1869     std::vector<Type *> Tys;
1870     unsigned ID = CI->getIntrinsicID();
1871     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
1872     switch (ID) {
1873     case Intrinsic::arm_mve_mull_int_predicated:
1874     case Intrinsic::arm_mve_vqdmull_predicated:
1875     case Intrinsic::arm_mve_vldr_gather_base_predicated:
1876       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
1877       break;
1878     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1879     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1880     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1881       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
1882              V2I1Ty};
1883       break;
1884     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
1885       Tys = {CI->getType(), CI->getOperand(0)->getType(),
1886              CI->getOperand(1)->getType(), V2I1Ty};
1887       break;
1888     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
1889       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
1890              CI->getOperand(2)->getType(), V2I1Ty};
1891       break;
1892     case Intrinsic::arm_cde_vcx1q_predicated:
1893     case Intrinsic::arm_cde_vcx1qa_predicated:
1894     case Intrinsic::arm_cde_vcx2q_predicated:
1895     case Intrinsic::arm_cde_vcx2qa_predicated:
1896     case Intrinsic::arm_cde_vcx3q_predicated:
1897     case Intrinsic::arm_cde_vcx3qa_predicated:
1898       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
1899       break;
1900     default:
1901       llvm_unreachable("Unhandled Intrinsic!");
1902     }
1903 
1904     std::vector<Value *> Ops;
1905     for (Value *Op : CI->args()) {
1906       Type *Ty = Op->getType();
1907       if (Ty->getScalarSizeInBits() == 1) {
1908         Value *C1 = Builder.CreateCall(
1909             Intrinsic::getDeclaration(
1910                 F->getParent(), Intrinsic::arm_mve_pred_v2i,
1911                 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1912             Op);
1913         Op = Builder.CreateCall(
1914             Intrinsic::getDeclaration(F->getParent(),
1915                                       Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
1916             C1);
1917       }
1918       Ops.push_back(Op);
1919     }
1920 
1921     Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1922     return Builder.CreateCall(Fn, Ops, CI->getName());
1923   }
1924   llvm_unreachable("Unknown function for ARM CallInst upgrade.");
1925 }
1926 
1927 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1928 /// provided to seamlessly integrate with existing context.
1929 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1930   Function *F = CI->getCalledFunction();
1931   LLVMContext &C = CI->getContext();
1932   IRBuilder<> Builder(C);
1933   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1934 
1935   assert(F && "Intrinsic call is not direct?");
1936 
1937   if (!NewFn) {
1938     // Get the Function's name.
1939     StringRef Name = F->getName();
1940 
1941     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1942     Name = Name.substr(5);
1943 
1944     bool IsX86 = Name.startswith("x86.");
1945     if (IsX86)
1946       Name = Name.substr(4);
1947     bool IsNVVM = Name.startswith("nvvm.");
1948     if (IsNVVM)
1949       Name = Name.substr(5);
1950     bool IsARM = Name.startswith("arm.");
1951     if (IsARM)
1952       Name = Name.substr(4);
1953 
1954     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1955       Module *M = F->getParent();
1956       SmallVector<Metadata *, 1> Elts;
1957       Elts.push_back(
1958           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1959       MDNode *Node = MDNode::get(C, Elts);
1960 
1961       Value *Arg0 = CI->getArgOperand(0);
1962       Value *Arg1 = CI->getArgOperand(1);
1963 
1964       // Nontemporal (unaligned) store of the 0'th element of the float/double
1965       // vector.
1966       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1967       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1968       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1969       Value *Extract =
1970           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1971 
1972       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1973       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1974 
1975       // Remove intrinsic.
1976       CI->eraseFromParent();
1977       return;
1978     }
1979 
1980     if (IsX86 && (Name.startswith("avx.movnt.") ||
1981                   Name.startswith("avx512.storent."))) {
1982       Module *M = F->getParent();
1983       SmallVector<Metadata *, 1> Elts;
1984       Elts.push_back(
1985           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1986       MDNode *Node = MDNode::get(C, Elts);
1987 
1988       Value *Arg0 = CI->getArgOperand(0);
1989       Value *Arg1 = CI->getArgOperand(1);
1990 
1991       // Convert the type of the pointer to a pointer to the stored type.
1992       Value *BC = Builder.CreateBitCast(Arg0,
1993                                         PointerType::getUnqual(Arg1->getType()),
1994                                         "cast");
1995       StoreInst *SI = Builder.CreateAlignedStore(
1996           Arg1, BC,
1997           Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1998       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1999 
2000       // Remove intrinsic.
2001       CI->eraseFromParent();
2002       return;
2003     }
2004 
2005     if (IsX86 && Name == "sse2.storel.dq") {
2006       Value *Arg0 = CI->getArgOperand(0);
2007       Value *Arg1 = CI->getArgOperand(1);
2008 
2009       auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2010       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2011       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2012       Value *BC = Builder.CreateBitCast(Arg0,
2013                                         PointerType::getUnqual(Elt->getType()),
2014                                         "cast");
2015       Builder.CreateAlignedStore(Elt, BC, Align(1));
2016 
2017       // Remove intrinsic.
2018       CI->eraseFromParent();
2019       return;
2020     }
2021 
2022     if (IsX86 && (Name.startswith("sse.storeu.") ||
2023                   Name.startswith("sse2.storeu.") ||
2024                   Name.startswith("avx.storeu."))) {
2025       Value *Arg0 = CI->getArgOperand(0);
2026       Value *Arg1 = CI->getArgOperand(1);
2027 
2028       Arg0 = Builder.CreateBitCast(Arg0,
2029                                    PointerType::getUnqual(Arg1->getType()),
2030                                    "cast");
2031       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2032 
2033       // Remove intrinsic.
2034       CI->eraseFromParent();
2035       return;
2036     }
2037 
2038     if (IsX86 && Name == "avx512.mask.store.ss") {
2039       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2040       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2041                          Mask, false);
2042 
2043       // Remove intrinsic.
2044       CI->eraseFromParent();
2045       return;
2046     }
2047 
2048     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2049       // "avx512.mask.storeu." or "avx512.mask.store."
2050       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2051       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2052                          CI->getArgOperand(2), Aligned);
2053 
2054       // Remove intrinsic.
2055       CI->eraseFromParent();
2056       return;
2057     }
2058 
2059     Value *Rep;
2060     // Upgrade packed integer vector compare intrinsics to compare instructions.
2061     if (IsX86 && (Name.startswith("sse2.pcmp") ||
2062                   Name.startswith("avx2.pcmp"))) {
2063       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2064       bool CmpEq = Name[9] == 'e';
2065       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2066                                CI->getArgOperand(0), CI->getArgOperand(1));
2067       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2068     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2069       Type *ExtTy = Type::getInt32Ty(C);
2070       if (CI->getOperand(0)->getType()->isIntegerTy(8))
2071         ExtTy = Type::getInt64Ty(C);
2072       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2073                          ExtTy->getPrimitiveSizeInBits();
2074       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2075       Rep = Builder.CreateVectorSplat(NumElts, Rep);
2076     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2077                          Name == "sse2.sqrt.sd")) {
2078       Value *Vec = CI->getArgOperand(0);
2079       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2080       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2081                                                  Intrinsic::sqrt, Elt0->getType());
2082       Elt0 = Builder.CreateCall(Intr, Elt0);
2083       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2084     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2085                          Name.startswith("sse2.sqrt.p") ||
2086                          Name.startswith("sse.sqrt.p"))) {
2087       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2088                                                          Intrinsic::sqrt,
2089                                                          CI->getType()),
2090                                {CI->getArgOperand(0)});
2091     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2092       if (CI->arg_size() == 4 &&
2093           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2094            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2095         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2096                                             : Intrinsic::x86_avx512_sqrt_pd_512;
2097 
2098         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2099         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2100                                                            IID), Args);
2101       } else {
2102         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2103                                                            Intrinsic::sqrt,
2104                                                            CI->getType()),
2105                                  {CI->getArgOperand(0)});
2106       }
2107       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2108                           CI->getArgOperand(1));
2109     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2110                          Name.startswith("avx512.ptestnm"))) {
2111       Value *Op0 = CI->getArgOperand(0);
2112       Value *Op1 = CI->getArgOperand(1);
2113       Value *Mask = CI->getArgOperand(2);
2114       Rep = Builder.CreateAnd(Op0, Op1);
2115       llvm::Type *Ty = Op0->getType();
2116       Value *Zero = llvm::Constant::getNullValue(Ty);
2117       ICmpInst::Predicate Pred =
2118         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2119       Rep = Builder.CreateICmp(Pred, Rep, Zero);
2120       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2121     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2122       unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2123                              ->getNumElements();
2124       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2125       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2126                           CI->getArgOperand(1));
2127     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2128       unsigned NumElts = CI->getType()->getScalarSizeInBits();
2129       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2130       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2131       int Indices[64];
2132       for (unsigned i = 0; i != NumElts; ++i)
2133         Indices[i] = i;
2134 
2135       // First extract half of each vector. This gives better codegen than
2136       // doing it in a single shuffle.
2137       LHS = Builder.CreateShuffleVector(LHS, LHS,
2138                                         makeArrayRef(Indices, NumElts / 2));
2139       RHS = Builder.CreateShuffleVector(RHS, RHS,
2140                                         makeArrayRef(Indices, NumElts / 2));
2141       // Concat the vectors.
2142       // NOTE: Operands have to be swapped to match intrinsic definition.
2143       Rep = Builder.CreateShuffleVector(RHS, LHS,
2144                                         makeArrayRef(Indices, NumElts));
2145       Rep = Builder.CreateBitCast(Rep, CI->getType());
2146     } else if (IsX86 && Name == "avx512.kand.w") {
2147       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2148       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2149       Rep = Builder.CreateAnd(LHS, RHS);
2150       Rep = Builder.CreateBitCast(Rep, CI->getType());
2151     } else if (IsX86 && Name == "avx512.kandn.w") {
2152       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2153       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2154       LHS = Builder.CreateNot(LHS);
2155       Rep = Builder.CreateAnd(LHS, RHS);
2156       Rep = Builder.CreateBitCast(Rep, CI->getType());
2157     } else if (IsX86 && Name == "avx512.kor.w") {
2158       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2159       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2160       Rep = Builder.CreateOr(LHS, RHS);
2161       Rep = Builder.CreateBitCast(Rep, CI->getType());
2162     } else if (IsX86 && Name == "avx512.kxor.w") {
2163       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2164       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2165       Rep = Builder.CreateXor(LHS, RHS);
2166       Rep = Builder.CreateBitCast(Rep, CI->getType());
2167     } else if (IsX86 && Name == "avx512.kxnor.w") {
2168       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2169       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2170       LHS = Builder.CreateNot(LHS);
2171       Rep = Builder.CreateXor(LHS, RHS);
2172       Rep = Builder.CreateBitCast(Rep, CI->getType());
2173     } else if (IsX86 && Name == "avx512.knot.w") {
2174       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2175       Rep = Builder.CreateNot(Rep);
2176       Rep = Builder.CreateBitCast(Rep, CI->getType());
2177     } else if (IsX86 &&
2178                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2179       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2180       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2181       Rep = Builder.CreateOr(LHS, RHS);
2182       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2183       Value *C;
2184       if (Name[14] == 'c')
2185         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2186       else
2187         C = ConstantInt::getNullValue(Builder.getInt16Ty());
2188       Rep = Builder.CreateICmpEQ(Rep, C);
2189       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2190     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2191                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2192                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2193                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2194       Type *I32Ty = Type::getInt32Ty(C);
2195       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2196                                                  ConstantInt::get(I32Ty, 0));
2197       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2198                                                  ConstantInt::get(I32Ty, 0));
2199       Value *EltOp;
2200       if (Name.contains(".add."))
2201         EltOp = Builder.CreateFAdd(Elt0, Elt1);
2202       else if (Name.contains(".sub."))
2203         EltOp = Builder.CreateFSub(Elt0, Elt1);
2204       else if (Name.contains(".mul."))
2205         EltOp = Builder.CreateFMul(Elt0, Elt1);
2206       else
2207         EltOp = Builder.CreateFDiv(Elt0, Elt1);
2208       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2209                                         ConstantInt::get(I32Ty, 0));
2210     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2211       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2212       bool CmpEq = Name[16] == 'e';
2213       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2214     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2215       Type *OpTy = CI->getArgOperand(0)->getType();
2216       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2217       Intrinsic::ID IID;
2218       switch (VecWidth) {
2219       default: llvm_unreachable("Unexpected intrinsic");
2220       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2221       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2222       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2223       }
2224 
2225       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2226                                { CI->getOperand(0), CI->getArgOperand(1) });
2227       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2228     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2229       Type *OpTy = CI->getArgOperand(0)->getType();
2230       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2231       unsigned EltWidth = OpTy->getScalarSizeInBits();
2232       Intrinsic::ID IID;
2233       if (VecWidth == 128 && EltWidth == 32)
2234         IID = Intrinsic::x86_avx512_fpclass_ps_128;
2235       else if (VecWidth == 256 && EltWidth == 32)
2236         IID = Intrinsic::x86_avx512_fpclass_ps_256;
2237       else if (VecWidth == 512 && EltWidth == 32)
2238         IID = Intrinsic::x86_avx512_fpclass_ps_512;
2239       else if (VecWidth == 128 && EltWidth == 64)
2240         IID = Intrinsic::x86_avx512_fpclass_pd_128;
2241       else if (VecWidth == 256 && EltWidth == 64)
2242         IID = Intrinsic::x86_avx512_fpclass_pd_256;
2243       else if (VecWidth == 512 && EltWidth == 64)
2244         IID = Intrinsic::x86_avx512_fpclass_pd_512;
2245       else
2246         llvm_unreachable("Unexpected intrinsic");
2247 
2248       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2249                                { CI->getOperand(0), CI->getArgOperand(1) });
2250       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2251     } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2252       SmallVector<Value *, 4> Args(CI->args());
2253       Type *OpTy = Args[0]->getType();
2254       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2255       unsigned EltWidth = OpTy->getScalarSizeInBits();
2256       Intrinsic::ID IID;
2257       if (VecWidth == 128 && EltWidth == 32)
2258         IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2259       else if (VecWidth == 256 && EltWidth == 32)
2260         IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2261       else if (VecWidth == 512 && EltWidth == 32)
2262         IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2263       else if (VecWidth == 128 && EltWidth == 64)
2264         IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2265       else if (VecWidth == 256 && EltWidth == 64)
2266         IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2267       else if (VecWidth == 512 && EltWidth == 64)
2268         IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2269       else
2270         llvm_unreachable("Unexpected intrinsic");
2271 
2272       Value *Mask = Constant::getAllOnesValue(CI->getType());
2273       if (VecWidth == 512)
2274         std::swap(Mask, Args.back());
2275       Args.push_back(Mask);
2276 
2277       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2278                                Args);
2279     } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2280       // Integer compare intrinsics.
2281       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2282       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2283     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2284       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2285       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2286     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2287                          Name.startswith("avx512.cvtw2mask.") ||
2288                          Name.startswith("avx512.cvtd2mask.") ||
2289                          Name.startswith("avx512.cvtq2mask."))) {
2290       Value *Op = CI->getArgOperand(0);
2291       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2292       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2293       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2294     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2295                         Name == "ssse3.pabs.w.128" ||
2296                         Name == "ssse3.pabs.d.128" ||
2297                         Name.startswith("avx2.pabs") ||
2298                         Name.startswith("avx512.mask.pabs"))) {
2299       Rep = upgradeAbs(Builder, *CI);
2300     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2301                          Name == "sse2.pmaxs.w" ||
2302                          Name == "sse41.pmaxsd" ||
2303                          Name.startswith("avx2.pmaxs") ||
2304                          Name.startswith("avx512.mask.pmaxs"))) {
2305       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2306     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2307                          Name == "sse41.pmaxuw" ||
2308                          Name == "sse41.pmaxud" ||
2309                          Name.startswith("avx2.pmaxu") ||
2310                          Name.startswith("avx512.mask.pmaxu"))) {
2311       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2312     } else if (IsX86 && (Name == "sse41.pminsb" ||
2313                          Name == "sse2.pmins.w" ||
2314                          Name == "sse41.pminsd" ||
2315                          Name.startswith("avx2.pmins") ||
2316                          Name.startswith("avx512.mask.pmins"))) {
2317       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2318     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2319                          Name == "sse41.pminuw" ||
2320                          Name == "sse41.pminud" ||
2321                          Name.startswith("avx2.pminu") ||
2322                          Name.startswith("avx512.mask.pminu"))) {
2323       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2324     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2325                          Name == "avx2.pmulu.dq" ||
2326                          Name == "avx512.pmulu.dq.512" ||
2327                          Name.startswith("avx512.mask.pmulu.dq."))) {
2328       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2329     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2330                          Name == "avx2.pmul.dq" ||
2331                          Name == "avx512.pmul.dq.512" ||
2332                          Name.startswith("avx512.mask.pmul.dq."))) {
2333       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2334     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2335                          Name == "sse2.cvtsi2sd" ||
2336                          Name == "sse.cvtsi642ss" ||
2337                          Name == "sse2.cvtsi642sd")) {
2338       Rep = Builder.CreateSIToFP(
2339           CI->getArgOperand(1),
2340           cast<VectorType>(CI->getType())->getElementType());
2341       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2342     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2343       Rep = Builder.CreateUIToFP(
2344           CI->getArgOperand(1),
2345           cast<VectorType>(CI->getType())->getElementType());
2346       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2347     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2348       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2349       Rep = Builder.CreateFPExt(
2350           Rep, cast<VectorType>(CI->getType())->getElementType());
2351       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2352     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2353                          Name == "sse2.cvtdq2ps" ||
2354                          Name == "avx.cvtdq2.pd.256" ||
2355                          Name == "avx.cvtdq2.ps.256" ||
2356                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2357                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2358                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2359                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2360                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2361                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2362                          Name == "avx512.mask.cvtqq2ps.256" ||
2363                          Name == "avx512.mask.cvtqq2ps.512" ||
2364                          Name == "avx512.mask.cvtuqq2ps.256" ||
2365                          Name == "avx512.mask.cvtuqq2ps.512" ||
2366                          Name == "sse2.cvtps2pd" ||
2367                          Name == "avx.cvt.ps2.pd.256" ||
2368                          Name == "avx512.mask.cvtps2pd.128" ||
2369                          Name == "avx512.mask.cvtps2pd.256")) {
2370       auto *DstTy = cast<FixedVectorType>(CI->getType());
2371       Rep = CI->getArgOperand(0);
2372       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2373 
2374       unsigned NumDstElts = DstTy->getNumElements();
2375       if (NumDstElts < SrcTy->getNumElements()) {
2376         assert(NumDstElts == 2 && "Unexpected vector size");
2377         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2378       }
2379 
2380       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2381       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2382       if (IsPS2PD)
2383         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2384       else if (CI->arg_size() == 4 &&
2385                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2386                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2387         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2388                                        : Intrinsic::x86_avx512_sitofp_round;
2389         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2390                                                 { DstTy, SrcTy });
2391         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2392       } else {
2393         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2394                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2395       }
2396 
2397       if (CI->arg_size() >= 3)
2398         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2399                             CI->getArgOperand(1));
2400     } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2401                          Name.startswith("vcvtph2ps."))) {
2402       auto *DstTy = cast<FixedVectorType>(CI->getType());
2403       Rep = CI->getArgOperand(0);
2404       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2405       unsigned NumDstElts = DstTy->getNumElements();
2406       if (NumDstElts != SrcTy->getNumElements()) {
2407         assert(NumDstElts == 4 && "Unexpected vector size");
2408         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2409       }
2410       Rep = Builder.CreateBitCast(
2411           Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2412       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2413       if (CI->arg_size() >= 3)
2414         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2415                             CI->getArgOperand(1));
2416     } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2417       // "avx512.mask.loadu." or "avx512.mask.load."
2418       bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2419       Rep =
2420           UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2421                             CI->getArgOperand(2), Aligned);
2422     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2423       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2424       Type *PtrTy = ResultTy->getElementType();
2425 
2426       // Cast the pointer to element type.
2427       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2428                                          llvm::PointerType::getUnqual(PtrTy));
2429 
2430       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2431                                      ResultTy->getNumElements());
2432 
2433       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2434                                                 Intrinsic::masked_expandload,
2435                                                 ResultTy);
2436       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2437     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2438       auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2439       Type *PtrTy = ResultTy->getElementType();
2440 
2441       // Cast the pointer to element type.
2442       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2443                                          llvm::PointerType::getUnqual(PtrTy));
2444 
2445       Value *MaskVec =
2446           getX86MaskVec(Builder, CI->getArgOperand(2),
2447                         cast<FixedVectorType>(ResultTy)->getNumElements());
2448 
2449       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2450                                                 Intrinsic::masked_compressstore,
2451                                                 ResultTy);
2452       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2453     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2454                          Name.startswith("avx512.mask.expand."))) {
2455       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2456 
2457       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2458                                      ResultTy->getNumElements());
2459 
2460       bool IsCompress = Name[12] == 'c';
2461       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2462                                      : Intrinsic::x86_avx512_mask_expand;
2463       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2464       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2465                                        MaskVec });
2466     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2467       bool IsSigned;
2468       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2469           Name.endswith("uq"))
2470         IsSigned = false;
2471       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2472                Name.endswith("q"))
2473         IsSigned = true;
2474       else
2475         llvm_unreachable("Unknown suffix");
2476 
2477       unsigned Imm;
2478       if (CI->arg_size() == 3) {
2479         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2480       } else {
2481         Name = Name.substr(9); // strip off "xop.vpcom"
2482         if (Name.startswith("lt"))
2483           Imm = 0;
2484         else if (Name.startswith("le"))
2485           Imm = 1;
2486         else if (Name.startswith("gt"))
2487           Imm = 2;
2488         else if (Name.startswith("ge"))
2489           Imm = 3;
2490         else if (Name.startswith("eq"))
2491           Imm = 4;
2492         else if (Name.startswith("ne"))
2493           Imm = 5;
2494         else if (Name.startswith("false"))
2495           Imm = 6;
2496         else if (Name.startswith("true"))
2497           Imm = 7;
2498         else
2499           llvm_unreachable("Unknown condition");
2500       }
2501 
2502       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2503     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2504       Value *Sel = CI->getArgOperand(2);
2505       Value *NotSel = Builder.CreateNot(Sel);
2506       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2507       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2508       Rep = Builder.CreateOr(Sel0, Sel1);
2509     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2510                          Name.startswith("avx512.prol") ||
2511                          Name.startswith("avx512.mask.prol"))) {
2512       Rep = upgradeX86Rotate(Builder, *CI, false);
2513     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2514                          Name.startswith("avx512.mask.pror"))) {
2515       Rep = upgradeX86Rotate(Builder, *CI, true);
2516     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2517                          Name.startswith("avx512.mask.vpshld") ||
2518                          Name.startswith("avx512.maskz.vpshld"))) {
2519       bool ZeroMask = Name[11] == 'z';
2520       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2521     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2522                          Name.startswith("avx512.mask.vpshrd") ||
2523                          Name.startswith("avx512.maskz.vpshrd"))) {
2524       bool ZeroMask = Name[11] == 'z';
2525       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2526     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2527       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2528                                                Intrinsic::x86_sse42_crc32_32_8);
2529       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2530       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2531       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2532     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2533                          Name.startswith("avx512.vbroadcast.s"))) {
2534       // Replace broadcasts with a series of insertelements.
2535       auto *VecTy = cast<FixedVectorType>(CI->getType());
2536       Type *EltTy = VecTy->getElementType();
2537       unsigned EltNum = VecTy->getNumElements();
2538       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2539                                           EltTy->getPointerTo());
2540       Value *Load = Builder.CreateLoad(EltTy, Cast);
2541       Type *I32Ty = Type::getInt32Ty(C);
2542       Rep = PoisonValue::get(VecTy);
2543       for (unsigned I = 0; I < EltNum; ++I)
2544         Rep = Builder.CreateInsertElement(Rep, Load,
2545                                           ConstantInt::get(I32Ty, I));
2546     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2547                          Name.startswith("sse41.pmovzx") ||
2548                          Name.startswith("avx2.pmovsx") ||
2549                          Name.startswith("avx2.pmovzx") ||
2550                          Name.startswith("avx512.mask.pmovsx") ||
2551                          Name.startswith("avx512.mask.pmovzx"))) {
2552       auto *DstTy = cast<FixedVectorType>(CI->getType());
2553       unsigned NumDstElts = DstTy->getNumElements();
2554 
2555       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2556       SmallVector<int, 8> ShuffleMask(NumDstElts);
2557       for (unsigned i = 0; i != NumDstElts; ++i)
2558         ShuffleMask[i] = i;
2559 
2560       Value *SV =
2561           Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2562 
2563       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2564       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2565                    : Builder.CreateZExt(SV, DstTy);
2566       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2567       if (CI->arg_size() == 3)
2568         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2569                             CI->getArgOperand(1));
2570     } else if (Name == "avx512.mask.pmov.qd.256" ||
2571                Name == "avx512.mask.pmov.qd.512" ||
2572                Name == "avx512.mask.pmov.wb.256" ||
2573                Name == "avx512.mask.pmov.wb.512") {
2574       Type *Ty = CI->getArgOperand(1)->getType();
2575       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2576       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2577                           CI->getArgOperand(1));
2578     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2579                          Name == "avx2.vbroadcasti128")) {
2580       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2581       Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2582       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2583       auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2584       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2585                                             PointerType::getUnqual(VT));
2586       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2587       if (NumSrcElts == 2)
2588         Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2589       else
2590         Rep = Builder.CreateShuffleVector(
2591             Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2592     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2593                          Name.startswith("avx512.mask.shuf.f"))) {
2594       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2595       Type *VT = CI->getType();
2596       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2597       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2598       unsigned ControlBitsMask = NumLanes - 1;
2599       unsigned NumControlBits = NumLanes / 2;
2600       SmallVector<int, 8> ShuffleMask(0);
2601 
2602       for (unsigned l = 0; l != NumLanes; ++l) {
2603         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2604         // We actually need the other source.
2605         if (l >= NumLanes / 2)
2606           LaneMask += NumLanes;
2607         for (unsigned i = 0; i != NumElementsInLane; ++i)
2608           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2609       }
2610       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2611                                         CI->getArgOperand(1), ShuffleMask);
2612       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2613                           CI->getArgOperand(3));
2614     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2615                          Name.startswith("avx512.mask.broadcasti"))) {
2616       unsigned NumSrcElts =
2617           cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2618               ->getNumElements();
2619       unsigned NumDstElts =
2620           cast<FixedVectorType>(CI->getType())->getNumElements();
2621 
2622       SmallVector<int, 8> ShuffleMask(NumDstElts);
2623       for (unsigned i = 0; i != NumDstElts; ++i)
2624         ShuffleMask[i] = i % NumSrcElts;
2625 
2626       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2627                                         CI->getArgOperand(0),
2628                                         ShuffleMask);
2629       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2630                           CI->getArgOperand(1));
2631     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2632                          Name.startswith("avx2.vbroadcast") ||
2633                          Name.startswith("avx512.pbroadcast") ||
2634                          Name.startswith("avx512.mask.broadcast.s"))) {
2635       // Replace vp?broadcasts with a vector shuffle.
2636       Value *Op = CI->getArgOperand(0);
2637       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2638       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2639       SmallVector<int, 8> M;
2640       ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2641       Rep = Builder.CreateShuffleVector(Op, M);
2642 
2643       if (CI->arg_size() == 3)
2644         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2645                             CI->getArgOperand(1));
2646     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2647                          Name.startswith("avx2.padds.") ||
2648                          Name.startswith("avx512.padds.") ||
2649                          Name.startswith("avx512.mask.padds."))) {
2650       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2651     } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2652                          Name.startswith("avx2.psubs.") ||
2653                          Name.startswith("avx512.psubs.") ||
2654                          Name.startswith("avx512.mask.psubs."))) {
2655       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2656     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2657                          Name.startswith("avx2.paddus.") ||
2658                          Name.startswith("avx512.mask.paddus."))) {
2659       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2660     } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2661                          Name.startswith("avx2.psubus.") ||
2662                          Name.startswith("avx512.mask.psubus."))) {
2663       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2664     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2665       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2666                                       CI->getArgOperand(1),
2667                                       CI->getArgOperand(2),
2668                                       CI->getArgOperand(3),
2669                                       CI->getArgOperand(4),
2670                                       false);
2671     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2672       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2673                                       CI->getArgOperand(1),
2674                                       CI->getArgOperand(2),
2675                                       CI->getArgOperand(3),
2676                                       CI->getArgOperand(4),
2677                                       true);
2678     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2679                          Name == "avx2.psll.dq")) {
2680       // 128/256-bit shift left specified in bits.
2681       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2682       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2683                                        Shift / 8); // Shift is in bits.
2684     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2685                          Name == "avx2.psrl.dq")) {
2686       // 128/256-bit shift right specified in bits.
2687       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2688       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2689                                        Shift / 8); // Shift is in bits.
2690     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2691                          Name == "avx2.psll.dq.bs" ||
2692                          Name == "avx512.psll.dq.512")) {
2693       // 128/256/512-bit shift left specified in bytes.
2694       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2695       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2696     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2697                          Name == "avx2.psrl.dq.bs" ||
2698                          Name == "avx512.psrl.dq.512")) {
2699       // 128/256/512-bit shift right specified in bytes.
2700       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2701       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2702     } else if (IsX86 && (Name == "sse41.pblendw" ||
2703                          Name.startswith("sse41.blendp") ||
2704                          Name.startswith("avx.blend.p") ||
2705                          Name == "avx2.pblendw" ||
2706                          Name.startswith("avx2.pblendd."))) {
2707       Value *Op0 = CI->getArgOperand(0);
2708       Value *Op1 = CI->getArgOperand(1);
2709       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2710       auto *VecTy = cast<FixedVectorType>(CI->getType());
2711       unsigned NumElts = VecTy->getNumElements();
2712 
2713       SmallVector<int, 16> Idxs(NumElts);
2714       for (unsigned i = 0; i != NumElts; ++i)
2715         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2716 
2717       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2718     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2719                          Name == "avx2.vinserti128" ||
2720                          Name.startswith("avx512.mask.insert"))) {
2721       Value *Op0 = CI->getArgOperand(0);
2722       Value *Op1 = CI->getArgOperand(1);
2723       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2724       unsigned DstNumElts =
2725           cast<FixedVectorType>(CI->getType())->getNumElements();
2726       unsigned SrcNumElts =
2727           cast<FixedVectorType>(Op1->getType())->getNumElements();
2728       unsigned Scale = DstNumElts / SrcNumElts;
2729 
2730       // Mask off the high bits of the immediate value; hardware ignores those.
2731       Imm = Imm % Scale;
2732 
2733       // Extend the second operand into a vector the size of the destination.
2734       SmallVector<int, 8> Idxs(DstNumElts);
2735       for (unsigned i = 0; i != SrcNumElts; ++i)
2736         Idxs[i] = i;
2737       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2738         Idxs[i] = SrcNumElts;
2739       Rep = Builder.CreateShuffleVector(Op1, Idxs);
2740 
2741       // Insert the second operand into the first operand.
2742 
2743       // Note that there is no guarantee that instruction lowering will actually
2744       // produce a vinsertf128 instruction for the created shuffles. In
2745       // particular, the 0 immediate case involves no lane changes, so it can
2746       // be handled as a blend.
2747 
2748       // Example of shuffle mask for 32-bit elements:
2749       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2750       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2751 
2752       // First fill with identify mask.
2753       for (unsigned i = 0; i != DstNumElts; ++i)
2754         Idxs[i] = i;
2755       // Then replace the elements where we need to insert.
2756       for (unsigned i = 0; i != SrcNumElts; ++i)
2757         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2758       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2759 
2760       // If the intrinsic has a mask operand, handle that.
2761       if (CI->arg_size() == 5)
2762         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2763                             CI->getArgOperand(3));
2764     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2765                          Name == "avx2.vextracti128" ||
2766                          Name.startswith("avx512.mask.vextract"))) {
2767       Value *Op0 = CI->getArgOperand(0);
2768       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2769       unsigned DstNumElts =
2770           cast<FixedVectorType>(CI->getType())->getNumElements();
2771       unsigned SrcNumElts =
2772           cast<FixedVectorType>(Op0->getType())->getNumElements();
2773       unsigned Scale = SrcNumElts / DstNumElts;
2774 
2775       // Mask off the high bits of the immediate value; hardware ignores those.
2776       Imm = Imm % Scale;
2777 
2778       // Get indexes for the subvector of the input vector.
2779       SmallVector<int, 8> Idxs(DstNumElts);
2780       for (unsigned i = 0; i != DstNumElts; ++i) {
2781         Idxs[i] = i + (Imm * DstNumElts);
2782       }
2783       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2784 
2785       // If the intrinsic has a mask operand, handle that.
2786       if (CI->arg_size() == 4)
2787         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2788                             CI->getArgOperand(2));
2789     } else if (!IsX86 && Name == "stackprotectorcheck") {
2790       Rep = nullptr;
2791     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2792                          Name.startswith("avx512.mask.perm.di."))) {
2793       Value *Op0 = CI->getArgOperand(0);
2794       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2795       auto *VecTy = cast<FixedVectorType>(CI->getType());
2796       unsigned NumElts = VecTy->getNumElements();
2797 
2798       SmallVector<int, 8> Idxs(NumElts);
2799       for (unsigned i = 0; i != NumElts; ++i)
2800         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2801 
2802       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2803 
2804       if (CI->arg_size() == 4)
2805         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2806                             CI->getArgOperand(2));
2807     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2808                          Name == "avx2.vperm2i128")) {
2809       // The immediate permute control byte looks like this:
2810       //    [1:0] - select 128 bits from sources for low half of destination
2811       //    [2]   - ignore
2812       //    [3]   - zero low half of destination
2813       //    [5:4] - select 128 bits from sources for high half of destination
2814       //    [6]   - ignore
2815       //    [7]   - zero high half of destination
2816 
2817       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2818 
2819       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2820       unsigned HalfSize = NumElts / 2;
2821       SmallVector<int, 8> ShuffleMask(NumElts);
2822 
2823       // Determine which operand(s) are actually in use for this instruction.
2824       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2825       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2826 
2827       // If needed, replace operands based on zero mask.
2828       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2829       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2830 
2831       // Permute low half of result.
2832       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2833       for (unsigned i = 0; i < HalfSize; ++i)
2834         ShuffleMask[i] = StartIndex + i;
2835 
2836       // Permute high half of result.
2837       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2838       for (unsigned i = 0; i < HalfSize; ++i)
2839         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2840 
2841       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2842 
2843     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2844                          Name == "sse2.pshuf.d" ||
2845                          Name.startswith("avx512.mask.vpermil.p") ||
2846                          Name.startswith("avx512.mask.pshuf.d."))) {
2847       Value *Op0 = CI->getArgOperand(0);
2848       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2849       auto *VecTy = cast<FixedVectorType>(CI->getType());
2850       unsigned NumElts = VecTy->getNumElements();
2851       // Calculate the size of each index in the immediate.
2852       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2853       unsigned IdxMask = ((1 << IdxSize) - 1);
2854 
2855       SmallVector<int, 8> Idxs(NumElts);
2856       // Lookup the bits for this element, wrapping around the immediate every
2857       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2858       // to offset by the first index of each group.
2859       for (unsigned i = 0; i != NumElts; ++i)
2860         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2861 
2862       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2863 
2864       if (CI->arg_size() == 4)
2865         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2866                             CI->getArgOperand(2));
2867     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2868                          Name.startswith("avx512.mask.pshufl.w."))) {
2869       Value *Op0 = CI->getArgOperand(0);
2870       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2871       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2872 
2873       SmallVector<int, 16> Idxs(NumElts);
2874       for (unsigned l = 0; l != NumElts; l += 8) {
2875         for (unsigned i = 0; i != 4; ++i)
2876           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2877         for (unsigned i = 4; i != 8; ++i)
2878           Idxs[i + l] = i + l;
2879       }
2880 
2881       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2882 
2883       if (CI->arg_size() == 4)
2884         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2885                             CI->getArgOperand(2));
2886     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2887                          Name.startswith("avx512.mask.pshufh.w."))) {
2888       Value *Op0 = CI->getArgOperand(0);
2889       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2890       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2891 
2892       SmallVector<int, 16> Idxs(NumElts);
2893       for (unsigned l = 0; l != NumElts; l += 8) {
2894         for (unsigned i = 0; i != 4; ++i)
2895           Idxs[i + l] = i + l;
2896         for (unsigned i = 0; i != 4; ++i)
2897           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2898       }
2899 
2900       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2901 
2902       if (CI->arg_size() == 4)
2903         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2904                             CI->getArgOperand(2));
2905     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2906       Value *Op0 = CI->getArgOperand(0);
2907       Value *Op1 = CI->getArgOperand(1);
2908       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2909       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2910 
2911       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2912       unsigned HalfLaneElts = NumLaneElts / 2;
2913 
2914       SmallVector<int, 16> Idxs(NumElts);
2915       for (unsigned i = 0; i != NumElts; ++i) {
2916         // Base index is the starting element of the lane.
2917         Idxs[i] = i - (i % NumLaneElts);
2918         // If we are half way through the lane switch to the other source.
2919         if ((i % NumLaneElts) >= HalfLaneElts)
2920           Idxs[i] += NumElts;
2921         // Now select the specific element. By adding HalfLaneElts bits from
2922         // the immediate. Wrapping around the immediate every 8-bits.
2923         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2924       }
2925 
2926       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2927 
2928       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2929                           CI->getArgOperand(3));
2930     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2931                          Name.startswith("avx512.mask.movshdup") ||
2932                          Name.startswith("avx512.mask.movsldup"))) {
2933       Value *Op0 = CI->getArgOperand(0);
2934       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2935       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2936 
2937       unsigned Offset = 0;
2938       if (Name.startswith("avx512.mask.movshdup."))
2939         Offset = 1;
2940 
2941       SmallVector<int, 16> Idxs(NumElts);
2942       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2943         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2944           Idxs[i + l + 0] = i + l + Offset;
2945           Idxs[i + l + 1] = i + l + Offset;
2946         }
2947 
2948       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2949 
2950       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2951                           CI->getArgOperand(1));
2952     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2953                          Name.startswith("avx512.mask.unpckl."))) {
2954       Value *Op0 = CI->getArgOperand(0);
2955       Value *Op1 = CI->getArgOperand(1);
2956       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2957       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2958 
2959       SmallVector<int, 64> Idxs(NumElts);
2960       for (int l = 0; l != NumElts; l += NumLaneElts)
2961         for (int i = 0; i != NumLaneElts; ++i)
2962           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2963 
2964       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2965 
2966       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2967                           CI->getArgOperand(2));
2968     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2969                          Name.startswith("avx512.mask.unpckh."))) {
2970       Value *Op0 = CI->getArgOperand(0);
2971       Value *Op1 = CI->getArgOperand(1);
2972       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2973       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2974 
2975       SmallVector<int, 64> Idxs(NumElts);
2976       for (int l = 0; l != NumElts; l += NumLaneElts)
2977         for (int i = 0; i != NumLaneElts; ++i)
2978           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2979 
2980       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2981 
2982       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2983                           CI->getArgOperand(2));
2984     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2985                          Name.startswith("avx512.mask.pand."))) {
2986       VectorType *FTy = cast<VectorType>(CI->getType());
2987       VectorType *ITy = VectorType::getInteger(FTy);
2988       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2989                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2990       Rep = Builder.CreateBitCast(Rep, FTy);
2991       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2992                           CI->getArgOperand(2));
2993     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2994                          Name.startswith("avx512.mask.pandn."))) {
2995       VectorType *FTy = cast<VectorType>(CI->getType());
2996       VectorType *ITy = VectorType::getInteger(FTy);
2997       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2998       Rep = Builder.CreateAnd(Rep,
2999                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3000       Rep = Builder.CreateBitCast(Rep, FTy);
3001       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3002                           CI->getArgOperand(2));
3003     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
3004                          Name.startswith("avx512.mask.por."))) {
3005       VectorType *FTy = cast<VectorType>(CI->getType());
3006       VectorType *ITy = VectorType::getInteger(FTy);
3007       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3008                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3009       Rep = Builder.CreateBitCast(Rep, FTy);
3010       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3011                           CI->getArgOperand(2));
3012     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3013                          Name.startswith("avx512.mask.pxor."))) {
3014       VectorType *FTy = cast<VectorType>(CI->getType());
3015       VectorType *ITy = VectorType::getInteger(FTy);
3016       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3017                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3018       Rep = Builder.CreateBitCast(Rep, FTy);
3019       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3020                           CI->getArgOperand(2));
3021     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3022       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3023       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3024                           CI->getArgOperand(2));
3025     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3026       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3027       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3028                           CI->getArgOperand(2));
3029     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3030       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3031       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3032                           CI->getArgOperand(2));
3033     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3034       if (Name.endswith(".512")) {
3035         Intrinsic::ID IID;
3036         if (Name[17] == 's')
3037           IID = Intrinsic::x86_avx512_add_ps_512;
3038         else
3039           IID = Intrinsic::x86_avx512_add_pd_512;
3040 
3041         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3042                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3043                                    CI->getArgOperand(4) });
3044       } else {
3045         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3046       }
3047       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3048                           CI->getArgOperand(2));
3049     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3050       if (Name.endswith(".512")) {
3051         Intrinsic::ID IID;
3052         if (Name[17] == 's')
3053           IID = Intrinsic::x86_avx512_div_ps_512;
3054         else
3055           IID = Intrinsic::x86_avx512_div_pd_512;
3056 
3057         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3058                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3059                                    CI->getArgOperand(4) });
3060       } else {
3061         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3062       }
3063       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3064                           CI->getArgOperand(2));
3065     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3066       if (Name.endswith(".512")) {
3067         Intrinsic::ID IID;
3068         if (Name[17] == 's')
3069           IID = Intrinsic::x86_avx512_mul_ps_512;
3070         else
3071           IID = Intrinsic::x86_avx512_mul_pd_512;
3072 
3073         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3074                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3075                                    CI->getArgOperand(4) });
3076       } else {
3077         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3078       }
3079       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3080                           CI->getArgOperand(2));
3081     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3082       if (Name.endswith(".512")) {
3083         Intrinsic::ID IID;
3084         if (Name[17] == 's')
3085           IID = Intrinsic::x86_avx512_sub_ps_512;
3086         else
3087           IID = Intrinsic::x86_avx512_sub_pd_512;
3088 
3089         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3090                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3091                                    CI->getArgOperand(4) });
3092       } else {
3093         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3094       }
3095       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3096                           CI->getArgOperand(2));
3097     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3098                          Name.startswith("avx512.mask.min.p")) &&
3099                Name.drop_front(18) == ".512") {
3100       bool IsDouble = Name[17] == 'd';
3101       bool IsMin = Name[13] == 'i';
3102       static const Intrinsic::ID MinMaxTbl[2][2] = {
3103         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3104         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3105       };
3106       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3107 
3108       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3109                                { CI->getArgOperand(0), CI->getArgOperand(1),
3110                                  CI->getArgOperand(4) });
3111       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3112                           CI->getArgOperand(2));
3113     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3114       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3115                                                          Intrinsic::ctlz,
3116                                                          CI->getType()),
3117                                { CI->getArgOperand(0), Builder.getInt1(false) });
3118       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3119                           CI->getArgOperand(1));
3120     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3121       bool IsImmediate = Name[16] == 'i' ||
3122                          (Name.size() > 18 && Name[18] == 'i');
3123       bool IsVariable = Name[16] == 'v';
3124       char Size = Name[16] == '.' ? Name[17] :
3125                   Name[17] == '.' ? Name[18] :
3126                   Name[18] == '.' ? Name[19] :
3127                                     Name[20];
3128 
3129       Intrinsic::ID IID;
3130       if (IsVariable && Name[17] != '.') {
3131         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3132           IID = Intrinsic::x86_avx2_psllv_q;
3133         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3134           IID = Intrinsic::x86_avx2_psllv_q_256;
3135         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3136           IID = Intrinsic::x86_avx2_psllv_d;
3137         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3138           IID = Intrinsic::x86_avx2_psllv_d_256;
3139         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3140           IID = Intrinsic::x86_avx512_psllv_w_128;
3141         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3142           IID = Intrinsic::x86_avx512_psllv_w_256;
3143         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3144           IID = Intrinsic::x86_avx512_psllv_w_512;
3145         else
3146           llvm_unreachable("Unexpected size");
3147       } else if (Name.endswith(".128")) {
3148         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3149           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3150                             : Intrinsic::x86_sse2_psll_d;
3151         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3152           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3153                             : Intrinsic::x86_sse2_psll_q;
3154         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3155           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3156                             : Intrinsic::x86_sse2_psll_w;
3157         else
3158           llvm_unreachable("Unexpected size");
3159       } else if (Name.endswith(".256")) {
3160         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3161           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3162                             : Intrinsic::x86_avx2_psll_d;
3163         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3164           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3165                             : Intrinsic::x86_avx2_psll_q;
3166         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3167           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3168                             : Intrinsic::x86_avx2_psll_w;
3169         else
3170           llvm_unreachable("Unexpected size");
3171       } else {
3172         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3173           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3174                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
3175                               Intrinsic::x86_avx512_psll_d_512;
3176         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3177           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3178                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
3179                               Intrinsic::x86_avx512_psll_q_512;
3180         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3181           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3182                             : Intrinsic::x86_avx512_psll_w_512;
3183         else
3184           llvm_unreachable("Unexpected size");
3185       }
3186 
3187       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3188     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3189       bool IsImmediate = Name[16] == 'i' ||
3190                          (Name.size() > 18 && Name[18] == 'i');
3191       bool IsVariable = Name[16] == 'v';
3192       char Size = Name[16] == '.' ? Name[17] :
3193                   Name[17] == '.' ? Name[18] :
3194                   Name[18] == '.' ? Name[19] :
3195                                     Name[20];
3196 
3197       Intrinsic::ID IID;
3198       if (IsVariable && Name[17] != '.') {
3199         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3200           IID = Intrinsic::x86_avx2_psrlv_q;
3201         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3202           IID = Intrinsic::x86_avx2_psrlv_q_256;
3203         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3204           IID = Intrinsic::x86_avx2_psrlv_d;
3205         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3206           IID = Intrinsic::x86_avx2_psrlv_d_256;
3207         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3208           IID = Intrinsic::x86_avx512_psrlv_w_128;
3209         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3210           IID = Intrinsic::x86_avx512_psrlv_w_256;
3211         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3212           IID = Intrinsic::x86_avx512_psrlv_w_512;
3213         else
3214           llvm_unreachable("Unexpected size");
3215       } else if (Name.endswith(".128")) {
3216         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3217           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3218                             : Intrinsic::x86_sse2_psrl_d;
3219         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3220           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3221                             : Intrinsic::x86_sse2_psrl_q;
3222         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3223           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3224                             : Intrinsic::x86_sse2_psrl_w;
3225         else
3226           llvm_unreachable("Unexpected size");
3227       } else if (Name.endswith(".256")) {
3228         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3229           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3230                             : Intrinsic::x86_avx2_psrl_d;
3231         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3232           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3233                             : Intrinsic::x86_avx2_psrl_q;
3234         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3235           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3236                             : Intrinsic::x86_avx2_psrl_w;
3237         else
3238           llvm_unreachable("Unexpected size");
3239       } else {
3240         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3241           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3242                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
3243                               Intrinsic::x86_avx512_psrl_d_512;
3244         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3245           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3246                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3247                               Intrinsic::x86_avx512_psrl_q_512;
3248         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3249           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3250                             : Intrinsic::x86_avx512_psrl_w_512;
3251         else
3252           llvm_unreachable("Unexpected size");
3253       }
3254 
3255       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3256     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3257       bool IsImmediate = Name[16] == 'i' ||
3258                          (Name.size() > 18 && Name[18] == 'i');
3259       bool IsVariable = Name[16] == 'v';
3260       char Size = Name[16] == '.' ? Name[17] :
3261                   Name[17] == '.' ? Name[18] :
3262                   Name[18] == '.' ? Name[19] :
3263                                     Name[20];
3264 
3265       Intrinsic::ID IID;
3266       if (IsVariable && Name[17] != '.') {
3267         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3268           IID = Intrinsic::x86_avx2_psrav_d;
3269         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3270           IID = Intrinsic::x86_avx2_psrav_d_256;
3271         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3272           IID = Intrinsic::x86_avx512_psrav_w_128;
3273         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3274           IID = Intrinsic::x86_avx512_psrav_w_256;
3275         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3276           IID = Intrinsic::x86_avx512_psrav_w_512;
3277         else
3278           llvm_unreachable("Unexpected size");
3279       } else if (Name.endswith(".128")) {
3280         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3281           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3282                             : Intrinsic::x86_sse2_psra_d;
3283         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3284           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3285                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3286                               Intrinsic::x86_avx512_psra_q_128;
3287         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3288           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3289                             : Intrinsic::x86_sse2_psra_w;
3290         else
3291           llvm_unreachable("Unexpected size");
3292       } else if (Name.endswith(".256")) {
3293         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3294           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3295                             : Intrinsic::x86_avx2_psra_d;
3296         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3297           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3298                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3299                               Intrinsic::x86_avx512_psra_q_256;
3300         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3301           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3302                             : Intrinsic::x86_avx2_psra_w;
3303         else
3304           llvm_unreachable("Unexpected size");
3305       } else {
3306         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3307           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3308                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3309                               Intrinsic::x86_avx512_psra_d_512;
3310         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3311           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3312                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3313                               Intrinsic::x86_avx512_psra_q_512;
3314         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3315           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3316                             : Intrinsic::x86_avx512_psra_w_512;
3317         else
3318           llvm_unreachable("Unexpected size");
3319       }
3320 
3321       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3322     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3323       Rep = upgradeMaskedMove(Builder, *CI);
3324     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3325       Rep = UpgradeMaskToInt(Builder, *CI);
3326     } else if (IsX86 && Name.endswith(".movntdqa")) {
3327       Module *M = F->getParent();
3328       MDNode *Node = MDNode::get(
3329           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3330 
3331       Value *Ptr = CI->getArgOperand(0);
3332 
3333       // Convert the type of the pointer to a pointer to the stored type.
3334       Value *BC = Builder.CreateBitCast(
3335           Ptr, PointerType::getUnqual(CI->getType()), "cast");
3336       LoadInst *LI = Builder.CreateAlignedLoad(
3337           CI->getType(), BC,
3338           Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
3339       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3340       Rep = LI;
3341     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3342                          Name.startswith("fma.vfmsub.") ||
3343                          Name.startswith("fma.vfnmadd.") ||
3344                          Name.startswith("fma.vfnmsub."))) {
3345       bool NegMul = Name[6] == 'n';
3346       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3347       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3348 
3349       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3350                        CI->getArgOperand(2) };
3351 
3352       if (IsScalar) {
3353         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3354         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3355         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3356       }
3357 
3358       if (NegMul && !IsScalar)
3359         Ops[0] = Builder.CreateFNeg(Ops[0]);
3360       if (NegMul && IsScalar)
3361         Ops[1] = Builder.CreateFNeg(Ops[1]);
3362       if (NegAcc)
3363         Ops[2] = Builder.CreateFNeg(Ops[2]);
3364 
3365       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3366                                                          Intrinsic::fma,
3367                                                          Ops[0]->getType()),
3368                                Ops);
3369 
3370       if (IsScalar)
3371         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3372                                           (uint64_t)0);
3373     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3374       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3375                        CI->getArgOperand(2) };
3376 
3377       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3378       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3379       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3380 
3381       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3382                                                          Intrinsic::fma,
3383                                                          Ops[0]->getType()),
3384                                Ops);
3385 
3386       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3387                                         Rep, (uint64_t)0);
3388     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3389                          Name.startswith("avx512.maskz.vfmadd.s") ||
3390                          Name.startswith("avx512.mask3.vfmadd.s") ||
3391                          Name.startswith("avx512.mask3.vfmsub.s") ||
3392                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3393       bool IsMask3 = Name[11] == '3';
3394       bool IsMaskZ = Name[11] == 'z';
3395       // Drop the "avx512.mask." to make it easier.
3396       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3397       bool NegMul = Name[2] == 'n';
3398       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3399 
3400       Value *A = CI->getArgOperand(0);
3401       Value *B = CI->getArgOperand(1);
3402       Value *C = CI->getArgOperand(2);
3403 
3404       if (NegMul && (IsMask3 || IsMaskZ))
3405         A = Builder.CreateFNeg(A);
3406       if (NegMul && !(IsMask3 || IsMaskZ))
3407         B = Builder.CreateFNeg(B);
3408       if (NegAcc)
3409         C = Builder.CreateFNeg(C);
3410 
3411       A = Builder.CreateExtractElement(A, (uint64_t)0);
3412       B = Builder.CreateExtractElement(B, (uint64_t)0);
3413       C = Builder.CreateExtractElement(C, (uint64_t)0);
3414 
3415       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3416           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3417         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3418 
3419         Intrinsic::ID IID;
3420         if (Name.back() == 'd')
3421           IID = Intrinsic::x86_avx512_vfmadd_f64;
3422         else
3423           IID = Intrinsic::x86_avx512_vfmadd_f32;
3424         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3425         Rep = Builder.CreateCall(FMA, Ops);
3426       } else {
3427         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3428                                                   Intrinsic::fma,
3429                                                   A->getType());
3430         Rep = Builder.CreateCall(FMA, { A, B, C });
3431       }
3432 
3433       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3434                         IsMask3 ? C : A;
3435 
3436       // For Mask3 with NegAcc, we need to create a new extractelement that
3437       // avoids the negation above.
3438       if (NegAcc && IsMask3)
3439         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3440                                                 (uint64_t)0);
3441 
3442       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3443                                 Rep, PassThru);
3444       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3445                                         Rep, (uint64_t)0);
3446     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3447                          Name.startswith("avx512.mask.vfnmadd.p") ||
3448                          Name.startswith("avx512.mask.vfnmsub.p") ||
3449                          Name.startswith("avx512.mask3.vfmadd.p") ||
3450                          Name.startswith("avx512.mask3.vfmsub.p") ||
3451                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3452                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3453       bool IsMask3 = Name[11] == '3';
3454       bool IsMaskZ = Name[11] == 'z';
3455       // Drop the "avx512.mask." to make it easier.
3456       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3457       bool NegMul = Name[2] == 'n';
3458       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3459 
3460       Value *A = CI->getArgOperand(0);
3461       Value *B = CI->getArgOperand(1);
3462       Value *C = CI->getArgOperand(2);
3463 
3464       if (NegMul && (IsMask3 || IsMaskZ))
3465         A = Builder.CreateFNeg(A);
3466       if (NegMul && !(IsMask3 || IsMaskZ))
3467         B = Builder.CreateFNeg(B);
3468       if (NegAcc)
3469         C = Builder.CreateFNeg(C);
3470 
3471       if (CI->arg_size() == 5 &&
3472           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3473            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3474         Intrinsic::ID IID;
3475         // Check the character before ".512" in string.
3476         if (Name[Name.size()-5] == 's')
3477           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3478         else
3479           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3480 
3481         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3482                                  { A, B, C, CI->getArgOperand(4) });
3483       } else {
3484         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3485                                                   Intrinsic::fma,
3486                                                   A->getType());
3487         Rep = Builder.CreateCall(FMA, { A, B, C });
3488       }
3489 
3490       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3491                         IsMask3 ? CI->getArgOperand(2) :
3492                                   CI->getArgOperand(0);
3493 
3494       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3495     } else if (IsX86 &&  Name.startswith("fma.vfmsubadd.p")) {
3496       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3497       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3498       Intrinsic::ID IID;
3499       if (VecWidth == 128 && EltWidth == 32)
3500         IID = Intrinsic::x86_fma_vfmaddsub_ps;
3501       else if (VecWidth == 256 && EltWidth == 32)
3502         IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3503       else if (VecWidth == 128 && EltWidth == 64)
3504         IID = Intrinsic::x86_fma_vfmaddsub_pd;
3505       else if (VecWidth == 256 && EltWidth == 64)
3506         IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3507       else
3508         llvm_unreachable("Unexpected intrinsic");
3509 
3510       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3511                        CI->getArgOperand(2) };
3512       Ops[2] = Builder.CreateFNeg(Ops[2]);
3513       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3514                                Ops);
3515     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3516                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3517                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3518                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3519       bool IsMask3 = Name[11] == '3';
3520       bool IsMaskZ = Name[11] == 'z';
3521       // Drop the "avx512.mask." to make it easier.
3522       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3523       bool IsSubAdd = Name[3] == 's';
3524       if (CI->arg_size() == 5) {
3525         Intrinsic::ID IID;
3526         // Check the character before ".512" in string.
3527         if (Name[Name.size()-5] == 's')
3528           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3529         else
3530           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3531 
3532         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3533                          CI->getArgOperand(2), CI->getArgOperand(4) };
3534         if (IsSubAdd)
3535           Ops[2] = Builder.CreateFNeg(Ops[2]);
3536 
3537         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3538                                  Ops);
3539       } else {
3540         int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3541 
3542         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3543                          CI->getArgOperand(2) };
3544 
3545         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3546                                                   Ops[0]->getType());
3547         Value *Odd = Builder.CreateCall(FMA, Ops);
3548         Ops[2] = Builder.CreateFNeg(Ops[2]);
3549         Value *Even = Builder.CreateCall(FMA, Ops);
3550 
3551         if (IsSubAdd)
3552           std::swap(Even, Odd);
3553 
3554         SmallVector<int, 32> Idxs(NumElts);
3555         for (int i = 0; i != NumElts; ++i)
3556           Idxs[i] = i + (i % 2) * NumElts;
3557 
3558         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3559       }
3560 
3561       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3562                         IsMask3 ? CI->getArgOperand(2) :
3563                                   CI->getArgOperand(0);
3564 
3565       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3566     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3567                          Name.startswith("avx512.maskz.pternlog."))) {
3568       bool ZeroMask = Name[11] == 'z';
3569       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3570       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3571       Intrinsic::ID IID;
3572       if (VecWidth == 128 && EltWidth == 32)
3573         IID = Intrinsic::x86_avx512_pternlog_d_128;
3574       else if (VecWidth == 256 && EltWidth == 32)
3575         IID = Intrinsic::x86_avx512_pternlog_d_256;
3576       else if (VecWidth == 512 && EltWidth == 32)
3577         IID = Intrinsic::x86_avx512_pternlog_d_512;
3578       else if (VecWidth == 128 && EltWidth == 64)
3579         IID = Intrinsic::x86_avx512_pternlog_q_128;
3580       else if (VecWidth == 256 && EltWidth == 64)
3581         IID = Intrinsic::x86_avx512_pternlog_q_256;
3582       else if (VecWidth == 512 && EltWidth == 64)
3583         IID = Intrinsic::x86_avx512_pternlog_q_512;
3584       else
3585         llvm_unreachable("Unexpected intrinsic");
3586 
3587       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3588                         CI->getArgOperand(2), CI->getArgOperand(3) };
3589       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3590                                Args);
3591       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3592                                  : CI->getArgOperand(0);
3593       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3594     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3595                          Name.startswith("avx512.maskz.vpmadd52"))) {
3596       bool ZeroMask = Name[11] == 'z';
3597       bool High = Name[20] == 'h' || Name[21] == 'h';
3598       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3599       Intrinsic::ID IID;
3600       if (VecWidth == 128 && !High)
3601         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3602       else if (VecWidth == 256 && !High)
3603         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3604       else if (VecWidth == 512 && !High)
3605         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3606       else if (VecWidth == 128 && High)
3607         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3608       else if (VecWidth == 256 && High)
3609         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3610       else if (VecWidth == 512 && High)
3611         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3612       else
3613         llvm_unreachable("Unexpected intrinsic");
3614 
3615       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3616                         CI->getArgOperand(2) };
3617       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3618                                Args);
3619       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3620                                  : CI->getArgOperand(0);
3621       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3622     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3623                          Name.startswith("avx512.mask.vpermt2var.") ||
3624                          Name.startswith("avx512.maskz.vpermt2var."))) {
3625       bool ZeroMask = Name[11] == 'z';
3626       bool IndexForm = Name[17] == 'i';
3627       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3628     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3629                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3630                          Name.startswith("avx512.mask.vpdpbusds.") ||
3631                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3632       bool ZeroMask = Name[11] == 'z';
3633       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3634       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3635       Intrinsic::ID IID;
3636       if (VecWidth == 128 && !IsSaturating)
3637         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3638       else if (VecWidth == 256 && !IsSaturating)
3639         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3640       else if (VecWidth == 512 && !IsSaturating)
3641         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3642       else if (VecWidth == 128 && IsSaturating)
3643         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3644       else if (VecWidth == 256 && IsSaturating)
3645         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3646       else if (VecWidth == 512 && IsSaturating)
3647         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3648       else
3649         llvm_unreachable("Unexpected intrinsic");
3650 
3651       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3652                         CI->getArgOperand(2)  };
3653       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3654                                Args);
3655       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3656                                  : CI->getArgOperand(0);
3657       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3658     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3659                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3660                          Name.startswith("avx512.mask.vpdpwssds.") ||
3661                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3662       bool ZeroMask = Name[11] == 'z';
3663       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3664       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3665       Intrinsic::ID IID;
3666       if (VecWidth == 128 && !IsSaturating)
3667         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3668       else if (VecWidth == 256 && !IsSaturating)
3669         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3670       else if (VecWidth == 512 && !IsSaturating)
3671         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3672       else if (VecWidth == 128 && IsSaturating)
3673         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3674       else if (VecWidth == 256 && IsSaturating)
3675         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3676       else if (VecWidth == 512 && IsSaturating)
3677         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3678       else
3679         llvm_unreachable("Unexpected intrinsic");
3680 
3681       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3682                         CI->getArgOperand(2)  };
3683       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3684                                Args);
3685       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3686                                  : CI->getArgOperand(0);
3687       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3688     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3689                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3690                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3691       Intrinsic::ID IID;
3692       if (Name[0] == 'a' && Name.back() == '2')
3693         IID = Intrinsic::x86_addcarry_32;
3694       else if (Name[0] == 'a' && Name.back() == '4')
3695         IID = Intrinsic::x86_addcarry_64;
3696       else if (Name[0] == 's' && Name.back() == '2')
3697         IID = Intrinsic::x86_subborrow_32;
3698       else if (Name[0] == 's' && Name.back() == '4')
3699         IID = Intrinsic::x86_subborrow_64;
3700       else
3701         llvm_unreachable("Unexpected intrinsic");
3702 
3703       // Make a call with 3 operands.
3704       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3705                         CI->getArgOperand(2)};
3706       Value *NewCall = Builder.CreateCall(
3707                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3708                                 Args);
3709 
3710       // Extract the second result and store it.
3711       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3712       // Cast the pointer to the right type.
3713       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3714                                  llvm::PointerType::getUnqual(Data->getType()));
3715       Builder.CreateAlignedStore(Data, Ptr, Align(1));
3716       // Replace the original call result with the first result of the new call.
3717       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3718 
3719       CI->replaceAllUsesWith(CF);
3720       Rep = nullptr;
3721     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3722                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3723       // Rep will be updated by the call in the condition.
3724     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3725       Value *Arg = CI->getArgOperand(0);
3726       Value *Neg = Builder.CreateNeg(Arg, "neg");
3727       Value *Cmp = Builder.CreateICmpSGE(
3728           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3729       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3730     } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3731                           Name.startswith("atomic.load.add.f64.p"))) {
3732       Value *Ptr = CI->getArgOperand(0);
3733       Value *Val = CI->getArgOperand(1);
3734       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3735                                     AtomicOrdering::SequentiallyConsistent);
3736     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3737                           Name == "max.ui" || Name == "max.ull")) {
3738       Value *Arg0 = CI->getArgOperand(0);
3739       Value *Arg1 = CI->getArgOperand(1);
3740       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3741                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3742                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3743       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3744     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3745                           Name == "min.ui" || Name == "min.ull")) {
3746       Value *Arg0 = CI->getArgOperand(0);
3747       Value *Arg1 = CI->getArgOperand(1);
3748       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3749                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3750                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3751       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3752     } else if (IsNVVM && Name == "clz.ll") {
3753       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3754       Value *Arg = CI->getArgOperand(0);
3755       Value *Ctlz = Builder.CreateCall(
3756           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3757                                     {Arg->getType()}),
3758           {Arg, Builder.getFalse()}, "ctlz");
3759       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3760     } else if (IsNVVM && Name == "popc.ll") {
3761       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3762       // i64.
3763       Value *Arg = CI->getArgOperand(0);
3764       Value *Popc = Builder.CreateCall(
3765           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3766                                     {Arg->getType()}),
3767           Arg, "ctpop");
3768       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3769     } else if (IsNVVM && Name == "h2f") {
3770       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3771                                    F->getParent(), Intrinsic::convert_from_fp16,
3772                                    {Builder.getFloatTy()}),
3773                                CI->getArgOperand(0), "h2f");
3774     } else if (IsARM) {
3775       Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
3776     } else {
3777       llvm_unreachable("Unknown function for CallInst upgrade.");
3778     }
3779 
3780     if (Rep)
3781       CI->replaceAllUsesWith(Rep);
3782     CI->eraseFromParent();
3783     return;
3784   }
3785 
3786   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3787     // Handle generic mangling change, but nothing else
3788     assert(
3789         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3790         "Unknown function for CallInst upgrade and isn't just a name change");
3791     CI->setCalledFunction(NewFn);
3792   };
3793   CallInst *NewCall = nullptr;
3794   switch (NewFn->getIntrinsicID()) {
3795   default: {
3796     DefaultCase();
3797     return;
3798   }
3799   case Intrinsic::arm_neon_vld1:
3800   case Intrinsic::arm_neon_vld2:
3801   case Intrinsic::arm_neon_vld3:
3802   case Intrinsic::arm_neon_vld4:
3803   case Intrinsic::arm_neon_vld2lane:
3804   case Intrinsic::arm_neon_vld3lane:
3805   case Intrinsic::arm_neon_vld4lane:
3806   case Intrinsic::arm_neon_vst1:
3807   case Intrinsic::arm_neon_vst2:
3808   case Intrinsic::arm_neon_vst3:
3809   case Intrinsic::arm_neon_vst4:
3810   case Intrinsic::arm_neon_vst2lane:
3811   case Intrinsic::arm_neon_vst3lane:
3812   case Intrinsic::arm_neon_vst4lane: {
3813     SmallVector<Value *, 4> Args(CI->args());
3814     NewCall = Builder.CreateCall(NewFn, Args);
3815     break;
3816   }
3817 
3818   case Intrinsic::arm_neon_bfdot:
3819   case Intrinsic::arm_neon_bfmmla:
3820   case Intrinsic::arm_neon_bfmlalb:
3821   case Intrinsic::arm_neon_bfmlalt:
3822   case Intrinsic::aarch64_neon_bfdot:
3823   case Intrinsic::aarch64_neon_bfmmla:
3824   case Intrinsic::aarch64_neon_bfmlalb:
3825   case Intrinsic::aarch64_neon_bfmlalt: {
3826     SmallVector<Value *, 3> Args;
3827     assert(CI->arg_size() == 3 &&
3828            "Mismatch between function args and call args");
3829     size_t OperandWidth =
3830         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
3831     assert((OperandWidth == 64 || OperandWidth == 128) &&
3832            "Unexpected operand width");
3833     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3834     auto Iter = CI->args().begin();
3835     Args.push_back(*Iter++);
3836     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3837     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3838     NewCall = Builder.CreateCall(NewFn, Args);
3839     break;
3840   }
3841 
3842   case Intrinsic::bitreverse:
3843     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3844     break;
3845 
3846   case Intrinsic::ctlz:
3847   case Intrinsic::cttz:
3848     assert(CI->arg_size() == 1 &&
3849            "Mismatch between function args and call args");
3850     NewCall =
3851         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3852     break;
3853 
3854   case Intrinsic::objectsize: {
3855     Value *NullIsUnknownSize =
3856         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
3857     Value *Dynamic =
3858         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3859     NewCall = Builder.CreateCall(
3860         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3861     break;
3862   }
3863 
3864   case Intrinsic::ctpop:
3865     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3866     break;
3867 
3868   case Intrinsic::convert_from_fp16:
3869     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3870     break;
3871 
3872   case Intrinsic::dbg_value:
3873     // Upgrade from the old version that had an extra offset argument.
3874     assert(CI->arg_size() == 4);
3875     // Drop nonzero offsets instead of attempting to upgrade them.
3876     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3877       if (Offset->isZeroValue()) {
3878         NewCall = Builder.CreateCall(
3879             NewFn,
3880             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3881         break;
3882       }
3883     CI->eraseFromParent();
3884     return;
3885 
3886   case Intrinsic::ptr_annotation:
3887     // Upgrade from versions that lacked the annotation attribute argument.
3888     assert(CI->arg_size() == 4 &&
3889            "Before LLVM 12.0 this intrinsic took four arguments");
3890     // Create a new call with an added null annotation attribute argument.
3891     NewCall = Builder.CreateCall(
3892         NewFn,
3893         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3894          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3895     NewCall->takeName(CI);
3896     CI->replaceAllUsesWith(NewCall);
3897     CI->eraseFromParent();
3898     return;
3899 
3900   case Intrinsic::var_annotation:
3901     // Upgrade from versions that lacked the annotation attribute argument.
3902     assert(CI->arg_size() == 4 &&
3903            "Before LLVM 12.0 this intrinsic took four arguments");
3904     // Create a new call with an added null annotation attribute argument.
3905     NewCall = Builder.CreateCall(
3906         NewFn,
3907         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3908          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3909     CI->eraseFromParent();
3910     return;
3911 
3912   case Intrinsic::x86_xop_vfrcz_ss:
3913   case Intrinsic::x86_xop_vfrcz_sd:
3914     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3915     break;
3916 
3917   case Intrinsic::x86_xop_vpermil2pd:
3918   case Intrinsic::x86_xop_vpermil2ps:
3919   case Intrinsic::x86_xop_vpermil2pd_256:
3920   case Intrinsic::x86_xop_vpermil2ps_256: {
3921     SmallVector<Value *, 4> Args(CI->args());
3922     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3923     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3924     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3925     NewCall = Builder.CreateCall(NewFn, Args);
3926     break;
3927   }
3928 
3929   case Intrinsic::x86_sse41_ptestc:
3930   case Intrinsic::x86_sse41_ptestz:
3931   case Intrinsic::x86_sse41_ptestnzc: {
3932     // The arguments for these intrinsics used to be v4f32, and changed
3933     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3934     // So, the only thing required is a bitcast for both arguments.
3935     // First, check the arguments have the old type.
3936     Value *Arg0 = CI->getArgOperand(0);
3937     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3938       return;
3939 
3940     // Old intrinsic, add bitcasts
3941     Value *Arg1 = CI->getArgOperand(1);
3942 
3943     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3944 
3945     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3946     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3947 
3948     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3949     break;
3950   }
3951 
3952   case Intrinsic::x86_rdtscp: {
3953     // This used to take 1 arguments. If we have no arguments, it is already
3954     // upgraded.
3955     if (CI->getNumOperands() == 0)
3956       return;
3957 
3958     NewCall = Builder.CreateCall(NewFn);
3959     // Extract the second result and store it.
3960     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3961     // Cast the pointer to the right type.
3962     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3963                                  llvm::PointerType::getUnqual(Data->getType()));
3964     Builder.CreateAlignedStore(Data, Ptr, Align(1));
3965     // Replace the original call result with the first result of the new call.
3966     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3967 
3968     NewCall->takeName(CI);
3969     CI->replaceAllUsesWith(TSC);
3970     CI->eraseFromParent();
3971     return;
3972   }
3973 
3974   case Intrinsic::x86_sse41_insertps:
3975   case Intrinsic::x86_sse41_dppd:
3976   case Intrinsic::x86_sse41_dpps:
3977   case Intrinsic::x86_sse41_mpsadbw:
3978   case Intrinsic::x86_avx_dp_ps_256:
3979   case Intrinsic::x86_avx2_mpsadbw: {
3980     // Need to truncate the last argument from i32 to i8 -- this argument models
3981     // an inherently 8-bit immediate operand to these x86 instructions.
3982     SmallVector<Value *, 4> Args(CI->args());
3983 
3984     // Replace the last argument with a trunc.
3985     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3986     NewCall = Builder.CreateCall(NewFn, Args);
3987     break;
3988   }
3989 
3990   case Intrinsic::x86_avx512_mask_cmp_pd_128:
3991   case Intrinsic::x86_avx512_mask_cmp_pd_256:
3992   case Intrinsic::x86_avx512_mask_cmp_pd_512:
3993   case Intrinsic::x86_avx512_mask_cmp_ps_128:
3994   case Intrinsic::x86_avx512_mask_cmp_ps_256:
3995   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
3996     SmallVector<Value *, 4> Args(CI->args());
3997     unsigned NumElts =
3998         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
3999     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4000 
4001     NewCall = Builder.CreateCall(NewFn, Args);
4002     Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4003 
4004     NewCall->takeName(CI);
4005     CI->replaceAllUsesWith(Res);
4006     CI->eraseFromParent();
4007     return;
4008   }
4009 
4010   case Intrinsic::thread_pointer: {
4011     NewCall = Builder.CreateCall(NewFn, {});
4012     break;
4013   }
4014 
4015   case Intrinsic::invariant_start:
4016   case Intrinsic::invariant_end: {
4017     SmallVector<Value *, 4> Args(CI->args());
4018     NewCall = Builder.CreateCall(NewFn, Args);
4019     break;
4020   }
4021   case Intrinsic::masked_load:
4022   case Intrinsic::masked_store:
4023   case Intrinsic::masked_gather:
4024   case Intrinsic::masked_scatter: {
4025     SmallVector<Value *, 4> Args(CI->args());
4026     NewCall = Builder.CreateCall(NewFn, Args);
4027     NewCall->copyMetadata(*CI);
4028     break;
4029   }
4030 
4031   case Intrinsic::memcpy:
4032   case Intrinsic::memmove:
4033   case Intrinsic::memset: {
4034     // We have to make sure that the call signature is what we're expecting.
4035     // We only want to change the old signatures by removing the alignment arg:
4036     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4037     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4038     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4039     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4040     // Note: i8*'s in the above can be any pointer type
4041     if (CI->arg_size() != 5) {
4042       DefaultCase();
4043       return;
4044     }
4045     // Remove alignment argument (3), and add alignment attributes to the
4046     // dest/src pointers.
4047     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4048                       CI->getArgOperand(2), CI->getArgOperand(4)};
4049     NewCall = Builder.CreateCall(NewFn, Args);
4050     auto *MemCI = cast<MemIntrinsic>(NewCall);
4051     // All mem intrinsics support dest alignment.
4052     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4053     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4054     // Memcpy/Memmove also support source alignment.
4055     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4056       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4057     break;
4058   }
4059   }
4060   assert(NewCall && "Should have either set this variable or returned through "
4061                     "the default case");
4062   NewCall->takeName(CI);
4063   CI->replaceAllUsesWith(NewCall);
4064   CI->eraseFromParent();
4065 }
4066 
4067 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4068   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4069 
4070   // Check if this function should be upgraded and get the replacement function
4071   // if there is one.
4072   Function *NewFn;
4073   if (UpgradeIntrinsicFunction(F, NewFn)) {
4074     // Replace all users of the old function with the new function or new
4075     // instructions. This is not a range loop because the call is deleted.
4076     for (User *U : make_early_inc_range(F->users()))
4077       if (CallInst *CI = dyn_cast<CallInst>(U))
4078         UpgradeIntrinsicCall(CI, NewFn);
4079 
4080     // Remove old function, no longer used, from the module.
4081     F->eraseFromParent();
4082   }
4083 }
4084 
4085 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4086   // Check if the tag uses struct-path aware TBAA format.
4087   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
4088     return &MD;
4089 
4090   auto &Context = MD.getContext();
4091   if (MD.getNumOperands() == 3) {
4092     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4093     MDNode *ScalarType = MDNode::get(Context, Elts);
4094     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4095     Metadata *Elts2[] = {ScalarType, ScalarType,
4096                          ConstantAsMetadata::get(
4097                              Constant::getNullValue(Type::getInt64Ty(Context))),
4098                          MD.getOperand(2)};
4099     return MDNode::get(Context, Elts2);
4100   }
4101   // Create a MDNode <MD, MD, offset 0>
4102   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4103                                     Type::getInt64Ty(Context)))};
4104   return MDNode::get(Context, Elts);
4105 }
4106 
4107 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4108                                       Instruction *&Temp) {
4109   if (Opc != Instruction::BitCast)
4110     return nullptr;
4111 
4112   Temp = nullptr;
4113   Type *SrcTy = V->getType();
4114   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4115       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4116     LLVMContext &Context = V->getContext();
4117 
4118     // We have no information about target data layout, so we assume that
4119     // the maximum pointer size is 64bit.
4120     Type *MidTy = Type::getInt64Ty(Context);
4121     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4122 
4123     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4124   }
4125 
4126   return nullptr;
4127 }
4128 
4129 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4130   if (Opc != Instruction::BitCast)
4131     return nullptr;
4132 
4133   Type *SrcTy = C->getType();
4134   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4135       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4136     LLVMContext &Context = C->getContext();
4137 
4138     // We have no information about target data layout, so we assume that
4139     // the maximum pointer size is 64bit.
4140     Type *MidTy = Type::getInt64Ty(Context);
4141 
4142     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4143                                      DestTy);
4144   }
4145 
4146   return nullptr;
4147 }
4148 
4149 /// Check the debug info version number, if it is out-dated, drop the debug
4150 /// info. Return true if module is modified.
4151 bool llvm::UpgradeDebugInfo(Module &M) {
4152   unsigned Version = getDebugMetadataVersionFromModule(M);
4153   if (Version == DEBUG_METADATA_VERSION) {
4154     bool BrokenDebugInfo = false;
4155     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4156       report_fatal_error("Broken module found, compilation aborted!");
4157     if (!BrokenDebugInfo)
4158       // Everything is ok.
4159       return false;
4160     else {
4161       // Diagnose malformed debug info.
4162       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4163       M.getContext().diagnose(Diag);
4164     }
4165   }
4166   bool Modified = StripDebugInfo(M);
4167   if (Modified && Version != DEBUG_METADATA_VERSION) {
4168     // Diagnose a version mismatch.
4169     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4170     M.getContext().diagnose(DiagVersion);
4171   }
4172   return Modified;
4173 }
4174 
4175 /// This checks for objc retain release marker which should be upgraded. It
4176 /// returns true if module is modified.
4177 static bool UpgradeRetainReleaseMarker(Module &M) {
4178   bool Changed = false;
4179   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4180   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4181   if (ModRetainReleaseMarker) {
4182     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4183     if (Op) {
4184       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4185       if (ID) {
4186         SmallVector<StringRef, 4> ValueComp;
4187         ID->getString().split(ValueComp, "#");
4188         if (ValueComp.size() == 2) {
4189           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4190           ID = MDString::get(M.getContext(), NewValue);
4191         }
4192         M.addModuleFlag(Module::Error, MarkerKey, ID);
4193         M.eraseNamedMetadata(ModRetainReleaseMarker);
4194         Changed = true;
4195       }
4196     }
4197   }
4198   return Changed;
4199 }
4200 
4201 void llvm::UpgradeARCRuntime(Module &M) {
4202   // This lambda converts normal function calls to ARC runtime functions to
4203   // intrinsic calls.
4204   auto UpgradeToIntrinsic = [&](const char *OldFunc,
4205                                 llvm::Intrinsic::ID IntrinsicFunc) {
4206     Function *Fn = M.getFunction(OldFunc);
4207 
4208     if (!Fn)
4209       return;
4210 
4211     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4212 
4213     for (User *U : make_early_inc_range(Fn->users())) {
4214       CallInst *CI = dyn_cast<CallInst>(U);
4215       if (!CI || CI->getCalledFunction() != Fn)
4216         continue;
4217 
4218       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4219       FunctionType *NewFuncTy = NewFn->getFunctionType();
4220       SmallVector<Value *, 2> Args;
4221 
4222       // Don't upgrade the intrinsic if it's not valid to bitcast the return
4223       // value to the return type of the old function.
4224       if (NewFuncTy->getReturnType() != CI->getType() &&
4225           !CastInst::castIsValid(Instruction::BitCast, CI,
4226                                  NewFuncTy->getReturnType()))
4227         continue;
4228 
4229       bool InvalidCast = false;
4230 
4231       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4232         Value *Arg = CI->getArgOperand(I);
4233 
4234         // Bitcast argument to the parameter type of the new function if it's
4235         // not a variadic argument.
4236         if (I < NewFuncTy->getNumParams()) {
4237           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4238           // to the parameter type of the new function.
4239           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4240                                      NewFuncTy->getParamType(I))) {
4241             InvalidCast = true;
4242             break;
4243           }
4244           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4245         }
4246         Args.push_back(Arg);
4247       }
4248 
4249       if (InvalidCast)
4250         continue;
4251 
4252       // Create a call instruction that calls the new function.
4253       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4254       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4255       NewCall->takeName(CI);
4256 
4257       // Bitcast the return value back to the type of the old call.
4258       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4259 
4260       if (!CI->use_empty())
4261         CI->replaceAllUsesWith(NewRetVal);
4262       CI->eraseFromParent();
4263     }
4264 
4265     if (Fn->use_empty())
4266       Fn->eraseFromParent();
4267   };
4268 
4269   // Unconditionally convert a call to "clang.arc.use" to a call to
4270   // "llvm.objc.clang.arc.use".
4271   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4272 
4273   // Upgrade the retain release marker. If there is no need to upgrade
4274   // the marker, that means either the module is already new enough to contain
4275   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4276   if (!UpgradeRetainReleaseMarker(M))
4277     return;
4278 
4279   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4280       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4281       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4282       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4283       {"objc_autoreleaseReturnValue",
4284        llvm::Intrinsic::objc_autoreleaseReturnValue},
4285       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4286       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4287       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4288       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4289       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4290       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4291       {"objc_release", llvm::Intrinsic::objc_release},
4292       {"objc_retain", llvm::Intrinsic::objc_retain},
4293       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4294       {"objc_retainAutoreleaseReturnValue",
4295        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4296       {"objc_retainAutoreleasedReturnValue",
4297        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4298       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4299       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4300       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4301       {"objc_unsafeClaimAutoreleasedReturnValue",
4302        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4303       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4304       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4305       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4306       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4307       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4308       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4309       {"objc_arc_annotation_topdown_bbstart",
4310        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4311       {"objc_arc_annotation_topdown_bbend",
4312        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4313       {"objc_arc_annotation_bottomup_bbstart",
4314        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4315       {"objc_arc_annotation_bottomup_bbend",
4316        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4317 
4318   for (auto &I : RuntimeFuncs)
4319     UpgradeToIntrinsic(I.first, I.second);
4320 }
4321 
4322 bool llvm::UpgradeModuleFlags(Module &M) {
4323   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4324   if (!ModFlags)
4325     return false;
4326 
4327   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4328   bool HasSwiftVersionFlag = false;
4329   uint8_t SwiftMajorVersion, SwiftMinorVersion;
4330   uint32_t SwiftABIVersion;
4331   auto Int8Ty = Type::getInt8Ty(M.getContext());
4332   auto Int32Ty = Type::getInt32Ty(M.getContext());
4333 
4334   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4335     MDNode *Op = ModFlags->getOperand(I);
4336     if (Op->getNumOperands() != 3)
4337       continue;
4338     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4339     if (!ID)
4340       continue;
4341     if (ID->getString() == "Objective-C Image Info Version")
4342       HasObjCFlag = true;
4343     if (ID->getString() == "Objective-C Class Properties")
4344       HasClassProperties = true;
4345     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4346     // field was Error and now they are Max.
4347     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4348       if (auto *Behavior =
4349               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4350         if (Behavior->getLimitedValue() == Module::Error) {
4351           Type *Int32Ty = Type::getInt32Ty(M.getContext());
4352           Metadata *Ops[3] = {
4353               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4354               MDString::get(M.getContext(), ID->getString()),
4355               Op->getOperand(2)};
4356           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4357           Changed = true;
4358         }
4359       }
4360     }
4361     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4362     // section name so that llvm-lto will not complain about mismatching
4363     // module flags that is functionally the same.
4364     if (ID->getString() == "Objective-C Image Info Section") {
4365       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4366         SmallVector<StringRef, 4> ValueComp;
4367         Value->getString().split(ValueComp, " ");
4368         if (ValueComp.size() != 1) {
4369           std::string NewValue;
4370           for (auto &S : ValueComp)
4371             NewValue += S.str();
4372           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4373                               MDString::get(M.getContext(), NewValue)};
4374           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4375           Changed = true;
4376         }
4377       }
4378     }
4379 
4380     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4381     // If the higher bits are set, it adds new module flag for swift info.
4382     if (ID->getString() == "Objective-C Garbage Collection") {
4383       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4384       if (Md) {
4385         assert(Md->getValue() && "Expected non-empty metadata");
4386         auto Type = Md->getValue()->getType();
4387         if (Type == Int8Ty)
4388           continue;
4389         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
4390         if ((Val & 0xff) != Val) {
4391           HasSwiftVersionFlag = true;
4392           SwiftABIVersion = (Val & 0xff00) >> 8;
4393           SwiftMajorVersion = (Val & 0xff000000) >> 24;
4394           SwiftMinorVersion = (Val & 0xff0000) >> 16;
4395         }
4396         Metadata *Ops[3] = {
4397           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
4398           Op->getOperand(1),
4399           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
4400         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4401         Changed = true;
4402       }
4403     }
4404   }
4405 
4406   // "Objective-C Class Properties" is recently added for Objective-C. We
4407   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4408   // flag of value 0, so we can correclty downgrade this flag when trying to
4409   // link an ObjC bitcode without this module flag with an ObjC bitcode with
4410   // this module flag.
4411   if (HasObjCFlag && !HasClassProperties) {
4412     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4413                     (uint32_t)0);
4414     Changed = true;
4415   }
4416 
4417   if (HasSwiftVersionFlag) {
4418     M.addModuleFlag(Module::Error, "Swift ABI Version",
4419                     SwiftABIVersion);
4420     M.addModuleFlag(Module::Error, "Swift Major Version",
4421                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
4422     M.addModuleFlag(Module::Error, "Swift Minor Version",
4423                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
4424     Changed = true;
4425   }
4426 
4427   return Changed;
4428 }
4429 
4430 void llvm::UpgradeSectionAttributes(Module &M) {
4431   auto TrimSpaces = [](StringRef Section) -> std::string {
4432     SmallVector<StringRef, 5> Components;
4433     Section.split(Components, ',');
4434 
4435     SmallString<32> Buffer;
4436     raw_svector_ostream OS(Buffer);
4437 
4438     for (auto Component : Components)
4439       OS << ',' << Component.trim();
4440 
4441     return std::string(OS.str().substr(1));
4442   };
4443 
4444   for (auto &GV : M.globals()) {
4445     if (!GV.hasSection())
4446       continue;
4447 
4448     StringRef Section = GV.getSection();
4449 
4450     if (!Section.startswith("__DATA, __objc_catlist"))
4451       continue;
4452 
4453     // __DATA, __objc_catlist, regular, no_dead_strip
4454     // __DATA,__objc_catlist,regular,no_dead_strip
4455     GV.setSection(TrimSpaces(Section));
4456   }
4457 }
4458 
4459 namespace {
4460 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
4461 // callsites within a function that did not also have the strictfp attribute.
4462 // Since 10.0, if strict FP semantics are needed within a function, the
4463 // function must have the strictfp attribute and all calls within the function
4464 // must also have the strictfp attribute. This latter restriction is
4465 // necessary to prevent unwanted libcall simplification when a function is
4466 // being cloned (such as for inlining).
4467 //
4468 // The "dangling" strictfp attribute usage was only used to prevent constant
4469 // folding and other libcall simplification. The nobuiltin attribute on the
4470 // callsite has the same effect.
4471 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
4472   StrictFPUpgradeVisitor() {}
4473 
4474   void visitCallBase(CallBase &Call) {
4475     if (!Call.isStrictFP())
4476       return;
4477     if (isa<ConstrainedFPIntrinsic>(&Call))
4478       return;
4479     // If we get here, the caller doesn't have the strictfp attribute
4480     // but this callsite does. Replace the strictfp attribute with nobuiltin.
4481     Call.removeFnAttr(Attribute::StrictFP);
4482     Call.addFnAttr(Attribute::NoBuiltin);
4483   }
4484 };
4485 } // namespace
4486 
4487 void llvm::UpgradeFunctionAttributes(Function &F) {
4488   // If a function definition doesn't have the strictfp attribute,
4489   // convert any callsite strictfp attributes to nobuiltin.
4490   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
4491     StrictFPUpgradeVisitor SFPV;
4492     SFPV.visit(F);
4493   }
4494 
4495   if (F.getCallingConv() == CallingConv::X86_INTR &&
4496       !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
4497     Type *ByValTy = F.getArg(0)->getType()->getPointerElementType();
4498     Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
4499     F.addParamAttr(0, NewAttr);
4500   }
4501 
4502   // Remove all incompatibile attributes from function.
4503   F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
4504   for (auto &Arg : F.args())
4505     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
4506 }
4507 
4508 static bool isOldLoopArgument(Metadata *MD) {
4509   auto *T = dyn_cast_or_null<MDTuple>(MD);
4510   if (!T)
4511     return false;
4512   if (T->getNumOperands() < 1)
4513     return false;
4514   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4515   if (!S)
4516     return false;
4517   return S->getString().startswith("llvm.vectorizer.");
4518 }
4519 
4520 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4521   StringRef OldPrefix = "llvm.vectorizer.";
4522   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4523 
4524   if (OldTag == "llvm.vectorizer.unroll")
4525     return MDString::get(C, "llvm.loop.interleave.count");
4526 
4527   return MDString::get(
4528       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4529              .str());
4530 }
4531 
4532 static Metadata *upgradeLoopArgument(Metadata *MD) {
4533   auto *T = dyn_cast_or_null<MDTuple>(MD);
4534   if (!T)
4535     return MD;
4536   if (T->getNumOperands() < 1)
4537     return MD;
4538   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4539   if (!OldTag)
4540     return MD;
4541   if (!OldTag->getString().startswith("llvm.vectorizer."))
4542     return MD;
4543 
4544   // This has an old tag.  Upgrade it.
4545   SmallVector<Metadata *, 8> Ops;
4546   Ops.reserve(T->getNumOperands());
4547   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4548   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4549     Ops.push_back(T->getOperand(I));
4550 
4551   return MDTuple::get(T->getContext(), Ops);
4552 }
4553 
4554 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4555   auto *T = dyn_cast<MDTuple>(&N);
4556   if (!T)
4557     return &N;
4558 
4559   if (none_of(T->operands(), isOldLoopArgument))
4560     return &N;
4561 
4562   SmallVector<Metadata *, 8> Ops;
4563   Ops.reserve(T->getNumOperands());
4564   for (Metadata *MD : T->operands())
4565     Ops.push_back(upgradeLoopArgument(MD));
4566 
4567   return MDTuple::get(T->getContext(), Ops);
4568 }
4569 
4570 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4571   Triple T(TT);
4572   // For AMDGPU we uprgrade older DataLayouts to include the default globals
4573   // address space of 1.
4574   if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
4575     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
4576   }
4577 
4578   std::string Res = DL.str();
4579   if (!T.isX86())
4580     return Res;
4581 
4582   // If the datalayout matches the expected format, add pointer size address
4583   // spaces to the datalayout.
4584   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4585   if (!DL.contains(AddrSpaces)) {
4586     SmallVector<StringRef, 4> Groups;
4587     Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4588     if (R.match(DL, &Groups))
4589       Res = (Groups[1] + AddrSpaces + Groups[3]).str();
4590   }
4591 
4592   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
4593   // Raising the alignment is safe because Clang did not produce f80 values in
4594   // the MSVC environment before this upgrade was added.
4595   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
4596     StringRef Ref = Res;
4597     auto I = Ref.find("-f80:32-");
4598     if (I != StringRef::npos)
4599       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
4600   }
4601 
4602   return Res;
4603 }
4604 
4605 void llvm::UpgradeAttributes(AttrBuilder &B) {
4606   StringRef FramePointer;
4607   Attribute A = B.getAttribute("no-frame-pointer-elim");
4608   if (A.isValid()) {
4609     // The value can be "true" or "false".
4610     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
4611     B.removeAttribute("no-frame-pointer-elim");
4612   }
4613   if (B.contains("no-frame-pointer-elim-non-leaf")) {
4614     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4615     if (FramePointer != "all")
4616       FramePointer = "non-leaf";
4617     B.removeAttribute("no-frame-pointer-elim-non-leaf");
4618   }
4619   if (!FramePointer.empty())
4620     B.addAttribute("frame-pointer", FramePointer);
4621 
4622   A = B.getAttribute("null-pointer-is-valid");
4623   if (A.isValid()) {
4624     // The value can be "true" or "false".
4625     bool NullPointerIsValid = A.getValueAsString() == "true";
4626     B.removeAttribute("null-pointer-is-valid");
4627     if (NullPointerIsValid)
4628       B.addAttribute(Attribute::NullPointerIsValid);
4629   }
4630 }
4631