1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// AArch64 Instruction definitions.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// ARM Instruction Predicate Definitions.
16//
17def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
18                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
19def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
20                                 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
21def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
22                                 AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
23def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
24                                 AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
25def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
26                                 AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
27def HasVH            : Predicate<"Subtarget->hasVH()">,
28                       AssemblerPredicate<"FeatureVH", "vh">;
29
30def HasLOR           : Predicate<"Subtarget->hasLOR()">,
31                       AssemblerPredicate<"FeatureLOR", "lor">;
32
33def HasPA            : Predicate<"Subtarget->hasPA()">,
34                       AssemblerPredicate<"FeaturePA", "pa">;
35
36def HasJS            : Predicate<"Subtarget->hasJS()">,
37                       AssemblerPredicate<"FeatureJS", "jsconv">;
38
39def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
40                       AssemblerPredicate<"FeatureCCIDX", "ccidx">;
41
42def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
43                       AssemblerPredicate<"FeatureComplxNum", "complxnum">;
44
45def HasNV            : Predicate<"Subtarget->hasNV()">,
46                       AssemblerPredicate<"FeatureNV", "nv">;
47
48def HasRASv8_4       : Predicate<"Subtarget->hasRASv8_4()">,
49                       AssemblerPredicate<"FeatureRASv8_4", "rasv8_4">;
50
51def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
52                       AssemblerPredicate<"FeatureMPAM", "mpam">;
53
54def HasDIT           : Predicate<"Subtarget->hasDIT()">,
55                       AssemblerPredicate<"FeatureDIT", "dit">;
56
57def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
58                       AssemblerPredicate<"FeatureTRACEV8_4", "tracev8.4">;
59
60def HasAM            : Predicate<"Subtarget->hasAM()">,
61                       AssemblerPredicate<"FeatureAM", "am">;
62
63def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
64                       AssemblerPredicate<"FeatureSEL2", "sel2">;
65
66def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
67                       AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">;
68
69def HasFMI           : Predicate<"Subtarget->hasFMI()">,
70                       AssemblerPredicate<"FeatureFMI", "fmi">;
71
72def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
73                       AssemblerPredicate<"FeatureRCPC_IMMO", "rcpc-immo">;
74
75def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
76                               AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
77def HasNEON          : Predicate<"Subtarget->hasNEON()">,
78                                 AssemblerPredicate<"FeatureNEON", "neon">;
79def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
80                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
81def HasSM4           : Predicate<"Subtarget->hasSM4()">,
82                                 AssemblerPredicate<"FeatureSM4", "sm4">;
83def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
84                                 AssemblerPredicate<"FeatureSHA3", "sha3">;
85def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
86                                 AssemblerPredicate<"FeatureSHA2", "sha2">;
87def HasAES           : Predicate<"Subtarget->hasAES()">,
88                                 AssemblerPredicate<"FeatureAES", "aes">;
89def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
90                                 AssemblerPredicate<"FeatureDotProd", "dotprod">;
91def HasCRC           : Predicate<"Subtarget->hasCRC()">,
92                                 AssemblerPredicate<"FeatureCRC", "crc">;
93def HasLSE           : Predicate<"Subtarget->hasLSE()">,
94                                 AssemblerPredicate<"FeatureLSE", "lse">;
95def HasRAS           : Predicate<"Subtarget->hasRAS()">,
96                                 AssemblerPredicate<"FeatureRAS", "ras">;
97def HasRDM           : Predicate<"Subtarget->hasRDM()">,
98                                 AssemblerPredicate<"FeatureRDM", "rdm">;
99def HasPerfMon       : Predicate<"Subtarget->hasPerfMon()">;
100def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
101                                 AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
102def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
103                                 AssemblerPredicate<"FeatureFP16FML", "fp16fml">;
104def HasSPE           : Predicate<"Subtarget->hasSPE()">,
105                                 AssemblerPredicate<"FeatureSPE", "spe">;
106def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
107                                 AssemblerPredicate<"FeatureFuseAES",
108                                 "fuse-aes">;
109def HasSVE           : Predicate<"Subtarget->hasSVE()">,
110                                 AssemblerPredicate<"FeatureSVE", "sve">;
111def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
112                                 AssemblerPredicate<"FeatureRCPC", "rcpc">;
113def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
114                       AssemblerPredicate<"FeatureAltFPCmp", "altnzcv">;
115def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
116                       AssemblerPredicate<"FeatureFRInt3264", "frint3264">;
117def HasSB            : Predicate<"Subtarget->hasSB()">,
118                       AssemblerPredicate<"FeatureSB", "sb">;
119def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
120                       AssemblerPredicate<"FeaturePredRes", "predres">;
121def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
122                       AssemblerPredicate<"FeatureCacheDeepPersist", "ccdp">;
123def HasBTI           : Predicate<"Subtarget->hasBTI()">,
124                       AssemblerPredicate<"FeatureBranchTargetId", "bti">;
125def HasMTE           : Predicate<"Subtarget->hasMTE()">,
126                       AssemblerPredicate<"FeatureMTE", "mte">;
127def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
128def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
129def UseAlternateSExtLoadCVTF32
130    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
131
132def UseNegativeImmediates
133    : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
134                                             "NegativeImmediates">;
135
136def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
137                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
138                                                       SDTCisInt<1>]>>;
139
140
141//===----------------------------------------------------------------------===//
142// AArch64-specific DAG Nodes.
143//
144
145// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
146def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
147                                              [SDTCisSameAs<0, 2>,
148                                               SDTCisSameAs<0, 3>,
149                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
150
151// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
152def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
153                                            [SDTCisSameAs<0, 1>,
154                                             SDTCisSameAs<0, 2>,
155                                             SDTCisInt<0>,
156                                             SDTCisVT<3, i32>]>;
157
158// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
159def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
160                                            [SDTCisSameAs<0, 2>,
161                                             SDTCisSameAs<0, 3>,
162                                             SDTCisInt<0>,
163                                             SDTCisVT<1, i32>,
164                                             SDTCisVT<4, i32>]>;
165
166def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
167                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
168                                      SDTCisVT<2, i32>]>;
169def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
170def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
171                                        SDTCisVT<2, OtherVT>]>;
172
173
174def SDT_AArch64CSel  : SDTypeProfile<1, 4,
175                                   [SDTCisSameAs<0, 1>,
176                                    SDTCisSameAs<0, 2>,
177                                    SDTCisInt<3>,
178                                    SDTCisVT<4, i32>]>;
179def SDT_AArch64CCMP : SDTypeProfile<1, 5,
180                                    [SDTCisVT<0, i32>,
181                                     SDTCisInt<1>,
182                                     SDTCisSameAs<1, 2>,
183                                     SDTCisInt<3>,
184                                     SDTCisInt<4>,
185                                     SDTCisVT<5, i32>]>;
186def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
187                                     [SDTCisVT<0, i32>,
188                                      SDTCisFP<1>,
189                                      SDTCisSameAs<1, 2>,
190                                      SDTCisInt<3>,
191                                      SDTCisInt<4>,
192                                      SDTCisVT<5, i32>]>;
193def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
194                                   [SDTCisFP<0>,
195                                    SDTCisSameAs<0, 1>]>;
196def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
197def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
198def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
199                                          SDTCisSameAs<0, 1>,
200                                          SDTCisSameAs<0, 2>]>;
201def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
202def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
203def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
204                                           SDTCisInt<2>, SDTCisInt<3>]>;
205def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
206def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
207                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
208def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
209
210def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
211def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
212def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
213def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
214                                           SDTCisSameAs<0,2>]>;
215def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
216                                           SDTCisSameAs<0,2>,
217                                           SDTCisSameAs<0,3>]>;
218def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
219def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
220
221def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
222
223def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
224                                                 SDTCisPtrTy<1>]>;
225
226// Generates the general dynamic sequences, i.e.
227//  adrp  x0, :tlsdesc:var
228//  ldr   x1, [x0, #:tlsdesc_lo12:var]
229//  add   x0, x0, #:tlsdesc_lo12:var
230//  .tlsdesccall var
231//  blr   x1
232
233// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
234// number of operands (the variable)
235def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
236                                          [SDTCisPtrTy<0>]>;
237
238def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
239                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
240                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
241                                         SDTCisSameAs<1, 4>]>;
242
243
244// Node definitions.
245def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
246def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
247def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
248def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
249def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
250                                SDCallSeqStart<[ SDTCisVT<0, i32>,
251                                                 SDTCisVT<1, i32> ]>,
252                                [SDNPHasChain, SDNPOutGlue]>;
253def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
254                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
255                                               SDTCisVT<1, i32> ]>,
256                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
257def AArch64call          : SDNode<"AArch64ISD::CALL",
258                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
259                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
260                                 SDNPVariadic]>;
261def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
262                                [SDNPHasChain]>;
263def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
264                                [SDNPHasChain]>;
265def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
266                                [SDNPHasChain]>;
267def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
268                                [SDNPHasChain]>;
269def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
270                                [SDNPHasChain]>;
271
272
273def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
274def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
275def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
276def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
277def AArch64retflag       : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
278                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
279def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
280def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
281def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
282                            [SDNPCommutative]>;
283def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
284def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
285                            [SDNPCommutative]>;
286def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
287def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
288
289def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
290def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
291def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
292
293def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
294
295def AArch64fcmp      : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
296
297def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
298def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
299def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
300def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
301def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
302
303def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
304def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
305def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
306def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
307def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
308def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
309
310def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
311def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
312def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
313def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
314def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
315def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
316def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
317
318def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
319def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
320def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
321def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
322
323def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
324def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
325def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
326def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
327def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
328def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
329def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
330def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
331
332def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
333def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
334def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>;
335
336def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
337def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
338def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
339def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
340def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
341
342def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
343def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
344def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
345
346def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
347def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
348def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
349def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
350def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
351def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
352                        (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
353
354def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
355def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
356def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
357def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
358def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
359
360def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
361def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
362
363def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>;
364
365def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
366                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
367
368def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
369                               [SDNPHasChain, SDNPSideEffect]>;
370
371def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
372def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
373
374def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
375                                    SDT_AArch64TLSDescCallSeq,
376                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
377                                     SDNPVariadic]>;
378
379
380def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
381                                 SDT_AArch64WrapperLarge>;
382
383def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
384
385def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
386                                    SDTCisSameAs<1, 2>]>;
387def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
388def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
389
390def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
391def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
392def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
393def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
394
395def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
396def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
397def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
398def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
399def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
400def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
401
402//===----------------------------------------------------------------------===//
403
404//===----------------------------------------------------------------------===//
405
406// AArch64 Instruction Predicate Definitions.
407// We could compute these on a per-module basis but doing so requires accessing
408// the Function object through the <Target>Subtarget and objections were raised
409// to that (see post-commit review comments for r301750).
410let RecomputePerFunction = 1 in {
411  def ForCodeSize   : Predicate<"MF->getFunction().optForSize()">;
412  def NotForCodeSize   : Predicate<"!MF->getFunction().optForSize()">;
413  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
414  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
415
416  def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
417  def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
418}
419
420include "AArch64InstrFormats.td"
421include "SVEInstrFormats.td"
422
423//===----------------------------------------------------------------------===//
424
425//===----------------------------------------------------------------------===//
426// Miscellaneous instructions.
427//===----------------------------------------------------------------------===//
428
429let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
430// We set Sched to empty list because we expect these instructions to simply get
431// removed in most cases.
432def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
433                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
434                              Sched<[]>;
435def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
436                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
437                            Sched<[]>;
438} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
439
440let isReMaterializable = 1, isCodeGenOnly = 1 in {
441// FIXME: The following pseudo instructions are only needed because remat
442// cannot handle multiple instructions.  When that changes, they can be
443// removed, along with the AArch64Wrapper node.
444
445let AddedComplexity = 10 in
446def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
447                     [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
448              Sched<[WriteLDAdr]>;
449
450// The MOVaddr instruction should match only when the add is not folded
451// into a load or store address.
452def MOVaddr
453    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
454             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
455                                            tglobaladdr:$low))]>,
456      Sched<[WriteAdrAdr]>;
457def MOVaddrJT
458    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
459             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
460                                             tjumptable:$low))]>,
461      Sched<[WriteAdrAdr]>;
462def MOVaddrCP
463    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
464             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
465                                             tconstpool:$low))]>,
466      Sched<[WriteAdrAdr]>;
467def MOVaddrBA
468    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
469             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
470                                             tblockaddress:$low))]>,
471      Sched<[WriteAdrAdr]>;
472def MOVaddrTLS
473    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
474             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
475                                            tglobaltlsaddr:$low))]>,
476      Sched<[WriteAdrAdr]>;
477def MOVaddrEXT
478    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
479             [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
480                                            texternalsym:$low))]>,
481      Sched<[WriteAdrAdr]>;
482// Normally AArch64addlow either gets folded into a following ldr/str,
483// or together with an adrp into MOVaddr above. For cases with TLS, it
484// might appear without either of them, so allow lowering it into a plain
485// add.
486def ADDlowTLS
487    : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low),
488             [(set GPR64:$dst, (AArch64addlow GPR64:$src,
489                                            tglobaltlsaddr:$low))]>,
490      Sched<[WriteAdr]>;
491
492} // isReMaterializable, isCodeGenOnly
493
494def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
495          (LOADgot tglobaltlsaddr:$addr)>;
496
497def : Pat<(AArch64LOADgot texternalsym:$addr),
498          (LOADgot texternalsym:$addr)>;
499
500def : Pat<(AArch64LOADgot tconstpool:$addr),
501          (LOADgot tconstpool:$addr)>;
502
503// 32-bit jump table destination is actually only 2 instructions since we can
504// use the table itself as a PC-relative base. But optimization occurs after
505// branch relaxation so be pessimistic.
506let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
507def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
508                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
509                      Sched<[]>;
510def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
511                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
512                      Sched<[]>;
513def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
514                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
515                     Sched<[]>;
516}
517
518// Space-consuming pseudo to aid testing of placement and reachability
519// algorithms. Immediate operand is the number of bytes this "instruction"
520// occupies; register operands can be used to enforce dependency and constrain
521// the scheduler.
522let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
523def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
524                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
525            Sched<[]>;
526
527let hasSideEffects = 1, isCodeGenOnly = 1 in {
528  def SpeculationSafeValueX
529      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
530  def SpeculationSafeValueW
531      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
532}
533
534
535//===----------------------------------------------------------------------===//
536// System instructions.
537//===----------------------------------------------------------------------===//
538
539def HINT : HintI<"hint">;
540def : InstAlias<"nop",  (HINT 0b000)>;
541def : InstAlias<"yield",(HINT 0b001)>;
542def : InstAlias<"wfe",  (HINT 0b010)>;
543def : InstAlias<"wfi",  (HINT 0b011)>;
544def : InstAlias<"sev",  (HINT 0b100)>;
545def : InstAlias<"sevl", (HINT 0b101)>;
546def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
547def : InstAlias<"csdb", (HINT 20)>;
548def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
549def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
550
551// v8.2a Statistical Profiling extension
552def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
553
554// As far as LLVM is concerned this writes to the system's exclusive monitors.
555let mayLoad = 1, mayStore = 1 in
556def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
557
558// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
559// model patterns with sufficiently fine granularity.
560let mayLoad = ?, mayStore = ? in {
561def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
562                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
563
564def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
565                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
566
567def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
568                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
569
570def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
571  let CRm        = 0b0010;
572  let Inst{12}   = 0;
573  let Predicates = [HasTRACEV8_4];
574}
575}
576
577// ARMv8.2-A Dot Product
578let Predicates = [HasDotProd] in {
579defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>;
580defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>;
581defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
582defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
583}
584
585// ARMv8.2-A FP16 Fused Multiply-Add Long
586let Predicates = [HasNEON, HasFP16FML] in {
587defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
588defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
589defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
590defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
591defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
592defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
593defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
594defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
595}
596
597// Armv8.2-A Crypto extensions
598let Predicates = [HasSHA3] in {
599def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
600def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
601def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
602def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
603def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
604def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
605def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
606def XAR       : CryptoRRRi6<"xar">;
607} // HasSHA3
608
609let Predicates = [HasSM4] in {
610def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
611def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
612def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
613def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
614def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
615def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
616def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
617def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
618def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
619} // HasSM4
620
621let Predicates = [HasRCPC] in {
622  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
623  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
624  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
625  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
626  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
627}
628
629// v8.3a complex add and multiply-accumulate. No predicate here, that is done
630// inside the multiclass as the FP16 versions need different predicates.
631defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
632                                               "fcmla", null_frag>;
633defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
634                                           "fcadd", null_frag>;
635defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
636                                       null_frag>;
637
638// v8.3a Pointer Authentication
639// These instructions inhabit part of the hint space and so can be used for
640// armv8 targets
641let Uses = [LR], Defs = [LR] in {
642  def PACIAZ   : SystemNoOperands<0b000, "paciaz">;
643  def PACIBZ   : SystemNoOperands<0b010, "pacibz">;
644  def AUTIAZ   : SystemNoOperands<0b100, "autiaz">;
645  def AUTIBZ   : SystemNoOperands<0b110, "autibz">;
646}
647let Uses = [LR, SP], Defs = [LR] in {
648  def PACIASP  : SystemNoOperands<0b001, "paciasp">;
649  def PACIBSP  : SystemNoOperands<0b011, "pacibsp">;
650  def AUTIASP  : SystemNoOperands<0b101, "autiasp">;
651  def AUTIBSP  : SystemNoOperands<0b111, "autibsp">;
652}
653let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
654  def PACIA1716  : SystemNoOperands<0b000, "pacia1716">;
655  def PACIB1716  : SystemNoOperands<0b010, "pacib1716">;
656  def AUTIA1716  : SystemNoOperands<0b100, "autia1716">;
657  def AUTIB1716  : SystemNoOperands<0b110, "autib1716">;
658}
659
660let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
661  def XPACLRI   : SystemNoOperands<0b111, "xpaclri">;
662}
663
664// These pointer authentication isntructions require armv8.3a
665let Predicates = [HasPA] in {
666  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
667    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
668    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
669    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>;
670    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>;
671    def IZA  : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>;
672    def DZA  : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>;
673    def IZB  : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>;
674    def DZB  : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>;
675  }
676
677  defm PAC : SignAuth<0b000, 0b010, "pac">;
678  defm AUT : SignAuth<0b001, 0b011, "aut">;
679
680  def XPACI : SignAuthZero<0b100, 0b00, "xpaci">;
681  def XPACD : SignAuthZero<0b100, 0b01, "xpacd">;
682  def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
683
684  // Combined Instructions
685  def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
686  def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
687  def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
688  def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
689
690  def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
691  def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
692  def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
693  def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
694
695  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
696    def RETAA   : AuthReturn<0b010, 0, "retaa">;
697    def RETAB   : AuthReturn<0b010, 1, "retab">;
698    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
699    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
700  }
701
702  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
703  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
704
705}
706
707// v8.3a floating point conversion for javascript
708let Predicates = [HasJS, HasFPARMv8] in
709def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
710                                      "fjcvtzs", []> {
711  let Inst{31} = 0;
712} // HasJS, HasFPARMv8
713
714// v8.4 Flag manipulation instructions
715let Predicates = [HasFMI] in {
716def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
717  let Inst{20-5} = 0b0000001000000000;
718}
719def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
720def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
721def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
722                        "{\t$Rn, $imm, $mask}">;
723} // HasFMI
724
725// v8.5 flag manipulation instructions
726let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
727
728def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
729  let Inst{18-16} = 0b000;
730  let Inst{11-8} = 0b0000;
731  let Unpredictable{11-8} = 0b1111;
732  let Inst{7-5} = 0b001;
733}
734
735def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
736  let Inst{18-16} = 0b000;
737  let Inst{11-8} = 0b0000;
738  let Unpredictable{11-8} = 0b1111;
739  let Inst{7-5} = 0b010;
740}
741} // HasAltNZCV
742
743
744// Armv8.5-A speculation barrier
745def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
746  let Inst{20-5} = 0b0001100110000111;
747  let Unpredictable{11-8} = 0b1111;
748  let Predicates = [HasSB];
749  let hasSideEffects = 1;
750}
751
752def : InstAlias<"clrex", (CLREX 0xf)>;
753def : InstAlias<"isb", (ISB 0xf)>;
754def : InstAlias<"ssbb", (DSB 0)>;
755def : InstAlias<"pssbb", (DSB 4)>;
756
757def MRS    : MRSI;
758def MSR    : MSRI;
759def MSRpstateImm1 : MSRpstateImm0_1;
760def MSRpstateImm4 : MSRpstateImm0_15;
761
762// The thread pointer (on Linux, at least, where this has been implemented) is
763// TPIDR_EL0.
764def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
765                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
766
767// The cycle counter PMC register is PMCCNTR_EL0.
768let Predicates = [HasPerfMon] in
769def : Pat<(readcyclecounter), (MRS 0xdce8)>;
770
771// FPCR register
772def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>;
773
774// Generic system instructions
775def SYSxt  : SystemXtI<0, "sys">;
776def SYSLxt : SystemLXtI<1, "sysl">;
777
778def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
779                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
780                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
781
782//===----------------------------------------------------------------------===//
783// Move immediate instructions.
784//===----------------------------------------------------------------------===//
785
786defm MOVK : InsertImmediate<0b11, "movk">;
787defm MOVN : MoveImmediate<0b00, "movn">;
788
789let PostEncoderMethod = "fixMOVZ" in
790defm MOVZ : MoveImmediate<0b10, "movz">;
791
792// First group of aliases covers an implicit "lsl #0".
793def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>;
794def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>;
795def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
796def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
797def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
798def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
799
800// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
801def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
802def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
803def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
804def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
805
806def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
807def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
808def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
809def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
810
811def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>;
812def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>;
813def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>;
814def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>;
815
816def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
817def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
818
819def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
820def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
821
822def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>;
823def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>;
824
825// Final group of aliases covers true "mov $Rd, $imm" cases.
826multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
827                          int width, int shift> {
828  def _asmoperand : AsmOperandClass {
829    let Name = basename # width # "_lsl" # shift # "MovAlias";
830    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
831                               # shift # ">";
832    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
833  }
834
835  def _movimm : Operand<i32> {
836    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
837  }
838
839  def : InstAlias<"mov $Rd, $imm",
840                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
841}
842
843defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
844defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
845
846defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
847defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
848defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
849defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
850
851defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
852defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
853
854defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
855defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
856defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
857defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
858
859let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
860    isAsCheapAsAMove = 1 in {
861// FIXME: The following pseudo instructions are only needed because remat
862// cannot handle multiple instructions.  When that changes, we can select
863// directly to the real instructions and get rid of these pseudos.
864
865def MOVi32imm
866    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
867             [(set GPR32:$dst, imm:$src)]>,
868      Sched<[WriteImm]>;
869def MOVi64imm
870    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
871             [(set GPR64:$dst, imm:$src)]>,
872      Sched<[WriteImm]>;
873} // isReMaterializable, isCodeGenOnly
874
875// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
876// eventual expansion code fewer bits to worry about getting right. Marshalling
877// the types is a little tricky though:
878def i64imm_32bit : ImmLeaf<i64, [{
879  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
880}]>;
881
882def s64imm_32bit : ImmLeaf<i64, [{
883  int64_t Imm64 = static_cast<int64_t>(Imm);
884  return Imm64 >= std::numeric_limits<int32_t>::min() &&
885         Imm64 <= std::numeric_limits<int32_t>::max();
886}]>;
887
888def trunc_imm : SDNodeXForm<imm, [{
889  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
890}]>;
891
892def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
893  GISDNodeXFormEquiv<trunc_imm>;
894
895def : Pat<(i64 i64imm_32bit:$src),
896          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
897
898// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
899def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
900return CurDAG->getTargetConstant(
901  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
902}]>;
903
904def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
905return CurDAG->getTargetConstant(
906  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
907}]>;
908
909
910def : Pat<(f32 fpimm:$in),
911  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
912def : Pat<(f64 fpimm:$in),
913  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
914
915
916// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
917// sequences.
918def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
919                             tglobaladdr:$g1, tglobaladdr:$g0),
920          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
921                                  tglobaladdr:$g1, 16),
922                          tglobaladdr:$g2, 32),
923                  tglobaladdr:$g3, 48)>;
924
925def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
926                             tblockaddress:$g1, tblockaddress:$g0),
927          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
928                                  tblockaddress:$g1, 16),
929                          tblockaddress:$g2, 32),
930                  tblockaddress:$g3, 48)>;
931
932def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
933                             tconstpool:$g1, tconstpool:$g0),
934          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
935                                  tconstpool:$g1, 16),
936                          tconstpool:$g2, 32),
937                  tconstpool:$g3, 48)>;
938
939def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
940                             tjumptable:$g1, tjumptable:$g0),
941          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
942                                  tjumptable:$g1, 16),
943                          tjumptable:$g2, 32),
944                  tjumptable:$g3, 48)>;
945
946
947//===----------------------------------------------------------------------===//
948// Arithmetic instructions.
949//===----------------------------------------------------------------------===//
950
951// Add/subtract with carry.
952defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
953defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
954
955def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
956def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
957def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
958def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
959
960// Add/subtract
961defm ADD : AddSub<0, "add", "sub", add>;
962defm SUB : AddSub<1, "sub", "add">;
963
964def : InstAlias<"mov $dst, $src",
965                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
966def : InstAlias<"mov $dst, $src",
967                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
968def : InstAlias<"mov $dst, $src",
969                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
970def : InstAlias<"mov $dst, $src",
971                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
972
973defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
974defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
975
976// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
977def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
978          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
979def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
980          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
981def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
982          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
983def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
984          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
985def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
986          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
987def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
988          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
989let AddedComplexity = 1 in {
990def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
991          (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
992def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
993          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
994}
995
996// Because of the immediate format for add/sub-imm instructions, the
997// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
998//  These patterns capture that transformation.
999let AddedComplexity = 1 in {
1000def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1001          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1002def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1003          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1004def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1005          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1006def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1007          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1008}
1009
1010// Because of the immediate format for add/sub-imm instructions, the
1011// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
1012//  These patterns capture that transformation.
1013let AddedComplexity = 1 in {
1014def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1015          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1016def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1017          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1018def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1019          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1020def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1021          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1022}
1023
1024def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1025def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1026def : InstAlias<"neg $dst, $src$shift",
1027                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1028def : InstAlias<"neg $dst, $src$shift",
1029                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1030
1031def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1032def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1033def : InstAlias<"negs $dst, $src$shift",
1034                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1035def : InstAlias<"negs $dst, $src$shift",
1036                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1037
1038
1039// Unsigned/Signed divide
1040defm UDIV : Div<0, "udiv", udiv>;
1041defm SDIV : Div<1, "sdiv", sdiv>;
1042
1043def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
1044def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
1045def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
1046def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
1047
1048// Variable shift
1049defm ASRV : Shift<0b10, "asr", sra>;
1050defm LSLV : Shift<0b00, "lsl", shl>;
1051defm LSRV : Shift<0b01, "lsr", srl>;
1052defm RORV : Shift<0b11, "ror", rotr>;
1053
1054def : ShiftAlias<"asrv", ASRVWr, GPR32>;
1055def : ShiftAlias<"asrv", ASRVXr, GPR64>;
1056def : ShiftAlias<"lslv", LSLVWr, GPR32>;
1057def : ShiftAlias<"lslv", LSLVXr, GPR64>;
1058def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
1059def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
1060def : ShiftAlias<"rorv", RORVWr, GPR32>;
1061def : ShiftAlias<"rorv", RORVXr, GPR64>;
1062
1063// Multiply-add
1064let AddedComplexity = 5 in {
1065defm MADD : MulAccum<0, "madd", add>;
1066defm MSUB : MulAccum<1, "msub", sub>;
1067
1068def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
1069          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1070def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
1071          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1072
1073def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
1074          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1075def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
1076          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1077def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
1078          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1079def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
1080          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1081} // AddedComplexity = 5
1082
1083let AddedComplexity = 5 in {
1084def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
1085def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
1086def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
1087def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
1088
1089def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
1090          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1091def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
1092          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1093
1094def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
1095          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1096def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
1097          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1098
1099def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
1100          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1101def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
1102          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1103def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
1104          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1105                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1106
1107def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
1108          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1109def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
1110          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1111def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
1112          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1113                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1114
1115def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
1116          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1117def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
1118          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1119def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
1120                    GPR64:$Ra)),
1121          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1122                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1123
1124def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
1125          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1126def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
1127          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1128def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
1129                                    (s64imm_32bit:$C)))),
1130          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1131                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1132} // AddedComplexity = 5
1133
1134def : MulAccumWAlias<"mul", MADDWrrr>;
1135def : MulAccumXAlias<"mul", MADDXrrr>;
1136def : MulAccumWAlias<"mneg", MSUBWrrr>;
1137def : MulAccumXAlias<"mneg", MSUBXrrr>;
1138def : WideMulAccumAlias<"smull", SMADDLrrr>;
1139def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
1140def : WideMulAccumAlias<"umull", UMADDLrrr>;
1141def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
1142
1143// Multiply-high
1144def SMULHrr : MulHi<0b010, "smulh", mulhs>;
1145def UMULHrr : MulHi<0b110, "umulh", mulhu>;
1146
1147// CRC32
1148def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
1149def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
1150def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
1151def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
1152
1153def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
1154def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
1155def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
1156def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
1157
1158// v8.1 atomic CAS
1159defm CAS   : CompareAndSwap<0, 0, "">;
1160defm CASA  : CompareAndSwap<1, 0, "a">;
1161defm CASL  : CompareAndSwap<0, 1, "l">;
1162defm CASAL : CompareAndSwap<1, 1, "al">;
1163
1164// v8.1 atomic CASP
1165defm CASP   : CompareAndSwapPair<0, 0, "">;
1166defm CASPA  : CompareAndSwapPair<1, 0, "a">;
1167defm CASPL  : CompareAndSwapPair<0, 1, "l">;
1168defm CASPAL : CompareAndSwapPair<1, 1, "al">;
1169
1170// v8.1 atomic SWP
1171defm SWP   : Swap<0, 0, "">;
1172defm SWPA  : Swap<1, 0, "a">;
1173defm SWPL  : Swap<0, 1, "l">;
1174defm SWPAL : Swap<1, 1, "al">;
1175
1176// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
1177defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
1178defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
1179defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
1180defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
1181
1182defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
1183defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
1184defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
1185defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
1186
1187defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
1188defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
1189defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
1190defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
1191
1192defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
1193defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
1194defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
1195defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
1196
1197defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
1198defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
1199defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
1200defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
1201
1202defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
1203defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
1204defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
1205defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
1206
1207defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
1208defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
1209defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
1210defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
1211
1212defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
1213defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
1214defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
1215defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
1216
1217// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
1218defm : STOPregister<"stadd","LDADD">; // STADDx
1219defm : STOPregister<"stclr","LDCLR">; // STCLRx
1220defm : STOPregister<"steor","LDEOR">; // STEORx
1221defm : STOPregister<"stset","LDSET">; // STSETx
1222defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
1223defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
1224defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
1225defm : STOPregister<"stumin","LDUMIN">;// STUMINx
1226
1227// v8.5 Memory Tagging Extension
1228let Predicates = [HasMTE] in {
1229
1230def IRG   : BaseTwoOperand<0b0100, GPR64sp, "irg", null_frag, GPR64sp, GPR64>,
1231            Sched<[]>{
1232  let Inst{31} = 1;
1233}
1234def GMI   : BaseTwoOperand<0b0101, GPR64, "gmi", null_frag, GPR64sp>, Sched<[]>{
1235  let Inst{31} = 1;
1236  let isNotDuplicable = 1;
1237}
1238def ADDG  : AddSubG<0, "addg", null_frag>;
1239def SUBG  : AddSubG<1, "subg", null_frag>;
1240
1241def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
1242
1243def SUBP : SUBP<0, "subp", null_frag>, Sched<[]>;
1244def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
1245  let Defs = [NZCV];
1246}
1247
1248def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
1249
1250def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
1251def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
1252
1253def LDGV : MemTagVector<1, "ldgv", "\t$Rt, [$Rn]!",
1254                   (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn)> {
1255  let DecoderMethod = "DecodeLoadAllocTagArrayInstruction";
1256}
1257def STGV : MemTagVector<0, "stgv", "\t$Rt, [$Rn]!",
1258                   (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn)>;
1259
1260defm STG   : MemTagStore<0b00, "stg">;
1261defm STZG  : MemTagStore<0b01, "stzg">;
1262defm ST2G  : MemTagStore<0b10, "st2g">;
1263defm STZ2G : MemTagStore<0b11, "stz2g">;
1264
1265defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
1266def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
1267def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
1268
1269} // Predicates = [HasMTE]
1270
1271//===----------------------------------------------------------------------===//
1272// Logical instructions.
1273//===----------------------------------------------------------------------===//
1274
1275// (immediate)
1276defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
1277defm AND  : LogicalImm<0b00, "and", and, "bic">;
1278defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
1279defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
1280
1281// FIXME: these aliases *are* canonical sometimes (when movz can't be
1282// used). Actually, it seems to be working right now, but putting logical_immXX
1283// here is a bit dodgy on the AsmParser side too.
1284def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
1285                                          logical_imm32:$imm), 0>;
1286def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
1287                                          logical_imm64:$imm), 0>;
1288
1289
1290// (register)
1291defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
1292defm BICS : LogicalRegS<0b11, 1, "bics",
1293                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
1294defm AND  : LogicalReg<0b00, 0, "and", and>;
1295defm BIC  : LogicalReg<0b00, 1, "bic",
1296                       BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
1297defm EON  : LogicalReg<0b10, 1, "eon",
1298                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
1299defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
1300defm ORN  : LogicalReg<0b01, 1, "orn",
1301                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
1302defm ORR  : LogicalReg<0b01, 0, "orr", or>;
1303
1304def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
1305def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
1306
1307def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
1308def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
1309
1310def : InstAlias<"mvn $Wd, $Wm$sh",
1311                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
1312def : InstAlias<"mvn $Xd, $Xm$sh",
1313                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
1314
1315def : InstAlias<"tst $src1, $src2",
1316                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
1317def : InstAlias<"tst $src1, $src2",
1318                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
1319
1320def : InstAlias<"tst $src1, $src2",
1321                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
1322def : InstAlias<"tst $src1, $src2",
1323                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
1324
1325def : InstAlias<"tst $src1, $src2$sh",
1326               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
1327def : InstAlias<"tst $src1, $src2$sh",
1328               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
1329
1330
1331def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
1332def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
1333
1334
1335//===----------------------------------------------------------------------===//
1336// One operand data processing instructions.
1337//===----------------------------------------------------------------------===//
1338
1339defm CLS    : OneOperandData<0b101, "cls">;
1340defm CLZ    : OneOperandData<0b100, "clz", ctlz>;
1341defm RBIT   : OneOperandData<0b000, "rbit", bitreverse>;
1342
1343def  REV16Wr : OneWRegData<0b001, "rev16",
1344                                  UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
1345def  REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
1346
1347def : Pat<(cttz GPR32:$Rn),
1348          (CLZWr (RBITWr GPR32:$Rn))>;
1349def : Pat<(cttz GPR64:$Rn),
1350          (CLZXr (RBITXr GPR64:$Rn))>;
1351def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
1352                (i32 1))),
1353          (CLSWr GPR32:$Rn)>;
1354def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
1355                (i64 1))),
1356          (CLSXr GPR64:$Rn)>;
1357
1358// Unlike the other one operand instructions, the instructions with the "rev"
1359// mnemonic do *not* just different in the size bit, but actually use different
1360// opcode bits for the different sizes.
1361def REVWr   : OneWRegData<0b010, "rev", bswap>;
1362def REVXr   : OneXRegData<0b011, "rev", bswap>;
1363def REV32Xr : OneXRegData<0b010, "rev32",
1364                                 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
1365
1366def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
1367
1368// The bswap commutes with the rotr so we want a pattern for both possible
1369// orders.
1370def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
1371def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
1372
1373//===----------------------------------------------------------------------===//
1374// Bitfield immediate extraction instruction.
1375//===----------------------------------------------------------------------===//
1376let hasSideEffects = 0 in
1377defm EXTR : ExtractImm<"extr">;
1378def : InstAlias<"ror $dst, $src, $shift",
1379            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
1380def : InstAlias<"ror $dst, $src, $shift",
1381            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
1382
1383def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
1384          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
1385def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
1386          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
1387
1388//===----------------------------------------------------------------------===//
1389// Other bitfield immediate instructions.
1390//===----------------------------------------------------------------------===//
1391let hasSideEffects = 0 in {
1392defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
1393defm SBFM : BitfieldImm<0b00, "sbfm">;
1394defm UBFM : BitfieldImm<0b10, "ubfm">;
1395}
1396
1397def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
1398  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
1399  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1400}]>;
1401
1402def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
1403  uint64_t enc = 31 - N->getZExtValue();
1404  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1405}]>;
1406
1407// min(7, 31 - shift_amt)
1408def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
1409  uint64_t enc = 31 - N->getZExtValue();
1410  enc = enc > 7 ? 7 : enc;
1411  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1412}]>;
1413
1414// min(15, 31 - shift_amt)
1415def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
1416  uint64_t enc = 31 - N->getZExtValue();
1417  enc = enc > 15 ? 15 : enc;
1418  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1419}]>;
1420
1421def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
1422  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
1423  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1424}]>;
1425
1426def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
1427  uint64_t enc = 63 - N->getZExtValue();
1428  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1429}]>;
1430
1431// min(7, 63 - shift_amt)
1432def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
1433  uint64_t enc = 63 - N->getZExtValue();
1434  enc = enc > 7 ? 7 : enc;
1435  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1436}]>;
1437
1438// min(15, 63 - shift_amt)
1439def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
1440  uint64_t enc = 63 - N->getZExtValue();
1441  enc = enc > 15 ? 15 : enc;
1442  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1443}]>;
1444
1445// min(31, 63 - shift_amt)
1446def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
1447  uint64_t enc = 63 - N->getZExtValue();
1448  enc = enc > 31 ? 31 : enc;
1449  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
1450}]>;
1451
1452def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
1453          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
1454                              (i64 (i32shift_b imm0_31:$imm)))>;
1455def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
1456          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
1457                              (i64 (i64shift_b imm0_63:$imm)))>;
1458
1459let AddedComplexity = 10 in {
1460def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
1461          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
1462def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
1463          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
1464}
1465
1466def : InstAlias<"asr $dst, $src, $shift",
1467                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
1468def : InstAlias<"asr $dst, $src, $shift",
1469                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
1470def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
1471def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
1472def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
1473def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
1474def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
1475
1476def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
1477          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
1478def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
1479          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
1480
1481def : InstAlias<"lsr $dst, $src, $shift",
1482                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
1483def : InstAlias<"lsr $dst, $src, $shift",
1484                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
1485def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
1486def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
1487def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
1488def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
1489def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
1490
1491//===----------------------------------------------------------------------===//
1492// Conditional comparison instructions.
1493//===----------------------------------------------------------------------===//
1494defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
1495defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
1496
1497//===----------------------------------------------------------------------===//
1498// Conditional select instructions.
1499//===----------------------------------------------------------------------===//
1500defm CSEL  : CondSelect<0, 0b00, "csel">;
1501
1502def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
1503defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
1504defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
1505defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
1506
1507def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
1508          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
1509def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
1510          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
1511def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
1512          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
1513def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
1514          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
1515def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
1516          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
1517def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
1518          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
1519
1520def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
1521          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
1522def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
1523          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
1524def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
1525          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
1526def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
1527          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
1528def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
1529          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
1530def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
1531          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
1532def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
1533          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
1534def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
1535          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
1536def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
1537          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
1538def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
1539          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
1540def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
1541          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
1542def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
1543          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
1544
1545// The inverse of the condition code from the alias instruction is what is used
1546// in the aliased instruction. The parser all ready inverts the condition code
1547// for these aliases.
1548def : InstAlias<"cset $dst, $cc",
1549                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
1550def : InstAlias<"cset $dst, $cc",
1551                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
1552
1553def : InstAlias<"csetm $dst, $cc",
1554                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
1555def : InstAlias<"csetm $dst, $cc",
1556                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
1557
1558def : InstAlias<"cinc $dst, $src, $cc",
1559                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
1560def : InstAlias<"cinc $dst, $src, $cc",
1561                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
1562
1563def : InstAlias<"cinv $dst, $src, $cc",
1564                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
1565def : InstAlias<"cinv $dst, $src, $cc",
1566                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
1567
1568def : InstAlias<"cneg $dst, $src, $cc",
1569                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
1570def : InstAlias<"cneg $dst, $src, $cc",
1571                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
1572
1573//===----------------------------------------------------------------------===//
1574// PC-relative instructions.
1575//===----------------------------------------------------------------------===//
1576let isReMaterializable = 1 in {
1577let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
1578def ADR  : ADRI<0, "adr", adrlabel,
1579                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
1580} // hasSideEffects = 0
1581
1582def ADRP : ADRI<1, "adrp", adrplabel,
1583                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
1584} // isReMaterializable = 1
1585
1586// page address of a constant pool entry, block address
1587def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
1588def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
1589def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
1590def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
1591def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
1592def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
1593def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
1594
1595//===----------------------------------------------------------------------===//
1596// Unconditional branch (register) instructions.
1597//===----------------------------------------------------------------------===//
1598
1599let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1600def RET  : BranchReg<0b0010, "ret", []>;
1601def DRPS : SpecialReturn<0b0101, "drps">;
1602def ERET : SpecialReturn<0b0100, "eret">;
1603} // isReturn = 1, isTerminator = 1, isBarrier = 1
1604
1605// Default to the LR register.
1606def : InstAlias<"ret", (RET LR)>;
1607
1608let isCall = 1, Defs = [LR], Uses = [SP] in {
1609def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
1610} // isCall
1611
1612let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
1613def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
1614} // isBranch, isTerminator, isBarrier, isIndirectBranch
1615
1616// Create a separate pseudo-instruction for codegen to use so that we don't
1617// flag lr as used in every function. It'll be restored before the RET by the
1618// epilogue if it's legitimately used.
1619def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>,
1620                   Sched<[WriteBrReg]> {
1621  let isTerminator = 1;
1622  let isBarrier = 1;
1623  let isReturn = 1;
1624}
1625
1626// This is a directive-like pseudo-instruction. The purpose is to insert an
1627// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
1628// (which in the usual case is a BLR).
1629let hasSideEffects = 1 in
1630def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
1631  let AsmString = ".tlsdesccall $sym";
1632}
1633
1634// Pseudo instruction to tell the streamer to emit a 'B' character into the
1635// augmentation string.
1636def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
1637
1638// FIXME: maybe the scratch register used shouldn't be fixed to X1?
1639// FIXME: can "hasSideEffects be dropped?
1640let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
1641    isCodeGenOnly = 1 in
1642def TLSDESC_CALLSEQ
1643    : Pseudo<(outs), (ins i64imm:$sym),
1644             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
1645      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
1646def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
1647          (TLSDESC_CALLSEQ texternalsym:$sym)>;
1648
1649//===----------------------------------------------------------------------===//
1650// Conditional branch (immediate) instruction.
1651//===----------------------------------------------------------------------===//
1652def Bcc : BranchCond;
1653
1654//===----------------------------------------------------------------------===//
1655// Compare-and-branch instructions.
1656//===----------------------------------------------------------------------===//
1657defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
1658defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
1659
1660//===----------------------------------------------------------------------===//
1661// Test-bit-and-branch instructions.
1662//===----------------------------------------------------------------------===//
1663defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
1664defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
1665
1666//===----------------------------------------------------------------------===//
1667// Unconditional branch (immediate) instructions.
1668//===----------------------------------------------------------------------===//
1669let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
1670def B  : BranchImm<0, "b", [(br bb:$addr)]>;
1671} // isBranch, isTerminator, isBarrier
1672
1673let isCall = 1, Defs = [LR], Uses = [SP] in {
1674def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
1675} // isCall
1676def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
1677
1678//===----------------------------------------------------------------------===//
1679// Exception generation instructions.
1680//===----------------------------------------------------------------------===//
1681let isTrap = 1 in {
1682def BRK   : ExceptionGeneration<0b001, 0b00, "brk">;
1683}
1684def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
1685def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
1686def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
1687def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
1688def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
1689def SMC   : ExceptionGeneration<0b000, 0b11, "smc">;
1690def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
1691
1692// DCPSn defaults to an immediate operand of zero if unspecified.
1693def : InstAlias<"dcps1", (DCPS1 0)>;
1694def : InstAlias<"dcps2", (DCPS2 0)>;
1695def : InstAlias<"dcps3", (DCPS3 0)>;
1696
1697def UDF : UDFType<0, "udf">;
1698
1699//===----------------------------------------------------------------------===//
1700// Load instructions.
1701//===----------------------------------------------------------------------===//
1702
1703// Pair (indexed, offset)
1704defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
1705defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
1706defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
1707defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
1708defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
1709
1710defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
1711
1712// Pair (pre-indexed)
1713def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
1714def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
1715def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
1716def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
1717def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
1718
1719def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
1720
1721// Pair (post-indexed)
1722def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
1723def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
1724def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
1725def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
1726def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
1727
1728def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
1729
1730
1731// Pair (no allocate)
1732defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
1733defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
1734defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
1735defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
1736defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
1737
1738//---
1739// (register offset)
1740//---
1741
1742// Integer
1743defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
1744defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
1745defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
1746defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
1747
1748// Floating-point
1749defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", untyped, load>;
1750defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
1751defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
1752defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
1753defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
1754
1755// Load sign-extended half-word
1756defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
1757defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
1758
1759// Load sign-extended byte
1760defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
1761defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
1762
1763// Load sign-extended word
1764defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
1765
1766// Pre-fetch.
1767defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
1768
1769// For regular load, we do not have any alignment requirement.
1770// Thus, it is safe to directly map the vector loads with interesting
1771// addressing modes.
1772// FIXME: We could do the same for bitconvert to floating point vectors.
1773multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
1774                              ValueType ScalTy, ValueType VecTy,
1775                              Instruction LOADW, Instruction LOADX,
1776                              SubRegIndex sub> {
1777  def : Pat<(VecTy (scalar_to_vector (ScalTy
1778              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
1779            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
1780                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
1781                           sub)>;
1782
1783  def : Pat<(VecTy (scalar_to_vector (ScalTy
1784              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
1785            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
1786                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
1787                           sub)>;
1788}
1789
1790let AddedComplexity = 10 in {
1791defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
1792defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
1793
1794defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
1795defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
1796
1797defm : ScalToVecROLoadPat<ro16, load,       i32, v4f16, LDRHroW, LDRHroX, hsub>;
1798defm : ScalToVecROLoadPat<ro16, load,       i32, v8f16, LDRHroW, LDRHroX, hsub>;
1799
1800defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
1801defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
1802
1803defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
1804defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
1805
1806defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
1807
1808defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
1809
1810
1811def : Pat <(v1i64 (scalar_to_vector (i64
1812                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
1813                                           ro_Wextend64:$extend))))),
1814           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
1815
1816def : Pat <(v1i64 (scalar_to_vector (i64
1817                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
1818                                           ro_Xextend64:$extend))))),
1819           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
1820}
1821
1822// Match all load 64 bits width whose type is compatible with FPR64
1823multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
1824                        Instruction LOADW, Instruction LOADX> {
1825
1826  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
1827            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
1828
1829  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
1830            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
1831}
1832
1833let AddedComplexity = 10 in {
1834let Predicates = [IsLE] in {
1835  // We must do vector loads with LD1 in big-endian.
1836  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
1837  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
1838  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
1839  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
1840  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
1841}
1842
1843defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
1844defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
1845
1846// Match all load 128 bits width whose type is compatible with FPR128
1847let Predicates = [IsLE] in {
1848  // We must do vector loads with LD1 in big-endian.
1849  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
1850  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
1851  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
1852  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
1853  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
1854  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
1855  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
1856}
1857} // AddedComplexity = 10
1858
1859// zextload -> i64
1860multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
1861                            Instruction INSTW, Instruction INSTX> {
1862  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
1863            (SUBREG_TO_REG (i64 0),
1864                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
1865                           sub_32)>;
1866
1867  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
1868            (SUBREG_TO_REG (i64 0),
1869                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
1870                           sub_32)>;
1871}
1872
1873let AddedComplexity = 10 in {
1874  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
1875  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
1876  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
1877
1878  // zextloadi1 -> zextloadi8
1879  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
1880
1881  // extload -> zextload
1882  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
1883  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
1884  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
1885
1886  // extloadi1 -> zextloadi8
1887  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
1888}
1889
1890
1891// zextload -> i64
1892multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
1893                            Instruction INSTW, Instruction INSTX> {
1894  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
1895            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
1896
1897  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
1898            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
1899
1900}
1901
1902let AddedComplexity = 10 in {
1903  // extload -> zextload
1904  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
1905  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
1906  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
1907
1908  // zextloadi1 -> zextloadi8
1909  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
1910}
1911
1912//---
1913// (unsigned immediate)
1914//---
1915defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
1916                   [(set GPR64z:$Rt,
1917                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
1918defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
1919                   [(set GPR32z:$Rt,
1920                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
1921defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
1922                   [(set FPR8Op:$Rt,
1923                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
1924defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
1925                   [(set (f16 FPR16Op:$Rt),
1926                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
1927defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
1928                   [(set (f32 FPR32Op:$Rt),
1929                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
1930defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
1931                   [(set (f64 FPR64Op:$Rt),
1932                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
1933defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
1934                 [(set (f128 FPR128Op:$Rt),
1935                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
1936
1937// For regular load, we do not have any alignment requirement.
1938// Thus, it is safe to directly map the vector loads with interesting
1939// addressing modes.
1940// FIXME: We could do the same for bitconvert to floating point vectors.
1941def : Pat <(v8i8 (scalar_to_vector (i32
1942               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
1943           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
1944                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
1945def : Pat <(v16i8 (scalar_to_vector (i32
1946               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
1947           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
1948                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
1949def : Pat <(v4i16 (scalar_to_vector (i32
1950               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
1951           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
1952                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
1953def : Pat <(v8i16 (scalar_to_vector (i32
1954               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
1955           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
1956                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
1957def : Pat <(v2i32 (scalar_to_vector (i32
1958               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
1959           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
1960                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
1961def : Pat <(v4i32 (scalar_to_vector (i32
1962               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
1963           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
1964                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
1965def : Pat <(v1i64 (scalar_to_vector (i64
1966               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
1967           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1968def : Pat <(v2i64 (scalar_to_vector (i64
1969               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
1970           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
1971                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
1972
1973// Match all load 64 bits width whose type is compatible with FPR64
1974let Predicates = [IsLE] in {
1975  // We must use LD1 to perform vector loads in big-endian.
1976  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1977            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1978  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1979            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1980  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1981            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1982  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1983            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1984  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1985            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1986}
1987def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1988          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1989def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1990          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1991
1992// Match all load 128 bits width whose type is compatible with FPR128
1993let Predicates = [IsLE] in {
1994  // We must use LD1 to perform vector loads in big-endian.
1995  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1996            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1997  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1998            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1999  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2000            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2001  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2002            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2003  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2004            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2005  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2006            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2007  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2008            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2009}
2010def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2011          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2012
2013defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
2014                    [(set GPR32:$Rt,
2015                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
2016                                                     uimm12s2:$offset)))]>;
2017defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
2018                    [(set GPR32:$Rt,
2019                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
2020                                                   uimm12s1:$offset)))]>;
2021// zextload -> i64
2022def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2023    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2024def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
2025    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
2026
2027// zextloadi1 -> zextloadi8
2028def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2029          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
2030def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2031    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2032
2033// extload -> zextload
2034def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
2035          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
2036def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2037          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
2038def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2039          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
2040def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
2041    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
2042def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
2043    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
2044def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2045    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2046def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2047    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2048
2049// load sign-extended half-word
2050defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
2051                     [(set GPR32:$Rt,
2052                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
2053                                                      uimm12s2:$offset)))]>;
2054defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
2055                     [(set GPR64:$Rt,
2056                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
2057                                                      uimm12s2:$offset)))]>;
2058
2059// load sign-extended byte
2060defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
2061                     [(set GPR32:$Rt,
2062                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
2063                                                    uimm12s1:$offset)))]>;
2064defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
2065                     [(set GPR64:$Rt,
2066                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
2067                                                    uimm12s1:$offset)))]>;
2068
2069// load sign-extended word
2070defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
2071                     [(set GPR64:$Rt,
2072                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
2073                                                      uimm12s4:$offset)))]>;
2074
2075// load zero-extended word
2076def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
2077      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
2078
2079// Pre-fetch.
2080def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
2081                        [(AArch64Prefetch imm:$Rt,
2082                                        (am_indexed64 GPR64sp:$Rn,
2083                                                      uimm12s8:$offset))]>;
2084
2085def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
2086
2087//---
2088// (literal)
2089
2090def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
2091  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
2092    const DataLayout &DL = MF->getDataLayout();
2093    unsigned Align = G->getGlobal()->getPointerAlignment(DL);
2094    return Align >= 4 && G->getOffset() % 4 == 0;
2095  }
2096  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
2097    return C->getAlignment() >= 4 && C->getOffset() % 4 == 0;
2098  return false;
2099}]>;
2100
2101def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
2102  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
2103def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
2104  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
2105def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
2106  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
2107def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
2108  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
2109def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
2110  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
2111
2112// load sign-extended word
2113def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
2114  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
2115
2116let AddedComplexity = 20 in {
2117def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
2118        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
2119}
2120
2121// prefetch
2122def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
2123//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
2124
2125//---
2126// (unscaled immediate)
2127defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
2128                    [(set GPR64z:$Rt,
2129                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
2130defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
2131                    [(set GPR32z:$Rt,
2132                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
2133defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
2134                    [(set FPR8Op:$Rt,
2135                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
2136defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
2137                    [(set FPR16Op:$Rt,
2138                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2139defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
2140                    [(set (f32 FPR32Op:$Rt),
2141                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
2142defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
2143                    [(set (f64 FPR64Op:$Rt),
2144                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
2145defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
2146                    [(set (f128 FPR128Op:$Rt),
2147                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
2148
2149defm LDURHH
2150    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
2151             [(set GPR32:$Rt,
2152                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2153defm LDURBB
2154    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
2155             [(set GPR32:$Rt,
2156                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2157
2158// Match all load 64 bits width whose type is compatible with FPR64
2159let Predicates = [IsLE] in {
2160  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2161            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2162  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2163            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2164  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2165            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2166  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2167            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2168  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2169            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2170}
2171def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2172          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2173def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2174          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2175
2176// Match all load 128 bits width whose type is compatible with FPR128
2177let Predicates = [IsLE] in {
2178  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2179            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2180  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2181            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2182  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2183            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2184  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2185            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2186  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2187            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2188  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2189            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2190  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2191            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2192}
2193
2194//  anyext -> zext
2195def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2196          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
2197def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2198          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2199def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2200          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2201def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
2202    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2203def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2204    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2205def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2206    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2207def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2208    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2209// unscaled zext
2210def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2211          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
2212def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2213          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2214def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2215          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2216def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
2217    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2218def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2219    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2220def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2221    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2222def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2223    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2224
2225
2226//---
2227// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
2228
2229// Define new assembler match classes as we want to only match these when
2230// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
2231// associate a DiagnosticType either, as we want the diagnostic for the
2232// canonical form (the scaled operand) to take precedence.
2233class SImm9OffsetOperand<int Width> : AsmOperandClass {
2234  let Name = "SImm9OffsetFB" # Width;
2235  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
2236  let RenderMethod = "addImmOperands";
2237}
2238
2239def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
2240def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
2241def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
2242def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
2243def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
2244
2245def simm9_offset_fb8 : Operand<i64> {
2246  let ParserMatchClass = SImm9OffsetFB8Operand;
2247}
2248def simm9_offset_fb16 : Operand<i64> {
2249  let ParserMatchClass = SImm9OffsetFB16Operand;
2250}
2251def simm9_offset_fb32 : Operand<i64> {
2252  let ParserMatchClass = SImm9OffsetFB32Operand;
2253}
2254def simm9_offset_fb64 : Operand<i64> {
2255  let ParserMatchClass = SImm9OffsetFB64Operand;
2256}
2257def simm9_offset_fb128 : Operand<i64> {
2258  let ParserMatchClass = SImm9OffsetFB128Operand;
2259}
2260
2261def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2262                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2263def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2264                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2265def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2266                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2267def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2268                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2269def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2270                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2271def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2272                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2273def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2274               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
2275
2276// zextload -> i64
2277def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2278  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2279def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2280  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2281
2282// load sign-extended half-word
2283defm LDURSHW
2284    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
2285               [(set GPR32:$Rt,
2286                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2287defm LDURSHX
2288    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
2289              [(set GPR64:$Rt,
2290                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2291
2292// load sign-extended byte
2293defm LDURSBW
2294    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
2295                [(set GPR32:$Rt,
2296                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
2297defm LDURSBX
2298    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
2299                [(set GPR64:$Rt,
2300                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
2301
2302// load sign-extended word
2303defm LDURSW
2304    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
2305              [(set GPR64:$Rt,
2306                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
2307
2308// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
2309def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
2310                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2311def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
2312                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2313def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
2314                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2315def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
2316                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2317def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
2318                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2319def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
2320                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2321def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
2322                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2323
2324// Pre-fetch.
2325defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
2326                  [(AArch64Prefetch imm:$Rt,
2327                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
2328
2329//---
2330// (unscaled immediate, unprivileged)
2331defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
2332defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
2333
2334defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
2335defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
2336
2337// load sign-extended half-word
2338defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
2339defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
2340
2341// load sign-extended byte
2342defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
2343defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
2344
2345// load sign-extended word
2346defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
2347
2348//---
2349// (immediate pre-indexed)
2350def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
2351def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
2352def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
2353def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
2354def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
2355def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
2356def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
2357
2358// load sign-extended half-word
2359def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
2360def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
2361
2362// load sign-extended byte
2363def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
2364def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
2365
2366// load zero-extended byte
2367def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
2368def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
2369
2370// load sign-extended word
2371def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
2372
2373//---
2374// (immediate post-indexed)
2375def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
2376def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
2377def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
2378def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
2379def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
2380def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
2381def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
2382
2383// load sign-extended half-word
2384def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
2385def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
2386
2387// load sign-extended byte
2388def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
2389def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
2390
2391// load zero-extended byte
2392def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
2393def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
2394
2395// load sign-extended word
2396def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
2397
2398//===----------------------------------------------------------------------===//
2399// Store instructions.
2400//===----------------------------------------------------------------------===//
2401
2402// Pair (indexed, offset)
2403// FIXME: Use dedicated range-checked addressing mode operand here.
2404defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
2405defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
2406defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
2407defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
2408defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
2409
2410// Pair (pre-indexed)
2411def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
2412def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
2413def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
2414def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
2415def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
2416
2417// Pair (pre-indexed)
2418def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
2419def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
2420def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
2421def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
2422def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
2423
2424// Pair (no allocate)
2425defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
2426defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
2427defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
2428defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
2429defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
2430
2431//---
2432// (Register offset)
2433
2434// Integer
2435defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
2436defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
2437defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
2438defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
2439
2440
2441// Floating-point
2442defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", untyped, store>;
2443defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
2444defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
2445defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
2446defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str", f128,    store>;
2447
2448let Predicates = [UseSTRQro], AddedComplexity = 10 in {
2449  def : Pat<(store (f128 FPR128:$Rt),
2450                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
2451                                        ro_Wextend128:$extend)),
2452            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
2453  def : Pat<(store (f128 FPR128:$Rt),
2454                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
2455                                        ro_Xextend128:$extend)),
2456            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
2457}
2458
2459multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
2460                                 Instruction STRW, Instruction STRX> {
2461
2462  def : Pat<(storeop GPR64:$Rt,
2463                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
2464            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
2465                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2466
2467  def : Pat<(storeop GPR64:$Rt,
2468                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
2469            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
2470                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2471}
2472
2473let AddedComplexity = 10 in {
2474  // truncstore i64
2475  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
2476  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
2477  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
2478}
2479
2480multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
2481                         Instruction STRW, Instruction STRX> {
2482  def : Pat<(store (VecTy FPR:$Rt),
2483                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
2484            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2485
2486  def : Pat<(store (VecTy FPR:$Rt),
2487                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
2488            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2489}
2490
2491let AddedComplexity = 10 in {
2492// Match all store 64 bits width whose type is compatible with FPR64
2493let Predicates = [IsLE] in {
2494  // We must use ST1 to store vectors in big-endian.
2495  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
2496  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
2497  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
2498  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
2499  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
2500}
2501
2502defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
2503defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
2504
2505// Match all store 128 bits width whose type is compatible with FPR128
2506let Predicates = [IsLE, UseSTRQro] in {
2507  // We must use ST1 to store vectors in big-endian.
2508  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
2509  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
2510  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
2511  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
2512  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
2513  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
2514  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
2515}
2516} // AddedComplexity = 10
2517
2518// Match stores from lane 0 to the appropriate subreg's store.
2519multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
2520                              ValueType VecTy, ValueType STy,
2521                              SubRegIndex SubRegIdx,
2522                              Instruction STRW, Instruction STRX> {
2523
2524  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
2525                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
2526            (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
2527                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2528
2529  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
2530                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
2531            (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
2532                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2533}
2534
2535let AddedComplexity = 19 in {
2536  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
2537  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, hsub, STRHroW, STRHroX>;
2538  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, ssub, STRSroW, STRSroX>;
2539  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, ssub, STRSroW, STRSroX>;
2540  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, dsub, STRDroW, STRDroX>;
2541  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, dsub, STRDroW, STRDroX>;
2542}
2543
2544//---
2545// (unsigned immediate)
2546defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
2547                   [(store GPR64z:$Rt,
2548                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
2549defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
2550                    [(store GPR32z:$Rt,
2551                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
2552defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
2553                    [(store FPR8Op:$Rt,
2554                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
2555defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
2556                    [(store (f16 FPR16Op:$Rt),
2557                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
2558defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
2559                    [(store (f32 FPR32Op:$Rt),
2560                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
2561defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
2562                    [(store (f64 FPR64Op:$Rt),
2563                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
2564defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
2565
2566defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
2567                     [(truncstorei16 GPR32z:$Rt,
2568                                     (am_indexed16 GPR64sp:$Rn,
2569                                                   uimm12s2:$offset))]>;
2570defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
2571                     [(truncstorei8 GPR32z:$Rt,
2572                                    (am_indexed8 GPR64sp:$Rn,
2573                                                 uimm12s1:$offset))]>;
2574
2575let AddedComplexity = 10 in {
2576
2577// Match all store 64 bits width whose type is compatible with FPR64
2578def : Pat<(store (v1i64 FPR64:$Rt),
2579                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
2580          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
2581def : Pat<(store (v1f64 FPR64:$Rt),
2582                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
2583          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
2584
2585let Predicates = [IsLE] in {
2586  // We must use ST1 to store vectors in big-endian.
2587  def : Pat<(store (v2f32 FPR64:$Rt),
2588                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
2589            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
2590  def : Pat<(store (v8i8 FPR64:$Rt),
2591                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
2592            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
2593  def : Pat<(store (v4i16 FPR64:$Rt),
2594                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
2595            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
2596  def : Pat<(store (v2i32 FPR64:$Rt),
2597                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
2598            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
2599  def : Pat<(store (v4f16 FPR64:$Rt),
2600                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
2601            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
2602}
2603
2604// Match all store 128 bits width whose type is compatible with FPR128
2605def : Pat<(store (f128  FPR128:$Rt),
2606                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2607          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2608
2609let Predicates = [IsLE] in {
2610  // We must use ST1 to store vectors in big-endian.
2611  def : Pat<(store (v4f32 FPR128:$Rt),
2612                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2613            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2614  def : Pat<(store (v2f64 FPR128:$Rt),
2615                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2616            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2617  def : Pat<(store (v16i8 FPR128:$Rt),
2618                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2619            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2620  def : Pat<(store (v8i16 FPR128:$Rt),
2621                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2622            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2623  def : Pat<(store (v4i32 FPR128:$Rt),
2624                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2625            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2626  def : Pat<(store (v2i64 FPR128:$Rt),
2627                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2628            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2629  def : Pat<(store (v8f16 FPR128:$Rt),
2630                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
2631            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
2632}
2633
2634// truncstore i64
2635def : Pat<(truncstorei32 GPR64:$Rt,
2636                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
2637  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
2638def : Pat<(truncstorei16 GPR64:$Rt,
2639                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
2640  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
2641def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
2642  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
2643
2644} // AddedComplexity = 10
2645
2646// Match stores from lane 0 to the appropriate subreg's store.
2647multiclass VecStoreLane0Pat<Operand UIAddrMode, SDPatternOperator storeop,
2648                            ValueType VTy, ValueType STy,
2649                            SubRegIndex SubRegIdx, Operand IndexType,
2650                            Instruction STR> {
2651  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
2652                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
2653            (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
2654                 GPR64sp:$Rn, IndexType:$offset)>;
2655}
2656
2657let AddedComplexity = 19 in {
2658  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
2659  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, hsub, uimm12s2, STRHui>;
2660  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, ssub, uimm12s4, STRSui>;
2661  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, ssub, uimm12s4, STRSui>;
2662  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, dsub, uimm12s8, STRDui>;
2663  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, dsub, uimm12s8, STRDui>;
2664}
2665
2666//---
2667// (unscaled immediate)
2668defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
2669                         [(store GPR64z:$Rt,
2670                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
2671defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
2672                         [(store GPR32z:$Rt,
2673                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
2674defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
2675                         [(store FPR8Op:$Rt,
2676                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
2677defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
2678                         [(store (f16 FPR16Op:$Rt),
2679                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
2680defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
2681                         [(store (f32 FPR32Op:$Rt),
2682                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
2683defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
2684                         [(store (f64 FPR64Op:$Rt),
2685                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
2686defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
2687                         [(store (f128 FPR128Op:$Rt),
2688                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
2689defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
2690                         [(truncstorei16 GPR32z:$Rt,
2691                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
2692defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
2693                         [(truncstorei8 GPR32z:$Rt,
2694                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
2695
2696// Armv8.4 Weaker Release Consistency enhancements
2697//         LDAPR & STLR with Immediate Offset instructions
2698let Predicates = [HasRCPC_IMMO] in {
2699defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
2700defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
2701defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
2702defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
2703defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
2704defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
2705defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
2706defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
2707defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
2708defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
2709defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
2710defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
2711defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
2712}
2713
2714// Match all store 64 bits width whose type is compatible with FPR64
2715def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
2716          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2717def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
2718          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2719
2720let AddedComplexity = 10 in {
2721
2722let Predicates = [IsLE] in {
2723  // We must use ST1 to store vectors in big-endian.
2724  def : Pat<(store (v2f32 FPR64:$Rt),
2725                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
2726            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2727  def : Pat<(store (v8i8 FPR64:$Rt),
2728                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
2729            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2730  def : Pat<(store (v4i16 FPR64:$Rt),
2731                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
2732            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2733  def : Pat<(store (v2i32 FPR64:$Rt),
2734                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
2735            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2736  def : Pat<(store (v4f16 FPR64:$Rt),
2737                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
2738            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2739}
2740
2741// Match all store 128 bits width whose type is compatible with FPR128
2742def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2743          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2744
2745let Predicates = [IsLE] in {
2746  // We must use ST1 to store vectors in big-endian.
2747  def : Pat<(store (v4f32 FPR128:$Rt),
2748                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2749            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2750  def : Pat<(store (v2f64 FPR128:$Rt),
2751                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2752            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2753  def : Pat<(store (v16i8 FPR128:$Rt),
2754                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2755            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2756  def : Pat<(store (v8i16 FPR128:$Rt),
2757                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2758            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2759  def : Pat<(store (v4i32 FPR128:$Rt),
2760                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2761            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2762  def : Pat<(store (v2i64 FPR128:$Rt),
2763                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2764            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2765  def : Pat<(store (v2f64 FPR128:$Rt),
2766                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2767            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2768  def : Pat<(store (v8f16 FPR128:$Rt),
2769                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2770            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2771}
2772
2773} // AddedComplexity = 10
2774
2775// unscaled i64 truncating stores
2776def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
2777  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
2778def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2779  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
2780def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
2781  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
2782
2783// Match stores from lane 0 to the appropriate subreg's store.
2784multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
2785                             ValueType VTy, ValueType STy,
2786                             SubRegIndex SubRegIdx, Instruction STR> {
2787  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
2788}
2789
2790let AddedComplexity = 19 in {
2791  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
2792  defm : VecStoreULane0Pat<store,         v8f16, f16, hsub, STURHi>;
2793  defm : VecStoreULane0Pat<store,         v4i32, i32, ssub, STURSi>;
2794  defm : VecStoreULane0Pat<store,         v4f32, f32, ssub, STURSi>;
2795  defm : VecStoreULane0Pat<store,         v2i64, i64, dsub, STURDi>;
2796  defm : VecStoreULane0Pat<store,         v2f64, f64, dsub, STURDi>;
2797}
2798
2799//---
2800// STR mnemonics fall back to STUR for negative or unaligned offsets.
2801def : InstAlias<"str $Rt, [$Rn, $offset]",
2802                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2803def : InstAlias<"str $Rt, [$Rn, $offset]",
2804                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2805def : InstAlias<"str $Rt, [$Rn, $offset]",
2806                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2807def : InstAlias<"str $Rt, [$Rn, $offset]",
2808                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2809def : InstAlias<"str $Rt, [$Rn, $offset]",
2810                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2811def : InstAlias<"str $Rt, [$Rn, $offset]",
2812                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2813def : InstAlias<"str $Rt, [$Rn, $offset]",
2814                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
2815
2816def : InstAlias<"strb $Rt, [$Rn, $offset]",
2817                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2818def : InstAlias<"strh $Rt, [$Rn, $offset]",
2819                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2820
2821//---
2822// (unscaled immediate, unprivileged)
2823defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
2824defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
2825
2826defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
2827defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
2828
2829//---
2830// (immediate pre-indexed)
2831def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
2832def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
2833def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, untyped>;
2834def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
2835def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
2836def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
2837def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
2838
2839def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
2840def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
2841
2842// truncstore i64
2843def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2844  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2845           simm9:$off)>;
2846def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2847  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2848            simm9:$off)>;
2849def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2850  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2851            simm9:$off)>;
2852
2853def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2854          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2855def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2856          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2857def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2858          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2859def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2860          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2861def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2862          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2863def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2864          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2865def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2866          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2867
2868def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2869          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2870def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2871          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2872def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2873          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2874def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2875          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2876def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2877          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2878def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2879          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2880def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2881          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2882
2883//---
2884// (immediate post-indexed)
2885def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
2886def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
2887def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, untyped>;
2888def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
2889def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
2890def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
2891def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
2892
2893def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
2894def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
2895
2896// truncstore i64
2897def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2898  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2899            simm9:$off)>;
2900def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2901  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2902             simm9:$off)>;
2903def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2904  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2905             simm9:$off)>;
2906
2907def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2908          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2909def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2910          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2911def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2912          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2913def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2914          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2915def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2916          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2917def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2918          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2919def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2920          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2921
2922def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2923          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2924def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2925          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2926def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2927          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2928def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2929          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2930def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2931          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2932def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2933          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2934def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2935          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2936
2937//===----------------------------------------------------------------------===//
2938// Load/store exclusive instructions.
2939//===----------------------------------------------------------------------===//
2940
2941def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
2942def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
2943def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
2944def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
2945
2946def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
2947def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
2948def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
2949def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
2950
2951def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
2952def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
2953def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
2954def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
2955
2956def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
2957def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
2958def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
2959def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
2960
2961def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
2962def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
2963def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
2964def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
2965
2966def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
2967def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
2968def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
2969def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
2970
2971def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
2972def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
2973
2974def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
2975def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
2976
2977def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
2978def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
2979
2980def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
2981def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
2982
2983let Predicates = [HasLOR] in {
2984  // v8.1a "Limited Order Region" extension load-acquire instructions
2985  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
2986  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
2987  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
2988  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
2989
2990  // v8.1a "Limited Order Region" extension store-release instructions
2991  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
2992  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
2993  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
2994  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
2995}
2996
2997//===----------------------------------------------------------------------===//
2998// Scaled floating point to integer conversion instructions.
2999//===----------------------------------------------------------------------===//
3000
3001defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
3002defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
3003defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
3004defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
3005defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
3006defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
3007defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
3008defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
3009defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
3010defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
3011defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
3012defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
3013
3014multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
3015  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
3016  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
3017  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
3018  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
3019  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
3020  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
3021
3022  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
3023            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
3024  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
3025            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
3026  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
3027            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
3028  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
3029            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
3030  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
3031            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
3032  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
3033            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
3034}
3035
3036defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
3037defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
3038
3039multiclass FPToIntegerPats<SDNode to_int, SDNode round, string INST> {
3040  def : Pat<(i32 (to_int (round f32:$Rn))),
3041            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
3042  def : Pat<(i64 (to_int (round f32:$Rn))),
3043            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
3044  def : Pat<(i32 (to_int (round f64:$Rn))),
3045            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
3046  def : Pat<(i64 (to_int (round f64:$Rn))),
3047            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
3048}
3049
3050defm : FPToIntegerPats<fp_to_sint, fceil,  "FCVTPS">;
3051defm : FPToIntegerPats<fp_to_uint, fceil,  "FCVTPU">;
3052defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">;
3053defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">;
3054defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">;
3055defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
3056defm : FPToIntegerPats<fp_to_sint, fround, "FCVTAS">;
3057defm : FPToIntegerPats<fp_to_uint, fround, "FCVTAU">;
3058
3059//===----------------------------------------------------------------------===//
3060// Scaled integer to floating point conversion instructions.
3061//===----------------------------------------------------------------------===//
3062
3063defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
3064defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
3065
3066//===----------------------------------------------------------------------===//
3067// Unscaled integer to floating point conversion instruction.
3068//===----------------------------------------------------------------------===//
3069
3070defm FMOV : UnscaledConversion<"fmov">;
3071
3072// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
3073let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
3074def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
3075    Sched<[WriteF]>, Requires<[HasFullFP16]>;
3076def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
3077    Sched<[WriteF]>;
3078def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
3079    Sched<[WriteF]>;
3080}
3081// Similarly add aliases
3082def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
3083    Requires<[HasFullFP16]>;
3084def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
3085def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
3086
3087//===----------------------------------------------------------------------===//
3088// Floating point conversion instruction.
3089//===----------------------------------------------------------------------===//
3090
3091defm FCVT : FPConversion<"fcvt">;
3092
3093//===----------------------------------------------------------------------===//
3094// Floating point single operand instructions.
3095//===----------------------------------------------------------------------===//
3096
3097defm FABS   : SingleOperandFPData<0b0001, "fabs", fabs>;
3098defm FMOV   : SingleOperandFPData<0b0000, "fmov">;
3099defm FNEG   : SingleOperandFPData<0b0010, "fneg", fneg>;
3100defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
3101defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
3102defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
3103defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
3104defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
3105
3106def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
3107          (FRINTNDr FPR64:$Rn)>;
3108
3109defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
3110defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
3111
3112let SchedRW = [WriteFDiv] in {
3113defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
3114}
3115
3116let Predicates = [HasFRInt3264] in {
3117  defm FRINT32Z : FRIntNNT<0b00, "frint32z">;
3118  defm FRINT64Z : FRIntNNT<0b10, "frint64z">;
3119  defm FRINT32X : FRIntNNT<0b01, "frint32x">;
3120  defm FRINT64X : FRIntNNT<0b11, "frint64x">;
3121} // HasFRInt3264
3122
3123//===----------------------------------------------------------------------===//
3124// Floating point two operand instructions.
3125//===----------------------------------------------------------------------===//
3126
3127defm FADD   : TwoOperandFPData<0b0010, "fadd", fadd>;
3128let SchedRW = [WriteFDiv] in {
3129defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
3130}
3131defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
3132defm FMAX   : TwoOperandFPData<0b0100, "fmax", fmaximum>;
3133defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
3134defm FMIN   : TwoOperandFPData<0b0101, "fmin", fminimum>;
3135let SchedRW = [WriteFMul] in {
3136defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
3137defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
3138}
3139defm FSUB   : TwoOperandFPData<0b0011, "fsub", fsub>;
3140
3141def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3142          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
3143def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3144          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
3145def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3146          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
3147def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3148          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
3149
3150//===----------------------------------------------------------------------===//
3151// Floating point three operand instructions.
3152//===----------------------------------------------------------------------===//
3153
3154defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", fma>;
3155defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
3156     TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
3157defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
3158     TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
3159defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
3160     TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
3161
3162// The following def pats catch the case where the LHS of an FMA is negated.
3163// The TriOpFrag above catches the case where the middle operand is negated.
3164
3165// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
3166// the NEON variant.
3167def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
3168          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
3169
3170def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
3171          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
3172
3173// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and
3174// "(-a) + b*(-c)".
3175def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
3176          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
3177
3178def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
3179          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
3180
3181def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
3182          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
3183
3184def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
3185          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
3186
3187//===----------------------------------------------------------------------===//
3188// Floating point comparison instructions.
3189//===----------------------------------------------------------------------===//
3190
3191defm FCMPE : FPComparison<1, "fcmpe">;
3192defm FCMP  : FPComparison<0, "fcmp", AArch64fcmp>;
3193
3194//===----------------------------------------------------------------------===//
3195// Floating point conditional comparison instructions.
3196//===----------------------------------------------------------------------===//
3197
3198defm FCCMPE : FPCondComparison<1, "fccmpe">;
3199defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
3200
3201//===----------------------------------------------------------------------===//
3202// Floating point conditional select instruction.
3203//===----------------------------------------------------------------------===//
3204
3205defm FCSEL : FPCondSelect<"fcsel">;
3206
3207// CSEL instructions providing f128 types need to be handled by a
3208// pseudo-instruction since the eventual code will need to introduce basic
3209// blocks and control flow.
3210def F128CSEL : Pseudo<(outs FPR128:$Rd),
3211                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
3212                      [(set (f128 FPR128:$Rd),
3213                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
3214                                       (i32 imm:$cond), NZCV))]> {
3215  let Uses = [NZCV];
3216  let usesCustomInserter = 1;
3217  let hasNoSchedulingInfo = 1;
3218}
3219
3220//===----------------------------------------------------------------------===//
3221// Instructions used for emitting unwind opcodes on ARM64 Windows.
3222//===----------------------------------------------------------------------===//
3223let isPseudo = 1 in {
3224  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
3225  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
3226  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
3227  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
3228  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
3229  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
3230  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
3231  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
3232  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
3233  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
3234  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
3235  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
3236  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
3237  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
3238  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
3239  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
3240  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
3241}
3242
3243// Pseudo instructions for Windows EH
3244//===----------------------------------------------------------------------===//
3245let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
3246    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
3247   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
3248   let usesCustomInserter = 1 in
3249     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
3250                    Sched<[]>;
3251}
3252
3253let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
3254    usesCustomInserter = 1 in
3255def CATCHPAD : Pseudo<(outs), (ins), [(catchpad)]>, Sched<[]>;
3256
3257//===----------------------------------------------------------------------===//
3258// Floating point immediate move.
3259//===----------------------------------------------------------------------===//
3260
3261let isReMaterializable = 1 in {
3262defm FMOV : FPMoveImmediate<"fmov">;
3263}
3264
3265//===----------------------------------------------------------------------===//
3266// Advanced SIMD two vector instructions.
3267//===----------------------------------------------------------------------===//
3268
3269defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
3270                                          int_aarch64_neon_uabd>;
3271// Match UABDL in log2-shuffle patterns.
3272def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
3273                           (zext (v8i8 V64:$opB))))),
3274          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
3275def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
3276               (v8i16 (add (sub (zext (v8i8 V64:$opA)),
3277                                (zext (v8i8 V64:$opB))),
3278                           (AArch64vashr v8i16:$src, (i32 15))))),
3279          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
3280def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
3281                           (zext (extract_high_v16i8 V128:$opB))))),
3282          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
3283def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
3284               (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
3285                                (zext (extract_high_v16i8 V128:$opB))),
3286                           (AArch64vashr v8i16:$src, (i32 15))))),
3287          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
3288def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
3289                           (zext (v4i16 V64:$opB))))),
3290          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
3291def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
3292                           (zext (extract_high_v8i16 V128:$opB))))),
3293          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
3294def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
3295                           (zext (v2i32 V64:$opB))))),
3296          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
3297def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
3298                           (zext (extract_high_v4i32 V128:$opB))))),
3299          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
3300
3301defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
3302defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
3303defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
3304defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
3305defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
3306defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
3307defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
3308defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
3309defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
3310defm FABS   : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
3311
3312defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
3313defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
3314defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
3315defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
3316defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
3317defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
3318defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
3319defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
3320def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
3321          (FCVTLv4i16 V64:$Rn)>;
3322def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
3323                                                              (i64 4)))),
3324          (FCVTLv8i16 V128:$Rn)>;
3325def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
3326def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
3327                                                    (i64 2))))),
3328          (FCVTLv4i32 V128:$Rn)>;
3329
3330def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
3331def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
3332                                                    (i64 4))))),
3333          (FCVTLv8i16 V128:$Rn)>;
3334
3335defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
3336defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
3337defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
3338defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
3339defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
3340def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
3341          (FCVTNv4i16 V128:$Rn)>;
3342def : Pat<(concat_vectors V64:$Rd,
3343                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
3344          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
3345def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
3346def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
3347def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))),
3348          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
3349defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
3350defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
3351defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
3352                                        int_aarch64_neon_fcvtxn>;
3353defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
3354defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
3355
3356def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
3357def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
3358def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
3359def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
3360def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
3361
3362def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
3363def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
3364def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
3365def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
3366def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
3367
3368defm FNEG   : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
3369defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
3370defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
3371defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
3372defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
3373defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
3374defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
3375defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
3376defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
3377
3378let Predicates = [HasFRInt3264] in {
3379  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z">;
3380  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z">;
3381  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x">;
3382  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x">;
3383} // HasFRInt3264
3384
3385defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
3386defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
3387defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
3388                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
3389defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
3390// Aliases for MVN -> NOT.
3391def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
3392                (NOTv8i8 V64:$Vd, V64:$Vn)>;
3393def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
3394                (NOTv16i8 V128:$Vd, V128:$Vn)>;
3395
3396def : Pat<(AArch64neg (v8i8  V64:$Rn)),  (NEGv8i8  V64:$Rn)>;
3397def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
3398def : Pat<(AArch64neg (v4i16 V64:$Rn)),  (NEGv4i16 V64:$Rn)>;
3399def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
3400def : Pat<(AArch64neg (v2i32 V64:$Rn)),  (NEGv2i32 V64:$Rn)>;
3401def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
3402def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
3403
3404def : Pat<(AArch64not (v8i8 V64:$Rn)),   (NOTv8i8  V64:$Rn)>;
3405def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
3406def : Pat<(AArch64not (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
3407def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
3408def : Pat<(AArch64not (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
3409def : Pat<(AArch64not (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
3410def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
3411def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
3412
3413def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
3414def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
3415def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
3416def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
3417def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
3418
3419defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
3420defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
3421defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
3422defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
3423defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
3424       BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
3425defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
3426defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
3427defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
3428defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
3429defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
3430defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
3431defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
3432defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
3433defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
3434       BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >;
3435defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp",
3436                    int_aarch64_neon_uaddlp>;
3437defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
3438defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
3439defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
3440defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
3441defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
3442defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
3443
3444def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
3445def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
3446def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
3447def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
3448def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
3449def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
3450
3451// Patterns for vector long shift (by element width). These need to match all
3452// three of zext, sext and anyext so it's easier to pull the patterns out of the
3453// definition.
3454multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
3455  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
3456            (SHLLv8i8 V64:$Rn)>;
3457  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
3458            (SHLLv16i8 V128:$Rn)>;
3459  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
3460            (SHLLv4i16 V64:$Rn)>;
3461  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
3462            (SHLLv8i16 V128:$Rn)>;
3463  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
3464            (SHLLv2i32 V64:$Rn)>;
3465  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
3466            (SHLLv4i32 V128:$Rn)>;
3467}
3468
3469defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
3470defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
3471defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
3472
3473//===----------------------------------------------------------------------===//
3474// Advanced SIMD three vector instructions.
3475//===----------------------------------------------------------------------===//
3476
3477defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
3478defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
3479defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
3480defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
3481defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
3482defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
3483defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
3484defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
3485defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
3486let Predicates = [HasNEON] in {
3487foreach VT = [ v2f32, v4f32, v2f64 ] in
3488def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
3489}
3490let Predicates = [HasNEON, HasFullFP16] in {
3491foreach VT = [ v4f16, v8f16 ] in
3492def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
3493}
3494defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
3495defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
3496defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
3497defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
3498defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
3499defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
3500defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
3501defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
3502defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
3503defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
3504defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
3505defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
3506defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
3507defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
3508defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
3509defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
3510
3511// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
3512// instruction expects the addend first, while the fma intrinsic puts it last.
3513defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
3514            TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
3515defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
3516            TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
3517
3518// The following def pats catch the case where the LHS of an FMA is negated.
3519// The TriOpFrag above catches the case where the middle operand is negated.
3520def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
3521          (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
3522
3523def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
3524          (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
3525
3526def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
3527          (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
3528
3529defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
3530defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
3531defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
3532defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
3533defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
3534defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
3535                      TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
3536defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
3537                      TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
3538defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
3539defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
3540defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
3541      TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
3542defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
3543defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
3544defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
3545defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
3546defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
3547defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
3548defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
3549defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
3550defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
3551defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
3552defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
3553defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
3554defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
3555defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>;
3556defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
3557defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
3558defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
3559defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
3560      TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
3561defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
3562defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
3563defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
3564defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
3565defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
3566defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
3567defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
3568defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
3569defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
3570defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
3571defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
3572defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
3573defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
3574defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
3575defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
3576                                                  int_aarch64_neon_sqadd>;
3577defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
3578                                                    int_aarch64_neon_sqsub>;
3579
3580defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
3581defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
3582                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
3583defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
3584defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
3585defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
3586    TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
3587defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
3588defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
3589                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
3590defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
3591
3592
3593def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
3594          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
3595def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
3596          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
3597def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
3598          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
3599def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
3600          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
3601
3602def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
3603          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
3604def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
3605          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
3606def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
3607          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
3608def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
3609          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
3610
3611def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
3612                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
3613def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
3614                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
3615def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
3616                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
3617def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
3618                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
3619
3620def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
3621                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
3622def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
3623                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
3624def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
3625                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
3626def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
3627                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
3628
3629def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
3630                "|cmls.8b\t$dst, $src1, $src2}",
3631                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
3632def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
3633                "|cmls.16b\t$dst, $src1, $src2}",
3634                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
3635def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
3636                "|cmls.4h\t$dst, $src1, $src2}",
3637                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
3638def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
3639                "|cmls.8h\t$dst, $src1, $src2}",
3640                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
3641def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
3642                "|cmls.2s\t$dst, $src1, $src2}",
3643                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
3644def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
3645                "|cmls.4s\t$dst, $src1, $src2}",
3646                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
3647def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
3648                "|cmls.2d\t$dst, $src1, $src2}",
3649                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
3650
3651def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
3652                "|cmlo.8b\t$dst, $src1, $src2}",
3653                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
3654def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
3655                "|cmlo.16b\t$dst, $src1, $src2}",
3656                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
3657def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
3658                "|cmlo.4h\t$dst, $src1, $src2}",
3659                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
3660def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
3661                "|cmlo.8h\t$dst, $src1, $src2}",
3662                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
3663def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
3664                "|cmlo.2s\t$dst, $src1, $src2}",
3665                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
3666def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
3667                "|cmlo.4s\t$dst, $src1, $src2}",
3668                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
3669def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
3670                "|cmlo.2d\t$dst, $src1, $src2}",
3671                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
3672
3673def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
3674                "|cmle.8b\t$dst, $src1, $src2}",
3675                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
3676def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
3677                "|cmle.16b\t$dst, $src1, $src2}",
3678                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
3679def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
3680                "|cmle.4h\t$dst, $src1, $src2}",
3681                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
3682def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
3683                "|cmle.8h\t$dst, $src1, $src2}",
3684                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
3685def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
3686                "|cmle.2s\t$dst, $src1, $src2}",
3687                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
3688def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
3689                "|cmle.4s\t$dst, $src1, $src2}",
3690                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
3691def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
3692                "|cmle.2d\t$dst, $src1, $src2}",
3693                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
3694
3695def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
3696                "|cmlt.8b\t$dst, $src1, $src2}",
3697                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
3698def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
3699                "|cmlt.16b\t$dst, $src1, $src2}",
3700                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
3701def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
3702                "|cmlt.4h\t$dst, $src1, $src2}",
3703                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
3704def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
3705                "|cmlt.8h\t$dst, $src1, $src2}",
3706                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
3707def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
3708                "|cmlt.2s\t$dst, $src1, $src2}",
3709                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
3710def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
3711                "|cmlt.4s\t$dst, $src1, $src2}",
3712                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
3713def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
3714                "|cmlt.2d\t$dst, $src1, $src2}",
3715                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
3716
3717let Predicates = [HasNEON, HasFullFP16] in {
3718def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
3719                "|fcmle.4h\t$dst, $src1, $src2}",
3720                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
3721def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
3722                "|fcmle.8h\t$dst, $src1, $src2}",
3723                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
3724}
3725def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
3726                "|fcmle.2s\t$dst, $src1, $src2}",
3727                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
3728def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
3729                "|fcmle.4s\t$dst, $src1, $src2}",
3730                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
3731def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
3732                "|fcmle.2d\t$dst, $src1, $src2}",
3733                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
3734
3735let Predicates = [HasNEON, HasFullFP16] in {
3736def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
3737                "|fcmlt.4h\t$dst, $src1, $src2}",
3738                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
3739def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
3740                "|fcmlt.8h\t$dst, $src1, $src2}",
3741                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
3742}
3743def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
3744                "|fcmlt.2s\t$dst, $src1, $src2}",
3745                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
3746def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
3747                "|fcmlt.4s\t$dst, $src1, $src2}",
3748                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
3749def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
3750                "|fcmlt.2d\t$dst, $src1, $src2}",
3751                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
3752
3753let Predicates = [HasNEON, HasFullFP16] in {
3754def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
3755                "|facle.4h\t$dst, $src1, $src2}",
3756                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
3757def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
3758                "|facle.8h\t$dst, $src1, $src2}",
3759                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
3760}
3761def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
3762                "|facle.2s\t$dst, $src1, $src2}",
3763                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
3764def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
3765                "|facle.4s\t$dst, $src1, $src2}",
3766                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
3767def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
3768                "|facle.2d\t$dst, $src1, $src2}",
3769                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
3770
3771let Predicates = [HasNEON, HasFullFP16] in {
3772def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
3773                "|faclt.4h\t$dst, $src1, $src2}",
3774                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
3775def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
3776                "|faclt.8h\t$dst, $src1, $src2}",
3777                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
3778}
3779def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
3780                "|faclt.2s\t$dst, $src1, $src2}",
3781                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
3782def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
3783                "|faclt.4s\t$dst, $src1, $src2}",
3784                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
3785def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
3786                "|faclt.2d\t$dst, $src1, $src2}",
3787                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
3788
3789//===----------------------------------------------------------------------===//
3790// Advanced SIMD three scalar instructions.
3791//===----------------------------------------------------------------------===//
3792
3793defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
3794defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
3795defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
3796defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
3797defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
3798defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
3799defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
3800defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
3801def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3802          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
3803let Predicates = [HasFullFP16] in {
3804def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
3805}
3806def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
3807def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
3808defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
3809                                     int_aarch64_neon_facge>;
3810defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
3811                                     int_aarch64_neon_facgt>;
3812defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
3813defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
3814defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
3815defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
3816defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
3817defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
3818defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
3819defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
3820defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
3821defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
3822defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
3823defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
3824defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
3825defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
3826defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
3827defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
3828defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
3829defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
3830defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
3831defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
3832defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
3833let Predicates = [HasRDM] in {
3834  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
3835  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
3836  def : Pat<(i32 (int_aarch64_neon_sqadd
3837                   (i32 FPR32:$Rd),
3838                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
3839                                                   (i32 FPR32:$Rm))))),
3840            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
3841  def : Pat<(i32 (int_aarch64_neon_sqsub
3842                   (i32 FPR32:$Rd),
3843                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
3844                                                   (i32 FPR32:$Rm))))),
3845            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
3846}
3847
3848def : InstAlias<"cmls $dst, $src1, $src2",
3849                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3850def : InstAlias<"cmle $dst, $src1, $src2",
3851                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3852def : InstAlias<"cmlo $dst, $src1, $src2",
3853                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3854def : InstAlias<"cmlt $dst, $src1, $src2",
3855                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3856def : InstAlias<"fcmle $dst, $src1, $src2",
3857                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
3858def : InstAlias<"fcmle $dst, $src1, $src2",
3859                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3860def : InstAlias<"fcmlt $dst, $src1, $src2",
3861                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
3862def : InstAlias<"fcmlt $dst, $src1, $src2",
3863                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3864def : InstAlias<"facle $dst, $src1, $src2",
3865                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
3866def : InstAlias<"facle $dst, $src1, $src2",
3867                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3868def : InstAlias<"faclt $dst, $src1, $src2",
3869                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
3870def : InstAlias<"faclt $dst, $src1, $src2",
3871                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
3872
3873//===----------------------------------------------------------------------===//
3874// Advanced SIMD three scalar instructions (mixed operands).
3875//===----------------------------------------------------------------------===//
3876defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
3877                                       int_aarch64_neon_sqdmulls_scalar>;
3878defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
3879defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
3880
3881def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
3882                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
3883                                                        (i32 FPR32:$Rm))))),
3884          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
3885def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
3886                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
3887                                                        (i32 FPR32:$Rm))))),
3888          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
3889
3890//===----------------------------------------------------------------------===//
3891// Advanced SIMD two scalar instructions.
3892//===----------------------------------------------------------------------===//
3893
3894defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs>;
3895defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
3896defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
3897defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
3898defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
3899defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
3900defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
3901defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
3902defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
3903defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
3904defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
3905defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
3906defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
3907defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
3908defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
3909defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
3910defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
3911defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
3912defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
3913def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
3914defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
3915defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
3916defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
3917defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
3918defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
3919defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
3920                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
3921defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
3922defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
3923defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
3924defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
3925defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
3926defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
3927                                     int_aarch64_neon_suqadd>;
3928defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
3929defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
3930defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
3931                                    int_aarch64_neon_usqadd>;
3932
3933def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
3934
3935def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
3936          (FCVTASv1i64 FPR64:$Rn)>;
3937def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
3938          (FCVTAUv1i64 FPR64:$Rn)>;
3939def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
3940          (FCVTMSv1i64 FPR64:$Rn)>;
3941def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
3942          (FCVTMUv1i64 FPR64:$Rn)>;
3943def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
3944          (FCVTNSv1i64 FPR64:$Rn)>;
3945def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
3946          (FCVTNUv1i64 FPR64:$Rn)>;
3947def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
3948          (FCVTPSv1i64 FPR64:$Rn)>;
3949def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
3950          (FCVTPUv1i64 FPR64:$Rn)>;
3951
3952def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
3953          (FRECPEv1f16 FPR16:$Rn)>;
3954def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
3955          (FRECPEv1i32 FPR32:$Rn)>;
3956def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
3957          (FRECPEv1i64 FPR64:$Rn)>;
3958def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
3959          (FRECPEv1i64 FPR64:$Rn)>;
3960
3961def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
3962          (FRECPEv1i32 FPR32:$Rn)>;
3963def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
3964          (FRECPEv2f32 V64:$Rn)>;
3965def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
3966          (FRECPEv4f32 FPR128:$Rn)>;
3967def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
3968          (FRECPEv1i64 FPR64:$Rn)>;
3969def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
3970          (FRECPEv1i64 FPR64:$Rn)>;
3971def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
3972          (FRECPEv2f64 FPR128:$Rn)>;
3973
3974def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
3975          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
3976def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
3977          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
3978def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
3979          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
3980def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
3981          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
3982def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
3983          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
3984
3985def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
3986          (FRECPXv1f16 FPR16:$Rn)>;
3987def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
3988          (FRECPXv1i32 FPR32:$Rn)>;
3989def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
3990          (FRECPXv1i64 FPR64:$Rn)>;
3991
3992def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
3993          (FRSQRTEv1f16 FPR16:$Rn)>;
3994def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
3995          (FRSQRTEv1i32 FPR32:$Rn)>;
3996def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
3997          (FRSQRTEv1i64 FPR64:$Rn)>;
3998def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
3999          (FRSQRTEv1i64 FPR64:$Rn)>;
4000
4001def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
4002          (FRSQRTEv1i32 FPR32:$Rn)>;
4003def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
4004          (FRSQRTEv2f32 V64:$Rn)>;
4005def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
4006          (FRSQRTEv4f32 FPR128:$Rn)>;
4007def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
4008          (FRSQRTEv1i64 FPR64:$Rn)>;
4009def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
4010          (FRSQRTEv1i64 FPR64:$Rn)>;
4011def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
4012          (FRSQRTEv2f64 FPR128:$Rn)>;
4013
4014def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4015          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
4016def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
4017          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
4018def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
4019          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
4020def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4021          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
4022def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
4023          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
4024
4025// If an integer is about to be converted to a floating point value,
4026// just load it on the floating point unit.
4027// Here are the patterns for 8 and 16-bits to float.
4028// 8-bits -> float.
4029multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
4030                             SDPatternOperator loadop, Instruction UCVTF,
4031                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
4032                             SubRegIndex sub> {
4033  def : Pat<(DstTy (uint_to_fp (SrcTy
4034                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
4035                                      ro.Wext:$extend))))),
4036           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
4037                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
4038                                 sub))>;
4039
4040  def : Pat<(DstTy (uint_to_fp (SrcTy
4041                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
4042                                      ro.Wext:$extend))))),
4043           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
4044                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
4045                                 sub))>;
4046}
4047
4048defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
4049                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
4050def : Pat <(f32 (uint_to_fp (i32
4051               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
4052           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4053                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
4054def : Pat <(f32 (uint_to_fp (i32
4055                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
4056           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4057                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
4058// 16-bits -> float.
4059defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
4060                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
4061def : Pat <(f32 (uint_to_fp (i32
4062                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
4063           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4064                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
4065def : Pat <(f32 (uint_to_fp (i32
4066                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
4067           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4068                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
4069// 32-bits are handled in target specific dag combine:
4070// performIntToFpCombine.
4071// 64-bits integer to 32-bits floating point, not possible with
4072// UCVTF on floating point registers (both source and destination
4073// must have the same size).
4074
4075// Here are the patterns for 8, 16, 32, and 64-bits to double.
4076// 8-bits -> double.
4077defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
4078                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
4079def : Pat <(f64 (uint_to_fp (i32
4080                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
4081           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4082                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
4083def : Pat <(f64 (uint_to_fp (i32
4084                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
4085           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4086                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
4087// 16-bits -> double.
4088defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
4089                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
4090def : Pat <(f64 (uint_to_fp (i32
4091                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
4092           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4093                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
4094def : Pat <(f64 (uint_to_fp (i32
4095                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
4096           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4097                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
4098// 32-bits -> double.
4099defm : UIntToFPROLoadPat<f64, i32, load,
4100                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
4101def : Pat <(f64 (uint_to_fp (i32
4102                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
4103           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4104                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
4105def : Pat <(f64 (uint_to_fp (i32
4106                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
4107           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4108                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
4109// 64-bits -> double are handled in target specific dag combine:
4110// performIntToFpCombine.
4111
4112//===----------------------------------------------------------------------===//
4113// Advanced SIMD three different-sized vector instructions.
4114//===----------------------------------------------------------------------===//
4115
4116defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
4117defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
4118defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
4119defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
4120defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
4121defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
4122                                             int_aarch64_neon_sabd>;
4123defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
4124                                          int_aarch64_neon_sabd>;
4125defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
4126            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
4127defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
4128                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
4129defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
4130    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
4131defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
4132    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
4133defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
4134defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
4135                                               int_aarch64_neon_sqadd>;
4136defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
4137                                               int_aarch64_neon_sqsub>;
4138defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
4139                                     int_aarch64_neon_sqdmull>;
4140defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
4141                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
4142defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
4143                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
4144defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
4145                                              int_aarch64_neon_uabd>;
4146defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
4147                 BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
4148defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
4149                 BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
4150defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
4151    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
4152defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
4153    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
4154defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
4155defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
4156                 BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
4157defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
4158                 BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
4159
4160// Additional patterns for SMULL and UMULL
4161multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
4162  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
4163  def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
4164            (INST8B V64:$Rn, V64:$Rm)>;
4165  def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
4166            (INST4H V64:$Rn, V64:$Rm)>;
4167  def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
4168            (INST2S V64:$Rn, V64:$Rm)>;
4169}
4170
4171defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
4172  SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
4173defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
4174  UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
4175
4176// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
4177multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
4178  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
4179  def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
4180            (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
4181  def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
4182            (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
4183  def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
4184            (INST2S  V128:$Rd, V64:$Rn, V64:$Rm)>;
4185}
4186
4187defm : Neon_mulacc_widen_patterns<
4188  TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
4189  SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
4190defm : Neon_mulacc_widen_patterns<
4191  TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
4192  UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
4193defm : Neon_mulacc_widen_patterns<
4194  TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
4195  SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
4196defm : Neon_mulacc_widen_patterns<
4197  TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
4198  UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
4199
4200// Patterns for 64-bit pmull
4201def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
4202          (PMULLv1i64 V64:$Rn, V64:$Rm)>;
4203def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
4204                                    (extractelt (v2i64 V128:$Rm), (i64 1))),
4205          (PMULLv2i64 V128:$Rn, V128:$Rm)>;
4206
4207// CodeGen patterns for addhn and subhn instructions, which can actually be
4208// written in LLVM IR without too much difficulty.
4209
4210// ADDHN
4211def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
4212          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
4213def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
4214                                           (i32 16))))),
4215          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
4216def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
4217                                           (i32 32))))),
4218          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
4219def : Pat<(concat_vectors (v8i8 V64:$Rd),
4220                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
4221                                                    (i32 8))))),
4222          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
4223                            V128:$Rn, V128:$Rm)>;
4224def : Pat<(concat_vectors (v4i16 V64:$Rd),
4225                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
4226                                                    (i32 16))))),
4227          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
4228                            V128:$Rn, V128:$Rm)>;
4229def : Pat<(concat_vectors (v2i32 V64:$Rd),
4230                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
4231                                                    (i32 32))))),
4232          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
4233                            V128:$Rn, V128:$Rm)>;
4234
4235// SUBHN
4236def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
4237          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
4238def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
4239                                           (i32 16))))),
4240          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
4241def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
4242                                           (i32 32))))),
4243          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
4244def : Pat<(concat_vectors (v8i8 V64:$Rd),
4245                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
4246                                                    (i32 8))))),
4247          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
4248                            V128:$Rn, V128:$Rm)>;
4249def : Pat<(concat_vectors (v4i16 V64:$Rd),
4250                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
4251                                                    (i32 16))))),
4252          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
4253                            V128:$Rn, V128:$Rm)>;
4254def : Pat<(concat_vectors (v2i32 V64:$Rd),
4255                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
4256                                                    (i32 32))))),
4257          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
4258                            V128:$Rn, V128:$Rm)>;
4259
4260//----------------------------------------------------------------------------
4261// AdvSIMD bitwise extract from vector instruction.
4262//----------------------------------------------------------------------------
4263
4264defm EXT : SIMDBitwiseExtract<"ext">;
4265
4266def AdjustExtImm : SDNodeXForm<imm, [{
4267  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
4268}]>;
4269multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
4270  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
4271            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
4272  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
4273            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
4274  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
4275  // 128-bit vector.
4276  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
4277            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
4278  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
4279  // single 128-bit EXT.
4280  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
4281                              (extract_subvector V128:$Rn, (i64 N)),
4282                              (i32 imm:$imm))),
4283            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
4284  // A 64-bit EXT of the high half of a 128-bit register can be done using a
4285  // 128-bit EXT of the whole register with an adjustment to the immediate. The
4286  // top half of the other operand will be unset, but that doesn't matter as it
4287  // will not be used.
4288  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
4289                              V64:$Rm,
4290                              (i32 imm:$imm))),
4291            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
4292                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
4293                                      (AdjustExtImm imm:$imm)), dsub)>;
4294}
4295
4296defm : ExtPat<v8i8, v16i8, 8>;
4297defm : ExtPat<v4i16, v8i16, 4>;
4298defm : ExtPat<v4f16, v8f16, 4>;
4299defm : ExtPat<v2i32, v4i32, 2>;
4300defm : ExtPat<v2f32, v4f32, 2>;
4301defm : ExtPat<v1i64, v2i64, 1>;
4302defm : ExtPat<v1f64, v2f64, 1>;
4303
4304//----------------------------------------------------------------------------
4305// AdvSIMD zip vector
4306//----------------------------------------------------------------------------
4307
4308defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
4309defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
4310defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
4311defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
4312defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
4313defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
4314
4315//----------------------------------------------------------------------------
4316// AdvSIMD TBL/TBX instructions
4317//----------------------------------------------------------------------------
4318
4319defm TBL : SIMDTableLookup<    0, "tbl">;
4320defm TBX : SIMDTableLookupTied<1, "tbx">;
4321
4322def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
4323          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
4324def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
4325          (TBLv16i8One V128:$Ri, V128:$Rn)>;
4326
4327def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
4328                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
4329          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
4330def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
4331                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
4332          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
4333
4334
4335//----------------------------------------------------------------------------
4336// AdvSIMD scalar CPY instruction
4337//----------------------------------------------------------------------------
4338
4339defm CPY : SIMDScalarCPY<"cpy">;
4340
4341//----------------------------------------------------------------------------
4342// AdvSIMD scalar pairwise instructions
4343//----------------------------------------------------------------------------
4344
4345defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
4346defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
4347defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
4348defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
4349defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
4350defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
4351def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
4352          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
4353def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
4354          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
4355def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
4356          (FADDPv2i32p V64:$Rn)>;
4357def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
4358          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
4359def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
4360          (FADDPv2i64p V128:$Rn)>;
4361def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
4362          (FMAXNMPv2i32p V64:$Rn)>;
4363def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
4364          (FMAXNMPv2i64p V128:$Rn)>;
4365def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
4366          (FMAXPv2i32p V64:$Rn)>;
4367def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
4368          (FMAXPv2i64p V128:$Rn)>;
4369def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
4370          (FMINNMPv2i32p V64:$Rn)>;
4371def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
4372          (FMINNMPv2i64p V128:$Rn)>;
4373def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
4374          (FMINPv2i32p V64:$Rn)>;
4375def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
4376          (FMINPv2i64p V128:$Rn)>;
4377
4378//----------------------------------------------------------------------------
4379// AdvSIMD INS/DUP instructions
4380//----------------------------------------------------------------------------
4381
4382def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
4383def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
4384def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
4385def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
4386def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
4387def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
4388def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
4389
4390def DUPv2i64lane : SIMDDup64FromElement;
4391def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
4392def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
4393def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
4394def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
4395def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
4396def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
4397
4398// DUP from a 64-bit register to a 64-bit register is just a copy
4399def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
4400          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
4401def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
4402          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
4403
4404def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
4405          (v2f32 (DUPv2i32lane
4406            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
4407            (i64 0)))>;
4408def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
4409          (v4f32 (DUPv4i32lane
4410            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
4411            (i64 0)))>;
4412def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
4413          (v2f64 (DUPv2i64lane
4414            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
4415            (i64 0)))>;
4416def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
4417          (v4f16 (DUPv4i16lane
4418            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
4419            (i64 0)))>;
4420def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
4421          (v8f16 (DUPv8i16lane
4422            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
4423            (i64 0)))>;
4424
4425def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
4426          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
4427def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
4428          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
4429
4430def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
4431          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
4432def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
4433         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
4434def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
4435          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
4436
4437// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
4438// instruction even if the types don't match: we just have to remap the lane
4439// carefully. N.b. this trick only applies to truncations.
4440def VecIndex_x2 : SDNodeXForm<imm, [{
4441  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
4442}]>;
4443def VecIndex_x4 : SDNodeXForm<imm, [{
4444  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
4445}]>;
4446def VecIndex_x8 : SDNodeXForm<imm, [{
4447  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
4448}]>;
4449
4450multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
4451                            ValueType Src128VT, ValueType ScalVT,
4452                            Instruction DUP, SDNodeXForm IdxXFORM> {
4453  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
4454                                                     imm:$idx)))),
4455            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
4456
4457  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
4458                                                     imm:$idx)))),
4459            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
4460}
4461
4462defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
4463defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
4464defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
4465
4466defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
4467defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
4468defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
4469
4470multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
4471                               SDNodeXForm IdxXFORM> {
4472  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
4473                                                         imm:$idx))))),
4474            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
4475
4476  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
4477                                                       imm:$idx))))),
4478            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
4479}
4480
4481defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
4482defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
4483defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
4484
4485defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
4486defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
4487defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
4488
4489// SMOV and UMOV definitions, with some extra patterns for convenience
4490defm SMOV : SMov;
4491defm UMOV : UMov;
4492
4493def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
4494          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
4495def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
4496          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
4497def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
4498          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
4499def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
4500          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
4501def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
4502          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
4503def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
4504          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
4505
4506def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
4507            VectorIndexB:$idx)))), i8),
4508          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
4509def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
4510            VectorIndexH:$idx)))), i16),
4511          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
4512
4513// Extracting i8 or i16 elements will have the zero-extend transformed to
4514// an 'and' mask by type legalization since neither i8 nor i16 are legal types
4515// for AArch64. Match these patterns here since UMOV already zeroes out the high
4516// bits of the destination register.
4517def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
4518               (i32 0xff)),
4519          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
4520def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
4521               (i32 0xffff)),
4522          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
4523
4524defm INS : SIMDIns;
4525
4526def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
4527          (SUBREG_TO_REG (i32 0),
4528                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
4529def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
4530          (SUBREG_TO_REG (i32 0),
4531                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
4532
4533def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
4534          (SUBREG_TO_REG (i32 0),
4535                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
4536def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
4537          (SUBREG_TO_REG (i32 0),
4538                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
4539
4540def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
4541          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
4542def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
4543          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
4544
4545def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
4546            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
4547                                  (i32 FPR32:$Rn), ssub))>;
4548def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
4549            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
4550                                  (i32 FPR32:$Rn), ssub))>;
4551
4552def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
4553            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
4554                                  (i64 FPR64:$Rn), dsub))>;
4555
4556def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
4557          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
4558def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
4559          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
4560
4561def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
4562          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
4563def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
4564          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
4565
4566def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
4567          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
4568
4569def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
4570            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
4571          (EXTRACT_SUBREG
4572            (INSvi16lane
4573              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
4574              VectorIndexS:$imm,
4575              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
4576              (i64 0)),
4577            dsub)>;
4578
4579def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
4580            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
4581          (INSvi16lane
4582            V128:$Rn, VectorIndexH:$imm,
4583            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
4584            (i64 0))>;
4585
4586def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
4587            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
4588          (EXTRACT_SUBREG
4589            (INSvi32lane
4590              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
4591              VectorIndexS:$imm,
4592              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
4593              (i64 0)),
4594            dsub)>;
4595def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
4596            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
4597          (INSvi32lane
4598            V128:$Rn, VectorIndexS:$imm,
4599            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
4600            (i64 0))>;
4601def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
4602            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
4603          (INSvi64lane
4604            V128:$Rn, VectorIndexD:$imm,
4605            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
4606            (i64 0))>;
4607
4608// Copy an element at a constant index in one vector into a constant indexed
4609// element of another.
4610// FIXME refactor to a shared class/dev parameterized on vector type, vector
4611// index type and INS extension
4612def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
4613                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
4614                   VectorIndexB:$idx2)),
4615          (v16i8 (INSvi8lane
4616                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
4617          )>;
4618def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
4619                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
4620                   VectorIndexH:$idx2)),
4621          (v8i16 (INSvi16lane
4622                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
4623          )>;
4624def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
4625                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
4626                   VectorIndexS:$idx2)),
4627          (v4i32 (INSvi32lane
4628                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
4629          )>;
4630def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
4631                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
4632                   VectorIndexD:$idx2)),
4633          (v2i64 (INSvi64lane
4634                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
4635          )>;
4636
4637multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
4638                                ValueType VTScal, Instruction INS> {
4639  def : Pat<(VT128 (vector_insert V128:$src,
4640                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
4641                        imm:$Immd)),
4642            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
4643
4644  def : Pat<(VT128 (vector_insert V128:$src,
4645                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
4646                        imm:$Immd)),
4647            (INS V128:$src, imm:$Immd,
4648                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
4649
4650  def : Pat<(VT64 (vector_insert V64:$src,
4651                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
4652                        imm:$Immd)),
4653            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
4654                                 imm:$Immd, V128:$Rn, imm:$Immn),
4655                            dsub)>;
4656
4657  def : Pat<(VT64 (vector_insert V64:$src,
4658                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
4659                        imm:$Immd)),
4660            (EXTRACT_SUBREG
4661                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
4662                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
4663                dsub)>;
4664}
4665
4666defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
4667defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
4668defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
4669
4670
4671// Floating point vector extractions are codegen'd as either a sequence of
4672// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
4673// the lane number is anything other than zero.
4674def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
4675          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
4676def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
4677          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
4678def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
4679          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
4680
4681def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
4682          (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
4683def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
4684          (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
4685def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
4686          (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
4687
4688// All concat_vectors operations are canonicalised to act on i64 vectors for
4689// AArch64. In the general case we need an instruction, which had just as well be
4690// INS.
4691class ConcatPat<ValueType DstTy, ValueType SrcTy>
4692  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
4693        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
4694                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
4695
4696def : ConcatPat<v2i64, v1i64>;
4697def : ConcatPat<v2f64, v1f64>;
4698def : ConcatPat<v4i32, v2i32>;
4699def : ConcatPat<v4f32, v2f32>;
4700def : ConcatPat<v8i16, v4i16>;
4701def : ConcatPat<v8f16, v4f16>;
4702def : ConcatPat<v16i8, v8i8>;
4703
4704// If the high lanes are undef, though, we can just ignore them:
4705class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
4706  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
4707        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
4708
4709def : ConcatUndefPat<v2i64, v1i64>;
4710def : ConcatUndefPat<v2f64, v1f64>;
4711def : ConcatUndefPat<v4i32, v2i32>;
4712def : ConcatUndefPat<v4f32, v2f32>;
4713def : ConcatUndefPat<v8i16, v4i16>;
4714def : ConcatUndefPat<v16i8, v8i8>;
4715
4716//----------------------------------------------------------------------------
4717// AdvSIMD across lanes instructions
4718//----------------------------------------------------------------------------
4719
4720defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
4721defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
4722defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
4723defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
4724defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
4725defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
4726defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
4727defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
4728defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
4729defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
4730defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
4731
4732// Patterns for across-vector intrinsics, that have a node equivalent, that
4733// returns a vector (with only the low lane defined) instead of a scalar.
4734// In effect, opNode is the same as (scalar_to_vector (IntNode)).
4735multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
4736                                    SDPatternOperator opNode> {
4737// If a lane instruction caught the vector_extract around opNode, we can
4738// directly match the latter to the instruction.
4739def : Pat<(v8i8 (opNode V64:$Rn)),
4740          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
4741           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
4742def : Pat<(v16i8 (opNode V128:$Rn)),
4743          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4744           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
4745def : Pat<(v4i16 (opNode V64:$Rn)),
4746          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
4747           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
4748def : Pat<(v8i16 (opNode V128:$Rn)),
4749          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
4750           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
4751def : Pat<(v4i32 (opNode V128:$Rn)),
4752          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
4753           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
4754
4755
4756// If none did, fallback to the explicit patterns, consuming the vector_extract.
4757def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
4758            (i32 0)), (i64 0))),
4759          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
4760            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
4761            bsub), ssub)>;
4762def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
4763          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4764            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
4765            bsub), ssub)>;
4766def : Pat<(i32 (vector_extract (insert_subvector undef,
4767            (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
4768          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
4769            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
4770            hsub), ssub)>;
4771def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
4772          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
4773            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
4774            hsub), ssub)>;
4775def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
4776          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
4777            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
4778            ssub), ssub)>;
4779
4780}
4781
4782multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
4783                                          SDPatternOperator opNode>
4784    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
4785// If there is a sign extension after this intrinsic, consume it as smov already
4786// performed it
4787def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
4788            (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
4789          (i32 (SMOVvi8to32
4790            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4791              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
4792            (i64 0)))>;
4793def : Pat<(i32 (sext_inreg (i32 (vector_extract
4794            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
4795          (i32 (SMOVvi8to32
4796            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4797             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
4798            (i64 0)))>;
4799def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
4800            (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
4801          (i32 (SMOVvi16to32
4802           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4803            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
4804           (i64 0)))>;
4805def : Pat<(i32 (sext_inreg (i32 (vector_extract
4806            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
4807          (i32 (SMOVvi16to32
4808            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4809             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
4810            (i64 0)))>;
4811}
4812
4813multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
4814                                            SDPatternOperator opNode>
4815    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
4816// If there is a masking operation keeping only what has been actually
4817// generated, consume it.
4818def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
4819            (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
4820      (i32 (EXTRACT_SUBREG
4821        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4822          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
4823        ssub))>;
4824def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
4825            maski8_or_more)),
4826        (i32 (EXTRACT_SUBREG
4827          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4828            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
4829          ssub))>;
4830def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
4831            (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
4832          (i32 (EXTRACT_SUBREG
4833            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4834              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
4835            ssub))>;
4836def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
4837            maski16_or_more)),
4838        (i32 (EXTRACT_SUBREG
4839          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4840            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
4841          ssub))>;
4842}
4843
4844defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
4845// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
4846def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
4847          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
4848
4849defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
4850// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
4851def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
4852          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
4853
4854defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
4855def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
4856          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
4857
4858defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
4859def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
4860          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
4861
4862defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
4863def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
4864          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
4865
4866defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
4867def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
4868          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
4869
4870multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
4871  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
4872        (i32 (SMOVvi16to32
4873          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4874            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
4875          (i64 0)))>;
4876def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
4877        (i32 (SMOVvi16to32
4878          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4879           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
4880          (i64 0)))>;
4881
4882def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
4883          (i32 (EXTRACT_SUBREG
4884           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4885            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
4886           ssub))>;
4887def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
4888        (i32 (EXTRACT_SUBREG
4889          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4890           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
4891          ssub))>;
4892
4893def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
4894        (i64 (EXTRACT_SUBREG
4895          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4896           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
4897          dsub))>;
4898}
4899
4900multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
4901                                                Intrinsic intOp> {
4902  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
4903        (i32 (EXTRACT_SUBREG
4904          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4905            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
4906          ssub))>;
4907def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
4908        (i32 (EXTRACT_SUBREG
4909          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4910            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
4911          ssub))>;
4912
4913def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
4914          (i32 (EXTRACT_SUBREG
4915            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4916              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
4917            ssub))>;
4918def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
4919        (i32 (EXTRACT_SUBREG
4920          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4921            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
4922          ssub))>;
4923
4924def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
4925        (i64 (EXTRACT_SUBREG
4926          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4927            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
4928          dsub))>;
4929}
4930
4931defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
4932defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
4933
4934// The vaddlv_s32 intrinsic gets mapped to SADDLP.
4935def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
4936          (i64 (EXTRACT_SUBREG
4937            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4938              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
4939            dsub))>;
4940// The vaddlv_u32 intrinsic gets mapped to UADDLP.
4941def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
4942          (i64 (EXTRACT_SUBREG
4943            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4944              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
4945            dsub))>;
4946
4947//------------------------------------------------------------------------------
4948// AdvSIMD modified immediate instructions
4949//------------------------------------------------------------------------------
4950
4951// AdvSIMD BIC
4952defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
4953// AdvSIMD ORR
4954defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
4955
4956def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
4957def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
4958def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
4959def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
4960
4961def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
4962def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
4963def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
4964def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
4965
4966def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
4967def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
4968def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
4969def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
4970
4971def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
4972def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
4973def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
4974def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
4975
4976// AdvSIMD FMOV
4977def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
4978                                              "fmov", ".2d",
4979                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
4980def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
4981                                              "fmov", ".2s",
4982                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
4983def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
4984                                              "fmov", ".4s",
4985                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
4986let Predicates = [HasNEON, HasFullFP16] in {
4987def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
4988                                              "fmov", ".4h",
4989                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
4990def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
4991                                              "fmov", ".8h",
4992                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
4993} // Predicates = [HasNEON, HasFullFP16]
4994
4995// AdvSIMD MOVI
4996
4997// EDIT byte mask: scalar
4998let isReMaterializable = 1, isAsCheapAsAMove = 1 in
4999def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
5000                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
5001// The movi_edit node has the immediate value already encoded, so we use
5002// a plain imm0_255 here.
5003def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
5004          (MOVID imm0_255:$shift)>;
5005
5006// EDIT byte mask: 2d
5007
5008// The movi_edit node has the immediate value already encoded, so we use
5009// a plain imm0_255 in the pattern
5010let isReMaterializable = 1, isAsCheapAsAMove = 1 in
5011def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
5012                                                simdimmtype10,
5013                                                "movi", ".2d",
5014                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
5015
5016def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
5017def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
5018def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
5019def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
5020
5021def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
5022def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
5023def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
5024def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
5025
5026// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
5027// extract is free and this gives better MachineCSE results.
5028def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
5029def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
5030def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
5031def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
5032
5033def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
5034def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
5035def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
5036def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
5037
5038// EDIT per word & halfword: 2s, 4h, 4s, & 8h
5039let isReMaterializable = 1, isAsCheapAsAMove = 1 in
5040defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
5041
5042def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
5043def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
5044def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
5045def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
5046
5047def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
5048def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
5049def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
5050def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
5051
5052def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
5053          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
5054def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
5055          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
5056def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
5057          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
5058def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
5059          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
5060
5061let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
5062// EDIT per word: 2s & 4s with MSL shifter
5063def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
5064                      [(set (v2i32 V64:$Rd),
5065                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
5066def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
5067                      [(set (v4i32 V128:$Rd),
5068                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
5069
5070// Per byte: 8b & 16b
5071def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
5072                                                 "movi", ".8b",
5073                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
5074
5075def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
5076                                                 "movi", ".16b",
5077                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
5078}
5079
5080// AdvSIMD MVNI
5081
5082// EDIT per word & halfword: 2s, 4h, 4s, & 8h
5083let isReMaterializable = 1, isAsCheapAsAMove = 1 in
5084defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
5085
5086def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
5087def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
5088def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
5089def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
5090
5091def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
5092def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
5093def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
5094def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
5095
5096def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
5097          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
5098def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
5099          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
5100def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
5101          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
5102def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
5103          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
5104
5105// EDIT per word: 2s & 4s with MSL shifter
5106let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
5107def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
5108                      [(set (v2i32 V64:$Rd),
5109                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
5110def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
5111                      [(set (v4i32 V128:$Rd),
5112                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
5113}
5114
5115//----------------------------------------------------------------------------
5116// AdvSIMD indexed element
5117//----------------------------------------------------------------------------
5118
5119let hasSideEffects = 0 in {
5120  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
5121  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
5122}
5123
5124// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
5125// instruction expects the addend first, while the intrinsic expects it last.
5126
5127// On the other hand, there are quite a few valid combinatorial options due to
5128// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
5129defm : SIMDFPIndexedTiedPatterns<"FMLA",
5130           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
5131defm : SIMDFPIndexedTiedPatterns<"FMLA",
5132           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
5133
5134defm : SIMDFPIndexedTiedPatterns<"FMLS",
5135           TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
5136defm : SIMDFPIndexedTiedPatterns<"FMLS",
5137           TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
5138defm : SIMDFPIndexedTiedPatterns<"FMLS",
5139           TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
5140defm : SIMDFPIndexedTiedPatterns<"FMLS",
5141           TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
5142
5143multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
5144  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
5145  // and DUP scalar.
5146  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
5147                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
5148                                           VectorIndexS:$idx))),
5149            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
5150  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
5151                           (v2f32 (AArch64duplane32
5152                                      (v4f32 (insert_subvector undef,
5153                                                 (v2f32 (fneg V64:$Rm)),
5154                                                 (i32 0))),
5155                                      VectorIndexS:$idx)))),
5156            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
5157                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
5158                               VectorIndexS:$idx)>;
5159  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
5160                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
5161            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
5162                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
5163
5164  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
5165  // and DUP scalar.
5166  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
5167                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
5168                                           VectorIndexS:$idx))),
5169            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
5170                               VectorIndexS:$idx)>;
5171  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
5172                           (v4f32 (AArch64duplane32
5173                                      (v4f32 (insert_subvector undef,
5174                                                 (v2f32 (fneg V64:$Rm)),
5175                                                 (i32 0))),
5176                                      VectorIndexS:$idx)))),
5177            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
5178                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
5179                               VectorIndexS:$idx)>;
5180  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
5181                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
5182            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
5183                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
5184
5185  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
5186  // (DUPLANE from 64-bit would be trivial).
5187  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
5188                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
5189                                           VectorIndexD:$idx))),
5190            (FMLSv2i64_indexed
5191                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
5192  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
5193                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
5194            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
5195                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
5196
5197  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
5198  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
5199                         (vector_extract (v4f32 (fneg V128:$Rm)),
5200                                         VectorIndexS:$idx))),
5201            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
5202                V128:$Rm, VectorIndexS:$idx)>;
5203  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
5204                         (vector_extract (v4f32 (insert_subvector undef,
5205                                                    (v2f32 (fneg V64:$Rm)),
5206                                                    (i32 0))),
5207                                         VectorIndexS:$idx))),
5208            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
5209                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
5210
5211  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
5212  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
5213                         (vector_extract (v2f64 (fneg V128:$Rm)),
5214                                         VectorIndexS:$idx))),
5215            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
5216                V128:$Rm, VectorIndexS:$idx)>;
5217}
5218
5219defm : FMLSIndexedAfterNegPatterns<
5220           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
5221defm : FMLSIndexedAfterNegPatterns<
5222           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
5223
5224defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
5225defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
5226
5227def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
5228          (FMULv2i32_indexed V64:$Rn,
5229            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
5230            (i64 0))>;
5231def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
5232          (FMULv4i32_indexed V128:$Rn,
5233            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
5234            (i64 0))>;
5235def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
5236          (FMULv2i64_indexed V128:$Rn,
5237            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
5238            (i64 0))>;
5239
5240defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
5241defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5242defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
5243              TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
5244defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
5245              TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
5246defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
5247defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
5248    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
5249defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
5250    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
5251defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
5252                int_aarch64_neon_smull>;
5253defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
5254                                           int_aarch64_neon_sqadd>;
5255defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
5256                                           int_aarch64_neon_sqsub>;
5257defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
5258                                          int_aarch64_neon_sqadd>;
5259defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
5260                                          int_aarch64_neon_sqsub>;
5261defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
5262defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
5263    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
5264defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
5265    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
5266defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
5267                int_aarch64_neon_umull>;
5268
5269// A scalar sqdmull with the second operand being a vector lane can be
5270// handled directly with the indexed instruction encoding.
5271def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5272                                          (vector_extract (v4i32 V128:$Vm),
5273                                                           VectorIndexS:$idx)),
5274          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
5275
5276//----------------------------------------------------------------------------
5277// AdvSIMD scalar shift instructions
5278//----------------------------------------------------------------------------
5279defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
5280defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
5281defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
5282defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
5283// Codegen patterns for the above. We don't put these directly on the
5284// instructions because TableGen's type inference can't handle the truth.
5285// Having the same base pattern for fp <--> int totally freaks it out.
5286def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
5287          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
5288def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
5289          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
5290def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
5291          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
5292def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
5293          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
5294def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
5295                                            vecshiftR64:$imm)),
5296          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
5297def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
5298                                            vecshiftR64:$imm)),
5299          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
5300def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
5301          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
5302def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
5303          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
5304def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
5305                                            vecshiftR64:$imm)),
5306          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
5307def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
5308          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
5309def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
5310                                            vecshiftR64:$imm)),
5311          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
5312def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
5313          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
5314
5315// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported.
5316
5317def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
5318          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
5319def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
5320          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
5321def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
5322            (and FPR32:$Rn, (i32 65535)),
5323            vecshiftR16:$imm)),
5324          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
5325def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
5326          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
5327def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
5328          (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
5329def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
5330          (i32 (INSERT_SUBREG
5331            (i32 (IMPLICIT_DEF)),
5332            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
5333            hsub))>;
5334def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
5335          (i64 (INSERT_SUBREG
5336            (i64 (IMPLICIT_DEF)),
5337            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
5338            hsub))>;
5339def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
5340          (i32 (INSERT_SUBREG
5341            (i32 (IMPLICIT_DEF)),
5342            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
5343            hsub))>;
5344def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
5345          (i64 (INSERT_SUBREG
5346            (i64 (IMPLICIT_DEF)),
5347            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
5348            hsub))>;
5349
5350defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
5351defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
5352defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
5353                                     int_aarch64_neon_sqrshrn>;
5354defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
5355                                     int_aarch64_neon_sqrshrun>;
5356defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
5357defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
5358defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
5359                                     int_aarch64_neon_sqshrn>;
5360defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
5361                                     int_aarch64_neon_sqshrun>;
5362defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
5363defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
5364defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
5365    TriOpFrag<(add node:$LHS,
5366                   (AArch64srshri node:$MHS, node:$RHS))>>;
5367defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
5368defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
5369    TriOpFrag<(add node:$LHS,
5370                   (AArch64vashr node:$MHS, node:$RHS))>>;
5371defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
5372                                     int_aarch64_neon_uqrshrn>;
5373defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
5374defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
5375                                     int_aarch64_neon_uqshrn>;
5376defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
5377defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
5378    TriOpFrag<(add node:$LHS,
5379                   (AArch64urshri node:$MHS, node:$RHS))>>;
5380defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
5381defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
5382    TriOpFrag<(add node:$LHS,
5383                   (AArch64vlshr node:$MHS, node:$RHS))>>;
5384
5385//----------------------------------------------------------------------------
5386// AdvSIMD vector shift instructions
5387//----------------------------------------------------------------------------
5388defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
5389defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
5390defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
5391                                   int_aarch64_neon_vcvtfxs2fp>;
5392defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
5393                                         int_aarch64_neon_rshrn>;
5394defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
5395defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
5396                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
5397defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
5398def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
5399                                      (i32 vecshiftL64:$imm))),
5400          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
5401defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
5402                                         int_aarch64_neon_sqrshrn>;
5403defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
5404                                         int_aarch64_neon_sqrshrun>;
5405defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
5406defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
5407defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
5408                                         int_aarch64_neon_sqshrn>;
5409defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
5410                                         int_aarch64_neon_sqshrun>;
5411defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
5412def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
5413                                      (i32 vecshiftR64:$imm))),
5414          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
5415defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
5416defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
5417                 TriOpFrag<(add node:$LHS,
5418                                (AArch64srshri node:$MHS, node:$RHS))> >;
5419defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
5420                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
5421
5422defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
5423defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
5424                TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
5425defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
5426                        int_aarch64_neon_vcvtfxu2fp>;
5427defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
5428                                         int_aarch64_neon_uqrshrn>;
5429defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
5430defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
5431                                         int_aarch64_neon_uqshrn>;
5432defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
5433defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
5434                TriOpFrag<(add node:$LHS,
5435                               (AArch64urshri node:$MHS, node:$RHS))> >;
5436defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
5437                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
5438defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
5439defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
5440                TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
5441
5442// SHRN patterns for when a logical right shift was used instead of arithmetic
5443// (the immediate guarantees no sign bits actually end up in the result so it
5444// doesn't matter).
5445def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
5446          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
5447def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
5448          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
5449def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
5450          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
5451
5452def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
5453                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
5454                                                    vecshiftR16Narrow:$imm)))),
5455          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
5456                           V128:$Rn, vecshiftR16Narrow:$imm)>;
5457def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
5458                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
5459                                                    vecshiftR32Narrow:$imm)))),
5460          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
5461                           V128:$Rn, vecshiftR32Narrow:$imm)>;
5462def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
5463                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
5464                                                    vecshiftR64Narrow:$imm)))),
5465          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
5466                           V128:$Rn, vecshiftR32Narrow:$imm)>;
5467
5468// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
5469// Anyexts are implemented as zexts.
5470def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
5471def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
5472def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
5473def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
5474def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
5475def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
5476def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
5477def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
5478def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
5479// Also match an extend from the upper half of a 128 bit source register.
5480def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
5481          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
5482def : Pat<(v8i16 (zext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
5483          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
5484def : Pat<(v8i16 (sext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
5485          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
5486def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
5487          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
5488def : Pat<(v4i32 (zext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
5489          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
5490def : Pat<(v4i32 (sext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
5491          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
5492def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
5493          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
5494def : Pat<(v2i64 (zext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
5495          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
5496def : Pat<(v2i64 (sext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
5497          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
5498
5499// Vector shift sxtl aliases
5500def : InstAlias<"sxtl.8h $dst, $src1",
5501                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
5502def : InstAlias<"sxtl $dst.8h, $src1.8b",
5503                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
5504def : InstAlias<"sxtl.4s $dst, $src1",
5505                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
5506def : InstAlias<"sxtl $dst.4s, $src1.4h",
5507                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
5508def : InstAlias<"sxtl.2d $dst, $src1",
5509                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
5510def : InstAlias<"sxtl $dst.2d, $src1.2s",
5511                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
5512
5513// Vector shift sxtl2 aliases
5514def : InstAlias<"sxtl2.8h $dst, $src1",
5515                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
5516def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
5517                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
5518def : InstAlias<"sxtl2.4s $dst, $src1",
5519                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
5520def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
5521                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
5522def : InstAlias<"sxtl2.2d $dst, $src1",
5523                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
5524def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
5525                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
5526
5527// Vector shift uxtl aliases
5528def : InstAlias<"uxtl.8h $dst, $src1",
5529                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
5530def : InstAlias<"uxtl $dst.8h, $src1.8b",
5531                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
5532def : InstAlias<"uxtl.4s $dst, $src1",
5533                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
5534def : InstAlias<"uxtl $dst.4s, $src1.4h",
5535                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
5536def : InstAlias<"uxtl.2d $dst, $src1",
5537                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
5538def : InstAlias<"uxtl $dst.2d, $src1.2s",
5539                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
5540
5541// Vector shift uxtl2 aliases
5542def : InstAlias<"uxtl2.8h $dst, $src1",
5543                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
5544def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
5545                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
5546def : InstAlias<"uxtl2.4s $dst, $src1",
5547                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
5548def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
5549                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
5550def : InstAlias<"uxtl2.2d $dst, $src1",
5551                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
5552def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
5553                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
5554
5555// If an integer is about to be converted to a floating point value,
5556// just load it on the floating point unit.
5557// These patterns are more complex because floating point loads do not
5558// support sign extension.
5559// The sign extension has to be explicitly added and is only supported for
5560// one step: byte-to-half, half-to-word, word-to-doubleword.
5561// SCVTF GPR -> FPR is 9 cycles.
5562// SCVTF FPR -> FPR is 4 cyclces.
5563// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
5564// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
5565// and still being faster.
5566// However, this is not good for code size.
5567// 8-bits -> float. 2 sizes step-up.
5568class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
5569  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
5570        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
5571                            (SSHLLv4i16_shift
5572                              (f64
5573                                (EXTRACT_SUBREG
5574                                  (SSHLLv8i8_shift
5575                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5576                                        INST,
5577                                        bsub),
5578                                    0),
5579                                  dsub)),
5580                               0),
5581                             ssub)))>,
5582    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
5583
5584def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
5585                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
5586def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
5587                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
5588def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
5589                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
5590def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
5591                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
5592
5593// 16-bits -> float. 1 size step-up.
5594class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
5595  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
5596        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
5597                            (SSHLLv4i16_shift
5598                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5599                                  INST,
5600                                  hsub),
5601                                0),
5602                            ssub)))>, Requires<[NotForCodeSize]>;
5603
5604def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
5605                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
5606def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
5607                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
5608def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
5609                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
5610def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
5611                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
5612
5613// 32-bits to 32-bits are handled in target specific dag combine:
5614// performIntToFpCombine.
5615// 64-bits integer to 32-bits floating point, not possible with
5616// SCVTF on floating point registers (both source and destination
5617// must have the same size).
5618
5619// Here are the patterns for 8, 16, 32, and 64-bits to double.
5620// 8-bits -> double. 3 size step-up: give up.
5621// 16-bits -> double. 2 size step.
5622class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
5623  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
5624           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
5625                              (SSHLLv2i32_shift
5626                                 (f64
5627                                  (EXTRACT_SUBREG
5628                                    (SSHLLv4i16_shift
5629                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5630                                        INST,
5631                                        hsub),
5632                                     0),
5633                                   dsub)),
5634                               0),
5635                             dsub)))>,
5636    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
5637
5638def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
5639                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
5640def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
5641                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
5642def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
5643                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
5644def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
5645                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
5646// 32-bits -> double. 1 size step-up.
5647class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
5648  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
5649           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
5650                              (SSHLLv2i32_shift
5651                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5652                                  INST,
5653                                  ssub),
5654                               0),
5655                             dsub)))>, Requires<[NotForCodeSize]>;
5656
5657def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
5658                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
5659def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
5660                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
5661def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
5662                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
5663def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
5664                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
5665
5666// 64-bits -> double are handled in target specific dag combine:
5667// performIntToFpCombine.
5668
5669
5670//----------------------------------------------------------------------------
5671// AdvSIMD Load-Store Structure
5672//----------------------------------------------------------------------------
5673defm LD1 : SIMDLd1Multiple<"ld1">;
5674defm LD2 : SIMDLd2Multiple<"ld2">;
5675defm LD3 : SIMDLd3Multiple<"ld3">;
5676defm LD4 : SIMDLd4Multiple<"ld4">;
5677
5678defm ST1 : SIMDSt1Multiple<"st1">;
5679defm ST2 : SIMDSt2Multiple<"st2">;
5680defm ST3 : SIMDSt3Multiple<"st3">;
5681defm ST4 : SIMDSt4Multiple<"st4">;
5682
5683class Ld1Pat<ValueType ty, Instruction INST>
5684  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
5685
5686def : Ld1Pat<v16i8, LD1Onev16b>;
5687def : Ld1Pat<v8i16, LD1Onev8h>;
5688def : Ld1Pat<v4i32, LD1Onev4s>;
5689def : Ld1Pat<v2i64, LD1Onev2d>;
5690def : Ld1Pat<v8i8,  LD1Onev8b>;
5691def : Ld1Pat<v4i16, LD1Onev4h>;
5692def : Ld1Pat<v2i32, LD1Onev2s>;
5693def : Ld1Pat<v1i64, LD1Onev1d>;
5694
5695class St1Pat<ValueType ty, Instruction INST>
5696  : Pat<(store ty:$Vt, GPR64sp:$Rn),
5697        (INST ty:$Vt, GPR64sp:$Rn)>;
5698
5699def : St1Pat<v16i8, ST1Onev16b>;
5700def : St1Pat<v8i16, ST1Onev8h>;
5701def : St1Pat<v4i32, ST1Onev4s>;
5702def : St1Pat<v2i64, ST1Onev2d>;
5703def : St1Pat<v8i8,  ST1Onev8b>;
5704def : St1Pat<v4i16, ST1Onev4h>;
5705def : St1Pat<v2i32, ST1Onev2s>;
5706def : St1Pat<v1i64, ST1Onev1d>;
5707
5708//---
5709// Single-element
5710//---
5711
5712defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
5713defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
5714defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
5715defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
5716let mayLoad = 1, hasSideEffects = 0 in {
5717defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
5718defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
5719defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
5720defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
5721defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
5722defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
5723defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
5724defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
5725defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
5726defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
5727defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
5728defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
5729defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
5730defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
5731defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
5732defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
5733}
5734
5735def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
5736          (LD1Rv8b GPR64sp:$Rn)>;
5737def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
5738          (LD1Rv16b GPR64sp:$Rn)>;
5739def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
5740          (LD1Rv4h GPR64sp:$Rn)>;
5741def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
5742          (LD1Rv8h GPR64sp:$Rn)>;
5743def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
5744          (LD1Rv2s GPR64sp:$Rn)>;
5745def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
5746          (LD1Rv4s GPR64sp:$Rn)>;
5747def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
5748          (LD1Rv2d GPR64sp:$Rn)>;
5749def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
5750          (LD1Rv1d GPR64sp:$Rn)>;
5751// Grab the floating point version too
5752def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
5753          (LD1Rv2s GPR64sp:$Rn)>;
5754def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
5755          (LD1Rv4s GPR64sp:$Rn)>;
5756def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
5757          (LD1Rv2d GPR64sp:$Rn)>;
5758def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
5759          (LD1Rv1d GPR64sp:$Rn)>;
5760def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
5761          (LD1Rv4h GPR64sp:$Rn)>;
5762def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
5763          (LD1Rv8h GPR64sp:$Rn)>;
5764
5765class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
5766                    ValueType VTy, ValueType STy, Instruction LD1>
5767  : Pat<(vector_insert (VTy VecListOne128:$Rd),
5768           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
5769        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
5770
5771def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
5772def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
5773def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
5774def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
5775def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
5776def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
5777def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
5778
5779class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
5780                   ValueType VTy, ValueType STy, Instruction LD1>
5781  : Pat<(vector_insert (VTy VecListOne64:$Rd),
5782           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
5783        (EXTRACT_SUBREG
5784            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
5785                          VecIndex:$idx, GPR64sp:$Rn),
5786            dsub)>;
5787
5788def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
5789def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
5790def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
5791def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
5792def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
5793
5794
5795defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
5796defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
5797defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
5798defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
5799
5800// Stores
5801defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
5802defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
5803defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
5804defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
5805
5806let AddedComplexity = 19 in
5807class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
5808                    ValueType VTy, ValueType STy, Instruction ST1>
5809  : Pat<(scalar_store
5810             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
5811             GPR64sp:$Rn),
5812        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
5813
5814def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
5815def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
5816def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
5817def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
5818def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
5819def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
5820def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
5821
5822let AddedComplexity = 19 in
5823class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
5824                   ValueType VTy, ValueType STy, Instruction ST1>
5825  : Pat<(scalar_store
5826             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
5827             GPR64sp:$Rn),
5828        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
5829             VecIndex:$idx, GPR64sp:$Rn)>;
5830
5831def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
5832def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
5833def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
5834def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
5835def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
5836
5837multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
5838                             ValueType VTy, ValueType STy, Instruction ST1,
5839                             int offset> {
5840  def : Pat<(scalar_store
5841              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
5842              GPR64sp:$Rn, offset),
5843        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
5844             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
5845
5846  def : Pat<(scalar_store
5847              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
5848              GPR64sp:$Rn, GPR64:$Rm),
5849        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
5850             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
5851}
5852
5853defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
5854defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
5855                        2>;
5856defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
5857defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
5858defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
5859defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
5860defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
5861
5862multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
5863                             ValueType VTy, ValueType STy, Instruction ST1,
5864                             int offset> {
5865  def : Pat<(scalar_store
5866              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
5867              GPR64sp:$Rn, offset),
5868        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
5869
5870  def : Pat<(scalar_store
5871              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
5872              GPR64sp:$Rn, GPR64:$Rm),
5873        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
5874}
5875
5876defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
5877                         1>;
5878defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
5879                         2>;
5880defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
5881defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
5882defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
5883defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
5884defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
5885
5886let mayStore = 1, hasSideEffects = 0 in {
5887defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
5888defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
5889defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
5890defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
5891defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
5892defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
5893defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
5894defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
5895defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
5896defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
5897defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
5898defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
5899}
5900
5901defm ST1 : SIMDLdSt1SingleAliases<"st1">;
5902defm ST2 : SIMDLdSt2SingleAliases<"st2">;
5903defm ST3 : SIMDLdSt3SingleAliases<"st3">;
5904defm ST4 : SIMDLdSt4SingleAliases<"st4">;
5905
5906//----------------------------------------------------------------------------
5907// Crypto extensions
5908//----------------------------------------------------------------------------
5909
5910let Predicates = [HasAES] in {
5911def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
5912def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
5913def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
5914def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
5915}
5916
5917// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
5918// for AES fusion on some CPUs.
5919let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
5920def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
5921                        Sched<[WriteV]>;
5922def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
5923                         Sched<[WriteV]>;
5924}
5925
5926// Only use constrained versions of AES(I)MC instructions if they are paired with
5927// AESE/AESD.
5928def : Pat<(v16i8 (int_aarch64_crypto_aesmc
5929            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
5930                                            (v16i8 V128:$src2))))),
5931          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
5932                                             (v16i8 V128:$src2)))))>,
5933          Requires<[HasFuseAES]>;
5934
5935def : Pat<(v16i8 (int_aarch64_crypto_aesimc
5936            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
5937                                            (v16i8 V128:$src2))))),
5938          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
5939                                              (v16i8 V128:$src2)))))>,
5940          Requires<[HasFuseAES]>;
5941
5942let Predicates = [HasSHA2] in {
5943def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
5944def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
5945def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
5946def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
5947def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
5948def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
5949def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
5950
5951def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
5952def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
5953def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
5954}
5955
5956//----------------------------------------------------------------------------
5957// Compiler-pseudos
5958//----------------------------------------------------------------------------
5959// FIXME: Like for X86, these should go in their own separate .td file.
5960
5961def def32 : PatLeaf<(i32 GPR32:$src), [{
5962  return isDef32(*N);
5963}]>;
5964
5965// In the case of a 32-bit def that is known to implicitly zero-extend,
5966// we can use a SUBREG_TO_REG.
5967def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
5968
5969// For an anyext, we don't care what the high bits are, so we can perform an
5970// INSERT_SUBREF into an IMPLICIT_DEF.
5971def : Pat<(i64 (anyext GPR32:$src)),
5972          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
5973
5974// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
5975// then assert the extension has happened.
5976def : Pat<(i64 (zext GPR32:$src)),
5977          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
5978
5979// To sign extend, we use a signed bitfield move instruction (SBFM) on the
5980// containing super-reg.
5981def : Pat<(i64 (sext GPR32:$src)),
5982   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
5983def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
5984def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
5985def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
5986def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
5987def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
5988def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
5989def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
5990
5991def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
5992          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
5993                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
5994def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
5995          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
5996                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
5997
5998def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
5999          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
6000                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
6001def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
6002          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
6003                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
6004
6005def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
6006          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
6007                   (i64 (i64shift_a        imm0_63:$imm)),
6008                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
6009
6010// sra patterns have an AddedComplexity of 10, so make sure we have a higher
6011// AddedComplexity for the following patterns since we want to match sext + sra
6012// patterns before we attempt to match a single sra node.
6013let AddedComplexity = 20 in {
6014// We support all sext + sra combinations which preserve at least one bit of the
6015// original value which is to be sign extended. E.g. we support shifts up to
6016// bitwidth-1 bits.
6017def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
6018          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
6019def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
6020          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
6021
6022def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
6023          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
6024def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
6025          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
6026
6027def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
6028          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
6029                   (i64 imm0_31:$imm), 31)>;
6030} // AddedComplexity = 20
6031
6032// To truncate, we can simply extract from a subregister.
6033def : Pat<(i32 (trunc GPR64sp:$src)),
6034          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
6035
6036// __builtin_trap() uses the BRK instruction on AArch64.
6037def : Pat<(trap), (BRK 1)>;
6038
6039// Multiply high patterns which multiply the lower subvector using smull/umull
6040// and the upper subvector with smull2/umull2. Then shuffle the high the high
6041// part of both results together.
6042def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
6043          (UZP2v16i8
6044           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
6045                            (EXTRACT_SUBREG V128:$Rm, dsub)),
6046           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
6047def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
6048          (UZP2v8i16
6049           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
6050                             (EXTRACT_SUBREG V128:$Rm, dsub)),
6051           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
6052def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
6053          (UZP2v4i32
6054           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
6055                             (EXTRACT_SUBREG V128:$Rm, dsub)),
6056           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
6057
6058def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
6059          (UZP2v16i8
6060           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
6061                            (EXTRACT_SUBREG V128:$Rm, dsub)),
6062           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
6063def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
6064          (UZP2v8i16
6065           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
6066                             (EXTRACT_SUBREG V128:$Rm, dsub)),
6067           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
6068def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
6069          (UZP2v4i32
6070           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
6071                             (EXTRACT_SUBREG V128:$Rm, dsub)),
6072           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
6073
6074// Conversions within AdvSIMD types in the same register size are free.
6075// But because we need a consistent lane ordering, in big endian many
6076// conversions require one or more REV instructions.
6077//
6078// Consider a simple memory load followed by a bitconvert then a store.
6079//   v0 = load v2i32
6080//   v1 = BITCAST v2i32 v0 to v4i16
6081//        store v4i16 v2
6082//
6083// In big endian mode every memory access has an implicit byte swap. LDR and
6084// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
6085// is, they treat the vector as a sequence of elements to be byte-swapped.
6086// The two pairs of instructions are fundamentally incompatible. We've decided
6087// to use LD1/ST1 only to simplify compiler implementation.
6088//
6089// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
6090// the original code sequence:
6091//   v0 = load v2i32
6092//   v1 = REV v2i32                  (implicit)
6093//   v2 = BITCAST v2i32 v1 to v4i16
6094//   v3 = REV v4i16 v2               (implicit)
6095//        store v4i16 v3
6096//
6097// But this is now broken - the value stored is different to the value loaded
6098// due to lane reordering. To fix this, on every BITCAST we must perform two
6099// other REVs:
6100//   v0 = load v2i32
6101//   v1 = REV v2i32                  (implicit)
6102//   v2 = REV v2i32
6103//   v3 = BITCAST v2i32 v2 to v4i16
6104//   v4 = REV v4i16
6105//   v5 = REV v4i16 v4               (implicit)
6106//        store v4i16 v5
6107//
6108// This means an extra two instructions, but actually in most cases the two REV
6109// instructions can be combined into one. For example:
6110//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
6111//
6112// There is also no 128-bit REV instruction. This must be synthesized with an
6113// EXT instruction.
6114//
6115// Most bitconverts require some sort of conversion. The only exceptions are:
6116//   a) Identity conversions -  vNfX <-> vNiX
6117//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
6118//
6119
6120// Natural vector casts (64 bit)
6121def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
6122def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
6123def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
6124def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
6125def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
6126def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
6127
6128def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
6129def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
6130def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
6131def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
6132def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
6133
6134def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
6135def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
6136def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
6137def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
6138def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
6139def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
6140
6141def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
6142def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
6143def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
6144def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
6145def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
6146def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6147def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
6148
6149def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
6150def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
6151def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
6152def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
6153def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
6154
6155// Natural vector casts (128 bit)
6156def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
6157def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
6158def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
6159def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
6160def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
6161def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
6162def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
6163
6164def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
6165def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
6166def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
6167def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
6168def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
6169def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
6170def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
6171
6172def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
6173def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
6174def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
6175def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
6176def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
6177def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
6178def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
6179
6180def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
6181def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
6182def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
6183def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
6184def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
6185def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
6186def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
6187
6188def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
6189def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
6190def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
6191def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
6192def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
6193def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
6194def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
6195
6196def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
6197def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
6198def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
6199def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
6200def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
6201def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
6202def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
6203
6204let Predicates = [IsLE] in {
6205def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6206def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6207def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6208def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6209def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6210
6211def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
6212          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6213def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
6214          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6215def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
6216          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6217def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
6218          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6219def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
6220          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6221def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
6222          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6223}
6224let Predicates = [IsBE] in {
6225def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
6226                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
6227def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
6228                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
6229def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
6230                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
6231def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
6232                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
6233def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
6234                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
6235
6236def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
6237          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
6238def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
6239          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
6240def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
6241          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
6242def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
6243          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
6244def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
6245          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
6246}
6247def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6248def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6249def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
6250          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6251def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
6252          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6253def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
6254          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6255def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
6256
6257def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
6258          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
6259def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
6260          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
6261def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
6262          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
6263def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
6264          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
6265def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
6266          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
6267
6268let Predicates = [IsLE] in {
6269def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
6270def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
6271def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
6272def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
6273def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
6274}
6275let Predicates = [IsBE] in {
6276def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
6277                             (v1i64 (REV64v2i32 FPR64:$src))>;
6278def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
6279                             (v1i64 (REV64v4i16 FPR64:$src))>;
6280def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
6281                             (v1i64 (REV64v8i8 FPR64:$src))>;
6282def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
6283                             (v1i64 (REV64v4i16 FPR64:$src))>;
6284def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
6285                             (v1i64 (REV64v2i32 FPR64:$src))>;
6286}
6287def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6288def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
6289
6290let Predicates = [IsLE] in {
6291def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
6292def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
6293def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
6294def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
6295def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
6296def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
6297}
6298let Predicates = [IsBE] in {
6299def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
6300                             (v2i32 (REV64v2i32 FPR64:$src))>;
6301def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
6302                             (v2i32 (REV32v4i16 FPR64:$src))>;
6303def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
6304                             (v2i32 (REV32v8i8 FPR64:$src))>;
6305def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
6306                             (v2i32 (REV64v2i32 FPR64:$src))>;
6307def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
6308                             (v2i32 (REV64v2i32 FPR64:$src))>;
6309def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
6310                             (v2i32 (REV32v4i16 FPR64:$src))>;
6311}
6312def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
6313
6314let Predicates = [IsLE] in {
6315def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
6316def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
6317def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
6318def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
6319def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
6320def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
6321}
6322let Predicates = [IsBE] in {
6323def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
6324                             (v4i16 (REV64v4i16 FPR64:$src))>;
6325def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
6326                             (v4i16 (REV32v4i16 FPR64:$src))>;
6327def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
6328                             (v4i16 (REV16v8i8 FPR64:$src))>;
6329def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
6330                             (v4i16 (REV64v4i16 FPR64:$src))>;
6331def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
6332                             (v4i16 (REV32v4i16 FPR64:$src))>;
6333def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
6334                             (v4i16 (REV64v4i16 FPR64:$src))>;
6335}
6336def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
6337
6338let Predicates = [IsLE] in {
6339def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
6340def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
6341def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
6342def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
6343def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
6344def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
6345}
6346let Predicates = [IsBE] in {
6347def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
6348                             (v4f16 (REV64v4i16 FPR64:$src))>;
6349def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
6350                             (v4f16 (REV32v4i16 FPR64:$src))>;
6351def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
6352                             (v4f16 (REV16v8i8 FPR64:$src))>;
6353def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
6354                             (v4f16 (REV64v4i16 FPR64:$src))>;
6355def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
6356                             (v4f16 (REV32v4i16 FPR64:$src))>;
6357def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
6358                             (v4f16 (REV64v4i16 FPR64:$src))>;
6359}
6360def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
6361
6362let Predicates = [IsLE] in {
6363def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
6364def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
6365def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
6366def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
6367def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
6368def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
6369def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
6370}
6371let Predicates = [IsBE] in {
6372def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
6373                             (v8i8 (REV64v8i8 FPR64:$src))>;
6374def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
6375                             (v8i8 (REV32v8i8 FPR64:$src))>;
6376def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
6377                             (v8i8 (REV16v8i8 FPR64:$src))>;
6378def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
6379                             (v8i8 (REV64v8i8 FPR64:$src))>;
6380def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
6381                             (v8i8 (REV32v8i8 FPR64:$src))>;
6382def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
6383                             (v8i8 (REV64v8i8 FPR64:$src))>;
6384def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
6385                             (v8i8 (REV16v8i8 FPR64:$src))>;
6386}
6387
6388let Predicates = [IsLE] in {
6389def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
6390def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
6391def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
6392def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
6393def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
6394}
6395let Predicates = [IsBE] in {
6396def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
6397                             (f64 (REV64v2i32 FPR64:$src))>;
6398def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
6399                             (f64 (REV64v4i16 FPR64:$src))>;
6400def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
6401                             (f64 (REV64v2i32 FPR64:$src))>;
6402def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
6403                             (f64 (REV64v8i8 FPR64:$src))>;
6404def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
6405                             (f64 (REV64v4i16 FPR64:$src))>;
6406}
6407def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
6408def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
6409
6410let Predicates = [IsLE] in {
6411def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
6412def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
6413def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
6414def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
6415def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
6416}
6417let Predicates = [IsBE] in {
6418def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
6419                             (v1f64 (REV64v2i32 FPR64:$src))>;
6420def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
6421                             (v1f64 (REV64v4i16 FPR64:$src))>;
6422def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
6423                             (v1f64 (REV64v8i8 FPR64:$src))>;
6424def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
6425                             (v1f64 (REV64v2i32 FPR64:$src))>;
6426def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
6427                             (v1f64 (REV64v4i16 FPR64:$src))>;
6428}
6429def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
6430def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
6431
6432let Predicates = [IsLE] in {
6433def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
6434def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
6435def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
6436def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
6437def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
6438def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
6439}
6440let Predicates = [IsBE] in {
6441def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
6442                             (v2f32 (REV64v2i32 FPR64:$src))>;
6443def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
6444                             (v2f32 (REV32v4i16 FPR64:$src))>;
6445def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
6446                             (v2f32 (REV32v8i8 FPR64:$src))>;
6447def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
6448                             (v2f32 (REV64v2i32 FPR64:$src))>;
6449def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
6450                             (v2f32 (REV64v2i32 FPR64:$src))>;
6451def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
6452                             (v2f32 (REV32v4i16 FPR64:$src))>;
6453}
6454def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
6455
6456let Predicates = [IsLE] in {
6457def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
6458def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
6459def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
6460def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
6461def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
6462def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
6463def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
6464}
6465let Predicates = [IsBE] in {
6466def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
6467                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
6468def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
6469                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
6470                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
6471def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
6472                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
6473                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
6474def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
6475                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
6476                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
6477def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
6478                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
6479def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
6480                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
6481                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
6482def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
6483                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
6484                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
6485}
6486
6487let Predicates = [IsLE] in {
6488def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
6489def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
6490def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
6491def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
6492def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
6493def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
6494}
6495let Predicates = [IsBE] in {
6496def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
6497                             (v2f64 (EXTv16i8 FPR128:$src,
6498                                              FPR128:$src, (i32 8)))>;
6499def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
6500                             (v2f64 (REV64v4i32 FPR128:$src))>;
6501def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
6502                             (v2f64 (REV64v8i16 FPR128:$src))>;
6503def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
6504                             (v2f64 (REV64v8i16 FPR128:$src))>;
6505def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
6506                             (v2f64 (REV64v16i8 FPR128:$src))>;
6507def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
6508                             (v2f64 (REV64v4i32 FPR128:$src))>;
6509}
6510def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
6511
6512let Predicates = [IsLE] in {
6513def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
6514def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
6515def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
6516def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
6517def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
6518def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
6519}
6520let Predicates = [IsBE] in {
6521def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
6522                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
6523                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
6524def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
6525                             (v4f32 (REV32v8i16 FPR128:$src))>;
6526def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
6527                             (v4f32 (REV32v8i16 FPR128:$src))>;
6528def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
6529                             (v4f32 (REV32v16i8 FPR128:$src))>;
6530def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
6531                             (v4f32 (REV64v4i32 FPR128:$src))>;
6532def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
6533                             (v4f32 (REV64v4i32 FPR128:$src))>;
6534}
6535def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
6536
6537let Predicates = [IsLE] in {
6538def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
6539def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
6540def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
6541def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
6542def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
6543def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
6544}
6545let Predicates = [IsBE] in {
6546def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
6547                             (v2i64 (EXTv16i8 FPR128:$src,
6548                                              FPR128:$src, (i32 8)))>;
6549def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
6550                             (v2i64 (REV64v4i32 FPR128:$src))>;
6551def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
6552                             (v2i64 (REV64v8i16 FPR128:$src))>;
6553def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
6554                             (v2i64 (REV64v16i8 FPR128:$src))>;
6555def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
6556                             (v2i64 (REV64v4i32 FPR128:$src))>;
6557def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
6558                             (v2i64 (REV64v8i16 FPR128:$src))>;
6559}
6560def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
6561
6562let Predicates = [IsLE] in {
6563def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
6564def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
6565def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
6566def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
6567def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
6568def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
6569}
6570let Predicates = [IsBE] in {
6571def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
6572                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
6573                                              (REV64v4i32 FPR128:$src),
6574                                              (i32 8)))>;
6575def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
6576                             (v4i32 (REV64v4i32 FPR128:$src))>;
6577def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
6578                             (v4i32 (REV32v8i16 FPR128:$src))>;
6579def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
6580                             (v4i32 (REV32v16i8 FPR128:$src))>;
6581def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
6582                             (v4i32 (REV64v4i32 FPR128:$src))>;
6583def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
6584                             (v4i32 (REV32v8i16 FPR128:$src))>;
6585}
6586def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
6587
6588let Predicates = [IsLE] in {
6589def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
6590def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
6591def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
6592def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
6593def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
6594def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
6595}
6596let Predicates = [IsBE] in {
6597def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
6598                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
6599                                              (REV64v8i16 FPR128:$src),
6600                                              (i32 8)))>;
6601def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
6602                             (v8i16 (REV64v8i16 FPR128:$src))>;
6603def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
6604                             (v8i16 (REV32v8i16 FPR128:$src))>;
6605def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
6606                             (v8i16 (REV16v16i8 FPR128:$src))>;
6607def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
6608                             (v8i16 (REV64v8i16 FPR128:$src))>;
6609def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
6610                             (v8i16 (REV32v8i16 FPR128:$src))>;
6611}
6612def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
6613
6614let Predicates = [IsLE] in {
6615def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
6616def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
6617def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
6618def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
6619def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
6620def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
6621}
6622let Predicates = [IsBE] in {
6623def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
6624                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
6625                                              (REV64v8i16 FPR128:$src),
6626                                              (i32 8)))>;
6627def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
6628                             (v8f16 (REV64v8i16 FPR128:$src))>;
6629def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
6630                             (v8f16 (REV32v8i16 FPR128:$src))>;
6631def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
6632                             (v8f16 (REV16v16i8 FPR128:$src))>;
6633def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
6634                             (v8f16 (REV64v8i16 FPR128:$src))>;
6635def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
6636                             (v8f16 (REV32v8i16 FPR128:$src))>;
6637}
6638def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
6639
6640let Predicates = [IsLE] in {
6641def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
6642def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
6643def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
6644def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
6645def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
6646def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
6647def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
6648}
6649let Predicates = [IsBE] in {
6650def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
6651                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
6652                                              (REV64v16i8 FPR128:$src),
6653                                              (i32 8)))>;
6654def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
6655                             (v16i8 (REV64v16i8 FPR128:$src))>;
6656def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
6657                             (v16i8 (REV32v16i8 FPR128:$src))>;
6658def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
6659                             (v16i8 (REV16v16i8 FPR128:$src))>;
6660def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
6661                             (v16i8 (REV64v16i8 FPR128:$src))>;
6662def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
6663                             (v16i8 (REV32v16i8 FPR128:$src))>;
6664def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
6665                             (v16i8 (REV16v16i8 FPR128:$src))>;
6666}
6667
6668def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
6669           (EXTRACT_SUBREG V128:$Rn, dsub)>;
6670def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
6671           (EXTRACT_SUBREG V128:$Rn, dsub)>;
6672def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
6673           (EXTRACT_SUBREG V128:$Rn, dsub)>;
6674def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
6675           (EXTRACT_SUBREG V128:$Rn, dsub)>;
6676def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
6677           (EXTRACT_SUBREG V128:$Rn, dsub)>;
6678def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
6679           (EXTRACT_SUBREG V128:$Rn, dsub)>;
6680def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
6681           (EXTRACT_SUBREG V128:$Rn, dsub)>;
6682
6683def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
6684          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
6685def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
6686          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
6687def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
6688          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
6689def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
6690          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
6691
6692// A 64-bit subvector insert to the first 128-bit vector position
6693// is a subregister copy that needs no instruction.
6694multiclass InsertSubvectorUndef<ValueType Ty> {
6695  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
6696            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6697  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
6698            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6699  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
6700            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6701  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
6702            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6703  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
6704            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6705  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
6706            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6707  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
6708            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6709}
6710
6711defm : InsertSubvectorUndef<i32>;
6712defm : InsertSubvectorUndef<i64>;
6713
6714// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
6715// or v2f32.
6716def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
6717                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
6718           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
6719def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
6720                     (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
6721           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
6722    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
6723    // so we match on v4f32 here, not v2f32. This will also catch adding
6724    // the low two lanes of a true v4f32 vector.
6725def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
6726                (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
6727          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
6728
6729// Scalar 64-bit shifts in FPR64 registers.
6730def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
6731          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
6732def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
6733          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
6734def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
6735          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
6736def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
6737          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
6738
6739// Patterns for nontemporal/no-allocate stores.
6740// We have to resort to tricks to turn a single-input store into a store pair,
6741// because there is no single-input nontemporal store, only STNP.
6742let Predicates = [IsLE] in {
6743let AddedComplexity = 15 in {
6744class NTStore128Pat<ValueType VT> :
6745  Pat<(nontemporalstore (VT FPR128:$Rt),
6746        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
6747      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
6748              (CPYi64 FPR128:$Rt, (i64 1)),
6749              GPR64sp:$Rn, simm7s8:$offset)>;
6750
6751def : NTStore128Pat<v2i64>;
6752def : NTStore128Pat<v4i32>;
6753def : NTStore128Pat<v8i16>;
6754def : NTStore128Pat<v16i8>;
6755
6756class NTStore64Pat<ValueType VT> :
6757  Pat<(nontemporalstore (VT FPR64:$Rt),
6758        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
6759      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
6760              (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
6761              GPR64sp:$Rn, simm7s4:$offset)>;
6762
6763// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
6764def : NTStore64Pat<v1f64>;
6765def : NTStore64Pat<v1i64>;
6766def : NTStore64Pat<v2i32>;
6767def : NTStore64Pat<v4i16>;
6768def : NTStore64Pat<v8i8>;
6769
6770def : Pat<(nontemporalstore GPR64:$Rt,
6771            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
6772          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
6773                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
6774                  GPR64sp:$Rn, simm7s4:$offset)>;
6775} // AddedComplexity=10
6776} // Predicates = [IsLE]
6777
6778// Tail call return handling. These are all compiler pseudo-instructions,
6779// so no encoding information or anything like that.
6780let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
6781  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
6782                   Sched<[WriteBrReg]>;
6783  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
6784                   Sched<[WriteBrReg]>;
6785  // Indirect tail-call with any register allowed, used by MachineOutliner when
6786  // this is proven safe.
6787  // FIXME: If we have to add any more hacks like this, we should instead relax
6788  // some verifier checks for outlined functions.
6789  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
6790                      Sched<[WriteBrReg]>;
6791  // Indirect tail-call limited to only use registers (x16 and x17) which are
6792  // allowed to tail-call a "BTI c" instruction.
6793  def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
6794                      Sched<[WriteBrReg]>;
6795}
6796
6797def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
6798          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
6799      Requires<[NotUseBTI]>;
6800def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
6801          (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
6802      Requires<[UseBTI]>;
6803def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
6804          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
6805def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
6806          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
6807
6808def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
6809def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
6810
6811include "AArch64InstrAtomics.td"
6812include "AArch64SVEInstrInfo.td"
6813