1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13//===----------------------------------------------------------------------===//
14// ARM Instruction Predicate Definitions.
15//
16def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
17                                 AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">;
18def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
19                                 AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">;
20def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
21                                 AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">;
22def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
23                                 AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
24def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
25                                 AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
26def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
27                                 AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
28def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
29                                 AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
30def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
31                                 AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">;
32def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
33                                 AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
34def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
35                                 AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
36def HasVH            : Predicate<"Subtarget->hasVH()">,
37                       AssemblerPredicate<(all_of FeatureVH), "vh">;
38
39def HasLOR           : Predicate<"Subtarget->hasLOR()">,
40                       AssemblerPredicate<(all_of FeatureLOR), "lor">;
41
42def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
43                       AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
44
45def HasJS            : Predicate<"Subtarget->hasJS()">,
46                       AssemblerPredicate<(all_of FeatureJS), "jsconv">;
47
48def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
49                       AssemblerPredicate<(all_of FeatureCCIDX), "ccidx">;
50
51def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
52                       AssemblerPredicate<(all_of FeatureComplxNum), "complxnum">;
53
54def HasNV            : Predicate<"Subtarget->hasNV()">,
55                       AssemblerPredicate<(all_of FeatureNV), "nv">;
56
57def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
58                       AssemblerPredicate<(all_of FeatureMPAM), "mpam">;
59
60def HasDIT           : Predicate<"Subtarget->hasDIT()">,
61                       AssemblerPredicate<(all_of FeatureDIT), "dit">;
62
63def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
64                       AssemblerPredicate<(all_of FeatureTRACEV8_4), "tracev8.4">;
65
66def HasAM            : Predicate<"Subtarget->hasAM()">,
67                       AssemblerPredicate<(all_of FeatureAM), "am">;
68
69def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
70                       AssemblerPredicate<(all_of FeatureSEL2), "sel2">;
71
72def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
73                       AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
74
75def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
76                       AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
77
78def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
79                       AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
80
81def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
82                               AssemblerPredicate<(all_of FeatureFPARMv8), "fp-armv8">;
83def HasNEON          : Predicate<"Subtarget->hasNEON()">,
84                                 AssemblerPredicate<(all_of FeatureNEON), "neon">;
85def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
86                                 AssemblerPredicate<(all_of FeatureCrypto), "crypto">;
87def HasSM4           : Predicate<"Subtarget->hasSM4()">,
88                                 AssemblerPredicate<(all_of FeatureSM4), "sm4">;
89def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
90                                 AssemblerPredicate<(all_of FeatureSHA3), "sha3">;
91def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
92                                 AssemblerPredicate<(all_of FeatureSHA2), "sha2">;
93def HasAES           : Predicate<"Subtarget->hasAES()">,
94                                 AssemblerPredicate<(all_of FeatureAES), "aes">;
95def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
96                                 AssemblerPredicate<(all_of FeatureDotProd), "dotprod">;
97def HasCRC           : Predicate<"Subtarget->hasCRC()">,
98                                 AssemblerPredicate<(all_of FeatureCRC), "crc">;
99def HasLSE           : Predicate<"Subtarget->hasLSE()">,
100                                 AssemblerPredicate<(all_of FeatureLSE), "lse">;
101def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
102def HasRAS           : Predicate<"Subtarget->hasRAS()">,
103                                 AssemblerPredicate<(all_of FeatureRAS), "ras">;
104def HasRDM           : Predicate<"Subtarget->hasRDM()">,
105                                 AssemblerPredicate<(all_of FeatureRDM), "rdm">;
106def HasPerfMon       : Predicate<"Subtarget->hasPerfMon()">;
107def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
108                                 AssemblerPredicate<(all_of FeatureFullFP16), "fullfp16">;
109def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
110                                 AssemblerPredicate<(all_of FeatureFP16FML), "fp16fml">;
111def HasSPE           : Predicate<"Subtarget->hasSPE()">,
112                                 AssemblerPredicate<(all_of FeatureSPE), "spe">;
113def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
114                                 AssemblerPredicate<(all_of FeatureFuseAES),
115                                 "fuse-aes">;
116def HasSVE           : Predicate<"Subtarget->hasSVE()">,
117                                 AssemblerPredicate<(all_of FeatureSVE), "sve">;
118def HasSVE2          : Predicate<"Subtarget->hasSVE2()">,
119                                 AssemblerPredicate<(all_of FeatureSVE2), "sve2">;
120def HasSVE2AES       : Predicate<"Subtarget->hasSVE2AES()">,
121                                 AssemblerPredicate<(all_of FeatureSVE2AES), "sve2-aes">;
122def HasSVE2SM4       : Predicate<"Subtarget->hasSVE2SM4()">,
123                                 AssemblerPredicate<(all_of FeatureSVE2SM4), "sve2-sm4">;
124def HasSVE2SHA3      : Predicate<"Subtarget->hasSVE2SHA3()">,
125                                 AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">;
126def HasSVE2BitPerm   : Predicate<"Subtarget->hasSVE2BitPerm()">,
127                                 AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
128def HasSME           : Predicate<"Subtarget->hasSME()">,
129                                 AssemblerPredicate<(all_of FeatureSME), "sme">;
130def HasSMEF64        : Predicate<"Subtarget->hasSMEF64()">,
131                                 AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">;
132def HasSMEI64        : Predicate<"Subtarget->hasSMEI64()">,
133                                 AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">;
134def HasStreamingSVE  : Predicate<"Subtarget->hasStreamingSVE()">,
135                                 AssemblerPredicate<(all_of FeatureStreamingSVE), "streaming-sve">;
136// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
137// they should be enabled if either has been specified.
138def HasSVEorStreamingSVE
139    : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">,
140                AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE),
141                "streaming-sve or sve">;
142def HasSVE2orStreamingSVE
143    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">,
144                AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE),
145                "streaming-sve or sve2">;
146// A subset of NEON instructions are legal in Streaming SVE execution mode,
147// they should be enabled if either has been specified.
148def HasNEONorStreamingSVE
149    : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">,
150                AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE),
151                "streaming-sve or neon">;
152def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
153                                 AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
154def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
155                       AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">;
156def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
157                       AssemblerPredicate<(all_of FeatureFRInt3264), "frint3264">;
158def HasSB            : Predicate<"Subtarget->hasSB()">,
159                       AssemblerPredicate<(all_of FeatureSB), "sb">;
160def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
161                       AssemblerPredicate<(all_of FeaturePredRes), "predres">;
162def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
163                       AssemblerPredicate<(all_of FeatureCacheDeepPersist), "ccdp">;
164def HasBTI           : Predicate<"Subtarget->hasBTI()">,
165                       AssemblerPredicate<(all_of FeatureBranchTargetId), "bti">;
166def HasMTE           : Predicate<"Subtarget->hasMTE()">,
167                       AssemblerPredicate<(all_of FeatureMTE), "mte">;
168def HasTME           : Predicate<"Subtarget->hasTME()">,
169                       AssemblerPredicate<(all_of FeatureTME), "tme">;
170def HasETE           : Predicate<"Subtarget->hasETE()">,
171                       AssemblerPredicate<(all_of FeatureETE), "ete">;
172def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
173                       AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
174def HasBF16          : Predicate<"Subtarget->hasBF16()">,
175                       AssemblerPredicate<(all_of FeatureBF16), "bf16">;
176def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
177                       AssemblerPredicate<(all_of FeatureMatMulInt8), "i8mm">;
178def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
179                       AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
180def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
181                       AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
182def HasXS            : Predicate<"Subtarget->hasXS()">,
183                       AssemblerPredicate<(all_of FeatureXS), "xs">;
184def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
185                       AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
186def HasLS64          : Predicate<"Subtarget->hasLS64()">,
187                       AssemblerPredicate<(all_of FeatureLS64), "ls64">;
188def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
189                       AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
190def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
191                       AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
192def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
193def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
194def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
195def UseExperimentalZeroingPseudos
196    : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
197def UseAlternateSExtLoadCVTF32
198    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
199
200def UseNegativeImmediates
201    : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
202                                             "NegativeImmediates">;
203
204def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
205                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
206                                                       SDTCisInt<1>]>>;
207
208
209//===----------------------------------------------------------------------===//
210// AArch64-specific DAG Nodes.
211//
212
213// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
214def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
215                                              [SDTCisSameAs<0, 2>,
216                                               SDTCisSameAs<0, 3>,
217                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
218
219// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
220def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
221                                            [SDTCisSameAs<0, 1>,
222                                             SDTCisSameAs<0, 2>,
223                                             SDTCisInt<0>,
224                                             SDTCisVT<3, i32>]>;
225
226// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
227def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
228                                            [SDTCisSameAs<0, 2>,
229                                             SDTCisSameAs<0, 3>,
230                                             SDTCisInt<0>,
231                                             SDTCisVT<1, i32>,
232                                             SDTCisVT<4, i32>]>;
233
234def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
235                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
236                                      SDTCisVT<2, i32>]>;
237def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
238def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
239                                        SDTCisVT<2, OtherVT>]>;
240
241
242def SDT_AArch64CSel  : SDTypeProfile<1, 4,
243                                   [SDTCisSameAs<0, 1>,
244                                    SDTCisSameAs<0, 2>,
245                                    SDTCisInt<3>,
246                                    SDTCisVT<4, i32>]>;
247def SDT_AArch64CCMP : SDTypeProfile<1, 5,
248                                    [SDTCisVT<0, i32>,
249                                     SDTCisInt<1>,
250                                     SDTCisSameAs<1, 2>,
251                                     SDTCisInt<3>,
252                                     SDTCisInt<4>,
253                                     SDTCisVT<5, i32>]>;
254def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
255                                     [SDTCisVT<0, i32>,
256                                      SDTCisFP<1>,
257                                      SDTCisSameAs<1, 2>,
258                                      SDTCisInt<3>,
259                                      SDTCisInt<4>,
260                                      SDTCisVT<5, i32>]>;
261def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
262                                   [SDTCisFP<0>,
263                                    SDTCisSameAs<0, 1>]>;
264def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
265def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
266def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
267def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
268                                          SDTCisSameAs<0, 1>,
269                                          SDTCisSameAs<0, 2>]>;
270def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
271def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
272def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
273                                           SDTCisInt<2>, SDTCisInt<3>]>;
274def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
275def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
276                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
277def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
278def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
279                                         SDTCisVec<2>, SDTCisSameAs<2,3>]>;
280
281def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
282                                                 SDTCisSameAs<0,1>,
283                                                 SDTCisSameAs<0,2>]>;
284
285def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
286def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
287def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
288def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
289                                           SDTCisSameAs<0,2>]>;
290def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
291                                           SDTCisSameAs<0,2>,
292                                           SDTCisSameAs<0,3>]>;
293def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
294def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
295
296def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
297
298def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
299                                                 SDTCisPtrTy<1>]>;
300
301def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
302
303def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
304def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
305def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
306
307// Generates the general dynamic sequences, i.e.
308//  adrp  x0, :tlsdesc:var
309//  ldr   x1, [x0, #:tlsdesc_lo12:var]
310//  add   x0, x0, #:tlsdesc_lo12:var
311//  .tlsdesccall var
312//  blr   x1
313
314// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
315// number of operands (the variable)
316def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
317                                          [SDTCisPtrTy<0>]>;
318
319def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
320                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
321                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
322                                         SDTCisSameAs<1, 4>]>;
323
324def SDT_AArch64TBL : SDTypeProfile<1, 2, [
325  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
326]>;
327
328// non-extending masked load fragment.
329def nonext_masked_load :
330  PatFrag<(ops node:$ptr, node:$pred, node:$def),
331          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
332  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
333         cast<MaskedLoadSDNode>(N)->isUnindexed() &&
334         !cast<MaskedLoadSDNode>(N)->isNonTemporal();
335}]>;
336// sign extending masked load fragments.
337def asext_masked_load :
338  PatFrag<(ops node:$ptr, node:$pred, node:$def),
339          (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
340  return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
341          cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) &&
342         cast<MaskedLoadSDNode>(N)->isUnindexed();
343}]>;
344def asext_masked_load_i8 :
345  PatFrag<(ops node:$ptr, node:$pred, node:$def),
346          (asext_masked_load node:$ptr, node:$pred, node:$def), [{
347  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
348}]>;
349def asext_masked_load_i16 :
350  PatFrag<(ops node:$ptr, node:$pred, node:$def),
351          (asext_masked_load node:$ptr, node:$pred, node:$def), [{
352  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
353}]>;
354def asext_masked_load_i32 :
355  PatFrag<(ops node:$ptr, node:$pred, node:$def),
356          (asext_masked_load node:$ptr, node:$pred, node:$def), [{
357  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
358}]>;
359// zero extending masked load fragments.
360def zext_masked_load :
361  PatFrag<(ops node:$ptr, node:$pred, node:$def),
362          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
363  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD &&
364         cast<MaskedLoadSDNode>(N)->isUnindexed();
365}]>;
366def zext_masked_load_i8 :
367  PatFrag<(ops node:$ptr, node:$pred, node:$def),
368          (zext_masked_load node:$ptr, node:$pred, node:$def), [{
369  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
370}]>;
371def zext_masked_load_i16 :
372  PatFrag<(ops node:$ptr, node:$pred, node:$def),
373          (zext_masked_load node:$ptr, node:$pred, node:$def), [{
374  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
375}]>;
376def zext_masked_load_i32 :
377  PatFrag<(ops node:$ptr, node:$pred, node:$def),
378          (zext_masked_load node:$ptr, node:$pred, node:$def), [{
379  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
380}]>;
381
382def non_temporal_load :
383   PatFrag<(ops node:$ptr, node:$pred, node:$def),
384           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
385   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
386          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
387          cast<MaskedLoadSDNode>(N)->isNonTemporal();
388}]>;
389
390// non-truncating masked store fragment.
391def nontrunc_masked_store :
392  PatFrag<(ops node:$val, node:$ptr, node:$pred),
393          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
394  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
395         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
396         !cast<MaskedStoreSDNode>(N)->isNonTemporal();
397}]>;
398// truncating masked store fragments.
399def trunc_masked_store :
400  PatFrag<(ops node:$val, node:$ptr, node:$pred),
401          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
402  return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
403         cast<MaskedStoreSDNode>(N)->isUnindexed();
404}]>;
405def trunc_masked_store_i8 :
406  PatFrag<(ops node:$val, node:$ptr, node:$pred),
407          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
408  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
409}]>;
410def trunc_masked_store_i16 :
411  PatFrag<(ops node:$val, node:$ptr, node:$pred),
412          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
413  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
414}]>;
415def trunc_masked_store_i32 :
416  PatFrag<(ops node:$val, node:$ptr, node:$pred),
417          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
418  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
419}]>;
420
421def non_temporal_store :
422  PatFrag<(ops node:$val, node:$ptr, node:$pred),
423          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
424  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
425         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
426         cast<MaskedStoreSDNode>(N)->isNonTemporal();
427}]>;
428
429// Node definitions.
430def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
431def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
432def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
433def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
434def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
435                                SDCallSeqStart<[ SDTCisVT<0, i32>,
436                                                 SDTCisVT<1, i32> ]>,
437                                [SDNPHasChain, SDNPOutGlue]>;
438def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
439                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
440                                               SDTCisVT<1, i32> ]>,
441                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
442def AArch64call          : SDNode<"AArch64ISD::CALL",
443                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
444                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
445                                 SDNPVariadic]>;
446
447def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
448                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
449                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
450                              SDNPVariadic]>;
451
452def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
453                                [SDNPHasChain]>;
454def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
455                                [SDNPHasChain]>;
456def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
457                                [SDNPHasChain]>;
458def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
459                                [SDNPHasChain]>;
460def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
461                                [SDNPHasChain]>;
462
463
464def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
465def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
466def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
467def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
468def AArch64retflag       : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
469                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
470def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
471def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
472def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
473                            [SDNPCommutative]>;
474def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
475def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
476                            [SDNPCommutative]>;
477def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
478def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
479
480def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
481def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
482def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
483
484def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
485
486def AArch64fcmp         : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
487def AArch64strict_fcmp  : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
488                                 [SDNPHasChain]>;
489def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
490                                 [SDNPHasChain]>;
491def AArch64any_fcmp     : PatFrags<(ops node:$lhs, node:$rhs),
492                                   [(AArch64strict_fcmp node:$lhs, node:$rhs),
493                                    (AArch64fcmp node:$lhs, node:$rhs)]>;
494
495def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
496def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
497def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
498def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
499def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
500
501def AArch64insr      : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
502
503def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
504def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
505def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
506def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
507def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
508def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
509
510def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
511def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
512def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
513def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
514def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
515def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
516def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
517
518def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
519def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
520def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
521def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
522
523def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
524def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
525def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
526def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
527def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
528def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
529def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
530def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
531def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
532def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
533
534def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
535def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
536
537def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
538def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
539def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
540def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
541def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
542
543def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
544def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
545def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
546
547def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
548def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
549def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
550def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
551def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
552def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
553                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
554
555def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
556def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
557def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
558def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
559def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
560
561def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
562def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
563
564def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
565                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
566
567def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
568                               [SDNPHasChain, SDNPSideEffect]>;
569
570def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
571def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
572
573def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
574                                    SDT_AArch64TLSDescCallSeq,
575                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
576                                     SDNPVariadic]>;
577
578
579def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
580                                 SDT_AArch64WrapperLarge>;
581
582def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
583
584def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
585                                    SDTCisSameAs<1, 2>]>;
586def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
587def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
588
589def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
590def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
591def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
592def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
593
594def AArch64sdot     : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
595def AArch64udot     : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
596
597def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
598def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
599def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
600def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
601def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
602def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
603
604def AArch64srhadd   : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
605def AArch64urhadd   : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
606def AArch64shadd   : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
607def AArch64uhadd   : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
608
609def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
610                               [(abdu node:$lhs, node:$rhs),
611                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
612def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
613                               [(abds node:$lhs, node:$rhs),
614                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
615
616def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
617def AArch64uaddlp   : PatFrags<(ops node:$src),
618                               [(AArch64uaddlp_n node:$src),
619                                (int_aarch64_neon_uaddlp node:$src)]>;
620
621def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
622def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
623def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
624def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
625def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
626
627def SDT_AArch64unpk : SDTypeProfile<1, 1, [
628    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
629]>;
630def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
631def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
632def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
633def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
634
635def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
636def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
637def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
638
639def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
640def AArch64mrs : SDNode<"AArch64ISD::MRS",
641                        SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
642                        [SDNPHasChain, SDNPOutGlue]>;
643//===----------------------------------------------------------------------===//
644
645//===----------------------------------------------------------------------===//
646
647// AArch64 Instruction Predicate Definitions.
648// We could compute these on a per-module basis but doing so requires accessing
649// the Function object through the <Target>Subtarget and objections were raised
650// to that (see post-commit review comments for r301750).
651let RecomputePerFunction = 1 in {
652  def ForCodeSize   : Predicate<"shouldOptForSize(MF)">;
653  def NotForCodeSize   : Predicate<"!shouldOptForSize(MF)">;
654  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
655  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
656
657  def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
658  def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
659
660  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
661  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
662  // Toggles patterns which aren't beneficial in GlobalISel when we aren't
663  // optimizing. This allows us to selectively use patterns without impacting
664  // SelectionDAG's behaviour.
665  // FIXME: One day there will probably be a nicer way to check for this, but
666  // today is not that day.
667  def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
668}
669
670include "AArch64InstrFormats.td"
671include "SVEInstrFormats.td"
672include "SMEInstrFormats.td"
673
674//===----------------------------------------------------------------------===//
675
676//===----------------------------------------------------------------------===//
677// Miscellaneous instructions.
678//===----------------------------------------------------------------------===//
679
680let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
681// We set Sched to empty list because we expect these instructions to simply get
682// removed in most cases.
683def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
684                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
685                              Sched<[]>;
686def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
687                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
688                            Sched<[]>;
689} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
690
691let isReMaterializable = 1, isCodeGenOnly = 1 in {
692// FIXME: The following pseudo instructions are only needed because remat
693// cannot handle multiple instructions.  When that changes, they can be
694// removed, along with the AArch64Wrapper node.
695
696let AddedComplexity = 10 in
697def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
698                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
699              Sched<[WriteLDAdr]>;
700
701// The MOVaddr instruction should match only when the add is not folded
702// into a load or store address.
703def MOVaddr
704    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
705             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
706                                            tglobaladdr:$low))]>,
707      Sched<[WriteAdrAdr]>;
708def MOVaddrJT
709    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
710             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
711                                             tjumptable:$low))]>,
712      Sched<[WriteAdrAdr]>;
713def MOVaddrCP
714    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
715             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
716                                             tconstpool:$low))]>,
717      Sched<[WriteAdrAdr]>;
718def MOVaddrBA
719    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
720             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
721                                             tblockaddress:$low))]>,
722      Sched<[WriteAdrAdr]>;
723def MOVaddrTLS
724    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
725             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
726                                            tglobaltlsaddr:$low))]>,
727      Sched<[WriteAdrAdr]>;
728def MOVaddrEXT
729    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
730             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
731                                            texternalsym:$low))]>,
732      Sched<[WriteAdrAdr]>;
733// Normally AArch64addlow either gets folded into a following ldr/str,
734// or together with an adrp into MOVaddr above. For cases with TLS, it
735// might appear without either of them, so allow lowering it into a plain
736// add.
737def ADDlowTLS
738    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
739             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
740                                            tglobaltlsaddr:$low))]>,
741      Sched<[WriteAdr]>;
742
743} // isReMaterializable, isCodeGenOnly
744
745def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
746          (LOADgot tglobaltlsaddr:$addr)>;
747
748def : Pat<(AArch64LOADgot texternalsym:$addr),
749          (LOADgot texternalsym:$addr)>;
750
751def : Pat<(AArch64LOADgot tconstpool:$addr),
752          (LOADgot tconstpool:$addr)>;
753
754// 32-bit jump table destination is actually only 2 instructions since we can
755// use the table itself as a PC-relative base. But optimization occurs after
756// branch relaxation so be pessimistic.
757let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
758    isNotDuplicable = 1 in {
759def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
760                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
761                      Sched<[]>;
762def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
763                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
764                      Sched<[]>;
765def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
766                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
767                     Sched<[]>;
768}
769
770// Space-consuming pseudo to aid testing of placement and reachability
771// algorithms. Immediate operand is the number of bytes this "instruction"
772// occupies; register operands can be used to enforce dependency and constrain
773// the scheduler.
774let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
775def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
776                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
777            Sched<[]>;
778
779let hasSideEffects = 1, isCodeGenOnly = 1 in {
780  def SpeculationSafeValueX
781      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
782  def SpeculationSafeValueW
783      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
784}
785
786// SpeculationBarrierEndBB must only be used after an unconditional control
787// flow, i.e. after a terminator for which isBarrier is True.
788let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
789  def SpeculationBarrierISBDSBEndBB
790      : Pseudo<(outs), (ins), []>, Sched<[]>;
791  def SpeculationBarrierSBEndBB
792      : Pseudo<(outs), (ins), []>, Sched<[]>;
793}
794
795//===----------------------------------------------------------------------===//
796// System instructions.
797//===----------------------------------------------------------------------===//
798
799def HINT : HintI<"hint">;
800def : InstAlias<"nop",  (HINT 0b000)>;
801def : InstAlias<"yield",(HINT 0b001)>;
802def : InstAlias<"wfe",  (HINT 0b010)>;
803def : InstAlias<"wfi",  (HINT 0b011)>;
804def : InstAlias<"sev",  (HINT 0b100)>;
805def : InstAlias<"sevl", (HINT 0b101)>;
806def : InstAlias<"dgh",  (HINT 0b110)>;
807def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
808def : InstAlias<"csdb", (HINT 20)>;
809// In order to be able to write readable assembly, LLVM should accept assembly
810// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
811// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
812// should not emit these mnemonics unless BTI is enabled.
813def : InstAlias<"bti",  (HINT 32), 0>;
814def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
815def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
816def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
817
818// v8.2a Statistical Profiling extension
819def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
820
821// As far as LLVM is concerned this writes to the system's exclusive monitors.
822let mayLoad = 1, mayStore = 1 in
823def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
824
825// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
826// model patterns with sufficiently fine granularity.
827let mayLoad = ?, mayStore = ? in {
828def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
829                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
830
831def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
832                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
833
834def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
835                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
836
837def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
838  let CRm        = 0b0010;
839  let Inst{12}   = 0;
840  let Predicates = [HasTRACEV8_4];
841}
842
843def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
844  let CRm{1-0}   = 0b11;
845  let Inst{9-8}  = 0b10;
846  let Predicates = [HasXS];
847}
848
849let Predicates = [HasWFxT] in {
850def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
851def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
852}
853
854// Branch Record Buffer two-word mnemonic instructions
855class BRBEI<bits<3> op2, string keyword>
856    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
857  let Inst{31-8} = 0b110101010000100101110010;
858  let Inst{7-5} = op2;
859  let Predicates = [HasBRBE];
860}
861def BRB_IALL: BRBEI<0b100, "\tiall">;
862def BRB_INJ:  BRBEI<0b101, "\tinj">;
863
864}
865
866// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
867def : TokenAlias<"INJ", "inj">;
868def : TokenAlias<"IALL", "iall">;
869
870// ARMv8.2-A Dot Product
871let Predicates = [HasDotProd] in {
872defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
873defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
874defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
875defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
876}
877
878// ARMv8.6-A BFloat
879let Predicates = [HasNEON, HasBF16] in {
880defm BFDOT       : SIMDThreeSameVectorBFDot<1, "bfdot">;
881defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
882def BFMMLA       : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
883def BFMLALB      : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
884def BFMLALT      : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
885def BFMLALBIdx   : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
886def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
887def BFCVTN       : SIMD_BFCVTN;
888def BFCVTN2      : SIMD_BFCVTN2;
889
890// Vector-scalar BFDOT:
891// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
892// register (the instruction uses a single 32-bit lane from it), so the pattern
893// is a bit tricky.
894def : Pat<(v2f32 (int_aarch64_neon_bfdot
895                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
896                    (v4bf16 (bitconvert
897                      (v2i32 (AArch64duplane32
898                        (v4i32 (bitconvert
899                          (v8bf16 (insert_subvector undef,
900                            (v4bf16 V64:$Rm),
901                            (i64 0))))),
902                        VectorIndexS:$idx)))))),
903          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
904                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
905                             VectorIndexS:$idx)>;
906}
907
908let Predicates = [HasNEONorStreamingSVE, HasBF16] in {
909def BFCVT : BF16ToSinglePrecision<"bfcvt">;
910}
911
912// ARMv8.6A AArch64 matrix multiplication
913let Predicates = [HasMatMulInt8] in {
914def  SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
915def  UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
916def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
917defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
918defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
919
920// sudot lane has a pattern where usdot is expected (there is no sudot).
921// The second operand is used in the dup operation to repeat the indexed
922// element.
923class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
924                         string rhs_kind, RegisterOperand RegType,
925                         ValueType AccumType, ValueType InputType>
926      : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind,
927                                        lhs_kind, rhs_kind, RegType, AccumType,
928                                        InputType, null_frag> {
929  let Pattern = [(set (AccumType RegType:$dst),
930                      (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
931                                 (InputType (bitconvert (AccumType
932                                    (AArch64duplane32 (v4i32 V128:$Rm),
933                                        VectorIndexS:$idx)))),
934                                 (InputType RegType:$Rn))))];
935}
936
937multiclass SIMDSUDOTIndex {
938  def v8i8  : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
939  def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
940}
941
942defm SUDOTlane : SIMDSUDOTIndex;
943
944}
945
946// ARMv8.2-A FP16 Fused Multiply-Add Long
947let Predicates = [HasNEON, HasFP16FML] in {
948defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
949defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
950defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
951defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
952defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
953defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
954defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
955defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
956}
957
958// Armv8.2-A Crypto extensions
959let Predicates = [HasSHA3] in {
960def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
961def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
962def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
963def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
964def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
965def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
966def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
967def XAR       : CryptoRRRi6<"xar">;
968
969class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
970  : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
971        (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
972
973def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
974          (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
975
976def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
977def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
978def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
979
980def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
981def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
982def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
983def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
984
985class EOR3_pattern<ValueType VecTy>
986  : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
987        (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
988
989def : EOR3_pattern<v16i8>;
990def : EOR3_pattern<v8i16>;
991def : EOR3_pattern<v4i32>;
992def : EOR3_pattern<v2i64>;
993
994def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
995def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
996def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
997def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
998
999def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1000def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1001def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1002def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1003
1004def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1005def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1006def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1007def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1008
1009def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1010          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1011
1012def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1013          (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1014
1015
1016} // HasSHA3
1017
1018let Predicates = [HasSM4] in {
1019def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1020def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1021def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1022def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1023def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
1024def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1025def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1026def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1027def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1028
1029def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1030          (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1031
1032class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1033  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1034        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1035
1036class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1037  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1038        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1039
1040class SM4_pattern<Instruction INST, Intrinsic OpNode>
1041  : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1042        (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1043
1044def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1045def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1046
1047def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1048def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1049def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1050def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1051
1052def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1053def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1054} // HasSM4
1055
1056let Predicates = [HasRCPC] in {
1057  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1058  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
1059  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
1060  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
1061  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
1062}
1063
1064// v8.3a complex add and multiply-accumulate. No predicate here, that is done
1065// inside the multiclass as the FP16 versions need different predicates.
1066defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1067                                               "fcmla", null_frag>;
1068defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1069                                           "fcadd", null_frag>;
1070defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1071
1072let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1073  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1074            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1075  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1076            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1077  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1078            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1079  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1080            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1081}
1082
1083let Predicates = [HasComplxNum, HasNEON] in {
1084  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1085            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1086  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1087            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1088  foreach Ty = [v4f32, v2f64] in {
1089    def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1090              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1091    def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1092              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1093  }
1094}
1095
1096multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1097  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1098            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1099  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1100            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1101  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1102            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1103  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1104            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1105}
1106
1107multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1108  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1109            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1110  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1111            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1112  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1113            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1114  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1115            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1116}
1117
1118
1119let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1120  defm : FCMLA_PATS<v4f16, V64>;
1121  defm : FCMLA_PATS<v8f16, V128>;
1122
1123  defm : FCMLA_LANE_PATS<v4f16, V64,
1124                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1125  defm : FCMLA_LANE_PATS<v8f16, V128,
1126                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1127}
1128let Predicates = [HasComplxNum, HasNEON] in {
1129  defm : FCMLA_PATS<v2f32, V64>;
1130  defm : FCMLA_PATS<v4f32, V128>;
1131  defm : FCMLA_PATS<v2f64, V128>;
1132
1133  defm : FCMLA_LANE_PATS<v4f32, V128,
1134                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1135}
1136
1137// v8.3a Pointer Authentication
1138// These instructions inhabit part of the hint space and so can be used for
1139// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1140// important for compatibility with other assemblers (e.g. GAS) when building
1141// software compatible with both CPUs that do or don't implement PA.
1142let Uses = [LR], Defs = [LR] in {
1143  def PACIAZ   : SystemNoOperands<0b000, "hint\t#24">;
1144  def PACIBZ   : SystemNoOperands<0b010, "hint\t#26">;
1145  let isAuthenticated = 1 in {
1146    def AUTIAZ   : SystemNoOperands<0b100, "hint\t#28">;
1147    def AUTIBZ   : SystemNoOperands<0b110, "hint\t#30">;
1148  }
1149}
1150let Uses = [LR, SP], Defs = [LR] in {
1151  def PACIASP  : SystemNoOperands<0b001, "hint\t#25">;
1152  def PACIBSP  : SystemNoOperands<0b011, "hint\t#27">;
1153  let isAuthenticated = 1 in {
1154    def AUTIASP  : SystemNoOperands<0b101, "hint\t#29">;
1155    def AUTIBSP  : SystemNoOperands<0b111, "hint\t#31">;
1156  }
1157}
1158let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1159  def PACIA1716  : SystemNoOperands<0b000, "hint\t#8">;
1160  def PACIB1716  : SystemNoOperands<0b010, "hint\t#10">;
1161  let isAuthenticated = 1 in {
1162    def AUTIA1716  : SystemNoOperands<0b100, "hint\t#12">;
1163    def AUTIB1716  : SystemNoOperands<0b110, "hint\t#14">;
1164  }
1165}
1166
1167let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1168  def XPACLRI   : SystemNoOperands<0b111, "hint\t#7">;
1169}
1170
1171// In order to be able to write readable assembly, LLVM should accept assembly
1172// inputs that use pointer authentication mnemonics, even with PA disabled.
1173// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1174// should not emit these mnemonics unless PA is enabled.
1175def : InstAlias<"paciaz", (PACIAZ), 0>;
1176def : InstAlias<"pacibz", (PACIBZ), 0>;
1177def : InstAlias<"autiaz", (AUTIAZ), 0>;
1178def : InstAlias<"autibz", (AUTIBZ), 0>;
1179def : InstAlias<"paciasp", (PACIASP), 0>;
1180def : InstAlias<"pacibsp", (PACIBSP), 0>;
1181def : InstAlias<"autiasp", (AUTIASP), 0>;
1182def : InstAlias<"autibsp", (AUTIBSP), 0>;
1183def : InstAlias<"pacia1716", (PACIA1716), 0>;
1184def : InstAlias<"pacib1716", (PACIB1716), 0>;
1185def : InstAlias<"autia1716", (AUTIA1716), 0>;
1186def : InstAlias<"autib1716", (AUTIB1716), 0>;
1187def : InstAlias<"xpaclri", (XPACLRI), 0>;
1188
1189// These pointer authentication instructions require armv8.3a
1190let Predicates = [HasPAuth] in {
1191
1192  // When PA is enabled, a better mnemonic should be emitted.
1193  def : InstAlias<"paciaz", (PACIAZ), 1>;
1194  def : InstAlias<"pacibz", (PACIBZ), 1>;
1195  def : InstAlias<"autiaz", (AUTIAZ), 1>;
1196  def : InstAlias<"autibz", (AUTIBZ), 1>;
1197  def : InstAlias<"paciasp", (PACIASP), 1>;
1198  def : InstAlias<"pacibsp", (PACIBSP), 1>;
1199  def : InstAlias<"autiasp", (AUTIASP), 1>;
1200  def : InstAlias<"autibsp", (AUTIBSP), 1>;
1201  def : InstAlias<"pacia1716", (PACIA1716), 1>;
1202  def : InstAlias<"pacib1716", (PACIB1716), 1>;
1203  def : InstAlias<"autia1716", (AUTIA1716), 1>;
1204  def : InstAlias<"autib1716", (AUTIB1716), 1>;
1205  def : InstAlias<"xpaclri", (XPACLRI), 1>;
1206
1207  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
1208    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
1209    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
1210    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>;
1211    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>;
1212    def IZA  : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>;
1213    def DZA  : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>;
1214    def IZB  : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>;
1215    def DZB  : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>;
1216  }
1217
1218  defm PAC : SignAuth<0b000, 0b010, "pac">;
1219  defm AUT : SignAuth<0b001, 0b011, "aut">;
1220
1221  def XPACI : ClearAuth<0, "xpaci">;
1222  def XPACD : ClearAuth<1, "xpacd">;
1223  def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
1224
1225  // Combined Instructions
1226  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1227    def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
1228    def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
1229  }
1230  let isCall = 1, Defs = [LR], Uses = [SP] in {
1231    def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
1232    def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
1233  }
1234
1235  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1236    def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
1237    def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
1238  }
1239  let isCall = 1, Defs = [LR], Uses = [SP] in {
1240    def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
1241    def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
1242  }
1243
1244  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1245    def RETAA   : AuthReturn<0b010, 0, "retaa">;
1246    def RETAB   : AuthReturn<0b010, 1, "retab">;
1247    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
1248    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
1249  }
1250
1251  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
1252  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
1253
1254}
1255
1256// v8.3a floating point conversion for javascript
1257let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
1258def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
1259                                      "fjcvtzs",
1260                                      [(set GPR32:$Rd,
1261                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
1262  let Inst{31} = 0;
1263} // HasJS, HasFPARMv8
1264
1265// v8.4 Flag manipulation instructions
1266let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
1267def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
1268  let Inst{20-5} = 0b0000001000000000;
1269}
1270def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
1271def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1272def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
1273                        "{\t$Rn, $imm, $mask}">;
1274} // HasFlagM
1275
1276// v8.5 flag manipulation instructions
1277let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
1278
1279def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
1280  let Inst{18-16} = 0b000;
1281  let Inst{11-8} = 0b0000;
1282  let Unpredictable{11-8} = 0b1111;
1283  let Inst{7-5} = 0b001;
1284}
1285
1286def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
1287  let Inst{18-16} = 0b000;
1288  let Inst{11-8} = 0b0000;
1289  let Unpredictable{11-8} = 0b1111;
1290  let Inst{7-5} = 0b010;
1291}
1292} // HasAltNZCV
1293
1294
1295// Armv8.5-A speculation barrier
1296def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
1297  let Inst{20-5} = 0b0001100110000111;
1298  let Unpredictable{11-8} = 0b1111;
1299  let Predicates = [HasSB];
1300  let hasSideEffects = 1;
1301}
1302
1303def : InstAlias<"clrex", (CLREX 0xf)>;
1304def : InstAlias<"isb", (ISB 0xf)>;
1305def : InstAlias<"ssbb", (DSB 0)>;
1306def : InstAlias<"pssbb", (DSB 4)>;
1307
1308def MRS    : MRSI;
1309def MSR    : MSRI;
1310def MSRpstateImm1 : MSRpstateImm0_1;
1311def MSRpstateImm4 : MSRpstateImm0_15;
1312
1313def : Pat<(AArch64mrs imm:$id),
1314          (MRS imm:$id)>;
1315
1316// The thread pointer (on Linux, at least, where this has been implemented) is
1317// TPIDR_EL0.
1318def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
1319                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
1320
1321let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
1322def HWASAN_CHECK_MEMACCESS : Pseudo<
1323  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1324  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1325  Sched<[]>;
1326}
1327
1328let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
1329def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
1330  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1331  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1332  Sched<[]>;
1333}
1334
1335// The cycle counter PMC register is PMCCNTR_EL0.
1336let Predicates = [HasPerfMon] in
1337def : Pat<(readcyclecounter), (MRS 0xdce8)>;
1338
1339// FPCR register
1340def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>;
1341def : Pat<(int_aarch64_set_fpcr i64:$val), (MSR 0xda20, GPR64:$val)>;
1342
1343// Generic system instructions
1344def SYSxt  : SystemXtI<0, "sys">;
1345def SYSLxt : SystemLXtI<1, "sysl">;
1346
1347def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
1348                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
1349                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
1350
1351
1352let Predicates = [HasTME] in {
1353
1354def TSTART : TMSystemI<0b0000, "tstart",
1355                      [(set GPR64:$Rt, (int_aarch64_tstart))]>;
1356
1357def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
1358
1359def TCANCEL : TMSystemException<0b011, "tcancel",
1360                                [(int_aarch64_tcancel i64_imm0_65535:$imm)]>;
1361
1362def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
1363  let mayLoad = 0;
1364  let mayStore = 0;
1365}
1366} // HasTME
1367
1368//===----------------------------------------------------------------------===//
1369// Move immediate instructions.
1370//===----------------------------------------------------------------------===//
1371
1372defm MOVK : InsertImmediate<0b11, "movk">;
1373defm MOVN : MoveImmediate<0b00, "movn">;
1374
1375let PostEncoderMethod = "fixMOVZ" in
1376defm MOVZ : MoveImmediate<0b10, "movz">;
1377
1378// First group of aliases covers an implicit "lsl #0".
1379def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>;
1380def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>;
1381def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
1382def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
1383def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
1384def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
1385
1386// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
1387def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1388def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1389def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1390def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1391
1392def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1393def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1394def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1395def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1396
1397def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
1398def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
1399def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
1400def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
1401
1402def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1403def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1404
1405def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1406def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1407
1408def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
1409def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
1410
1411// Final group of aliases covers true "mov $Rd, $imm" cases.
1412multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
1413                          int width, int shift> {
1414  def _asmoperand : AsmOperandClass {
1415    let Name = basename # width # "_lsl" # shift # "MovAlias";
1416    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
1417                               # shift # ">";
1418    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
1419  }
1420
1421  def _movimm : Operand<i32> {
1422    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
1423  }
1424
1425  def : InstAlias<"mov $Rd, $imm",
1426                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
1427}
1428
1429defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
1430defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
1431
1432defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
1433defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
1434defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
1435defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
1436
1437defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
1438defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
1439
1440defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
1441defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
1442defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
1443defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
1444
1445let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
1446    isAsCheapAsAMove = 1 in {
1447// FIXME: The following pseudo instructions are only needed because remat
1448// cannot handle multiple instructions.  When that changes, we can select
1449// directly to the real instructions and get rid of these pseudos.
1450
1451def MOVi32imm
1452    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
1453             [(set GPR32:$dst, imm:$src)]>,
1454      Sched<[WriteImm]>;
1455def MOVi64imm
1456    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
1457             [(set GPR64:$dst, imm:$src)]>,
1458      Sched<[WriteImm]>;
1459} // isReMaterializable, isCodeGenOnly
1460
1461// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
1462// eventual expansion code fewer bits to worry about getting right. Marshalling
1463// the types is a little tricky though:
1464def i64imm_32bit : ImmLeaf<i64, [{
1465  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
1466}]>;
1467
1468def s64imm_32bit : ImmLeaf<i64, [{
1469  int64_t Imm64 = static_cast<int64_t>(Imm);
1470  return Imm64 >= std::numeric_limits<int32_t>::min() &&
1471         Imm64 <= std::numeric_limits<int32_t>::max();
1472}]>;
1473
1474def trunc_imm : SDNodeXForm<imm, [{
1475  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
1476}]>;
1477
1478def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
1479  GISDNodeXFormEquiv<trunc_imm>;
1480
1481let Predicates = [OptimizedGISelOrOtherSelector] in {
1482// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
1483// copies.
1484def : Pat<(i64 i64imm_32bit:$src),
1485          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
1486}
1487
1488// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
1489def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
1490return CurDAG->getTargetConstant(
1491  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
1492}]>;
1493
1494def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
1495return CurDAG->getTargetConstant(
1496  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
1497}]>;
1498
1499
1500def : Pat<(f32 fpimm:$in),
1501  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
1502def : Pat<(f64 fpimm:$in),
1503  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
1504
1505
1506// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
1507// sequences.
1508def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
1509                             tglobaladdr:$g1, tglobaladdr:$g0),
1510          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
1511                                  tglobaladdr:$g1, 16),
1512                          tglobaladdr:$g2, 32),
1513                  tglobaladdr:$g3, 48)>;
1514
1515def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
1516                             tblockaddress:$g1, tblockaddress:$g0),
1517          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
1518                                  tblockaddress:$g1, 16),
1519                          tblockaddress:$g2, 32),
1520                  tblockaddress:$g3, 48)>;
1521
1522def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
1523                             tconstpool:$g1, tconstpool:$g0),
1524          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
1525                                  tconstpool:$g1, 16),
1526                          tconstpool:$g2, 32),
1527                  tconstpool:$g3, 48)>;
1528
1529def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
1530                             tjumptable:$g1, tjumptable:$g0),
1531          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
1532                                  tjumptable:$g1, 16),
1533                          tjumptable:$g2, 32),
1534                  tjumptable:$g3, 48)>;
1535
1536
1537//===----------------------------------------------------------------------===//
1538// Arithmetic instructions.
1539//===----------------------------------------------------------------------===//
1540
1541// Add/subtract with carry.
1542defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
1543defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
1544
1545def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
1546def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
1547def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
1548def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
1549
1550// Add/subtract
1551defm ADD : AddSub<0, "add", "sub", add>;
1552defm SUB : AddSub<1, "sub", "add">;
1553
1554def : InstAlias<"mov $dst, $src",
1555                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
1556def : InstAlias<"mov $dst, $src",
1557                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
1558def : InstAlias<"mov $dst, $src",
1559                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
1560def : InstAlias<"mov $dst, $src",
1561                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
1562
1563defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
1564defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
1565
1566// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
1567def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
1568          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
1569def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
1570          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
1571def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
1572          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
1573def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
1574          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
1575def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
1576          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
1577def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
1578          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
1579let AddedComplexity = 1 in {
1580def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
1581          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
1582def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
1583          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
1584}
1585
1586// Because of the immediate format for add/sub-imm instructions, the
1587// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
1588//  These patterns capture that transformation.
1589let AddedComplexity = 1 in {
1590def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1591          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1592def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1593          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1594def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1595          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1596def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1597          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1598}
1599
1600// Because of the immediate format for add/sub-imm instructions, the
1601// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
1602//  These patterns capture that transformation.
1603let AddedComplexity = 1 in {
1604def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1605          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1606def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1607          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1608def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1609          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1610def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1611          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1612}
1613
1614def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1615def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1616def : InstAlias<"neg $dst, $src$shift",
1617                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1618def : InstAlias<"neg $dst, $src$shift",
1619                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1620
1621def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1622def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1623def : InstAlias<"negs $dst, $src$shift",
1624                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1625def : InstAlias<"negs $dst, $src$shift",
1626                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1627
1628
1629// Unsigned/Signed divide
1630defm UDIV : Div<0, "udiv", udiv>;
1631defm SDIV : Div<1, "sdiv", sdiv>;
1632
1633def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
1634def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
1635def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
1636def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
1637
1638// Variable shift
1639defm ASRV : Shift<0b10, "asr", sra>;
1640defm LSLV : Shift<0b00, "lsl", shl>;
1641defm LSRV : Shift<0b01, "lsr", srl>;
1642defm RORV : Shift<0b11, "ror", rotr>;
1643
1644def : ShiftAlias<"asrv", ASRVWr, GPR32>;
1645def : ShiftAlias<"asrv", ASRVXr, GPR64>;
1646def : ShiftAlias<"lslv", LSLVWr, GPR32>;
1647def : ShiftAlias<"lslv", LSLVXr, GPR64>;
1648def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
1649def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
1650def : ShiftAlias<"rorv", RORVWr, GPR32>;
1651def : ShiftAlias<"rorv", RORVXr, GPR64>;
1652
1653// Multiply-add
1654let AddedComplexity = 5 in {
1655defm MADD : MulAccum<0, "madd">;
1656defm MSUB : MulAccum<1, "msub">;
1657
1658def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
1659          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1660def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
1661          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1662
1663def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
1664          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1665def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
1666          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1667def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
1668          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1669def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
1670          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1671} // AddedComplexity = 5
1672
1673let AddedComplexity = 5 in {
1674def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
1675def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
1676def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
1677def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
1678
1679def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
1680          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
1681def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
1682          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
1683def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
1684          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1685def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
1686          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
1687def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
1688          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
1689def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
1690          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1691
1692def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
1693          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1694def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
1695          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1696
1697def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
1698          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1699def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
1700          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1701def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
1702          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1703                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1704
1705def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
1706          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1707def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
1708          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1709def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
1710          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1711                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
1712
1713def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
1714          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1715def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
1716          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1717def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
1718                    GPR64:$Ra)),
1719          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1720                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1721
1722def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
1723          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1724def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
1725          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1726def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
1727                                    (s64imm_32bit:$C)))),
1728          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
1729                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
1730} // AddedComplexity = 5
1731
1732def : MulAccumWAlias<"mul", MADDWrrr>;
1733def : MulAccumXAlias<"mul", MADDXrrr>;
1734def : MulAccumWAlias<"mneg", MSUBWrrr>;
1735def : MulAccumXAlias<"mneg", MSUBXrrr>;
1736def : WideMulAccumAlias<"smull", SMADDLrrr>;
1737def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
1738def : WideMulAccumAlias<"umull", UMADDLrrr>;
1739def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
1740
1741// Multiply-high
1742def SMULHrr : MulHi<0b010, "smulh", mulhs>;
1743def UMULHrr : MulHi<0b110, "umulh", mulhu>;
1744
1745// CRC32
1746def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
1747def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
1748def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
1749def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
1750
1751def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
1752def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
1753def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
1754def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
1755
1756// v8.1 atomic CAS
1757defm CAS   : CompareAndSwap<0, 0, "">;
1758defm CASA  : CompareAndSwap<1, 0, "a">;
1759defm CASL  : CompareAndSwap<0, 1, "l">;
1760defm CASAL : CompareAndSwap<1, 1, "al">;
1761
1762// v8.1 atomic CASP
1763defm CASP   : CompareAndSwapPair<0, 0, "">;
1764defm CASPA  : CompareAndSwapPair<1, 0, "a">;
1765defm CASPL  : CompareAndSwapPair<0, 1, "l">;
1766defm CASPAL : CompareAndSwapPair<1, 1, "al">;
1767
1768// v8.1 atomic SWP
1769defm SWP   : Swap<0, 0, "">;
1770defm SWPA  : Swap<1, 0, "a">;
1771defm SWPL  : Swap<0, 1, "l">;
1772defm SWPAL : Swap<1, 1, "al">;
1773
1774// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
1775defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
1776defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
1777defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
1778defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
1779
1780defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
1781defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
1782defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
1783defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
1784
1785defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
1786defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
1787defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
1788defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
1789
1790defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
1791defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
1792defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
1793defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
1794
1795defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
1796defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
1797defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
1798defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
1799
1800defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
1801defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
1802defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
1803defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
1804
1805defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
1806defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
1807defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
1808defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
1809
1810defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
1811defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
1812defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
1813defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
1814
1815// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
1816defm : STOPregister<"stadd","LDADD">; // STADDx
1817defm : STOPregister<"stclr","LDCLR">; // STCLRx
1818defm : STOPregister<"steor","LDEOR">; // STEORx
1819defm : STOPregister<"stset","LDSET">; // STSETx
1820defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
1821defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
1822defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
1823defm : STOPregister<"stumin","LDUMIN">;// STUMINx
1824
1825// v8.5 Memory Tagging Extension
1826let Predicates = [HasMTE] in {
1827
1828def IRG   : BaseTwoOperand<0b0100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>,
1829            Sched<[]>{
1830  let Inst{31} = 1;
1831}
1832def GMI   : BaseTwoOperand<0b0101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]>{
1833  let Inst{31} = 1;
1834  let isNotDuplicable = 1;
1835}
1836def ADDG  : AddSubG<0, "addg", null_frag>;
1837def SUBG  : AddSubG<1, "subg", null_frag>;
1838
1839def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
1840
1841def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
1842def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
1843  let Defs = [NZCV];
1844}
1845
1846def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
1847
1848def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
1849
1850def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
1851          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
1852def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
1853          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
1854
1855def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
1856
1857def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
1858                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
1859def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
1860                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
1861def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
1862                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
1863  let Inst{23} = 0;
1864}
1865
1866defm STG   : MemTagStore<0b00, "stg">;
1867defm STZG  : MemTagStore<0b01, "stzg">;
1868defm ST2G  : MemTagStore<0b10, "st2g">;
1869defm STZ2G : MemTagStore<0b11, "stz2g">;
1870
1871def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
1872          (STGOffset $Rn, $Rm, $imm)>;
1873def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
1874          (STZGOffset $Rn, $Rm, $imm)>;
1875def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
1876          (ST2GOffset $Rn, $Rm, $imm)>;
1877def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
1878          (STZ2GOffset $Rn, $Rm, $imm)>;
1879
1880defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
1881def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
1882def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
1883
1884def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
1885          (STGOffset GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
1886
1887def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
1888          (STGPi $Rt, $Rt2, $Rn, $imm)>;
1889
1890def IRGstack
1891    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
1892      Sched<[]>;
1893def TAGPstack
1894    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
1895      Sched<[]>;
1896
1897// Explicit SP in the first operand prevents ShrinkWrap optimization
1898// from leaving this instruction out of the stack frame. When IRGstack
1899// is transformed into IRG, this operand is replaced with the actual
1900// register / expression for the tagged base pointer of the current function.
1901def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
1902
1903// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
1904// $Rn_wback is one past the end of the range. $Rm is the loop counter.
1905let isCodeGenOnly=1, mayStore=1 in {
1906def STGloop_wback
1907    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
1908             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
1909      Sched<[WriteAdr, WriteST]>;
1910
1911def STZGloop_wback
1912    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
1913             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
1914      Sched<[WriteAdr, WriteST]>;
1915
1916// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
1917// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
1918def STGloop
1919    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
1920             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
1921      Sched<[WriteAdr, WriteST]>;
1922
1923def STZGloop
1924    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
1925             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
1926      Sched<[WriteAdr, WriteST]>;
1927}
1928
1929} // Predicates = [HasMTE]
1930
1931//===----------------------------------------------------------------------===//
1932// Logical instructions.
1933//===----------------------------------------------------------------------===//
1934
1935// (immediate)
1936defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
1937defm AND  : LogicalImm<0b00, "and", and, "bic">;
1938defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
1939defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
1940
1941// FIXME: these aliases *are* canonical sometimes (when movz can't be
1942// used). Actually, it seems to be working right now, but putting logical_immXX
1943// here is a bit dodgy on the AsmParser side too.
1944def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
1945                                          logical_imm32:$imm), 0>;
1946def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
1947                                          logical_imm64:$imm), 0>;
1948
1949
1950// (register)
1951defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
1952defm BICS : LogicalRegS<0b11, 1, "bics",
1953                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
1954defm AND  : LogicalReg<0b00, 0, "and", and>;
1955defm BIC  : LogicalReg<0b00, 1, "bic",
1956                       BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
1957defm EON  : LogicalReg<0b10, 1, "eon",
1958                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
1959defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
1960defm ORN  : LogicalReg<0b01, 1, "orn",
1961                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
1962defm ORR  : LogicalReg<0b01, 0, "orr", or>;
1963
1964def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
1965def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
1966
1967def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
1968def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
1969
1970def : InstAlias<"mvn $Wd, $Wm$sh",
1971                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
1972def : InstAlias<"mvn $Xd, $Xm$sh",
1973                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
1974
1975def : InstAlias<"tst $src1, $src2",
1976                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
1977def : InstAlias<"tst $src1, $src2",
1978                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
1979
1980def : InstAlias<"tst $src1, $src2",
1981                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
1982def : InstAlias<"tst $src1, $src2",
1983                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
1984
1985def : InstAlias<"tst $src1, $src2$sh",
1986               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
1987def : InstAlias<"tst $src1, $src2$sh",
1988               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
1989
1990
1991def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
1992def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
1993
1994
1995//===----------------------------------------------------------------------===//
1996// One operand data processing instructions.
1997//===----------------------------------------------------------------------===//
1998
1999defm CLS    : OneOperandData<0b101, "cls">;
2000defm CLZ    : OneOperandData<0b100, "clz", ctlz>;
2001defm RBIT   : OneOperandData<0b000, "rbit", bitreverse>;
2002
2003def  REV16Wr : OneWRegData<0b001, "rev16",
2004                                  UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2005def  REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
2006
2007def : Pat<(cttz GPR32:$Rn),
2008          (CLZWr (RBITWr GPR32:$Rn))>;
2009def : Pat<(cttz GPR64:$Rn),
2010          (CLZXr (RBITXr GPR64:$Rn))>;
2011def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2012                (i32 1))),
2013          (CLSWr GPR32:$Rn)>;
2014def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2015                (i64 1))),
2016          (CLSXr GPR64:$Rn)>;
2017def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2018def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2019
2020// Unlike the other one operand instructions, the instructions with the "rev"
2021// mnemonic do *not* just different in the size bit, but actually use different
2022// opcode bits for the different sizes.
2023def REVWr   : OneWRegData<0b010, "rev", bswap>;
2024def REVXr   : OneXRegData<0b011, "rev", bswap>;
2025def REV32Xr : OneXRegData<0b010, "rev32",
2026                                 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2027
2028def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2029
2030// The bswap commutes with the rotr so we want a pattern for both possible
2031// orders.
2032def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2033def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2034
2035//===----------------------------------------------------------------------===//
2036// Bitfield immediate extraction instruction.
2037//===----------------------------------------------------------------------===//
2038let hasSideEffects = 0 in
2039defm EXTR : ExtractImm<"extr">;
2040def : InstAlias<"ror $dst, $src, $shift",
2041            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
2042def : InstAlias<"ror $dst, $src, $shift",
2043            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
2044
2045def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
2046          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
2047def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
2048          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
2049
2050//===----------------------------------------------------------------------===//
2051// Other bitfield immediate instructions.
2052//===----------------------------------------------------------------------===//
2053let hasSideEffects = 0 in {
2054defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
2055defm SBFM : BitfieldImm<0b00, "sbfm">;
2056defm UBFM : BitfieldImm<0b10, "ubfm">;
2057}
2058
2059def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
2060  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2061  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2062}]>;
2063
2064def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
2065  uint64_t enc = 31 - N->getZExtValue();
2066  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2067}]>;
2068
2069// min(7, 31 - shift_amt)
2070def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2071  uint64_t enc = 31 - N->getZExtValue();
2072  enc = enc > 7 ? 7 : enc;
2073  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2074}]>;
2075
2076// min(15, 31 - shift_amt)
2077def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2078  uint64_t enc = 31 - N->getZExtValue();
2079  enc = enc > 15 ? 15 : enc;
2080  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2081}]>;
2082
2083def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
2084  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2085  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2086}]>;
2087
2088def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
2089  uint64_t enc = 63 - N->getZExtValue();
2090  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2091}]>;
2092
2093// min(7, 63 - shift_amt)
2094def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2095  uint64_t enc = 63 - N->getZExtValue();
2096  enc = enc > 7 ? 7 : enc;
2097  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2098}]>;
2099
2100// min(15, 63 - shift_amt)
2101def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2102  uint64_t enc = 63 - N->getZExtValue();
2103  enc = enc > 15 ? 15 : enc;
2104  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2105}]>;
2106
2107// min(31, 63 - shift_amt)
2108def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
2109  uint64_t enc = 63 - N->getZExtValue();
2110  enc = enc > 31 ? 31 : enc;
2111  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2112}]>;
2113
2114def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
2115          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
2116                              (i64 (i32shift_b imm0_31:$imm)))>;
2117def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
2118          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
2119                              (i64 (i64shift_b imm0_63:$imm)))>;
2120
2121let AddedComplexity = 10 in {
2122def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
2123          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2124def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
2125          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2126}
2127
2128def : InstAlias<"asr $dst, $src, $shift",
2129                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2130def : InstAlias<"asr $dst, $src, $shift",
2131                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2132def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2133def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2134def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2135def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2136def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2137
2138def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
2139          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2140def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
2141          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2142
2143def : InstAlias<"lsr $dst, $src, $shift",
2144                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2145def : InstAlias<"lsr $dst, $src, $shift",
2146                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2147def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2148def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2149def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2150def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2151def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2152
2153//===----------------------------------------------------------------------===//
2154// Conditional comparison instructions.
2155//===----------------------------------------------------------------------===//
2156defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
2157defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2158
2159//===----------------------------------------------------------------------===//
2160// Conditional select instructions.
2161//===----------------------------------------------------------------------===//
2162defm CSEL  : CondSelect<0, 0b00, "csel">;
2163
2164def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
2165defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
2166defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2167defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2168
2169def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2170          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2171def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2172          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2173def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2174          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2175def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2176          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2177def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2178          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2179def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2180          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2181
2182def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2183          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
2184def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2185          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
2186def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2187          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2188def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2189          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2190def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2191          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2192def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2193          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2194def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2195          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
2196def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2197          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
2198def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2199          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2200def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
2201          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2202def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
2203          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2204def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
2205          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2206
2207def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2208          (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
2209def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2210          (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
2211
2212// The inverse of the condition code from the alias instruction is what is used
2213// in the aliased instruction. The parser all ready inverts the condition code
2214// for these aliases.
2215def : InstAlias<"cset $dst, $cc",
2216                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2217def : InstAlias<"cset $dst, $cc",
2218                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2219
2220def : InstAlias<"csetm $dst, $cc",
2221                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2222def : InstAlias<"csetm $dst, $cc",
2223                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2224
2225def : InstAlias<"cinc $dst, $src, $cc",
2226                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2227def : InstAlias<"cinc $dst, $src, $cc",
2228                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2229
2230def : InstAlias<"cinv $dst, $src, $cc",
2231                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2232def : InstAlias<"cinv $dst, $src, $cc",
2233                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2234
2235def : InstAlias<"cneg $dst, $src, $cc",
2236                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2237def : InstAlias<"cneg $dst, $src, $cc",
2238                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2239
2240//===----------------------------------------------------------------------===//
2241// PC-relative instructions.
2242//===----------------------------------------------------------------------===//
2243let isReMaterializable = 1 in {
2244let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
2245def ADR  : ADRI<0, "adr", adrlabel,
2246                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
2247} // hasSideEffects = 0
2248
2249def ADRP : ADRI<1, "adrp", adrplabel,
2250                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
2251} // isReMaterializable = 1
2252
2253// page address of a constant pool entry, block address
2254def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
2255def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
2256def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
2257def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
2258def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
2259def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
2260def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
2261
2262//===----------------------------------------------------------------------===//
2263// Unconditional branch (register) instructions.
2264//===----------------------------------------------------------------------===//
2265
2266let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
2267def RET  : BranchReg<0b0010, "ret", []>;
2268def DRPS : SpecialReturn<0b0101, "drps">;
2269def ERET : SpecialReturn<0b0100, "eret">;
2270} // isReturn = 1, isTerminator = 1, isBarrier = 1
2271
2272// Default to the LR register.
2273def : InstAlias<"ret", (RET LR)>;
2274
2275let isCall = 1, Defs = [LR], Uses = [SP] in {
2276  def BLR : BranchReg<0b0001, "blr", []>;
2277  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
2278                Sched<[WriteBrReg]>,
2279                PseudoInstExpansion<(BLR GPR64:$Rn)>;
2280  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
2281                     Sched<[WriteBrReg]>;
2282} // isCall
2283
2284def : Pat<(AArch64call GPR64:$Rn),
2285          (BLR GPR64:$Rn)>,
2286      Requires<[NoSLSBLRMitigation]>;
2287def : Pat<(AArch64call GPR64noip:$Rn),
2288          (BLRNoIP GPR64noip:$Rn)>,
2289      Requires<[SLSBLRMitigation]>;
2290
2291def : Pat<(AArch64call_rvmarker GPR64:$Rn),
2292          (BLR_RVMARKER GPR64:$Rn)>,
2293      Requires<[NoSLSBLRMitigation]>;
2294
2295let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2296def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
2297} // isBranch, isTerminator, isBarrier, isIndirectBranch
2298
2299// Create a separate pseudo-instruction for codegen to use so that we don't
2300// flag lr as used in every function. It'll be restored before the RET by the
2301// epilogue if it's legitimately used.
2302def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>,
2303                   Sched<[WriteBrReg]> {
2304  let isTerminator = 1;
2305  let isBarrier = 1;
2306  let isReturn = 1;
2307}
2308
2309// This is a directive-like pseudo-instruction. The purpose is to insert an
2310// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
2311// (which in the usual case is a BLR).
2312let hasSideEffects = 1 in
2313def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
2314  let AsmString = ".tlsdesccall $sym";
2315}
2316
2317// Pseudo instruction to tell the streamer to emit a 'B' character into the
2318// augmentation string.
2319def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
2320
2321// FIXME: maybe the scratch register used shouldn't be fixed to X1?
2322// FIXME: can "hasSideEffects be dropped?
2323let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
2324    isCodeGenOnly = 1 in
2325def TLSDESC_CALLSEQ
2326    : Pseudo<(outs), (ins i64imm:$sym),
2327             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
2328      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
2329def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
2330          (TLSDESC_CALLSEQ texternalsym:$sym)>;
2331
2332//===----------------------------------------------------------------------===//
2333// Conditional branch (immediate) instruction.
2334//===----------------------------------------------------------------------===//
2335def Bcc : BranchCond;
2336
2337//===----------------------------------------------------------------------===//
2338// Compare-and-branch instructions.
2339//===----------------------------------------------------------------------===//
2340defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
2341defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
2342
2343//===----------------------------------------------------------------------===//
2344// Test-bit-and-branch instructions.
2345//===----------------------------------------------------------------------===//
2346defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
2347defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
2348
2349//===----------------------------------------------------------------------===//
2350// Unconditional branch (immediate) instructions.
2351//===----------------------------------------------------------------------===//
2352let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
2353def B  : BranchImm<0, "b", [(br bb:$addr)]>;
2354} // isBranch, isTerminator, isBarrier
2355
2356let isCall = 1, Defs = [LR], Uses = [SP] in {
2357def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
2358} // isCall
2359def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
2360
2361//===----------------------------------------------------------------------===//
2362// Exception generation instructions.
2363//===----------------------------------------------------------------------===//
2364let isTrap = 1 in {
2365def BRK   : ExceptionGeneration<0b001, 0b00, "brk">;
2366}
2367def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
2368def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
2369def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
2370def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
2371def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
2372def SMC   : ExceptionGeneration<0b000, 0b11, "smc">;
2373def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
2374
2375// DCPSn defaults to an immediate operand of zero if unspecified.
2376def : InstAlias<"dcps1", (DCPS1 0)>;
2377def : InstAlias<"dcps2", (DCPS2 0)>;
2378def : InstAlias<"dcps3", (DCPS3 0)>;
2379
2380def UDF : UDFType<0, "udf">;
2381
2382//===----------------------------------------------------------------------===//
2383// Load instructions.
2384//===----------------------------------------------------------------------===//
2385
2386// Pair (indexed, offset)
2387defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
2388defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
2389defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
2390defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
2391defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
2392
2393defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2394
2395// Pair (pre-indexed)
2396def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2397def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2398def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2399def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2400def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2401
2402def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2403
2404// Pair (post-indexed)
2405def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2406def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2407def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2408def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2409def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2410
2411def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2412
2413
2414// Pair (no allocate)
2415defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
2416defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
2417defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
2418defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
2419defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
2420
2421def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
2422          (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
2423
2424//---
2425// (register offset)
2426//---
2427
2428// Integer
2429defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
2430defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
2431defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
2432defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
2433
2434// Floating-point
2435defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", untyped, load>;
2436defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
2437defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
2438defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
2439defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
2440
2441// Load sign-extended half-word
2442defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
2443defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
2444
2445// Load sign-extended byte
2446defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
2447defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
2448
2449// Load sign-extended word
2450defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
2451
2452// Pre-fetch.
2453defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
2454
2455// For regular load, we do not have any alignment requirement.
2456// Thus, it is safe to directly map the vector loads with interesting
2457// addressing modes.
2458// FIXME: We could do the same for bitconvert to floating point vectors.
2459multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
2460                              ValueType ScalTy, ValueType VecTy,
2461                              Instruction LOADW, Instruction LOADX,
2462                              SubRegIndex sub> {
2463  def : Pat<(VecTy (scalar_to_vector (ScalTy
2464              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
2465            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
2466                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
2467                           sub)>;
2468
2469  def : Pat<(VecTy (scalar_to_vector (ScalTy
2470              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
2471            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
2472                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
2473                           sub)>;
2474}
2475
2476let AddedComplexity = 10 in {
2477defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
2478defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
2479
2480defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
2481defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
2482
2483defm : ScalToVecROLoadPat<ro16, load,       i32, v4f16, LDRHroW, LDRHroX, hsub>;
2484defm : ScalToVecROLoadPat<ro16, load,       i32, v8f16, LDRHroW, LDRHroX, hsub>;
2485
2486defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
2487defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
2488
2489defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
2490defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
2491
2492defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
2493
2494defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
2495
2496
2497def : Pat <(v1i64 (scalar_to_vector (i64
2498                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
2499                                           ro_Wextend64:$extend))))),
2500           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
2501
2502def : Pat <(v1i64 (scalar_to_vector (i64
2503                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
2504                                           ro_Xextend64:$extend))))),
2505           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
2506}
2507
2508// Match all load 64 bits width whose type is compatible with FPR64
2509multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
2510                        Instruction LOADW, Instruction LOADX> {
2511
2512  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2513            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2514
2515  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2516            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2517}
2518
2519let AddedComplexity = 10 in {
2520let Predicates = [IsLE] in {
2521  // We must do vector loads with LD1 in big-endian.
2522  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
2523  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
2524  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
2525  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
2526  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
2527  defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
2528}
2529
2530defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
2531defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
2532
2533// Match all load 128 bits width whose type is compatible with FPR128
2534let Predicates = [IsLE] in {
2535  // We must do vector loads with LD1 in big-endian.
2536  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
2537  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
2538  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
2539  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
2540  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
2541  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
2542  defm : VecROLoadPat<ro128, v8bf16,  LDRQroW, LDRQroX>;
2543  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
2544}
2545} // AddedComplexity = 10
2546
2547// zextload -> i64
2548multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
2549                            Instruction INSTW, Instruction INSTX> {
2550  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2551            (SUBREG_TO_REG (i64 0),
2552                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
2553                           sub_32)>;
2554
2555  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2556            (SUBREG_TO_REG (i64 0),
2557                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
2558                           sub_32)>;
2559}
2560
2561let AddedComplexity = 10 in {
2562  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
2563  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
2564  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
2565
2566  // zextloadi1 -> zextloadi8
2567  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
2568
2569  // extload -> zextload
2570  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
2571  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
2572  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
2573
2574  // extloadi1 -> zextloadi8
2575  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
2576}
2577
2578
2579// zextload -> i64
2580multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
2581                            Instruction INSTW, Instruction INSTX> {
2582  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2583            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2584
2585  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2586            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2587
2588}
2589
2590let AddedComplexity = 10 in {
2591  // extload -> zextload
2592  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
2593  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
2594  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
2595
2596  // zextloadi1 -> zextloadi8
2597  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
2598}
2599
2600//---
2601// (unsigned immediate)
2602//---
2603defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
2604                   [(set GPR64z:$Rt,
2605                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
2606defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
2607                   [(set GPR32z:$Rt,
2608                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
2609defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
2610                   [(set FPR8Op:$Rt,
2611                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
2612defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
2613                   [(set (f16 FPR16Op:$Rt),
2614                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
2615defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
2616                   [(set (f32 FPR32Op:$Rt),
2617                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
2618defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
2619                   [(set (f64 FPR64Op:$Rt),
2620                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
2621defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
2622                 [(set (f128 FPR128Op:$Rt),
2623                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
2624
2625// bf16 load pattern
2626def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
2627           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
2628
2629// For regular load, we do not have any alignment requirement.
2630// Thus, it is safe to directly map the vector loads with interesting
2631// addressing modes.
2632// FIXME: We could do the same for bitconvert to floating point vectors.
2633def : Pat <(v8i8 (scalar_to_vector (i32
2634               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
2635           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
2636                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
2637def : Pat <(v16i8 (scalar_to_vector (i32
2638               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
2639           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
2640                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
2641def : Pat <(v4i16 (scalar_to_vector (i32
2642               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
2643           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
2644                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
2645def : Pat <(v8i16 (scalar_to_vector (i32
2646               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
2647           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
2648                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
2649def : Pat <(v2i32 (scalar_to_vector (i32
2650               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
2651           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
2652                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
2653def : Pat <(v4i32 (scalar_to_vector (i32
2654               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
2655           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
2656                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
2657def : Pat <(v1i64 (scalar_to_vector (i64
2658               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
2659           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2660def : Pat <(v2i64 (scalar_to_vector (i64
2661               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
2662           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
2663                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
2664
2665// Match all load 64 bits width whose type is compatible with FPR64
2666let Predicates = [IsLE] in {
2667  // We must use LD1 to perform vector loads in big-endian.
2668  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2669            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2670  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2671            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2672  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2673            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2674  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2675            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2676  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2677            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2678  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2679            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2680}
2681def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2682          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2683def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
2684          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
2685
2686// Match all load 128 bits width whose type is compatible with FPR128
2687let Predicates = [IsLE] in {
2688  // We must use LD1 to perform vector loads in big-endian.
2689  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2690            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2691  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2692            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2693  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2694            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2695  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2696            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2697  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2698            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2699  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2700            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2701  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2702            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2703  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2704            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2705}
2706def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
2707          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
2708
2709defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
2710                    [(set GPR32:$Rt,
2711                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
2712                                                     uimm12s2:$offset)))]>;
2713defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
2714                    [(set GPR32:$Rt,
2715                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
2716                                                   uimm12s1:$offset)))]>;
2717// zextload -> i64
2718def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2719    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2720def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
2721    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
2722
2723// zextloadi1 -> zextloadi8
2724def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2725          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
2726def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2727    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2728
2729// extload -> zextload
2730def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
2731          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
2732def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2733          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
2734def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2735          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
2736def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
2737    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
2738def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
2739    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
2740def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2741    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2742def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
2743    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
2744
2745// load sign-extended half-word
2746defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
2747                     [(set GPR32:$Rt,
2748                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
2749                                                      uimm12s2:$offset)))]>;
2750defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
2751                     [(set GPR64:$Rt,
2752                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
2753                                                      uimm12s2:$offset)))]>;
2754
2755// load sign-extended byte
2756defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
2757                     [(set GPR32:$Rt,
2758                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
2759                                                    uimm12s1:$offset)))]>;
2760defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
2761                     [(set GPR64:$Rt,
2762                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
2763                                                    uimm12s1:$offset)))]>;
2764
2765// load sign-extended word
2766defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
2767                     [(set GPR64:$Rt,
2768                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
2769                                                      uimm12s4:$offset)))]>;
2770
2771// load zero-extended word
2772def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
2773      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
2774
2775// Pre-fetch.
2776def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
2777                        [(AArch64Prefetch imm:$Rt,
2778                                        (am_indexed64 GPR64sp:$Rn,
2779                                                      uimm12s8:$offset))]>;
2780
2781def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
2782
2783//---
2784// (literal)
2785
2786def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
2787  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
2788    const DataLayout &DL = MF->getDataLayout();
2789    Align Align = G->getGlobal()->getPointerAlignment(DL);
2790    return Align >= 4 && G->getOffset() % 4 == 0;
2791  }
2792  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
2793    return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
2794  return false;
2795}]>;
2796
2797def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
2798  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
2799def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
2800  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
2801def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
2802  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
2803def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
2804  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
2805def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
2806  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
2807
2808// load sign-extended word
2809def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
2810  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
2811
2812let AddedComplexity = 20 in {
2813def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
2814        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
2815}
2816
2817// prefetch
2818def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
2819//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
2820
2821//---
2822// (unscaled immediate)
2823defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
2824                    [(set GPR64z:$Rt,
2825                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
2826defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
2827                    [(set GPR32z:$Rt,
2828                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
2829defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
2830                    [(set FPR8Op:$Rt,
2831                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
2832defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
2833                    [(set (f16 FPR16Op:$Rt),
2834                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2835defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
2836                    [(set (f32 FPR32Op:$Rt),
2837                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
2838defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
2839                    [(set (f64 FPR64Op:$Rt),
2840                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
2841defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
2842                    [(set (f128 FPR128Op:$Rt),
2843                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
2844
2845defm LDURHH
2846    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
2847             [(set GPR32:$Rt,
2848                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2849defm LDURBB
2850    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
2851             [(set GPR32:$Rt,
2852                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2853
2854// Match all load 64 bits width whose type is compatible with FPR64
2855let Predicates = [IsLE] in {
2856  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2857            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2858  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2859            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2860  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2861            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2862  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2863            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2864  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2865            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2866}
2867def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2868          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2869def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
2870          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
2871
2872// Match all load 128 bits width whose type is compatible with FPR128
2873let Predicates = [IsLE] in {
2874  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2875            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2876  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2877            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2878  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2879            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2880  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2881            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2882  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2883            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2884  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2885            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2886  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
2887            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
2888}
2889
2890//  anyext -> zext
2891def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2892          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
2893def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2894          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2895def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2896          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2897def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
2898    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2899def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2900    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2901def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2902    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2903def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2904    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2905// unscaled zext
2906def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2907          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
2908def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2909          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2910def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2911          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
2912def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
2913    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2914def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2915    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2916def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2917    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2918def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2919    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2920
2921
2922//---
2923// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
2924
2925// Define new assembler match classes as we want to only match these when
2926// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
2927// associate a DiagnosticType either, as we want the diagnostic for the
2928// canonical form (the scaled operand) to take precedence.
2929class SImm9OffsetOperand<int Width> : AsmOperandClass {
2930  let Name = "SImm9OffsetFB" # Width;
2931  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
2932  let RenderMethod = "addImmOperands";
2933}
2934
2935def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
2936def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
2937def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
2938def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
2939def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
2940
2941def simm9_offset_fb8 : Operand<i64> {
2942  let ParserMatchClass = SImm9OffsetFB8Operand;
2943}
2944def simm9_offset_fb16 : Operand<i64> {
2945  let ParserMatchClass = SImm9OffsetFB16Operand;
2946}
2947def simm9_offset_fb32 : Operand<i64> {
2948  let ParserMatchClass = SImm9OffsetFB32Operand;
2949}
2950def simm9_offset_fb64 : Operand<i64> {
2951  let ParserMatchClass = SImm9OffsetFB64Operand;
2952}
2953def simm9_offset_fb128 : Operand<i64> {
2954  let ParserMatchClass = SImm9OffsetFB128Operand;
2955}
2956
2957def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2958                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2959def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2960                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2961def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2962                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2963def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2964                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2965def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2966                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2967def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2968                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2969def : InstAlias<"ldr $Rt, [$Rn, $offset]",
2970               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
2971
2972// zextload -> i64
2973def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
2974  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2975def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
2976  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
2977
2978// load sign-extended half-word
2979defm LDURSHW
2980    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
2981               [(set GPR32:$Rt,
2982                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2983defm LDURSHX
2984    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
2985              [(set GPR64:$Rt,
2986                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
2987
2988// load sign-extended byte
2989defm LDURSBW
2990    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
2991                [(set GPR32:$Rt,
2992                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
2993defm LDURSBX
2994    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
2995                [(set GPR64:$Rt,
2996                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
2997
2998// load sign-extended word
2999defm LDURSW
3000    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3001              [(set GPR64:$Rt,
3002                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3003
3004// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3005def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3006                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3007def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3008                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3009def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3010                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3011def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3012                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3013def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3014                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3015def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3016                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3017def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3018                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3019
3020// Pre-fetch.
3021defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
3022                  [(AArch64Prefetch imm:$Rt,
3023                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3024
3025//---
3026// (unscaled immediate, unprivileged)
3027defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
3028defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
3029
3030defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
3031defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
3032
3033// load sign-extended half-word
3034defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
3035defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
3036
3037// load sign-extended byte
3038defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
3039defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
3040
3041// load sign-extended word
3042defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
3043
3044//---
3045// (immediate pre-indexed)
3046def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3047def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3048def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3049def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3050def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3051def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3052def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3053
3054// load sign-extended half-word
3055def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3056def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3057
3058// load sign-extended byte
3059def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3060def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3061
3062// load zero-extended byte
3063def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3064def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3065
3066// load sign-extended word
3067def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3068
3069//---
3070// (immediate post-indexed)
3071def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3072def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3073def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3074def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3075def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3076def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3077def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3078
3079// load sign-extended half-word
3080def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3081def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3082
3083// load sign-extended byte
3084def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3085def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3086
3087// load zero-extended byte
3088def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3089def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3090
3091// load sign-extended word
3092def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3093
3094//===----------------------------------------------------------------------===//
3095// Store instructions.
3096//===----------------------------------------------------------------------===//
3097
3098// Pair (indexed, offset)
3099// FIXME: Use dedicated range-checked addressing mode operand here.
3100defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
3101defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
3102defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
3103defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
3104defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
3105
3106// Pair (pre-indexed)
3107def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3108def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3109def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3110def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3111def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3112
3113// Pair (pre-indexed)
3114def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3115def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3116def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3117def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3118def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3119
3120// Pair (no allocate)
3121defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
3122defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
3123defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
3124defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
3125defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
3126
3127def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3128          (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
3129
3130def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3131          (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
3132
3133
3134//---
3135// (Register offset)
3136
3137// Integer
3138defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
3139defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
3140defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
3141defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
3142
3143
3144// Floating-point
3145defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", untyped, store>;
3146defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
3147defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
3148defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
3149defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
3150
3151let Predicates = [UseSTRQro], AddedComplexity = 10 in {
3152  def : Pat<(store (f128 FPR128:$Rt),
3153                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
3154                                        ro_Wextend128:$extend)),
3155            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
3156  def : Pat<(store (f128 FPR128:$Rt),
3157                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
3158                                        ro_Xextend128:$extend)),
3159            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
3160}
3161
3162multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
3163                                 Instruction STRW, Instruction STRX> {
3164
3165  def : Pat<(storeop GPR64:$Rt,
3166                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3167            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3168                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3169
3170  def : Pat<(storeop GPR64:$Rt,
3171                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3172            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3173                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3174}
3175
3176let AddedComplexity = 10 in {
3177  // truncstore i64
3178  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
3179  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
3180  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
3181}
3182
3183multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
3184                         Instruction STRW, Instruction STRX> {
3185  def : Pat<(store (VecTy FPR:$Rt),
3186                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3187            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3188
3189  def : Pat<(store (VecTy FPR:$Rt),
3190                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3191            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3192}
3193
3194let AddedComplexity = 10 in {
3195// Match all store 64 bits width whose type is compatible with FPR64
3196let Predicates = [IsLE] in {
3197  // We must use ST1 to store vectors in big-endian.
3198  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
3199  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
3200  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
3201  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
3202  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
3203  defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
3204}
3205
3206defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
3207defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
3208
3209// Match all store 128 bits width whose type is compatible with FPR128
3210let Predicates = [IsLE, UseSTRQro] in {
3211  // We must use ST1 to store vectors in big-endian.
3212  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
3213  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
3214  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
3215  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
3216  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
3217  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
3218  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
3219  defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
3220}
3221} // AddedComplexity = 10
3222
3223// Match stores from lane 0 to the appropriate subreg's store.
3224multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
3225                              ValueType VecTy, ValueType STy,
3226                              SubRegIndex SubRegIdx,
3227                              Instruction STRW, Instruction STRX> {
3228
3229  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3230                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3231            (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3232                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3233
3234  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3235                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3236            (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3237                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3238}
3239
3240let AddedComplexity = 19 in {
3241  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
3242  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, hsub, STRHroW, STRHroX>;
3243  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, ssub, STRSroW, STRSroX>;
3244  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, ssub, STRSroW, STRSroX>;
3245  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, dsub, STRDroW, STRDroX>;
3246  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, dsub, STRDroW, STRDroX>;
3247}
3248
3249//---
3250// (unsigned immediate)
3251defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
3252                   [(store GPR64z:$Rt,
3253                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3254defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
3255                    [(store GPR32z:$Rt,
3256                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3257defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
3258                    [(store FPR8Op:$Rt,
3259                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
3260defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
3261                    [(store (f16 FPR16Op:$Rt),
3262                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
3263defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
3264                    [(store (f32 FPR32Op:$Rt),
3265                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3266defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
3267                    [(store (f64 FPR64Op:$Rt),
3268                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3269defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
3270
3271defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
3272                     [(truncstorei16 GPR32z:$Rt,
3273                                     (am_indexed16 GPR64sp:$Rn,
3274                                                   uimm12s2:$offset))]>;
3275defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
3276                     [(truncstorei8 GPR32z:$Rt,
3277                                    (am_indexed8 GPR64sp:$Rn,
3278                                                 uimm12s1:$offset))]>;
3279
3280// bf16 store pattern
3281def : Pat<(store (bf16 FPR16Op:$Rt),
3282                 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3283          (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
3284
3285let AddedComplexity = 10 in {
3286
3287// Match all store 64 bits width whose type is compatible with FPR64
3288def : Pat<(store (v1i64 FPR64:$Rt),
3289                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3290          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3291def : Pat<(store (v1f64 FPR64:$Rt),
3292                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3293          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3294
3295let Predicates = [IsLE] in {
3296  // We must use ST1 to store vectors in big-endian.
3297  def : Pat<(store (v2f32 FPR64:$Rt),
3298                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3299            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3300  def : Pat<(store (v8i8 FPR64:$Rt),
3301                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3302            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3303  def : Pat<(store (v4i16 FPR64:$Rt),
3304                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3305            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3306  def : Pat<(store (v2i32 FPR64:$Rt),
3307                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3308            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3309  def : Pat<(store (v4f16 FPR64:$Rt),
3310                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3311            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3312  def : Pat<(store (v4bf16 FPR64:$Rt),
3313                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3314            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3315}
3316
3317// Match all store 128 bits width whose type is compatible with FPR128
3318def : Pat<(store (f128  FPR128:$Rt),
3319                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3320          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3321
3322let Predicates = [IsLE] in {
3323  // We must use ST1 to store vectors in big-endian.
3324  def : Pat<(store (v4f32 FPR128:$Rt),
3325                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3326            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3327  def : Pat<(store (v2f64 FPR128:$Rt),
3328                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3329            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3330  def : Pat<(store (v16i8 FPR128:$Rt),
3331                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3332            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3333  def : Pat<(store (v8i16 FPR128:$Rt),
3334                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3335            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3336  def : Pat<(store (v4i32 FPR128:$Rt),
3337                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3338            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3339  def : Pat<(store (v2i64 FPR128:$Rt),
3340                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3341            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3342  def : Pat<(store (v8f16 FPR128:$Rt),
3343                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3344            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3345  def : Pat<(store (v8bf16 FPR128:$Rt),
3346                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3347            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3348}
3349
3350// truncstore i64
3351def : Pat<(truncstorei32 GPR64:$Rt,
3352                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
3353  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
3354def : Pat<(truncstorei16 GPR64:$Rt,
3355                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3356  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
3357def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
3358  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
3359
3360} // AddedComplexity = 10
3361
3362// Match stores from lane 0 to the appropriate subreg's store.
3363multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
3364                            ValueType VTy, ValueType STy,
3365                            SubRegIndex SubRegIdx, Operand IndexType,
3366                            Instruction STR> {
3367  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
3368                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
3369            (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3370                 GPR64sp:$Rn, IndexType:$offset)>;
3371}
3372
3373let AddedComplexity = 19 in {
3374  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
3375  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, hsub, uimm12s2, STRHui>;
3376  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, ssub, uimm12s4, STRSui>;
3377  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, ssub, uimm12s4, STRSui>;
3378  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, dsub, uimm12s8, STRDui>;
3379  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, dsub, uimm12s8, STRDui>;
3380}
3381
3382//---
3383// (unscaled immediate)
3384defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
3385                         [(store GPR64z:$Rt,
3386                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3387defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
3388                         [(store GPR32z:$Rt,
3389                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
3390defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
3391                         [(store FPR8Op:$Rt,
3392                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
3393defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
3394                         [(store (f16 FPR16Op:$Rt),
3395                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
3396defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
3397                         [(store (f32 FPR32Op:$Rt),
3398                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
3399defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
3400                         [(store (f64 FPR64Op:$Rt),
3401                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3402defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
3403                         [(store (f128 FPR128Op:$Rt),
3404                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
3405defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
3406                         [(truncstorei16 GPR32z:$Rt,
3407                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
3408defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
3409                         [(truncstorei8 GPR32z:$Rt,
3410                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
3411
3412// Armv8.4 Weaker Release Consistency enhancements
3413//         LDAPR & STLR with Immediate Offset instructions
3414let Predicates = [HasRCPC_IMMO] in {
3415defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
3416defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
3417defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
3418defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
3419defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
3420defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
3421defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
3422defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
3423defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
3424defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
3425defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
3426defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
3427defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
3428}
3429
3430// Match all store 64 bits width whose type is compatible with FPR64
3431def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3432          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3433def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3434          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3435
3436let AddedComplexity = 10 in {
3437
3438let Predicates = [IsLE] in {
3439  // We must use ST1 to store vectors in big-endian.
3440  def : Pat<(store (v2f32 FPR64:$Rt),
3441                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3442            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3443  def : Pat<(store (v8i8 FPR64:$Rt),
3444                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3445            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3446  def : Pat<(store (v4i16 FPR64:$Rt),
3447                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3448            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3449  def : Pat<(store (v2i32 FPR64:$Rt),
3450                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3451            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3452  def : Pat<(store (v4f16 FPR64:$Rt),
3453                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3454            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3455  def : Pat<(store (v4bf16 FPR64:$Rt),
3456                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3457            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3458}
3459
3460// Match all store 128 bits width whose type is compatible with FPR128
3461def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3462          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3463
3464let Predicates = [IsLE] in {
3465  // We must use ST1 to store vectors in big-endian.
3466  def : Pat<(store (v4f32 FPR128:$Rt),
3467                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3468            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3469  def : Pat<(store (v2f64 FPR128:$Rt),
3470                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3471            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3472  def : Pat<(store (v16i8 FPR128:$Rt),
3473                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3474            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3475  def : Pat<(store (v8i16 FPR128:$Rt),
3476                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3477            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3478  def : Pat<(store (v4i32 FPR128:$Rt),
3479                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3480            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3481  def : Pat<(store (v2i64 FPR128:$Rt),
3482                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3483            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3484  def : Pat<(store (v2f64 FPR128:$Rt),
3485                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3486            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3487  def : Pat<(store (v8f16 FPR128:$Rt),
3488                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3489            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3490  def : Pat<(store (v8bf16 FPR128:$Rt),
3491                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3492            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3493}
3494
3495} // AddedComplexity = 10
3496
3497// unscaled i64 truncating stores
3498def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
3499  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3500def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
3501  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3502def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
3503  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3504
3505// Match stores from lane 0 to the appropriate subreg's store.
3506multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
3507                             ValueType VTy, ValueType STy,
3508                             SubRegIndex SubRegIdx, Instruction STR> {
3509  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
3510}
3511
3512let AddedComplexity = 19 in {
3513  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
3514  defm : VecStoreULane0Pat<store,         v8f16, f16, hsub, STURHi>;
3515  defm : VecStoreULane0Pat<store,         v4i32, i32, ssub, STURSi>;
3516  defm : VecStoreULane0Pat<store,         v4f32, f32, ssub, STURSi>;
3517  defm : VecStoreULane0Pat<store,         v2i64, i64, dsub, STURDi>;
3518  defm : VecStoreULane0Pat<store,         v2f64, f64, dsub, STURDi>;
3519}
3520
3521//---
3522// STR mnemonics fall back to STUR for negative or unaligned offsets.
3523def : InstAlias<"str $Rt, [$Rn, $offset]",
3524                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3525def : InstAlias<"str $Rt, [$Rn, $offset]",
3526                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3527def : InstAlias<"str $Rt, [$Rn, $offset]",
3528                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3529def : InstAlias<"str $Rt, [$Rn, $offset]",
3530                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3531def : InstAlias<"str $Rt, [$Rn, $offset]",
3532                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3533def : InstAlias<"str $Rt, [$Rn, $offset]",
3534                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3535def : InstAlias<"str $Rt, [$Rn, $offset]",
3536                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3537
3538def : InstAlias<"strb $Rt, [$Rn, $offset]",
3539                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3540def : InstAlias<"strh $Rt, [$Rn, $offset]",
3541                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3542
3543//---
3544// (unscaled immediate, unprivileged)
3545defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
3546defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
3547
3548defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
3549defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
3550
3551//---
3552// (immediate pre-indexed)
3553def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
3554def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
3555def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, untyped>;
3556def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
3557def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
3558def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
3559def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
3560
3561def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
3562def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
3563
3564// truncstore i64
3565def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
3566  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
3567           simm9:$off)>;
3568def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
3569  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
3570            simm9:$off)>;
3571def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
3572  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
3573            simm9:$off)>;
3574
3575def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3576          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3577def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3578          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3579def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3580          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3581def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3582          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3583def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3584          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3585def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3586          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3587def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3588          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3589
3590def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3591          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3592def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3593          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3594def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3595          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3596def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3597          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3598def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3599          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3600def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3601          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3602def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3603          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3604
3605//---
3606// (immediate post-indexed)
3607def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
3608def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
3609def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, untyped>;
3610def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
3611def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
3612def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
3613def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
3614
3615def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
3616def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
3617
3618// truncstore i64
3619def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
3620  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
3621            simm9:$off)>;
3622def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
3623  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
3624             simm9:$off)>;
3625def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
3626  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
3627             simm9:$off)>;
3628
3629def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
3630          (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
3631
3632def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3633          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3634def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3635          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3636def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3637          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3638def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3639          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3640def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3641          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3642def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3643          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3644def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3645          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3646def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
3647          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
3648
3649def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3650          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3651def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3652          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3653def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3654          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3655def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3656          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3657def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3658          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3659def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3660          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3661def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3662          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3663def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
3664          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
3665
3666//===----------------------------------------------------------------------===//
3667// Load/store exclusive instructions.
3668//===----------------------------------------------------------------------===//
3669
3670def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
3671def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
3672def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
3673def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
3674
3675def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
3676def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
3677def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
3678def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
3679
3680def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
3681def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
3682def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
3683def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
3684
3685def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
3686def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
3687def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
3688def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
3689
3690def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
3691def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
3692def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
3693def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
3694
3695def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
3696def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
3697def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
3698def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
3699
3700def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
3701def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
3702
3703def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
3704def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
3705
3706def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
3707def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
3708
3709def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
3710def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
3711
3712let Predicates = [HasLOR] in {
3713  // v8.1a "Limited Order Region" extension load-acquire instructions
3714  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
3715  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
3716  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
3717  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
3718
3719  // v8.1a "Limited Order Region" extension store-release instructions
3720  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
3721  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
3722  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
3723  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
3724}
3725
3726//===----------------------------------------------------------------------===//
3727// Scaled floating point to integer conversion instructions.
3728//===----------------------------------------------------------------------===//
3729
3730defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
3731defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
3732defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
3733defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
3734defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
3735defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
3736defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
3737defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
3738defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
3739defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
3740defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
3741defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
3742
3743// AArch64's FCVT instructions saturate when out of range.
3744multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
3745  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
3746            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
3747  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
3748            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
3749  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
3750            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
3751  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
3752            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
3753  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
3754            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
3755  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
3756            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
3757}
3758
3759defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
3760defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
3761
3762multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
3763  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
3764  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
3765  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
3766  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
3767  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
3768  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
3769
3770  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
3771            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
3772  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
3773            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
3774  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
3775            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
3776  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
3777            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
3778  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
3779            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
3780  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
3781            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
3782}
3783
3784defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
3785defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
3786
3787multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
3788  def : Pat<(i32 (to_int (round f32:$Rn))),
3789            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
3790  def : Pat<(i64 (to_int (round f32:$Rn))),
3791            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
3792  def : Pat<(i32 (to_int (round f64:$Rn))),
3793            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
3794  def : Pat<(i64 (to_int (round f64:$Rn))),
3795            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
3796
3797  // These instructions saturate like fp_to_[su]int_sat.
3798  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
3799            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
3800  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
3801            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
3802  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
3803            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
3804  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
3805            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
3806  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
3807            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
3808  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
3809            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
3810}
3811
3812defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
3813defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
3814defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
3815defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
3816defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
3817defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
3818defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
3819defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
3820
3821
3822
3823let Predicates = [HasFullFP16] in {
3824  def : Pat<(i32 (lround f16:$Rn)),
3825            (!cast<Instruction>(FCVTASUWHr) f16:$Rn)>;
3826  def : Pat<(i64 (lround f16:$Rn)),
3827            (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
3828  def : Pat<(i64 (llround f16:$Rn)),
3829            (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
3830}
3831def : Pat<(i32 (lround f32:$Rn)),
3832          (!cast<Instruction>(FCVTASUWSr) f32:$Rn)>;
3833def : Pat<(i32 (lround f64:$Rn)),
3834          (!cast<Instruction>(FCVTASUWDr) f64:$Rn)>;
3835def : Pat<(i64 (lround f32:$Rn)),
3836          (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
3837def : Pat<(i64 (lround f64:$Rn)),
3838          (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
3839def : Pat<(i64 (llround f32:$Rn)),
3840          (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
3841def : Pat<(i64 (llround f64:$Rn)),
3842          (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
3843
3844//===----------------------------------------------------------------------===//
3845// Scaled integer to floating point conversion instructions.
3846//===----------------------------------------------------------------------===//
3847
3848defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
3849defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
3850
3851//===----------------------------------------------------------------------===//
3852// Unscaled integer to floating point conversion instruction.
3853//===----------------------------------------------------------------------===//
3854
3855defm FMOV : UnscaledConversion<"fmov">;
3856
3857// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
3858let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
3859def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
3860    Sched<[WriteF]>, Requires<[HasFullFP16]>;
3861def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
3862    Sched<[WriteF]>;
3863def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
3864    Sched<[WriteF]>;
3865}
3866// Similarly add aliases
3867def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
3868    Requires<[HasFullFP16]>;
3869def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
3870def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
3871
3872//===----------------------------------------------------------------------===//
3873// Floating point conversion instruction.
3874//===----------------------------------------------------------------------===//
3875
3876defm FCVT : FPConversion<"fcvt">;
3877
3878//===----------------------------------------------------------------------===//
3879// Floating point single operand instructions.
3880//===----------------------------------------------------------------------===//
3881
3882defm FABS   : SingleOperandFPData<0b0001, "fabs", fabs>;
3883defm FMOV   : SingleOperandFPData<0b0000, "fmov">;
3884defm FNEG   : SingleOperandFPData<0b0010, "fneg", fneg>;
3885defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
3886defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
3887defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
3888defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>;
3889defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
3890
3891defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
3892defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
3893
3894let SchedRW = [WriteFDiv] in {
3895defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
3896}
3897
3898let Predicates = [HasFRInt3264] in {
3899  defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
3900  defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
3901  defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
3902  defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
3903} // HasFRInt3264
3904
3905let Predicates = [HasFullFP16] in {
3906  def : Pat<(i32 (lrint f16:$Rn)),
3907            (FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
3908  def : Pat<(i64 (lrint f16:$Rn)),
3909            (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
3910  def : Pat<(i64 (llrint f16:$Rn)),
3911            (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
3912}
3913def : Pat<(i32 (lrint f32:$Rn)),
3914          (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
3915def : Pat<(i32 (lrint f64:$Rn)),
3916          (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
3917def : Pat<(i64 (lrint f32:$Rn)),
3918          (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
3919def : Pat<(i64 (lrint f64:$Rn)),
3920          (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
3921def : Pat<(i64 (llrint f32:$Rn)),
3922          (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
3923def : Pat<(i64 (llrint f64:$Rn)),
3924          (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
3925
3926//===----------------------------------------------------------------------===//
3927// Floating point two operand instructions.
3928//===----------------------------------------------------------------------===//
3929
3930defm FADD   : TwoOperandFPData<0b0010, "fadd", fadd>;
3931let SchedRW = [WriteFDiv] in {
3932defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
3933}
3934defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
3935defm FMAX   : TwoOperandFPData<0b0100, "fmax", fmaximum>;
3936defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
3937defm FMIN   : TwoOperandFPData<0b0101, "fmin", fminimum>;
3938let SchedRW = [WriteFMul] in {
3939defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
3940defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
3941}
3942defm FSUB   : TwoOperandFPData<0b0011, "fsub", fsub>;
3943
3944def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3945          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
3946def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3947          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
3948def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3949          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
3950def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3951          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
3952
3953//===----------------------------------------------------------------------===//
3954// Floating point three operand instructions.
3955//===----------------------------------------------------------------------===//
3956
3957defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", fma>;
3958defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
3959     TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
3960defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
3961     TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
3962defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
3963     TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
3964
3965// The following def pats catch the case where the LHS of an FMA is negated.
3966// The TriOpFrag above catches the case where the middle operand is negated.
3967
3968// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
3969// the NEON variant.
3970
3971// Here we handle first -(a + b*c) for FNMADD:
3972
3973let Predicates = [HasNEON, HasFullFP16] in
3974def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
3975          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
3976
3977def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
3978          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
3979
3980def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
3981          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
3982
3983// Now it's time for "(-a) + (-b)*c"
3984
3985let Predicates = [HasNEON, HasFullFP16] in
3986def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
3987          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
3988
3989def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
3990          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
3991
3992def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
3993          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
3994
3995//===----------------------------------------------------------------------===//
3996// Floating point comparison instructions.
3997//===----------------------------------------------------------------------===//
3998
3999defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
4000defm FCMP  : FPComparison<0, "fcmp", AArch64any_fcmp>;
4001
4002//===----------------------------------------------------------------------===//
4003// Floating point conditional comparison instructions.
4004//===----------------------------------------------------------------------===//
4005
4006defm FCCMPE : FPCondComparison<1, "fccmpe">;
4007defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
4008
4009//===----------------------------------------------------------------------===//
4010// Floating point conditional select instruction.
4011//===----------------------------------------------------------------------===//
4012
4013defm FCSEL : FPCondSelect<"fcsel">;
4014
4015// CSEL instructions providing f128 types need to be handled by a
4016// pseudo-instruction since the eventual code will need to introduce basic
4017// blocks and control flow.
4018def F128CSEL : Pseudo<(outs FPR128:$Rd),
4019                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
4020                      [(set (f128 FPR128:$Rd),
4021                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
4022                                       (i32 imm:$cond), NZCV))]> {
4023  let Uses = [NZCV];
4024  let usesCustomInserter = 1;
4025  let hasNoSchedulingInfo = 1;
4026}
4027
4028//===----------------------------------------------------------------------===//
4029// Instructions used for emitting unwind opcodes on ARM64 Windows.
4030//===----------------------------------------------------------------------===//
4031let isPseudo = 1 in {
4032  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
4033  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4034  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4035  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4036  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4037  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4038  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4039  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4040  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4041  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4042  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4043  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
4044  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4045  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
4046  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4047  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
4048  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4049}
4050
4051// Pseudo instructions for Windows EH
4052//===----------------------------------------------------------------------===//
4053let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
4054    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
4055   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
4056   let usesCustomInserter = 1 in
4057     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
4058                    Sched<[]>;
4059}
4060
4061// Pseudo instructions for homogeneous prolog/epilog
4062let isPseudo = 1 in {
4063  // Save CSRs in order, {FPOffset}
4064  def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4065  // Restore CSRs in order
4066  def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4067}
4068
4069//===----------------------------------------------------------------------===//
4070// Floating point immediate move.
4071//===----------------------------------------------------------------------===//
4072
4073let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
4074defm FMOV : FPMoveImmediate<"fmov">;
4075}
4076
4077//===----------------------------------------------------------------------===//
4078// Advanced SIMD two vector instructions.
4079//===----------------------------------------------------------------------===//
4080
4081defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
4082                                          AArch64uabd>;
4083// Match UABDL in log2-shuffle patterns.
4084def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
4085                           (zext (v8i8 V64:$opB))))),
4086          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4087def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4088               (v8i16 (add (sub (zext (v8i8 V64:$opA)),
4089                                (zext (v8i8 V64:$opB))),
4090                           (AArch64vashr v8i16:$src, (i32 15))))),
4091          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4092def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
4093                           (zext (extract_high_v16i8 V128:$opB))))),
4094          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4095def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4096               (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
4097                                (zext (extract_high_v16i8 V128:$opB))),
4098                           (AArch64vashr v8i16:$src, (i32 15))))),
4099          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4100def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
4101                           (zext (v4i16 V64:$opB))))),
4102          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
4103def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
4104                           (zext (extract_high_v8i16 V128:$opB))))),
4105          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
4106def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
4107                           (zext (v2i32 V64:$opB))))),
4108          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
4109def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
4110                           (zext (extract_high_v4i32 V128:$opB))))),
4111          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
4112
4113defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
4114defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
4115defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
4116defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
4117defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
4118defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
4119defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
4120defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
4121defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
4122defm FABS   : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
4123
4124defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
4125defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
4126defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
4127defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
4128defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
4129defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
4130defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
4131defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
4132def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
4133          (FCVTLv4i16 V64:$Rn)>;
4134def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
4135                                                              (i64 4)))),
4136          (FCVTLv8i16 V128:$Rn)>;
4137def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
4138
4139def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
4140
4141defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
4142defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
4143defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
4144defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
4145defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
4146def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
4147          (FCVTNv4i16 V128:$Rn)>;
4148def : Pat<(concat_vectors V64:$Rd,
4149                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
4150          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4151def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
4152def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
4153def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))),
4154          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4155defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
4156defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
4157defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
4158                                        int_aarch64_neon_fcvtxn>;
4159defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
4160defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
4161
4162// AArch64's FCVT instructions saturate when out of range.
4163multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
4164  def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
4165            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
4166  def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
4167            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
4168  def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
4169            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
4170  def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
4171            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
4172  def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
4173            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
4174}
4175defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
4176defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
4177
4178def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
4179def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
4180def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
4181def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
4182def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
4183
4184def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
4185def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
4186def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
4187def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
4188def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
4189
4190defm FNEG   : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
4191defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
4192defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
4193defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
4194defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
4195defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>;
4196defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
4197defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
4198defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
4199
4200let Predicates = [HasFRInt3264] in {
4201  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
4202  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
4203  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
4204  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
4205} // HasFRInt3264
4206
4207defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
4208defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
4209defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
4210                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
4211defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
4212// Aliases for MVN -> NOT.
4213def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
4214                (NOTv8i8 V64:$Vd, V64:$Vn)>;
4215def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
4216                (NOTv16i8 V128:$Vd, V128:$Vn)>;
4217
4218def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4219def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4220def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4221def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4222def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4223def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4224
4225defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
4226defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
4227defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
4228defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
4229defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
4230       BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
4231defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
4232defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
4233defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
4234defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
4235defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
4236defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
4237defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
4238defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
4239defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
4240       BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
4241defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
4242defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
4243defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
4244defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
4245defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
4246defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
4247defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
4248
4249def : Pat<(v4f16  (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
4250def : Pat<(v4f16  (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
4251def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
4252def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
4253def : Pat<(v8f16  (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
4254def : Pat<(v8f16  (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
4255def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
4256def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
4257def : Pat<(v2f32  (AArch64rev64 V64:$Rn)),  (REV64v2i32 V64:$Rn)>;
4258def : Pat<(v4f32  (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
4259
4260// Patterns for vector long shift (by element width). These need to match all
4261// three of zext, sext and anyext so it's easier to pull the patterns out of the
4262// definition.
4263multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
4264  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
4265            (SHLLv8i8 V64:$Rn)>;
4266  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
4267            (SHLLv16i8 V128:$Rn)>;
4268  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
4269            (SHLLv4i16 V64:$Rn)>;
4270  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
4271            (SHLLv8i16 V128:$Rn)>;
4272  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
4273            (SHLLv2i32 V64:$Rn)>;
4274  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
4275            (SHLLv4i32 V128:$Rn)>;
4276}
4277
4278defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
4279defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
4280defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
4281
4282// Constant vector values, used in the S/UQXTN patterns below.
4283def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
4284def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
4285def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
4286def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
4287def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
4288def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
4289
4290// trunc(umin(X, 255)) -> UQXTRN v8i8
4291def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
4292          (UQXTNv8i8 V128:$Vn)>;
4293// trunc(umin(X, 65535)) -> UQXTRN v4i16
4294def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
4295          (UQXTNv4i16 V128:$Vn)>;
4296// trunc(smin(smax(X, -128), 128)) -> SQXTRN
4297//  with reversed min/max
4298def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
4299                             (v8i16 VImm7F)))),
4300          (SQXTNv8i8 V128:$Vn)>;
4301def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
4302                             (v8i16 VImm80)))),
4303          (SQXTNv8i8 V128:$Vn)>;
4304// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
4305//  with reversed min/max
4306def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
4307                              (v4i32 VImm7FFF)))),
4308          (SQXTNv4i16 V128:$Vn)>;
4309def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
4310                              (v4i32 VImm8000)))),
4311          (SQXTNv4i16 V128:$Vn)>;
4312
4313//===----------------------------------------------------------------------===//
4314// Advanced SIMD three vector instructions.
4315//===----------------------------------------------------------------------===//
4316
4317defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
4318defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
4319defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
4320defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
4321defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
4322defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
4323defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
4324defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
4325foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
4326def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
4327}
4328defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
4329let Predicates = [HasNEON] in {
4330foreach VT = [ v2f32, v4f32, v2f64 ] in
4331def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
4332}
4333let Predicates = [HasNEON, HasFullFP16] in {
4334foreach VT = [ v4f16, v8f16 ] in
4335def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
4336}
4337defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
4338defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
4339defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
4340defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
4341defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
4342defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
4343defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
4344defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
4345defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
4346defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
4347defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
4348defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
4349defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
4350defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
4351defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
4352defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
4353
4354// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
4355// instruction expects the addend first, while the fma intrinsic puts it last.
4356defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
4357            TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
4358defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
4359            TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
4360
4361defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
4362defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
4363defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
4364defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
4365defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
4366
4367// MLA and MLS are generated in MachineCombine
4368defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
4369defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
4370
4371defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
4372defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
4373defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
4374      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
4375defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
4376defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
4377defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
4378defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
4379defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
4380defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
4381defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
4382defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
4383defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
4384defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
4385defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
4386defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
4387defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
4388defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>;
4389defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
4390defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
4391defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
4392defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
4393      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
4394defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
4395defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
4396defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
4397defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
4398defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
4399defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
4400defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
4401defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
4402defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
4403defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
4404defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
4405defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
4406defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
4407defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
4408defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
4409                                                  int_aarch64_neon_sqadd>;
4410defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
4411                                                    int_aarch64_neon_sqsub>;
4412
4413// Extra saturate patterns, other than the intrinsics matches above
4414defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
4415defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
4416defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
4417defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
4418
4419defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
4420defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
4421                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
4422defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
4423defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
4424                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
4425defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
4426
4427// Pseudo bitwise select pattern BSP.
4428// It is expanded into BSL/BIT/BIF after register allocation.
4429defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
4430                                                      (and (vnot node:$LHS), node:$RHS))>>;
4431defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
4432defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
4433defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
4434
4435def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
4436          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
4437def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
4438          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
4439def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
4440          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
4441def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
4442          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
4443
4444def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
4445          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
4446def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
4447          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
4448def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
4449          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
4450def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
4451          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
4452
4453def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
4454                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
4455def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
4456                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
4457def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
4458                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
4459def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
4460                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
4461
4462def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
4463                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
4464def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
4465                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
4466def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
4467                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
4468def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
4469                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
4470
4471def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
4472                "|cmls.8b\t$dst, $src1, $src2}",
4473                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
4474def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
4475                "|cmls.16b\t$dst, $src1, $src2}",
4476                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
4477def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
4478                "|cmls.4h\t$dst, $src1, $src2}",
4479                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
4480def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
4481                "|cmls.8h\t$dst, $src1, $src2}",
4482                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
4483def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
4484                "|cmls.2s\t$dst, $src1, $src2}",
4485                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
4486def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
4487                "|cmls.4s\t$dst, $src1, $src2}",
4488                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
4489def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
4490                "|cmls.2d\t$dst, $src1, $src2}",
4491                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
4492
4493def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
4494                "|cmlo.8b\t$dst, $src1, $src2}",
4495                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
4496def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
4497                "|cmlo.16b\t$dst, $src1, $src2}",
4498                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
4499def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
4500                "|cmlo.4h\t$dst, $src1, $src2}",
4501                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
4502def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
4503                "|cmlo.8h\t$dst, $src1, $src2}",
4504                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
4505def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
4506                "|cmlo.2s\t$dst, $src1, $src2}",
4507                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
4508def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
4509                "|cmlo.4s\t$dst, $src1, $src2}",
4510                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
4511def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
4512                "|cmlo.2d\t$dst, $src1, $src2}",
4513                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
4514
4515def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
4516                "|cmle.8b\t$dst, $src1, $src2}",
4517                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
4518def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
4519                "|cmle.16b\t$dst, $src1, $src2}",
4520                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
4521def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
4522                "|cmle.4h\t$dst, $src1, $src2}",
4523                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
4524def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
4525                "|cmle.8h\t$dst, $src1, $src2}",
4526                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
4527def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
4528                "|cmle.2s\t$dst, $src1, $src2}",
4529                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
4530def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
4531                "|cmle.4s\t$dst, $src1, $src2}",
4532                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
4533def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
4534                "|cmle.2d\t$dst, $src1, $src2}",
4535                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
4536
4537def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
4538                "|cmlt.8b\t$dst, $src1, $src2}",
4539                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
4540def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
4541                "|cmlt.16b\t$dst, $src1, $src2}",
4542                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
4543def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
4544                "|cmlt.4h\t$dst, $src1, $src2}",
4545                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
4546def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
4547                "|cmlt.8h\t$dst, $src1, $src2}",
4548                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
4549def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
4550                "|cmlt.2s\t$dst, $src1, $src2}",
4551                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
4552def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
4553                "|cmlt.4s\t$dst, $src1, $src2}",
4554                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
4555def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
4556                "|cmlt.2d\t$dst, $src1, $src2}",
4557                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
4558
4559let Predicates = [HasNEON, HasFullFP16] in {
4560def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
4561                "|fcmle.4h\t$dst, $src1, $src2}",
4562                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
4563def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
4564                "|fcmle.8h\t$dst, $src1, $src2}",
4565                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
4566}
4567def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
4568                "|fcmle.2s\t$dst, $src1, $src2}",
4569                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
4570def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
4571                "|fcmle.4s\t$dst, $src1, $src2}",
4572                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
4573def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
4574                "|fcmle.2d\t$dst, $src1, $src2}",
4575                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
4576
4577let Predicates = [HasNEON, HasFullFP16] in {
4578def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
4579                "|fcmlt.4h\t$dst, $src1, $src2}",
4580                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
4581def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
4582                "|fcmlt.8h\t$dst, $src1, $src2}",
4583                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
4584}
4585def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
4586                "|fcmlt.2s\t$dst, $src1, $src2}",
4587                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
4588def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
4589                "|fcmlt.4s\t$dst, $src1, $src2}",
4590                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
4591def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
4592                "|fcmlt.2d\t$dst, $src1, $src2}",
4593                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
4594
4595let Predicates = [HasNEON, HasFullFP16] in {
4596def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
4597                "|facle.4h\t$dst, $src1, $src2}",
4598                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
4599def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
4600                "|facle.8h\t$dst, $src1, $src2}",
4601                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
4602}
4603def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
4604                "|facle.2s\t$dst, $src1, $src2}",
4605                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
4606def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
4607                "|facle.4s\t$dst, $src1, $src2}",
4608                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
4609def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
4610                "|facle.2d\t$dst, $src1, $src2}",
4611                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
4612
4613let Predicates = [HasNEON, HasFullFP16] in {
4614def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
4615                "|faclt.4h\t$dst, $src1, $src2}",
4616                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
4617def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
4618                "|faclt.8h\t$dst, $src1, $src2}",
4619                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
4620}
4621def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
4622                "|faclt.2s\t$dst, $src1, $src2}",
4623                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
4624def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
4625                "|faclt.4s\t$dst, $src1, $src2}",
4626                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
4627def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
4628                "|faclt.2d\t$dst, $src1, $src2}",
4629                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
4630
4631//===----------------------------------------------------------------------===//
4632// Advanced SIMD three scalar instructions.
4633//===----------------------------------------------------------------------===//
4634
4635defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
4636defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
4637defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
4638defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
4639defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
4640defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
4641defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
4642defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
4643def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4644          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
4645let Predicates = [HasFullFP16] in {
4646def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
4647}
4648def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
4649def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
4650defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
4651                                     int_aarch64_neon_facge>;
4652defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
4653                                     int_aarch64_neon_facgt>;
4654defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
4655defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
4656defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
4657defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
4658defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
4659defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
4660defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
4661defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
4662defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
4663defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
4664defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
4665defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
4666defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
4667defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
4668defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
4669defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
4670defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
4671defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
4672defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
4673defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
4674defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
4675let Predicates = [HasRDM] in {
4676  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
4677  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
4678  def : Pat<(i32 (int_aarch64_neon_sqadd
4679                   (i32 FPR32:$Rd),
4680                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
4681                                                   (i32 FPR32:$Rm))))),
4682            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
4683  def : Pat<(i32 (int_aarch64_neon_sqsub
4684                   (i32 FPR32:$Rd),
4685                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
4686                                                   (i32 FPR32:$Rm))))),
4687            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
4688}
4689
4690def : InstAlias<"cmls $dst, $src1, $src2",
4691                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4692def : InstAlias<"cmle $dst, $src1, $src2",
4693                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4694def : InstAlias<"cmlo $dst, $src1, $src2",
4695                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4696def : InstAlias<"cmlt $dst, $src1, $src2",
4697                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4698def : InstAlias<"fcmle $dst, $src1, $src2",
4699                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
4700def : InstAlias<"fcmle $dst, $src1, $src2",
4701                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4702def : InstAlias<"fcmlt $dst, $src1, $src2",
4703                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
4704def : InstAlias<"fcmlt $dst, $src1, $src2",
4705                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4706def : InstAlias<"facle $dst, $src1, $src2",
4707                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
4708def : InstAlias<"facle $dst, $src1, $src2",
4709                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4710def : InstAlias<"faclt $dst, $src1, $src2",
4711                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
4712def : InstAlias<"faclt $dst, $src1, $src2",
4713                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
4714
4715//===----------------------------------------------------------------------===//
4716// Advanced SIMD three scalar instructions (mixed operands).
4717//===----------------------------------------------------------------------===//
4718defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
4719                                       int_aarch64_neon_sqdmulls_scalar>;
4720defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
4721defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
4722
4723def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
4724                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
4725                                                        (i32 FPR32:$Rm))))),
4726          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
4727def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
4728                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
4729                                                        (i32 FPR32:$Rm))))),
4730          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
4731
4732//===----------------------------------------------------------------------===//
4733// Advanced SIMD two scalar instructions.
4734//===----------------------------------------------------------------------===//
4735
4736defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs>;
4737defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
4738defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
4739defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
4740defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
4741defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
4742defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
4743defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
4744defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
4745defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
4746defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
4747defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
4748defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
4749defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
4750defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
4751defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
4752defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
4753defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
4754defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
4755def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
4756defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
4757defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
4758defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
4759defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
4760defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
4761defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
4762                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
4763defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
4764defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
4765defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
4766defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
4767defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
4768defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
4769                                     int_aarch64_neon_suqadd>;
4770defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
4771defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
4772defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
4773                                    int_aarch64_neon_usqadd>;
4774
4775def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
4776          (FCVTASv1i64 FPR64:$Rn)>;
4777def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
4778          (FCVTAUv1i64 FPR64:$Rn)>;
4779def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
4780          (FCVTMSv1i64 FPR64:$Rn)>;
4781def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
4782          (FCVTMUv1i64 FPR64:$Rn)>;
4783def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
4784          (FCVTNSv1i64 FPR64:$Rn)>;
4785def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
4786          (FCVTNUv1i64 FPR64:$Rn)>;
4787def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
4788          (FCVTPSv1i64 FPR64:$Rn)>;
4789def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
4790          (FCVTPUv1i64 FPR64:$Rn)>;
4791def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
4792          (FCVTZSv1i64 FPR64:$Rn)>;
4793def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
4794          (FCVTZUv1i64 FPR64:$Rn)>;
4795
4796def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
4797          (FRECPEv1f16 FPR16:$Rn)>;
4798def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
4799          (FRECPEv1i32 FPR32:$Rn)>;
4800def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
4801          (FRECPEv1i64 FPR64:$Rn)>;
4802def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
4803          (FRECPEv1i64 FPR64:$Rn)>;
4804
4805def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
4806          (FRECPEv1i32 FPR32:$Rn)>;
4807def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
4808          (FRECPEv2f32 V64:$Rn)>;
4809def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
4810          (FRECPEv4f32 FPR128:$Rn)>;
4811def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
4812          (FRECPEv1i64 FPR64:$Rn)>;
4813def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
4814          (FRECPEv1i64 FPR64:$Rn)>;
4815def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
4816          (FRECPEv2f64 FPR128:$Rn)>;
4817
4818def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4819          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
4820def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
4821          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
4822def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
4823          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
4824def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4825          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
4826def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
4827          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
4828
4829def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
4830          (FRECPXv1f16 FPR16:$Rn)>;
4831def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
4832          (FRECPXv1i32 FPR32:$Rn)>;
4833def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
4834          (FRECPXv1i64 FPR64:$Rn)>;
4835
4836def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
4837          (FRSQRTEv1f16 FPR16:$Rn)>;
4838def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
4839          (FRSQRTEv1i32 FPR32:$Rn)>;
4840def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
4841          (FRSQRTEv1i64 FPR64:$Rn)>;
4842def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
4843          (FRSQRTEv1i64 FPR64:$Rn)>;
4844
4845def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
4846          (FRSQRTEv1i32 FPR32:$Rn)>;
4847def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
4848          (FRSQRTEv2f32 V64:$Rn)>;
4849def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
4850          (FRSQRTEv4f32 FPR128:$Rn)>;
4851def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
4852          (FRSQRTEv1i64 FPR64:$Rn)>;
4853def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
4854          (FRSQRTEv1i64 FPR64:$Rn)>;
4855def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
4856          (FRSQRTEv2f64 FPR128:$Rn)>;
4857
4858def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4859          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
4860def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
4861          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
4862def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
4863          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
4864def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4865          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
4866def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
4867          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
4868
4869// Some float -> int -> float conversion patterns for which we want to keep the
4870// int values in FP registers using the corresponding NEON instructions to
4871// avoid more costly int <-> fp register transfers.
4872let Predicates = [HasNEON] in {
4873def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
4874          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
4875def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
4876          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
4877def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
4878          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
4879def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
4880          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
4881
4882let Predicates = [HasFullFP16] in {
4883def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
4884          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
4885def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
4886          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
4887}
4888}
4889
4890// If an integer is about to be converted to a floating point value,
4891// just load it on the floating point unit.
4892// Here are the patterns for 8 and 16-bits to float.
4893// 8-bits -> float.
4894multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
4895                             SDPatternOperator loadop, Instruction UCVTF,
4896                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
4897                             SubRegIndex sub> {
4898  def : Pat<(DstTy (uint_to_fp (SrcTy
4899                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
4900                                      ro.Wext:$extend))))),
4901           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
4902                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
4903                                 sub))>;
4904
4905  def : Pat<(DstTy (uint_to_fp (SrcTy
4906                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
4907                                      ro.Wext:$extend))))),
4908           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
4909                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
4910                                 sub))>;
4911}
4912
4913defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
4914                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
4915def : Pat <(f32 (uint_to_fp (i32
4916               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
4917           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4918                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
4919def : Pat <(f32 (uint_to_fp (i32
4920                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
4921           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4922                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
4923// 16-bits -> float.
4924defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
4925                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
4926def : Pat <(f32 (uint_to_fp (i32
4927                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
4928           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4929                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
4930def : Pat <(f32 (uint_to_fp (i32
4931                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
4932           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4933                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
4934// 32-bits are handled in target specific dag combine:
4935// performIntToFpCombine.
4936// 64-bits integer to 32-bits floating point, not possible with
4937// UCVTF on floating point registers (both source and destination
4938// must have the same size).
4939
4940// Here are the patterns for 8, 16, 32, and 64-bits to double.
4941// 8-bits -> double.
4942defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
4943                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
4944def : Pat <(f64 (uint_to_fp (i32
4945                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
4946           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4947                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
4948def : Pat <(f64 (uint_to_fp (i32
4949                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
4950           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4951                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
4952// 16-bits -> double.
4953defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
4954                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
4955def : Pat <(f64 (uint_to_fp (i32
4956                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
4957           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4958                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
4959def : Pat <(f64 (uint_to_fp (i32
4960                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
4961           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4962                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
4963// 32-bits -> double.
4964defm : UIntToFPROLoadPat<f64, i32, load,
4965                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
4966def : Pat <(f64 (uint_to_fp (i32
4967                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
4968           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4969                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
4970def : Pat <(f64 (uint_to_fp (i32
4971                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
4972           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4973                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
4974// 64-bits -> double are handled in target specific dag combine:
4975// performIntToFpCombine.
4976
4977//===----------------------------------------------------------------------===//
4978// Advanced SIMD three different-sized vector instructions.
4979//===----------------------------------------------------------------------===//
4980
4981defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
4982defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
4983defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
4984defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
4985defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
4986defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
4987                                             AArch64sabd>;
4988defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
4989                                          AArch64sabd>;
4990defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
4991            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
4992defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
4993                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
4994defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
4995    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
4996defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
4997    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
4998defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
4999defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
5000                                               int_aarch64_neon_sqadd>;
5001defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
5002                                               int_aarch64_neon_sqsub>;
5003defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
5004                                     int_aarch64_neon_sqdmull>;
5005defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
5006                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
5007defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
5008                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
5009defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
5010                                              AArch64uabd>;
5011defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
5012                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
5013defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
5014                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
5015defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
5016    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
5017defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
5018    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
5019defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
5020defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
5021                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
5022defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
5023                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
5024
5025// Additional patterns for [SU]ML[AS]L
5026multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
5027  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5028  def : Pat<(v4i16 (opnode
5029                    V64:$Ra,
5030                    (v4i16 (extract_subvector
5031                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
5032                            (i64 0))))),
5033             (EXTRACT_SUBREG (v8i16 (INST8B
5034                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
5035                                     V64:$Rn, V64:$Rm)), dsub)>;
5036  def : Pat<(v2i32 (opnode
5037                    V64:$Ra,
5038                    (v2i32 (extract_subvector
5039                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
5040                            (i64 0))))),
5041             (EXTRACT_SUBREG (v4i32 (INST4H
5042                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
5043                                     V64:$Rn, V64:$Rm)), dsub)>;
5044  def : Pat<(v1i64 (opnode
5045                    V64:$Ra,
5046                    (v1i64 (extract_subvector
5047                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
5048                            (i64 0))))),
5049             (EXTRACT_SUBREG (v2i64 (INST2S
5050                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
5051                                     V64:$Rn, V64:$Rm)), dsub)>;
5052}
5053
5054defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
5055     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
5056defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
5057     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
5058defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
5059     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
5060defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
5061     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
5062
5063// Additional patterns for SMULL and UMULL
5064multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
5065  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5066  def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5067            (INST8B V64:$Rn, V64:$Rm)>;
5068  def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5069            (INST4H V64:$Rn, V64:$Rm)>;
5070  def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5071            (INST2S V64:$Rn, V64:$Rm)>;
5072}
5073
5074defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
5075  SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
5076defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
5077  UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
5078
5079// Patterns for smull2/umull2.
5080multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
5081  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5082  def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
5083                           (extract_high_v16i8 V128:$Rm))),
5084             (INST8B V128:$Rn, V128:$Rm)>;
5085  def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
5086                           (extract_high_v8i16 V128:$Rm))),
5087             (INST4H V128:$Rn, V128:$Rm)>;
5088  def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
5089                           (extract_high_v4i32 V128:$Rm))),
5090             (INST2S V128:$Rn, V128:$Rm)>;
5091}
5092
5093defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
5094  SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
5095defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
5096  UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
5097
5098// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
5099multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
5100  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5101  def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5102            (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
5103  def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5104            (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
5105  def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5106            (INST2S  V128:$Rd, V64:$Rn, V64:$Rm)>;
5107}
5108
5109defm : Neon_mulacc_widen_patterns<
5110  TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
5111  SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
5112defm : Neon_mulacc_widen_patterns<
5113  TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
5114  UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
5115defm : Neon_mulacc_widen_patterns<
5116  TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
5117  SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
5118defm : Neon_mulacc_widen_patterns<
5119  TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
5120  UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
5121
5122// Patterns for 64-bit pmull
5123def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
5124          (PMULLv1i64 V64:$Rn, V64:$Rm)>;
5125def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
5126                                    (extractelt (v2i64 V128:$Rm), (i64 1))),
5127          (PMULLv2i64 V128:$Rn, V128:$Rm)>;
5128
5129// CodeGen patterns for addhn and subhn instructions, which can actually be
5130// written in LLVM IR without too much difficulty.
5131
5132// ADDHN
5133def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
5134          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5135def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5136                                           (i32 16))))),
5137          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5138def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5139                                           (i32 32))))),
5140          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5141def : Pat<(concat_vectors (v8i8 V64:$Rd),
5142                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5143                                                    (i32 8))))),
5144          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5145                            V128:$Rn, V128:$Rm)>;
5146def : Pat<(concat_vectors (v4i16 V64:$Rd),
5147                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5148                                                    (i32 16))))),
5149          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5150                            V128:$Rn, V128:$Rm)>;
5151def : Pat<(concat_vectors (v2i32 V64:$Rd),
5152                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5153                                                    (i32 32))))),
5154          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5155                            V128:$Rn, V128:$Rm)>;
5156
5157// SUBHN
5158def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
5159          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5160def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5161                                           (i32 16))))),
5162          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5163def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5164                                           (i32 32))))),
5165          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5166def : Pat<(concat_vectors (v8i8 V64:$Rd),
5167                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5168                                                    (i32 8))))),
5169          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5170                            V128:$Rn, V128:$Rm)>;
5171def : Pat<(concat_vectors (v4i16 V64:$Rd),
5172                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5173                                                    (i32 16))))),
5174          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5175                            V128:$Rn, V128:$Rm)>;
5176def : Pat<(concat_vectors (v2i32 V64:$Rd),
5177                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5178                                                    (i32 32))))),
5179          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5180                            V128:$Rn, V128:$Rm)>;
5181
5182//----------------------------------------------------------------------------
5183// AdvSIMD bitwise extract from vector instruction.
5184//----------------------------------------------------------------------------
5185
5186defm EXT : SIMDBitwiseExtract<"ext">;
5187
5188def AdjustExtImm : SDNodeXForm<imm, [{
5189  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
5190}]>;
5191multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
5192  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
5193            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
5194  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
5195            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
5196  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
5197  // 128-bit vector.
5198  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
5199            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
5200  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
5201  // single 128-bit EXT.
5202  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
5203                              (extract_subvector V128:$Rn, (i64 N)),
5204                              (i32 imm:$imm))),
5205            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
5206  // A 64-bit EXT of the high half of a 128-bit register can be done using a
5207  // 128-bit EXT of the whole register with an adjustment to the immediate. The
5208  // top half of the other operand will be unset, but that doesn't matter as it
5209  // will not be used.
5210  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
5211                              V64:$Rm,
5212                              (i32 imm:$imm))),
5213            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
5214                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
5215                                      (AdjustExtImm imm:$imm)), dsub)>;
5216}
5217
5218defm : ExtPat<v8i8, v16i8, 8>;
5219defm : ExtPat<v4i16, v8i16, 4>;
5220defm : ExtPat<v4f16, v8f16, 4>;
5221defm : ExtPat<v4bf16, v8bf16, 4>;
5222defm : ExtPat<v2i32, v4i32, 2>;
5223defm : ExtPat<v2f32, v4f32, 2>;
5224defm : ExtPat<v1i64, v2i64, 1>;
5225defm : ExtPat<v1f64, v2f64, 1>;
5226
5227//----------------------------------------------------------------------------
5228// AdvSIMD zip vector
5229//----------------------------------------------------------------------------
5230
5231defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
5232defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
5233defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
5234defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
5235defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
5236defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
5237
5238//----------------------------------------------------------------------------
5239// AdvSIMD TBL/TBX instructions
5240//----------------------------------------------------------------------------
5241
5242defm TBL : SIMDTableLookup<    0, "tbl">;
5243defm TBX : SIMDTableLookupTied<1, "tbx">;
5244
5245def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
5246          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
5247def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
5248          (TBLv16i8One V128:$Ri, V128:$Rn)>;
5249
5250def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
5251                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
5252          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
5253def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
5254                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
5255          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
5256
5257
5258//----------------------------------------------------------------------------
5259// AdvSIMD scalar CPY instruction
5260//----------------------------------------------------------------------------
5261
5262defm CPY : SIMDScalarCPY<"mov">;
5263
5264//----------------------------------------------------------------------------
5265// AdvSIMD scalar pairwise instructions
5266//----------------------------------------------------------------------------
5267
5268defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
5269defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
5270defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
5271defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
5272defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
5273defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
5274
5275let Predicates = [HasFullFP16] in {
5276def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
5277            (FADDPv2i16p
5278              (EXTRACT_SUBREG
5279                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
5280               dsub))>;
5281def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
5282          (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
5283}
5284def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
5285          (FADDPv2i32p
5286            (EXTRACT_SUBREG
5287              (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
5288             dsub))>;
5289def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
5290          (FADDPv2i32p V64:$Rn)>;
5291def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
5292          (FADDPv2i64p V128:$Rn)>;
5293
5294def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
5295          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
5296def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
5297          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
5298def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
5299          (FADDPv2i32p V64:$Rn)>;
5300def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
5301          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
5302def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
5303          (FADDPv2i64p V128:$Rn)>;
5304def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
5305          (FMAXNMPv2i32p V64:$Rn)>;
5306def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
5307          (FMAXNMPv2i64p V128:$Rn)>;
5308def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
5309          (FMAXPv2i32p V64:$Rn)>;
5310def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
5311          (FMAXPv2i64p V128:$Rn)>;
5312def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
5313          (FMINNMPv2i32p V64:$Rn)>;
5314def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
5315          (FMINNMPv2i64p V128:$Rn)>;
5316def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
5317          (FMINPv2i32p V64:$Rn)>;
5318def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
5319          (FMINPv2i64p V128:$Rn)>;
5320
5321//----------------------------------------------------------------------------
5322// AdvSIMD INS/DUP instructions
5323//----------------------------------------------------------------------------
5324
5325def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
5326def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
5327def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
5328def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
5329def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
5330def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
5331def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
5332
5333def DUPv2i64lane : SIMDDup64FromElement;
5334def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
5335def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
5336def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
5337def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
5338def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
5339def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
5340
5341// DUP from a 64-bit register to a 64-bit register is just a copy
5342def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
5343          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
5344def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
5345          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
5346
5347def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
5348          (v2f32 (DUPv2i32lane
5349            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
5350            (i64 0)))>;
5351def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
5352          (v4f32 (DUPv4i32lane
5353            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
5354            (i64 0)))>;
5355def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
5356          (v2f64 (DUPv2i64lane
5357            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
5358            (i64 0)))>;
5359def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
5360          (v4f16 (DUPv4i16lane
5361            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5362            (i64 0)))>;
5363def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
5364          (v4bf16 (DUPv4i16lane
5365            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5366            (i64 0)))>;
5367def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
5368          (v8f16 (DUPv8i16lane
5369            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5370            (i64 0)))>;
5371def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
5372          (v8bf16 (DUPv8i16lane
5373            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5374            (i64 0)))>;
5375
5376def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
5377          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
5378def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
5379          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
5380
5381def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
5382          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
5383def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
5384          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
5385
5386def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
5387          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
5388def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
5389         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
5390def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
5391          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
5392
5393// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
5394// instruction even if the types don't match: we just have to remap the lane
5395// carefully. N.b. this trick only applies to truncations.
5396def VecIndex_x2 : SDNodeXForm<imm, [{
5397  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
5398}]>;
5399def VecIndex_x4 : SDNodeXForm<imm, [{
5400  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
5401}]>;
5402def VecIndex_x8 : SDNodeXForm<imm, [{
5403  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
5404}]>;
5405
5406multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
5407                            ValueType Src128VT, ValueType ScalVT,
5408                            Instruction DUP, SDNodeXForm IdxXFORM> {
5409  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
5410                                                     imm:$idx)))),
5411            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
5412
5413  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
5414                                                     imm:$idx)))),
5415            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
5416}
5417
5418defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
5419defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
5420defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
5421
5422defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
5423defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
5424defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
5425
5426multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
5427                               SDNodeXForm IdxXFORM> {
5428  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
5429                                                         imm:$idx))))),
5430            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
5431
5432  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
5433                                                       imm:$idx))))),
5434            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
5435}
5436
5437defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
5438defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
5439defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
5440
5441defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
5442defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
5443defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
5444
5445// SMOV and UMOV definitions, with some extra patterns for convenience
5446defm SMOV : SMov;
5447defm UMOV : UMov;
5448
5449def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
5450          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
5451def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
5452          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
5453def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
5454          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
5455def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
5456          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
5457def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
5458          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
5459def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
5460          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
5461
5462def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
5463            VectorIndexB:$idx)))), i8),
5464          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
5465def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
5466            VectorIndexH:$idx)))), i16),
5467          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
5468
5469// Extracting i8 or i16 elements will have the zero-extend transformed to
5470// an 'and' mask by type legalization since neither i8 nor i16 are legal types
5471// for AArch64. Match these patterns here since UMOV already zeroes out the high
5472// bits of the destination register.
5473def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
5474               (i32 0xff)),
5475          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
5476def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
5477               (i32 0xffff)),
5478          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
5479
5480def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
5481            VectorIndexB:$idx)))), (i64 0xff))),
5482          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
5483def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
5484            VectorIndexH:$idx)))), (i64 0xffff))),
5485          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
5486
5487defm INS : SIMDIns;
5488
5489def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
5490          (SUBREG_TO_REG (i32 0),
5491                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
5492def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
5493          (SUBREG_TO_REG (i32 0),
5494                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
5495
5496def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
5497          (SUBREG_TO_REG (i32 0),
5498                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
5499def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
5500          (SUBREG_TO_REG (i32 0),
5501                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
5502
5503def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
5504          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5505def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
5506          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5507
5508def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
5509          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5510def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
5511          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5512
5513def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
5514            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
5515                                  (i32 FPR32:$Rn), ssub))>;
5516def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
5517            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
5518                                  (i32 FPR32:$Rn), ssub))>;
5519
5520def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
5521            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
5522                                  (i64 FPR64:$Rn), dsub))>;
5523
5524def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
5525          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5526def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
5527          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5528
5529def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
5530          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5531def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
5532          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
5533
5534def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
5535          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
5536def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
5537          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
5538
5539def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
5540          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
5541
5542def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
5543            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
5544          (EXTRACT_SUBREG
5545            (INSvi16lane
5546              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
5547              VectorIndexS:$imm,
5548              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
5549              (i64 0)),
5550            dsub)>;
5551
5552def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
5553            (i64 VectorIndexH:$imm)),
5554          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
5555def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
5556            (i64 VectorIndexS:$imm)),
5557          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
5558def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
5559            (i64 VectorIndexD:$imm)),
5560          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
5561
5562def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
5563            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
5564          (INSvi16lane
5565            V128:$Rn, VectorIndexH:$imm,
5566            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
5567            (i64 0))>;
5568
5569def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
5570            (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
5571          (EXTRACT_SUBREG
5572            (INSvi16lane
5573              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
5574              VectorIndexS:$imm,
5575              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
5576              (i64 0)),
5577            dsub)>;
5578
5579def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
5580            (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
5581          (INSvi16lane
5582            V128:$Rn, VectorIndexH:$imm,
5583            (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
5584            (i64 0))>;
5585
5586def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
5587            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
5588          (EXTRACT_SUBREG
5589            (INSvi32lane
5590              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
5591              VectorIndexS:$imm,
5592              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
5593              (i64 0)),
5594            dsub)>;
5595def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
5596            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
5597          (INSvi32lane
5598            V128:$Rn, VectorIndexS:$imm,
5599            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
5600            (i64 0))>;
5601def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
5602            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
5603          (INSvi64lane
5604            V128:$Rn, VectorIndexD:$imm,
5605            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
5606            (i64 0))>;
5607
5608// Copy an element at a constant index in one vector into a constant indexed
5609// element of another.
5610// FIXME refactor to a shared class/dev parameterized on vector type, vector
5611// index type and INS extension
5612def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
5613                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
5614                   VectorIndexB:$idx2)),
5615          (v16i8 (INSvi8lane
5616                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
5617          )>;
5618def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
5619                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
5620                   VectorIndexH:$idx2)),
5621          (v8i16 (INSvi16lane
5622                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
5623          )>;
5624def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
5625                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
5626                   VectorIndexS:$idx2)),
5627          (v4i32 (INSvi32lane
5628                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
5629          )>;
5630def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
5631                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
5632                   VectorIndexD:$idx2)),
5633          (v2i64 (INSvi64lane
5634                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
5635          )>;
5636
5637multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
5638                                ValueType VTScal, Instruction INS> {
5639  def : Pat<(VT128 (vector_insert V128:$src,
5640                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
5641                        imm:$Immd)),
5642            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
5643
5644  def : Pat<(VT128 (vector_insert V128:$src,
5645                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
5646                        imm:$Immd)),
5647            (INS V128:$src, imm:$Immd,
5648                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
5649
5650  def : Pat<(VT64 (vector_insert V64:$src,
5651                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
5652                        imm:$Immd)),
5653            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
5654                                 imm:$Immd, V128:$Rn, imm:$Immn),
5655                            dsub)>;
5656
5657  def : Pat<(VT64 (vector_insert V64:$src,
5658                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
5659                        imm:$Immd)),
5660            (EXTRACT_SUBREG
5661                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
5662                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
5663                dsub)>;
5664}
5665
5666defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
5667defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
5668defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
5669defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
5670
5671
5672// Floating point vector extractions are codegen'd as either a sequence of
5673// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
5674// the lane number is anything other than zero.
5675def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
5676          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
5677def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
5678          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
5679def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
5680          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
5681def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
5682          (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
5683
5684
5685def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
5686          (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
5687def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
5688          (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
5689def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
5690          (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
5691def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
5692          (bf16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
5693
5694// All concat_vectors operations are canonicalised to act on i64 vectors for
5695// AArch64. In the general case we need an instruction, which had just as well be
5696// INS.
5697class ConcatPat<ValueType DstTy, ValueType SrcTy>
5698  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
5699        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
5700                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
5701
5702def : ConcatPat<v2i64, v1i64>;
5703def : ConcatPat<v2f64, v1f64>;
5704def : ConcatPat<v4i32, v2i32>;
5705def : ConcatPat<v4f32, v2f32>;
5706def : ConcatPat<v8i16, v4i16>;
5707def : ConcatPat<v8f16, v4f16>;
5708def : ConcatPat<v8bf16, v4bf16>;
5709def : ConcatPat<v16i8, v8i8>;
5710
5711// If the high lanes are undef, though, we can just ignore them:
5712class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
5713  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
5714        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
5715
5716def : ConcatUndefPat<v2i64, v1i64>;
5717def : ConcatUndefPat<v2f64, v1f64>;
5718def : ConcatUndefPat<v4i32, v2i32>;
5719def : ConcatUndefPat<v4f32, v2f32>;
5720def : ConcatUndefPat<v8i16, v4i16>;
5721def : ConcatUndefPat<v16i8, v8i8>;
5722
5723//----------------------------------------------------------------------------
5724// AdvSIMD across lanes instructions
5725//----------------------------------------------------------------------------
5726
5727defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
5728defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
5729defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
5730defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
5731defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
5732defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
5733defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
5734defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
5735defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
5736defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
5737defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
5738
5739// Patterns for uaddv(uaddlp(x)) ==> uaddlv
5740def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
5741            (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
5742            (i64 0))), (i64 0))),
5743          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
5744           (UADDLVv4i16v V64:$op), ssub), ssub)>;
5745def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
5746           (v16i8 V128:$op))))), (i64 0))),
5747          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
5748           (UADDLVv16i8v V128:$op), hsub), ssub)>;
5749def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
5750          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>;
5751
5752// Patterns for addp(uaddlp(x))) ==> uaddlv
5753def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))),
5754          (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>;
5755def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))),
5756          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>;
5757
5758// Patterns for across-vector intrinsics, that have a node equivalent, that
5759// returns a vector (with only the low lane defined) instead of a scalar.
5760// In effect, opNode is the same as (scalar_to_vector (IntNode)).
5761multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
5762                                    SDPatternOperator opNode> {
5763// If a lane instruction caught the vector_extract around opNode, we can
5764// directly match the latter to the instruction.
5765def : Pat<(v8i8 (opNode V64:$Rn)),
5766          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
5767           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
5768def : Pat<(v16i8 (opNode V128:$Rn)),
5769          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5770           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
5771def : Pat<(v4i16 (opNode V64:$Rn)),
5772          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
5773           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
5774def : Pat<(v8i16 (opNode V128:$Rn)),
5775          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
5776           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
5777def : Pat<(v4i32 (opNode V128:$Rn)),
5778          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
5779           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
5780
5781
5782// If none did, fallback to the explicit patterns, consuming the vector_extract.
5783def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
5784            (i64 0)), (i64 0))),
5785          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
5786            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
5787            bsub), ssub)>;
5788def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
5789          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5790            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
5791            bsub), ssub)>;
5792def : Pat<(i32 (vector_extract (insert_subvector undef,
5793            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
5794          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
5795            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
5796            hsub), ssub)>;
5797def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
5798          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
5799            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
5800            hsub), ssub)>;
5801def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
5802          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
5803            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
5804            ssub), ssub)>;
5805
5806}
5807
5808multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
5809                                          SDPatternOperator opNode>
5810    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
5811// If there is a sign extension after this intrinsic, consume it as smov already
5812// performed it
5813def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
5814            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
5815          (i32 (SMOVvi8to32
5816            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5817              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
5818            (i64 0)))>;
5819def : Pat<(i32 (sext_inreg (i32 (vector_extract
5820            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
5821          (i32 (SMOVvi8to32
5822            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5823             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
5824            (i64 0)))>;
5825def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
5826            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
5827          (i32 (SMOVvi16to32
5828           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5829            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
5830           (i64 0)))>;
5831def : Pat<(i32 (sext_inreg (i32 (vector_extract
5832            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
5833          (i32 (SMOVvi16to32
5834            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5835             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
5836            (i64 0)))>;
5837}
5838
5839multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
5840                                            SDPatternOperator opNode>
5841    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
5842// If there is a masking operation keeping only what has been actually
5843// generated, consume it.
5844def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
5845            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
5846      (i32 (EXTRACT_SUBREG
5847        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5848          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
5849        ssub))>;
5850def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
5851            maski8_or_more)),
5852        (i32 (EXTRACT_SUBREG
5853          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5854            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
5855          ssub))>;
5856def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
5857            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
5858          (i32 (EXTRACT_SUBREG
5859            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5860              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
5861            ssub))>;
5862def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
5863            maski16_or_more)),
5864        (i32 (EXTRACT_SUBREG
5865          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5866            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
5867          ssub))>;
5868}
5869
5870defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
5871// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
5872def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
5873          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
5874
5875defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
5876// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
5877def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
5878          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
5879
5880defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
5881def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
5882          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
5883
5884defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
5885def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
5886          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
5887
5888defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
5889def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
5890          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
5891
5892defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
5893def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
5894          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
5895
5896multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
5897  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
5898        (i32 (SMOVvi16to32
5899          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5900            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
5901          (i64 0)))>;
5902def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
5903        (i32 (SMOVvi16to32
5904          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5905           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
5906          (i64 0)))>;
5907
5908def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
5909          (i32 (EXTRACT_SUBREG
5910           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5911            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
5912           ssub))>;
5913def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
5914        (i32 (EXTRACT_SUBREG
5915          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5916           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
5917          ssub))>;
5918
5919def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
5920        (i64 (EXTRACT_SUBREG
5921          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5922           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
5923          dsub))>;
5924}
5925
5926multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
5927                                                Intrinsic intOp> {
5928  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
5929        (i32 (EXTRACT_SUBREG
5930          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5931            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
5932          ssub))>;
5933def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
5934        (i32 (EXTRACT_SUBREG
5935          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5936            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
5937          ssub))>;
5938
5939def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
5940          (i32 (EXTRACT_SUBREG
5941            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5942              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
5943            ssub))>;
5944def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
5945        (i32 (EXTRACT_SUBREG
5946          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5947            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
5948          ssub))>;
5949
5950def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
5951        (i64 (EXTRACT_SUBREG
5952          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5953            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
5954          dsub))>;
5955}
5956
5957defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
5958defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
5959
5960// The vaddlv_s32 intrinsic gets mapped to SADDLP.
5961def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
5962          (i64 (EXTRACT_SUBREG
5963            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5964              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
5965            dsub))>;
5966// The vaddlv_u32 intrinsic gets mapped to UADDLP.
5967def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
5968          (i64 (EXTRACT_SUBREG
5969            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5970              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
5971            dsub))>;
5972
5973//------------------------------------------------------------------------------
5974// AdvSIMD modified immediate instructions
5975//------------------------------------------------------------------------------
5976
5977// AdvSIMD BIC
5978defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
5979// AdvSIMD ORR
5980defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
5981
5982def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
5983def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
5984def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
5985def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
5986
5987def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
5988def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
5989def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
5990def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
5991
5992def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
5993def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
5994def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
5995def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
5996
5997def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
5998def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
5999def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6000def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6001
6002// AdvSIMD FMOV
6003def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
6004                                              "fmov", ".2d",
6005                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6006def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
6007                                              "fmov", ".2s",
6008                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6009def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
6010                                              "fmov", ".4s",
6011                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6012let Predicates = [HasNEON, HasFullFP16] in {
6013def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
6014                                              "fmov", ".4h",
6015                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6016def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
6017                                              "fmov", ".8h",
6018                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6019} // Predicates = [HasNEON, HasFullFP16]
6020
6021// AdvSIMD MOVI
6022
6023// EDIT byte mask: scalar
6024let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6025def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
6026                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
6027// The movi_edit node has the immediate value already encoded, so we use
6028// a plain imm0_255 here.
6029def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
6030          (MOVID imm0_255:$shift)>;
6031
6032// EDIT byte mask: 2d
6033
6034// The movi_edit node has the immediate value already encoded, so we use
6035// a plain imm0_255 in the pattern
6036let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6037def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
6038                                                simdimmtype10,
6039                                                "movi", ".2d",
6040                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
6041
6042def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6043def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6044def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6045def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6046
6047def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6048def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6049def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6050def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6051
6052// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
6053// extract is free and this gives better MachineCSE results.
6054def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6055def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6056def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6057def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6058
6059def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6060def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6061def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6062def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6063
6064// EDIT per word & halfword: 2s, 4h, 4s, & 8h
6065let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6066defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
6067
6068def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6069def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6070def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6071def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6072
6073def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6074def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6075def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6076def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6077
6078def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6079          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
6080def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6081          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
6082def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6083          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
6084def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6085          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
6086
6087let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
6088// EDIT per word: 2s & 4s with MSL shifter
6089def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
6090                      [(set (v2i32 V64:$Rd),
6091                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6092def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
6093                      [(set (v4i32 V128:$Rd),
6094                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6095
6096// Per byte: 8b & 16b
6097def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
6098                                                 "movi", ".8b",
6099                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
6100
6101def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
6102                                                 "movi", ".16b",
6103                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
6104}
6105
6106// AdvSIMD MVNI
6107
6108// EDIT per word & halfword: 2s, 4h, 4s, & 8h
6109let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6110defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
6111
6112def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6113def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6114def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6115def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6116
6117def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6118def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6119def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6120def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6121
6122def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6123          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
6124def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6125          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
6126def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6127          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
6128def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6129          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
6130
6131// EDIT per word: 2s & 4s with MSL shifter
6132let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
6133def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
6134                      [(set (v2i32 V64:$Rd),
6135                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6136def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
6137                      [(set (v4i32 V128:$Rd),
6138                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6139}
6140
6141//----------------------------------------------------------------------------
6142// AdvSIMD indexed element
6143//----------------------------------------------------------------------------
6144
6145let hasSideEffects = 0 in {
6146  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
6147  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
6148}
6149
6150// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
6151// instruction expects the addend first, while the intrinsic expects it last.
6152
6153// On the other hand, there are quite a few valid combinatorial options due to
6154// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
6155defm : SIMDFPIndexedTiedPatterns<"FMLA",
6156           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
6157defm : SIMDFPIndexedTiedPatterns<"FMLA",
6158           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
6159
6160defm : SIMDFPIndexedTiedPatterns<"FMLS",
6161           TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
6162defm : SIMDFPIndexedTiedPatterns<"FMLS",
6163           TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
6164defm : SIMDFPIndexedTiedPatterns<"FMLS",
6165           TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
6166defm : SIMDFPIndexedTiedPatterns<"FMLS",
6167           TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
6168
6169multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
6170  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
6171  // and DUP scalar.
6172  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6173                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
6174                                           VectorIndexS:$idx))),
6175            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
6176  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6177                           (v2f32 (AArch64duplane32
6178                                      (v4f32 (insert_subvector undef,
6179                                                 (v2f32 (fneg V64:$Rm)),
6180                                                 (i64 0))),
6181                                      VectorIndexS:$idx)))),
6182            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
6183                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6184                               VectorIndexS:$idx)>;
6185  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6186                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
6187            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
6188                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
6189
6190  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
6191  // and DUP scalar.
6192  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6193                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
6194                                           VectorIndexS:$idx))),
6195            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
6196                               VectorIndexS:$idx)>;
6197  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6198                           (v4f32 (AArch64duplane32
6199                                      (v4f32 (insert_subvector undef,
6200                                                 (v2f32 (fneg V64:$Rm)),
6201                                                 (i64 0))),
6202                                      VectorIndexS:$idx)))),
6203            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
6204                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6205                               VectorIndexS:$idx)>;
6206  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6207                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
6208            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
6209                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
6210
6211  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
6212  // (DUPLANE from 64-bit would be trivial).
6213  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
6214                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
6215                                           VectorIndexD:$idx))),
6216            (FMLSv2i64_indexed
6217                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
6218  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
6219                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
6220            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
6221                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
6222
6223  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
6224  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
6225                         (vector_extract (v4f32 (fneg V128:$Rm)),
6226                                         VectorIndexS:$idx))),
6227            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
6228                V128:$Rm, VectorIndexS:$idx)>;
6229  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
6230                         (vector_extract (v4f32 (insert_subvector undef,
6231                                                    (v2f32 (fneg V64:$Rm)),
6232                                                    (i64 0))),
6233                                         VectorIndexS:$idx))),
6234            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
6235                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
6236
6237  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
6238  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
6239                         (vector_extract (v2f64 (fneg V128:$Rm)),
6240                                         VectorIndexS:$idx))),
6241            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
6242                V128:$Rm, VectorIndexS:$idx)>;
6243}
6244
6245defm : FMLSIndexedAfterNegPatterns<
6246           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
6247defm : FMLSIndexedAfterNegPatterns<
6248           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
6249
6250defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
6251defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
6252
6253def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
6254          (FMULv2i32_indexed V64:$Rn,
6255            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
6256            (i64 0))>;
6257def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
6258          (FMULv4i32_indexed V128:$Rn,
6259            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
6260            (i64 0))>;
6261def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
6262          (FMULv2i64_indexed V128:$Rn,
6263            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
6264            (i64 0))>;
6265
6266defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
6267defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
6268
6269defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
6270                                     int_aarch64_neon_sqdmulh_laneq>;
6271defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
6272                                      int_aarch64_neon_sqrdmulh_laneq>;
6273
6274// Generated by MachineCombine
6275defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
6276defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
6277
6278defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
6279defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
6280    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
6281defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
6282    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
6283defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
6284                int_aarch64_neon_smull>;
6285defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
6286                                           int_aarch64_neon_sqadd>;
6287defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
6288                                           int_aarch64_neon_sqsub>;
6289defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
6290                                          int_aarch64_neon_sqadd>;
6291defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
6292                                          int_aarch64_neon_sqsub>;
6293defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
6294defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
6295    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
6296defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
6297    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
6298defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
6299                int_aarch64_neon_umull>;
6300
6301// A scalar sqdmull with the second operand being a vector lane can be
6302// handled directly with the indexed instruction encoding.
6303def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
6304                                          (vector_extract (v4i32 V128:$Vm),
6305                                                           VectorIndexS:$idx)),
6306          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
6307
6308//----------------------------------------------------------------------------
6309// AdvSIMD scalar shift instructions
6310//----------------------------------------------------------------------------
6311defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
6312defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
6313defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
6314defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
6315// Codegen patterns for the above. We don't put these directly on the
6316// instructions because TableGen's type inference can't handle the truth.
6317// Having the same base pattern for fp <--> int totally freaks it out.
6318def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
6319          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
6320def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
6321          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
6322def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
6323          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
6324def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
6325          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
6326def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
6327                                            vecshiftR64:$imm)),
6328          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
6329def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
6330                                            vecshiftR64:$imm)),
6331          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
6332def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
6333          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
6334def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
6335          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
6336def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
6337                                            vecshiftR64:$imm)),
6338          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
6339def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
6340          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
6341def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
6342                                            vecshiftR64:$imm)),
6343          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
6344def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
6345          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
6346
6347// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported.
6348
6349def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
6350          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
6351def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
6352          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
6353def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
6354          (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
6355def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
6356            (and FPR32:$Rn, (i32 65535)),
6357            vecshiftR16:$imm)),
6358          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
6359def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
6360          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
6361def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
6362          (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
6363def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
6364          (i32 (INSERT_SUBREG
6365            (i32 (IMPLICIT_DEF)),
6366            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
6367            hsub))>;
6368def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
6369          (i64 (INSERT_SUBREG
6370            (i64 (IMPLICIT_DEF)),
6371            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
6372            hsub))>;
6373def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
6374          (i32 (INSERT_SUBREG
6375            (i32 (IMPLICIT_DEF)),
6376            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
6377            hsub))>;
6378def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
6379          (i64 (INSERT_SUBREG
6380            (i64 (IMPLICIT_DEF)),
6381            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
6382            hsub))>;
6383def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
6384          (i32 (INSERT_SUBREG
6385            (i32 (IMPLICIT_DEF)),
6386            (FACGE16 FPR16:$Rn, FPR16:$Rm),
6387            hsub))>;
6388def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
6389          (i32 (INSERT_SUBREG
6390            (i32 (IMPLICIT_DEF)),
6391            (FACGT16 FPR16:$Rn, FPR16:$Rm),
6392            hsub))>;
6393
6394defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
6395defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
6396defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
6397                                     int_aarch64_neon_sqrshrn>;
6398defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
6399                                     int_aarch64_neon_sqrshrun>;
6400defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
6401defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
6402defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
6403                                     int_aarch64_neon_sqshrn>;
6404defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
6405                                     int_aarch64_neon_sqshrun>;
6406defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
6407defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
6408defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
6409    TriOpFrag<(add node:$LHS,
6410                   (AArch64srshri node:$MHS, node:$RHS))>>;
6411defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
6412defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
6413    TriOpFrag<(add node:$LHS,
6414                   (AArch64vashr node:$MHS, node:$RHS))>>;
6415defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
6416                                     int_aarch64_neon_uqrshrn>;
6417defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
6418defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
6419                                     int_aarch64_neon_uqshrn>;
6420defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
6421defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
6422    TriOpFrag<(add node:$LHS,
6423                   (AArch64urshri node:$MHS, node:$RHS))>>;
6424defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
6425defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
6426    TriOpFrag<(add node:$LHS,
6427                   (AArch64vlshr node:$MHS, node:$RHS))>>;
6428
6429//----------------------------------------------------------------------------
6430// AdvSIMD vector shift instructions
6431//----------------------------------------------------------------------------
6432defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
6433defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
6434defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
6435                                   int_aarch64_neon_vcvtfxs2fp>;
6436defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
6437                                         int_aarch64_neon_rshrn>;
6438defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
6439defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
6440                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
6441defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
6442def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
6443                                      (i32 vecshiftL64:$imm))),
6444          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
6445defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
6446                                         int_aarch64_neon_sqrshrn>;
6447defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
6448                                         int_aarch64_neon_sqrshrun>;
6449defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
6450defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
6451defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
6452                                         int_aarch64_neon_sqshrn>;
6453defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
6454                                         int_aarch64_neon_sqshrun>;
6455defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
6456def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
6457                                      (i32 vecshiftR64:$imm))),
6458          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
6459defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
6460defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
6461                 TriOpFrag<(add node:$LHS,
6462                                (AArch64srshri node:$MHS, node:$RHS))> >;
6463defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
6464                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
6465
6466defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
6467defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
6468                TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
6469defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
6470                        int_aarch64_neon_vcvtfxu2fp>;
6471defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
6472                                         int_aarch64_neon_uqrshrn>;
6473defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
6474defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
6475                                         int_aarch64_neon_uqshrn>;
6476defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
6477defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
6478                TriOpFrag<(add node:$LHS,
6479                               (AArch64urshri node:$MHS, node:$RHS))> >;
6480defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
6481                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
6482defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
6483defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
6484                TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
6485
6486// SHRN patterns for when a logical right shift was used instead of arithmetic
6487// (the immediate guarantees no sign bits actually end up in the result so it
6488// doesn't matter).
6489def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
6490          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
6491def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
6492          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
6493def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
6494          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
6495
6496def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
6497                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
6498                                                    vecshiftR16Narrow:$imm)))),
6499          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
6500                           V128:$Rn, vecshiftR16Narrow:$imm)>;
6501def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
6502                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
6503                                                    vecshiftR32Narrow:$imm)))),
6504          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
6505                           V128:$Rn, vecshiftR32Narrow:$imm)>;
6506def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
6507                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
6508                                                    vecshiftR64Narrow:$imm)))),
6509          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
6510                           V128:$Rn, vecshiftR32Narrow:$imm)>;
6511
6512// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
6513// Anyexts are implemented as zexts.
6514def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
6515def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
6516def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
6517def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
6518def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
6519def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
6520def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
6521def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
6522def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
6523// Also match an extend from the upper half of a 128 bit source register.
6524def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
6525          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
6526def : Pat<(v8i16 (zext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
6527          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
6528def : Pat<(v8i16 (sext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
6529          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
6530def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
6531          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
6532def : Pat<(v4i32 (zext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
6533          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
6534def : Pat<(v4i32 (sext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
6535          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
6536def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
6537          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
6538def : Pat<(v2i64 (zext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
6539          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
6540def : Pat<(v2i64 (sext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
6541          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
6542
6543// Vector shift sxtl aliases
6544def : InstAlias<"sxtl.8h $dst, $src1",
6545                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
6546def : InstAlias<"sxtl $dst.8h, $src1.8b",
6547                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
6548def : InstAlias<"sxtl.4s $dst, $src1",
6549                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
6550def : InstAlias<"sxtl $dst.4s, $src1.4h",
6551                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
6552def : InstAlias<"sxtl.2d $dst, $src1",
6553                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
6554def : InstAlias<"sxtl $dst.2d, $src1.2s",
6555                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
6556
6557// Vector shift sxtl2 aliases
6558def : InstAlias<"sxtl2.8h $dst, $src1",
6559                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
6560def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
6561                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
6562def : InstAlias<"sxtl2.4s $dst, $src1",
6563                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
6564def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
6565                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
6566def : InstAlias<"sxtl2.2d $dst, $src1",
6567                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
6568def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
6569                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
6570
6571// Vector shift uxtl aliases
6572def : InstAlias<"uxtl.8h $dst, $src1",
6573                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
6574def : InstAlias<"uxtl $dst.8h, $src1.8b",
6575                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
6576def : InstAlias<"uxtl.4s $dst, $src1",
6577                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
6578def : InstAlias<"uxtl $dst.4s, $src1.4h",
6579                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
6580def : InstAlias<"uxtl.2d $dst, $src1",
6581                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
6582def : InstAlias<"uxtl $dst.2d, $src1.2s",
6583                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
6584
6585// Vector shift uxtl2 aliases
6586def : InstAlias<"uxtl2.8h $dst, $src1",
6587                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
6588def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
6589                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
6590def : InstAlias<"uxtl2.4s $dst, $src1",
6591                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
6592def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
6593                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
6594def : InstAlias<"uxtl2.2d $dst, $src1",
6595                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
6596def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
6597                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
6598
6599// If an integer is about to be converted to a floating point value,
6600// just load it on the floating point unit.
6601// These patterns are more complex because floating point loads do not
6602// support sign extension.
6603// The sign extension has to be explicitly added and is only supported for
6604// one step: byte-to-half, half-to-word, word-to-doubleword.
6605// SCVTF GPR -> FPR is 9 cycles.
6606// SCVTF FPR -> FPR is 4 cyclces.
6607// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
6608// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
6609// and still being faster.
6610// However, this is not good for code size.
6611// 8-bits -> float. 2 sizes step-up.
6612class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
6613  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
6614        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
6615                            (SSHLLv4i16_shift
6616                              (f64
6617                                (EXTRACT_SUBREG
6618                                  (SSHLLv8i8_shift
6619                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6620                                        INST,
6621                                        bsub),
6622                                    0),
6623                                  dsub)),
6624                               0),
6625                             ssub)))>,
6626    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
6627
6628def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
6629                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
6630def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
6631                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
6632def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
6633                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
6634def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
6635                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
6636
6637// 16-bits -> float. 1 size step-up.
6638class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
6639  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
6640        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
6641                            (SSHLLv4i16_shift
6642                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6643                                  INST,
6644                                  hsub),
6645                                0),
6646                            ssub)))>, Requires<[NotForCodeSize]>;
6647
6648def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
6649                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
6650def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
6651                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
6652def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
6653                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
6654def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
6655                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
6656
6657// 32-bits to 32-bits are handled in target specific dag combine:
6658// performIntToFpCombine.
6659// 64-bits integer to 32-bits floating point, not possible with
6660// SCVTF on floating point registers (both source and destination
6661// must have the same size).
6662
6663// Here are the patterns for 8, 16, 32, and 64-bits to double.
6664// 8-bits -> double. 3 size step-up: give up.
6665// 16-bits -> double. 2 size step.
6666class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
6667  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
6668           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
6669                              (SSHLLv2i32_shift
6670                                 (f64
6671                                  (EXTRACT_SUBREG
6672                                    (SSHLLv4i16_shift
6673                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6674                                        INST,
6675                                        hsub),
6676                                     0),
6677                                   dsub)),
6678                               0),
6679                             dsub)))>,
6680    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
6681
6682def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
6683                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
6684def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
6685                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
6686def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
6687                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
6688def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
6689                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
6690// 32-bits -> double. 1 size step-up.
6691class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
6692  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
6693           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
6694                              (SSHLLv2i32_shift
6695                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6696                                  INST,
6697                                  ssub),
6698                               0),
6699                             dsub)))>, Requires<[NotForCodeSize]>;
6700
6701def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
6702                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
6703def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
6704                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
6705def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
6706                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
6707def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
6708                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
6709
6710// 64-bits -> double are handled in target specific dag combine:
6711// performIntToFpCombine.
6712
6713
6714//----------------------------------------------------------------------------
6715// AdvSIMD Load-Store Structure
6716//----------------------------------------------------------------------------
6717defm LD1 : SIMDLd1Multiple<"ld1">;
6718defm LD2 : SIMDLd2Multiple<"ld2">;
6719defm LD3 : SIMDLd3Multiple<"ld3">;
6720defm LD4 : SIMDLd4Multiple<"ld4">;
6721
6722defm ST1 : SIMDSt1Multiple<"st1">;
6723defm ST2 : SIMDSt2Multiple<"st2">;
6724defm ST3 : SIMDSt3Multiple<"st3">;
6725defm ST4 : SIMDSt4Multiple<"st4">;
6726
6727class Ld1Pat<ValueType ty, Instruction INST>
6728  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
6729
6730def : Ld1Pat<v16i8, LD1Onev16b>;
6731def : Ld1Pat<v8i16, LD1Onev8h>;
6732def : Ld1Pat<v4i32, LD1Onev4s>;
6733def : Ld1Pat<v2i64, LD1Onev2d>;
6734def : Ld1Pat<v8i8,  LD1Onev8b>;
6735def : Ld1Pat<v4i16, LD1Onev4h>;
6736def : Ld1Pat<v2i32, LD1Onev2s>;
6737def : Ld1Pat<v1i64, LD1Onev1d>;
6738
6739class St1Pat<ValueType ty, Instruction INST>
6740  : Pat<(store ty:$Vt, GPR64sp:$Rn),
6741        (INST ty:$Vt, GPR64sp:$Rn)>;
6742
6743def : St1Pat<v16i8, ST1Onev16b>;
6744def : St1Pat<v8i16, ST1Onev8h>;
6745def : St1Pat<v4i32, ST1Onev4s>;
6746def : St1Pat<v2i64, ST1Onev2d>;
6747def : St1Pat<v8i8,  ST1Onev8b>;
6748def : St1Pat<v4i16, ST1Onev4h>;
6749def : St1Pat<v2i32, ST1Onev2s>;
6750def : St1Pat<v1i64, ST1Onev1d>;
6751
6752//---
6753// Single-element
6754//---
6755
6756defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
6757defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
6758defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
6759defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
6760let mayLoad = 1, hasSideEffects = 0 in {
6761defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
6762defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
6763defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
6764defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
6765defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
6766defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
6767defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
6768defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
6769defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
6770defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
6771defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
6772defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
6773defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
6774defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
6775defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
6776defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
6777}
6778
6779def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
6780          (LD1Rv8b GPR64sp:$Rn)>;
6781def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
6782          (LD1Rv16b GPR64sp:$Rn)>;
6783def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
6784          (LD1Rv4h GPR64sp:$Rn)>;
6785def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
6786          (LD1Rv8h GPR64sp:$Rn)>;
6787def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
6788          (LD1Rv2s GPR64sp:$Rn)>;
6789def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
6790          (LD1Rv4s GPR64sp:$Rn)>;
6791def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
6792          (LD1Rv2d GPR64sp:$Rn)>;
6793def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
6794          (LD1Rv1d GPR64sp:$Rn)>;
6795// Grab the floating point version too
6796def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
6797          (LD1Rv2s GPR64sp:$Rn)>;
6798def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
6799          (LD1Rv4s GPR64sp:$Rn)>;
6800def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
6801          (LD1Rv2d GPR64sp:$Rn)>;
6802def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
6803          (LD1Rv1d GPR64sp:$Rn)>;
6804def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
6805          (LD1Rv4h GPR64sp:$Rn)>;
6806def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
6807          (LD1Rv8h GPR64sp:$Rn)>;
6808def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
6809          (LD1Rv4h GPR64sp:$Rn)>;
6810def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
6811          (LD1Rv8h GPR64sp:$Rn)>;
6812
6813class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
6814                    ValueType VTy, ValueType STy, Instruction LD1>
6815  : Pat<(vector_insert (VTy VecListOne128:$Rd),
6816           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
6817        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
6818
6819def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
6820def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
6821def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
6822def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
6823def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
6824def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
6825def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
6826def : Ld1Lane128Pat<load,       VectorIndexH, v8bf16, bf16, LD1i16>;
6827
6828// Generate LD1 for extload if memory type does not match the
6829// destination type, for example:
6830//
6831//   (v4i32 (insert_vector_elt (load anyext from i8) idx))
6832//
6833// In this case, the index must be adjusted to match LD1 type.
6834//
6835class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
6836                    VecIndex, ValueType VTy, ValueType STy,
6837                    Instruction LD1, SDNodeXForm IdxOp>
6838  : Pat<(vector_insert (VTy VecListOne128:$Rd),
6839                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
6840        (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
6841
6842def VectorIndexStoH : SDNodeXForm<imm, [{
6843  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
6844}]>;
6845def VectorIndexStoB : SDNodeXForm<imm, [{
6846  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
6847}]>;
6848def VectorIndexHtoB : SDNodeXForm<imm, [{
6849  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
6850}]>;
6851
6852def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
6853def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
6854def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
6855
6856// Same as above, but the first element is populated using
6857// scalar_to_vector + insert_subvector instead of insert_vector_elt.
6858class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
6859                        SDPatternOperator ExtLoad, Instruction LD1>
6860  : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
6861          (ResultTy (EXTRACT_SUBREG
6862            (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
6863
6864def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
6865def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
6866def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
6867
6868class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
6869                   ValueType VTy, ValueType STy, Instruction LD1>
6870  : Pat<(vector_insert (VTy VecListOne64:$Rd),
6871           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
6872        (EXTRACT_SUBREG
6873            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
6874                          VecIndex:$idx, GPR64sp:$Rn),
6875            dsub)>;
6876
6877def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
6878def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
6879def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
6880def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
6881def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
6882def : Ld1Lane64Pat<load,       VectorIndexH, v4bf16, bf16, LD1i16>;
6883
6884
6885defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
6886defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
6887defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
6888defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
6889
6890// Stores
6891defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
6892defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
6893defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
6894defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
6895
6896let AddedComplexity = 19 in
6897class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
6898                    ValueType VTy, ValueType STy, Instruction ST1>
6899  : Pat<(scalar_store
6900             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
6901             GPR64sp:$Rn),
6902        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
6903
6904def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
6905def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
6906def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
6907def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
6908def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
6909def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
6910def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
6911def : St1Lane128Pat<store,         VectorIndexH, v8bf16, bf16, ST1i16>;
6912
6913let AddedComplexity = 19 in
6914class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
6915                   ValueType VTy, ValueType STy, Instruction ST1>
6916  : Pat<(scalar_store
6917             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
6918             GPR64sp:$Rn),
6919        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
6920             VecIndex:$idx, GPR64sp:$Rn)>;
6921
6922def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
6923def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
6924def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
6925def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
6926def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
6927def : St1Lane64Pat<store,         VectorIndexH, v4bf16, bf16, ST1i16>;
6928
6929multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
6930                             ValueType VTy, ValueType STy, Instruction ST1,
6931                             int offset> {
6932  def : Pat<(scalar_store
6933              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
6934              GPR64sp:$Rn, offset),
6935        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
6936             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
6937
6938  def : Pat<(scalar_store
6939              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
6940              GPR64sp:$Rn, GPR64:$Rm),
6941        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
6942             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
6943}
6944
6945defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
6946defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
6947                        2>;
6948defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
6949defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
6950defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
6951defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
6952defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
6953defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
6954
6955multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
6956                             ValueType VTy, ValueType STy, Instruction ST1,
6957                             int offset> {
6958  def : Pat<(scalar_store
6959              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
6960              GPR64sp:$Rn, offset),
6961        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
6962
6963  def : Pat<(scalar_store
6964              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
6965              GPR64sp:$Rn, GPR64:$Rm),
6966        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
6967}
6968
6969defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
6970                         1>;
6971defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
6972                         2>;
6973defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
6974defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
6975defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
6976defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
6977defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
6978defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
6979
6980let mayStore = 1, hasSideEffects = 0 in {
6981defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
6982defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
6983defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
6984defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
6985defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
6986defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
6987defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
6988defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
6989defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
6990defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
6991defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
6992defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
6993}
6994
6995defm ST1 : SIMDLdSt1SingleAliases<"st1">;
6996defm ST2 : SIMDLdSt2SingleAliases<"st2">;
6997defm ST3 : SIMDLdSt3SingleAliases<"st3">;
6998defm ST4 : SIMDLdSt4SingleAliases<"st4">;
6999
7000//----------------------------------------------------------------------------
7001// Crypto extensions
7002//----------------------------------------------------------------------------
7003
7004let Predicates = [HasAES] in {
7005def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
7006def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
7007def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
7008def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
7009}
7010
7011// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
7012// for AES fusion on some CPUs.
7013let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
7014def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
7015                        Sched<[WriteVq]>;
7016def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
7017                         Sched<[WriteVq]>;
7018}
7019
7020// Only use constrained versions of AES(I)MC instructions if they are paired with
7021// AESE/AESD.
7022def : Pat<(v16i8 (int_aarch64_crypto_aesmc
7023            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
7024                                            (v16i8 V128:$src2))))),
7025          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
7026                                             (v16i8 V128:$src2)))))>,
7027          Requires<[HasFuseAES]>;
7028
7029def : Pat<(v16i8 (int_aarch64_crypto_aesimc
7030            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
7031                                            (v16i8 V128:$src2))))),
7032          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
7033                                              (v16i8 V128:$src2)))))>,
7034          Requires<[HasFuseAES]>;
7035
7036let Predicates = [HasSHA2] in {
7037def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
7038def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
7039def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
7040def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
7041def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
7042def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
7043def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
7044
7045def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
7046def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
7047def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
7048}
7049
7050//----------------------------------------------------------------------------
7051// Compiler-pseudos
7052//----------------------------------------------------------------------------
7053// FIXME: Like for X86, these should go in their own separate .td file.
7054
7055def def32 : PatLeaf<(i32 GPR32:$src), [{
7056  return isDef32(*N);
7057}]>;
7058
7059// In the case of a 32-bit def that is known to implicitly zero-extend,
7060// we can use a SUBREG_TO_REG.
7061def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
7062
7063// For an anyext, we don't care what the high bits are, so we can perform an
7064// INSERT_SUBREF into an IMPLICIT_DEF.
7065def : Pat<(i64 (anyext GPR32:$src)),
7066          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
7067
7068// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
7069// then assert the extension has happened.
7070def : Pat<(i64 (zext GPR32:$src)),
7071          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
7072
7073// To sign extend, we use a signed bitfield move instruction (SBFM) on the
7074// containing super-reg.
7075def : Pat<(i64 (sext GPR32:$src)),
7076   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
7077def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
7078def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
7079def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
7080def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
7081def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
7082def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
7083def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
7084
7085def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
7086          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
7087                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
7088def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
7089          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
7090                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
7091
7092def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
7093          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
7094                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
7095def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
7096          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
7097                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
7098
7099def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
7100          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7101                   (i64 (i64shift_a        imm0_63:$imm)),
7102                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
7103
7104// sra patterns have an AddedComplexity of 10, so make sure we have a higher
7105// AddedComplexity for the following patterns since we want to match sext + sra
7106// patterns before we attempt to match a single sra node.
7107let AddedComplexity = 20 in {
7108// We support all sext + sra combinations which preserve at least one bit of the
7109// original value which is to be sign extended. E.g. we support shifts up to
7110// bitwidth-1 bits.
7111def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
7112          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
7113def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
7114          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
7115
7116def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
7117          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
7118def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
7119          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
7120
7121def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
7122          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7123                   (i64 imm0_31:$imm), 31)>;
7124} // AddedComplexity = 20
7125
7126// To truncate, we can simply extract from a subregister.
7127def : Pat<(i32 (trunc GPR64sp:$src)),
7128          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
7129
7130// __builtin_trap() uses the BRK instruction on AArch64.
7131def : Pat<(trap), (BRK 1)>;
7132def : Pat<(debugtrap), (BRK 0xF000)>;
7133
7134def ubsan_trap_xform : SDNodeXForm<timm, [{
7135  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
7136}]>;
7137
7138def ubsan_trap_imm : TImmLeaf<i32, [{
7139  return isUInt<8>(Imm);
7140}], ubsan_trap_xform>;
7141
7142def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
7143
7144// Multiply high patterns which multiply the lower subvector using smull/umull
7145// and the upper subvector with smull2/umull2. Then shuffle the high the high
7146// part of both results together.
7147def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
7148          (UZP2v16i8
7149           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
7150                            (EXTRACT_SUBREG V128:$Rm, dsub)),
7151           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
7152def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
7153          (UZP2v8i16
7154           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
7155                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7156           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
7157def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
7158          (UZP2v4i32
7159           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
7160                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7161           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
7162
7163def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
7164          (UZP2v16i8
7165           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
7166                            (EXTRACT_SUBREG V128:$Rm, dsub)),
7167           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
7168def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
7169          (UZP2v8i16
7170           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
7171                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7172           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
7173def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
7174          (UZP2v4i32
7175           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
7176                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7177           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
7178
7179// Conversions within AdvSIMD types in the same register size are free.
7180// But because we need a consistent lane ordering, in big endian many
7181// conversions require one or more REV instructions.
7182//
7183// Consider a simple memory load followed by a bitconvert then a store.
7184//   v0 = load v2i32
7185//   v1 = BITCAST v2i32 v0 to v4i16
7186//        store v4i16 v2
7187//
7188// In big endian mode every memory access has an implicit byte swap. LDR and
7189// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
7190// is, they treat the vector as a sequence of elements to be byte-swapped.
7191// The two pairs of instructions are fundamentally incompatible. We've decided
7192// to use LD1/ST1 only to simplify compiler implementation.
7193//
7194// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
7195// the original code sequence:
7196//   v0 = load v2i32
7197//   v1 = REV v2i32                  (implicit)
7198//   v2 = BITCAST v2i32 v1 to v4i16
7199//   v3 = REV v4i16 v2               (implicit)
7200//        store v4i16 v3
7201//
7202// But this is now broken - the value stored is different to the value loaded
7203// due to lane reordering. To fix this, on every BITCAST we must perform two
7204// other REVs:
7205//   v0 = load v2i32
7206//   v1 = REV v2i32                  (implicit)
7207//   v2 = REV v2i32
7208//   v3 = BITCAST v2i32 v2 to v4i16
7209//   v4 = REV v4i16
7210//   v5 = REV v4i16 v4               (implicit)
7211//        store v4i16 v5
7212//
7213// This means an extra two instructions, but actually in most cases the two REV
7214// instructions can be combined into one. For example:
7215//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
7216//
7217// There is also no 128-bit REV instruction. This must be synthesized with an
7218// EXT instruction.
7219//
7220// Most bitconverts require some sort of conversion. The only exceptions are:
7221//   a) Identity conversions -  vNfX <-> vNiX
7222//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
7223//
7224
7225// Natural vector casts (64 bit)
7226def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
7227def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
7228def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
7229def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
7230def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
7231def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
7232def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
7233
7234def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
7235def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
7236def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
7237def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
7238def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
7239def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
7240
7241def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
7242def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
7243def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
7244def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
7245def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
7246def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
7247def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
7248
7249def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
7250def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
7251def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
7252def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
7253def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
7254def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
7255def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
7256def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
7257
7258def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
7259def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
7260def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
7261def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
7262def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
7263def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
7264
7265// Natural vector casts (128 bit)
7266def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
7267def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
7268def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
7269def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
7270def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
7271def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
7272def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
7273def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
7274
7275def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
7276def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
7277def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
7278def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
7279def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
7280def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
7281def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
7282def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
7283
7284def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
7285def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
7286def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
7287def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
7288def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
7289def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
7290def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
7291def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
7292
7293def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
7294def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
7295def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
7296def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
7297def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
7298def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
7299def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
7300def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
7301
7302def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
7303def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
7304def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
7305def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
7306def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
7307def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
7308def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
7309def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
7310
7311def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
7312def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
7313def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
7314def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
7315def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
7316def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
7317def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
7318def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
7319
7320let Predicates = [IsLE] in {
7321def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7322def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7323def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7324def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7325def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7326def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7327
7328def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
7329          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7330def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
7331          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7332def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
7333          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7334def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
7335          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7336def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
7337          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7338def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
7339          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7340def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
7341          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7342}
7343let Predicates = [IsBE] in {
7344def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
7345                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
7346def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
7347                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
7348def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
7349                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
7350def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
7351                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
7352def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
7353                  (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
7354def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
7355                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
7356
7357def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
7358          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
7359def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
7360          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
7361def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
7362          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
7363def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
7364          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
7365def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
7366          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
7367def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
7368          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
7369}
7370def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7371def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7372def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
7373          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7374def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
7375          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7376def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
7377          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7378def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
7379
7380def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
7381          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
7382def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
7383          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
7384def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
7385          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7386def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
7387          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
7388def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
7389          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7390
7391let Predicates = [IsLE] in {
7392def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
7393def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
7394def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
7395def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
7396def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
7397def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
7398}
7399let Predicates = [IsBE] in {
7400def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
7401                             (v1i64 (REV64v2i32 FPR64:$src))>;
7402def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
7403                             (v1i64 (REV64v4i16 FPR64:$src))>;
7404def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
7405                             (v1i64 (REV64v8i8 FPR64:$src))>;
7406def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
7407                             (v1i64 (REV64v4i16 FPR64:$src))>;
7408def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
7409                             (v1i64 (REV64v4i16 FPR64:$src))>;
7410def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
7411                             (v1i64 (REV64v2i32 FPR64:$src))>;
7412}
7413def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
7414def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
7415
7416let Predicates = [IsLE] in {
7417def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
7418def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
7419def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
7420def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
7421def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
7422def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
7423def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
7424}
7425let Predicates = [IsBE] in {
7426def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
7427                             (v2i32 (REV64v2i32 FPR64:$src))>;
7428def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
7429                             (v2i32 (REV32v4i16 FPR64:$src))>;
7430def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
7431                             (v2i32 (REV32v8i8 FPR64:$src))>;
7432def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
7433                             (v2i32 (REV64v2i32 FPR64:$src))>;
7434def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
7435                             (v2i32 (REV64v2i32 FPR64:$src))>;
7436def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
7437                             (v2i32 (REV32v4i16 FPR64:$src))>;
7438def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
7439                             (v2i32 (REV32v4i16 FPR64:$src))>;
7440}
7441def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
7442
7443let Predicates = [IsLE] in {
7444def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
7445def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
7446def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
7447def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
7448def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
7449def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
7450}
7451let Predicates = [IsBE] in {
7452def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
7453                             (v4i16 (REV64v4i16 FPR64:$src))>;
7454def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
7455                             (v4i16 (REV32v4i16 FPR64:$src))>;
7456def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
7457                             (v4i16 (REV16v8i8 FPR64:$src))>;
7458def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
7459                             (v4i16 (REV64v4i16 FPR64:$src))>;
7460def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
7461                             (v4i16 (REV32v4i16 FPR64:$src))>;
7462def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
7463                             (v4i16 (REV64v4i16 FPR64:$src))>;
7464}
7465def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
7466def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
7467
7468let Predicates = [IsLE] in {
7469def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
7470def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
7471def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
7472def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
7473def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
7474def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
7475
7476def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
7477def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
7478def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))), (v4bf16 FPR64:$src)>;
7479def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))), (v4bf16 FPR64:$src)>;
7480def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
7481def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
7482}
7483let Predicates = [IsBE] in {
7484def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
7485                             (v4f16 (REV64v4i16 FPR64:$src))>;
7486def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
7487                             (v4f16 (REV32v4i16 FPR64:$src))>;
7488def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
7489                             (v4f16 (REV16v8i8 FPR64:$src))>;
7490def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
7491                             (v4f16 (REV64v4i16 FPR64:$src))>;
7492def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
7493                             (v4f16 (REV32v4i16 FPR64:$src))>;
7494def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
7495                             (v4f16 (REV64v4i16 FPR64:$src))>;
7496
7497def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
7498                             (v4bf16 (REV64v4i16 FPR64:$src))>;
7499def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
7500                             (v4bf16 (REV32v4i16 FPR64:$src))>;
7501def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))),
7502                             (v4bf16 (REV16v8i8 FPR64:$src))>;
7503def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))),
7504                             (v4bf16 (REV64v4i16 FPR64:$src))>;
7505def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
7506                             (v4bf16 (REV32v4i16 FPR64:$src))>;
7507def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
7508                             (v4bf16 (REV64v4i16 FPR64:$src))>;
7509}
7510def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
7511def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
7512
7513let Predicates = [IsLE] in {
7514def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
7515def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
7516def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
7517def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
7518def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
7519def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
7520def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
7521def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))), (v8i8  FPR64:$src)>;
7522}
7523let Predicates = [IsBE] in {
7524def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
7525                             (v8i8 (REV64v8i8 FPR64:$src))>;
7526def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
7527                             (v8i8 (REV32v8i8 FPR64:$src))>;
7528def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
7529                             (v8i8 (REV16v8i8 FPR64:$src))>;
7530def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
7531                             (v8i8 (REV64v8i8 FPR64:$src))>;
7532def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
7533                             (v8i8 (REV32v8i8 FPR64:$src))>;
7534def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
7535                             (v8i8 (REV64v8i8 FPR64:$src))>;
7536def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
7537                             (v8i8 (REV16v8i8 FPR64:$src))>;
7538def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))),
7539                             (v8i8 (REV16v8i8 FPR64:$src))>;
7540}
7541
7542let Predicates = [IsLE] in {
7543def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
7544def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
7545def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
7546def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
7547def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
7548def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))), (f64   FPR64:$src)>;
7549}
7550let Predicates = [IsBE] in {
7551def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
7552                             (f64 (REV64v2i32 FPR64:$src))>;
7553def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
7554                             (f64 (REV64v4i16 FPR64:$src))>;
7555def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
7556                             (f64 (REV64v2i32 FPR64:$src))>;
7557def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
7558                             (f64 (REV64v8i8 FPR64:$src))>;
7559def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
7560                             (f64 (REV64v4i16 FPR64:$src))>;
7561def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))),
7562                             (f64 (REV64v4i16 FPR64:$src))>;
7563}
7564def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
7565def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
7566
7567let Predicates = [IsLE] in {
7568def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
7569def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
7570def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
7571def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
7572def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
7573def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
7574}
7575let Predicates = [IsBE] in {
7576def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
7577                             (v1f64 (REV64v2i32 FPR64:$src))>;
7578def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
7579                             (v1f64 (REV64v4i16 FPR64:$src))>;
7580def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
7581                             (v1f64 (REV64v8i8 FPR64:$src))>;
7582def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
7583                             (v1f64 (REV64v2i32 FPR64:$src))>;
7584def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
7585                             (v1f64 (REV64v4i16 FPR64:$src))>;
7586def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
7587                             (v1f64 (REV64v4i16 FPR64:$src))>;
7588}
7589def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
7590def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
7591
7592let Predicates = [IsLE] in {
7593def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
7594def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
7595def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
7596def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
7597def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
7598def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
7599def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
7600}
7601let Predicates = [IsBE] in {
7602def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
7603                             (v2f32 (REV64v2i32 FPR64:$src))>;
7604def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
7605                             (v2f32 (REV32v4i16 FPR64:$src))>;
7606def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
7607                             (v2f32 (REV32v8i8 FPR64:$src))>;
7608def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
7609                             (v2f32 (REV64v2i32 FPR64:$src))>;
7610def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
7611                             (v2f32 (REV64v2i32 FPR64:$src))>;
7612def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
7613                             (v2f32 (REV32v4i16 FPR64:$src))>;
7614def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
7615                             (v2f32 (REV32v4i16 FPR64:$src))>;
7616}
7617def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
7618
7619let Predicates = [IsLE] in {
7620def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
7621def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
7622def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
7623def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
7624def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
7625def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
7626def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
7627def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
7628}
7629let Predicates = [IsBE] in {
7630def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
7631                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
7632def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
7633                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
7634                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
7635def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
7636                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
7637                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
7638def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
7639                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
7640                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
7641def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
7642                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
7643                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
7644def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
7645                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
7646def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
7647                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
7648                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
7649def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
7650                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
7651                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
7652}
7653
7654let Predicates = [IsLE] in {
7655def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
7656def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
7657def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
7658def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
7659def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
7660def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
7661def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
7662}
7663let Predicates = [IsBE] in {
7664def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
7665                             (v2f64 (EXTv16i8 FPR128:$src,
7666                                              FPR128:$src, (i32 8)))>;
7667def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
7668                             (v2f64 (REV64v4i32 FPR128:$src))>;
7669def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
7670                             (v2f64 (REV64v8i16 FPR128:$src))>;
7671def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
7672                             (v2f64 (REV64v8i16 FPR128:$src))>;
7673def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
7674                             (v2f64 (REV64v8i16 FPR128:$src))>;
7675def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
7676                             (v2f64 (REV64v16i8 FPR128:$src))>;
7677def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
7678                             (v2f64 (REV64v4i32 FPR128:$src))>;
7679}
7680def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
7681
7682let Predicates = [IsLE] in {
7683def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
7684def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
7685def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
7686def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
7687def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
7688def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
7689def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
7690}
7691let Predicates = [IsBE] in {
7692def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
7693                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
7694                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
7695def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
7696                             (v4f32 (REV32v8i16 FPR128:$src))>;
7697def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
7698                             (v4f32 (REV32v8i16 FPR128:$src))>;
7699def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
7700                             (v4f32 (REV32v8i16 FPR128:$src))>;
7701def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
7702                             (v4f32 (REV32v16i8 FPR128:$src))>;
7703def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
7704                             (v4f32 (REV64v4i32 FPR128:$src))>;
7705def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
7706                             (v4f32 (REV64v4i32 FPR128:$src))>;
7707}
7708def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
7709
7710let Predicates = [IsLE] in {
7711def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
7712def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
7713def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
7714def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
7715def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
7716def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
7717def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
7718}
7719let Predicates = [IsBE] in {
7720def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
7721                             (v2i64 (EXTv16i8 FPR128:$src,
7722                                              FPR128:$src, (i32 8)))>;
7723def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
7724                             (v2i64 (REV64v4i32 FPR128:$src))>;
7725def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
7726                             (v2i64 (REV64v8i16 FPR128:$src))>;
7727def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
7728                             (v2i64 (REV64v16i8 FPR128:$src))>;
7729def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
7730                             (v2i64 (REV64v4i32 FPR128:$src))>;
7731def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
7732                             (v2i64 (REV64v8i16 FPR128:$src))>;
7733def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
7734                             (v2i64 (REV64v8i16 FPR128:$src))>;
7735}
7736def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
7737
7738let Predicates = [IsLE] in {
7739def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
7740def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
7741def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
7742def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
7743def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
7744def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
7745def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
7746}
7747let Predicates = [IsBE] in {
7748def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
7749                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
7750                                              (REV64v4i32 FPR128:$src),
7751                                              (i32 8)))>;
7752def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
7753                             (v4i32 (REV64v4i32 FPR128:$src))>;
7754def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
7755                             (v4i32 (REV32v8i16 FPR128:$src))>;
7756def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
7757                             (v4i32 (REV32v16i8 FPR128:$src))>;
7758def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
7759                             (v4i32 (REV64v4i32 FPR128:$src))>;
7760def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
7761                             (v4i32 (REV32v8i16 FPR128:$src))>;
7762def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
7763                             (v4i32 (REV32v8i16 FPR128:$src))>;
7764}
7765def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
7766
7767let Predicates = [IsLE] in {
7768def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
7769def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
7770def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
7771def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
7772def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
7773def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
7774}
7775let Predicates = [IsBE] in {
7776def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
7777                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
7778                                              (REV64v8i16 FPR128:$src),
7779                                              (i32 8)))>;
7780def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
7781                             (v8i16 (REV64v8i16 FPR128:$src))>;
7782def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
7783                             (v8i16 (REV32v8i16 FPR128:$src))>;
7784def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
7785                             (v8i16 (REV16v16i8 FPR128:$src))>;
7786def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
7787                             (v8i16 (REV64v8i16 FPR128:$src))>;
7788def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
7789                             (v8i16 (REV32v8i16 FPR128:$src))>;
7790}
7791def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
7792def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
7793
7794let Predicates = [IsLE] in {
7795def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
7796def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
7797def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
7798def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
7799def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
7800def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
7801
7802def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))), (v8bf16 FPR128:$src)>;
7803def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
7804def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
7805def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
7806def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
7807def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
7808}
7809let Predicates = [IsBE] in {
7810def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
7811                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
7812                                              (REV64v8i16 FPR128:$src),
7813                                              (i32 8)))>;
7814def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
7815                             (v8f16 (REV64v8i16 FPR128:$src))>;
7816def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
7817                             (v8f16 (REV32v8i16 FPR128:$src))>;
7818def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
7819                             (v8f16 (REV16v16i8 FPR128:$src))>;
7820def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
7821                             (v8f16 (REV64v8i16 FPR128:$src))>;
7822def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
7823                             (v8f16 (REV32v8i16 FPR128:$src))>;
7824
7825def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))),
7826                             (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
7827                                              (REV64v8i16 FPR128:$src),
7828                                              (i32 8)))>;
7829def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
7830                             (v8bf16 (REV64v8i16 FPR128:$src))>;
7831def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
7832                             (v8bf16 (REV32v8i16 FPR128:$src))>;
7833def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
7834                             (v8bf16 (REV16v16i8 FPR128:$src))>;
7835def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
7836                             (v8bf16 (REV64v8i16 FPR128:$src))>;
7837def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
7838                             (v8bf16 (REV32v8i16 FPR128:$src))>;
7839}
7840def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
7841def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
7842
7843let Predicates = [IsLE] in {
7844def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
7845def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
7846def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
7847def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
7848def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
7849def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
7850def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
7851def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
7852}
7853let Predicates = [IsBE] in {
7854def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
7855                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
7856                                              (REV64v16i8 FPR128:$src),
7857                                              (i32 8)))>;
7858def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
7859                             (v16i8 (REV64v16i8 FPR128:$src))>;
7860def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
7861                             (v16i8 (REV32v16i8 FPR128:$src))>;
7862def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
7863                             (v16i8 (REV16v16i8 FPR128:$src))>;
7864def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
7865                             (v16i8 (REV64v16i8 FPR128:$src))>;
7866def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
7867                             (v16i8 (REV32v16i8 FPR128:$src))>;
7868def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
7869                             (v16i8 (REV16v16i8 FPR128:$src))>;
7870def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
7871                             (v16i8 (REV16v16i8 FPR128:$src))>;
7872}
7873
7874def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
7875           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7876def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
7877           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7878def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
7879           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7880def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
7881           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7882def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
7883           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7884def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
7885           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7886def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
7887           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7888def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
7889           (EXTRACT_SUBREG V128:$Rn, dsub)>;
7890
7891def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
7892          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
7893def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
7894          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
7895def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
7896          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
7897def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
7898          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
7899
7900// A 64-bit subvector insert to the first 128-bit vector position
7901// is a subregister copy that needs no instruction.
7902multiclass InsertSubvectorUndef<ValueType Ty> {
7903  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
7904            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7905  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
7906            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7907  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
7908            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7909  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
7910            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7911  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
7912            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7913  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
7914            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7915  def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
7916            (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7917  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
7918            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
7919}
7920
7921defm : InsertSubvectorUndef<i32>;
7922defm : InsertSubvectorUndef<i64>;
7923
7924// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
7925// or v2f32.
7926def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
7927                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
7928           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
7929def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
7930                     (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
7931           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
7932    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
7933    // so we match on v4f32 here, not v2f32. This will also catch adding
7934    // the low two lanes of a true v4f32 vector.
7935def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
7936                (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
7937          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
7938def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
7939                (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
7940          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
7941
7942// Scalar 64-bit shifts in FPR64 registers.
7943def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
7944          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
7945def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
7946          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
7947def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
7948          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
7949def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
7950          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
7951
7952// Patterns for nontemporal/no-allocate stores.
7953// We have to resort to tricks to turn a single-input store into a store pair,
7954// because there is no single-input nontemporal store, only STNP.
7955let Predicates = [IsLE] in {
7956let AddedComplexity = 15 in {
7957class NTStore128Pat<ValueType VT> :
7958  Pat<(nontemporalstore (VT FPR128:$Rt),
7959        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
7960      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
7961              (CPYi64 FPR128:$Rt, (i64 1)),
7962              GPR64sp:$Rn, simm7s8:$offset)>;
7963
7964def : NTStore128Pat<v2i64>;
7965def : NTStore128Pat<v4i32>;
7966def : NTStore128Pat<v8i16>;
7967def : NTStore128Pat<v16i8>;
7968
7969class NTStore64Pat<ValueType VT> :
7970  Pat<(nontemporalstore (VT FPR64:$Rt),
7971        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
7972      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
7973              (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
7974              GPR64sp:$Rn, simm7s4:$offset)>;
7975
7976// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
7977def : NTStore64Pat<v1f64>;
7978def : NTStore64Pat<v1i64>;
7979def : NTStore64Pat<v2i32>;
7980def : NTStore64Pat<v4i16>;
7981def : NTStore64Pat<v8i8>;
7982
7983def : Pat<(nontemporalstore GPR64:$Rt,
7984            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
7985          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
7986                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
7987                  GPR64sp:$Rn, simm7s4:$offset)>;
7988} // AddedComplexity=10
7989} // Predicates = [IsLE]
7990
7991// Tail call return handling. These are all compiler pseudo-instructions,
7992// so no encoding information or anything like that.
7993let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
7994  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
7995                   Sched<[WriteBrReg]>;
7996  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
7997                   Sched<[WriteBrReg]>;
7998  // Indirect tail-call with any register allowed, used by MachineOutliner when
7999  // this is proven safe.
8000  // FIXME: If we have to add any more hacks like this, we should instead relax
8001  // some verifier checks for outlined functions.
8002  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
8003                      Sched<[WriteBrReg]>;
8004  // Indirect tail-call limited to only use registers (x16 and x17) which are
8005  // allowed to tail-call a "BTI c" instruction.
8006  def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
8007                      Sched<[WriteBrReg]>;
8008}
8009
8010def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
8011          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
8012      Requires<[NotUseBTI]>;
8013def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
8014          (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
8015      Requires<[UseBTI]>;
8016def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
8017          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
8018def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
8019          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
8020
8021def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
8022def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
8023
8024// Extracting lane zero is a special case where we can just use a plain
8025// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
8026// rest of the compiler, especially the register allocator and copy propagation,
8027// to reason about, so is preferred when it's possible to use it.
8028let AddedComplexity = 10 in {
8029  def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
8030  def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
8031  def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
8032}
8033
8034// dot_v4i8
8035class mul_v4i8<SDPatternOperator ldop> :
8036  PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
8037          (mul (ldop (add node:$Rn, node:$offset)),
8038               (ldop (add node:$Rm, node:$offset)))>;
8039class mulz_v4i8<SDPatternOperator ldop> :
8040  PatFrag<(ops node:$Rn, node:$Rm),
8041          (mul (ldop node:$Rn), (ldop node:$Rm))>;
8042
8043def load_v4i8 :
8044  OutPatFrag<(ops node:$R),
8045             (INSERT_SUBREG
8046              (v2i32 (IMPLICIT_DEF)),
8047               (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
8048              ssub)>;
8049
8050class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
8051  Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
8052           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
8053           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
8054                (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
8055      (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
8056                                (load_v4i8 GPR64sp:$Rn),
8057                                (load_v4i8 GPR64sp:$Rm))),
8058                      sub_32)>, Requires<[HasDotProd]>;
8059
8060// dot_v8i8
8061class ee_v8i8<SDPatternOperator extend> :
8062  PatFrag<(ops node:$V, node:$K),
8063          (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
8064
8065class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
8066  PatFrag<(ops node:$M, node:$N, node:$K),
8067          (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
8068                 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
8069
8070class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
8071  PatFrag<(ops node:$M, node:$N),
8072          (i32 (extractelt
8073           (v4i32 (AArch64uaddv
8074            (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
8075                 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
8076           (i64 0)))>;
8077
8078// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
8079def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
8080
8081class odot_v8i8<Instruction DOT> :
8082  OutPatFrag<(ops node:$Vm, node:$Vn),
8083             (EXTRACT_SUBREG
8084              (VADDV_32
8085               (i64 (DOT (DUPv2i32gpr WZR),
8086                         (v8i8 node:$Vm),
8087                         (v8i8 node:$Vn)))),
8088              sub_32)>;
8089
8090class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
8091                    SDPatternOperator extend> :
8092  Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
8093      (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
8094  Requires<[HasDotProd]>;
8095
8096// dot_v16i8
8097class ee_v16i8<SDPatternOperator extend> :
8098  PatFrag<(ops node:$V, node:$K1, node:$K2),
8099          (v4i16 (extract_subvector
8100           (v8i16 (extend
8101            (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
8102
8103class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
8104  PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
8105          (v4i32
8106           (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
8107                  (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
8108
8109class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
8110  PatFrag<(ops node:$M, node:$N),
8111          (i32 (extractelt
8112           (v4i32 (AArch64uaddv
8113            (add
8114             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
8115                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
8116             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
8117                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
8118           (i64 0)))>;
8119
8120class odot_v16i8<Instruction DOT> :
8121  OutPatFrag<(ops node:$Vm, node:$Vn),
8122             (i32 (ADDVv4i32v
8123              (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
8124
8125class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
8126                SDPatternOperator extend> :
8127  Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
8128      (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
8129  Requires<[HasDotProd]>;
8130
8131let AddedComplexity = 10 in {
8132  def : dot_v4i8<SDOTv8i8, sextloadi8>;
8133  def : dot_v4i8<UDOTv8i8, zextloadi8>;
8134  def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
8135  def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
8136  def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
8137  def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
8138
8139  // FIXME: add patterns to generate vector by element dot product.
8140  // FIXME: add SVE dot-product patterns.
8141}
8142
8143// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
8144// so that it can be used as input to inline asm, and vice versa.
8145def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
8146def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
8147def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
8148                             GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
8149          (REG_SEQUENCE GPR64x8Class,
8150              $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
8151              $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
8152foreach i = 0-7 in {
8153  def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
8154            (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
8155}
8156
8157let Predicates = [HasLS64] in {
8158  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
8159                                          (outs GPR64x8:$Rt)>;
8160  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
8161                                          (outs)>;
8162  def ST64BV:   Store64BV<0b011, "st64bv">;
8163  def ST64BV0:  Store64BV<0b010, "st64bv0">;
8164
8165  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
8166    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
8167          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
8168
8169  def : ST64BPattern<int_aarch64_st64b, ST64B>;
8170  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
8171  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
8172}
8173
8174let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
8175def StoreSwiftAsyncContext
8176      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
8177               []>, Sched<[]>;
8178
8179def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
8180def : Pat<(AArch64AssertZExtBool GPR32:$op),
8181          (i32 GPR32:$op)>;
8182
8183include "AArch64InstrAtomics.td"
8184include "AArch64SVEInstrInfo.td"
8185include "AArch64SMEInstrInfo.td"
8186include "AArch64InstrGISel.td"
8187