1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13//===----------------------------------------------------------------------===//
14// ARM Instruction Predicate Definitions.
15//
16def HasV8_0a         : Predicate<"Subtarget->hasV8_0aOps()">,
17                                 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">;
18def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
19                                 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
20def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
21                                 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
22def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
23                                 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
24def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
25                                 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
26def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
27                                 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
28def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
29                                 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
30def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
31                                 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
32def HasV8_8a         : Predicate<"Subtarget->hasV8_8aOps()">,
33                                 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">;
34def HasV8_9a         : Predicate<"Subtarget->hasV8_9aOps()">,
35                                 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">;
36def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
37                                 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
38def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
39                                 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
40def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
41                                 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
42def HasV9_3a         : Predicate<"Subtarget->hasV9_3aOps()">,
43                                 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
44def HasV9_4a         : Predicate<"Subtarget->hasV9_4aOps()">,
45                                 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
46def HasV8_0r         : Predicate<"Subtarget->hasV8_0rOps()">,
47                                 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
48
49def HasEL2VMSA       : Predicate<"Subtarget->hasEL2VMSA()">,
50                       AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
51
52def HasEL3           : Predicate<"Subtarget->hasEL3()">,
53                       AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
54
55def HasVH            : Predicate<"Subtarget->hasVH()">,
56                       AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
57
58def HasLOR           : Predicate<"Subtarget->hasLOR()">,
59                       AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
60
61def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
62                       AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
63
64def HasJS            : Predicate<"Subtarget->hasJS()">,
65                       AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
66
67def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
68                       AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
69
70def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
71                       AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
72
73def HasNV            : Predicate<"Subtarget->hasNV()">,
74                       AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
75
76def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
77                       AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
78
79def HasDIT           : Predicate<"Subtarget->hasDIT()">,
80                       AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
81
82def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
83                       AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
84
85def HasAM            : Predicate<"Subtarget->hasAM()">,
86                       AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
87
88def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
89                       AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
90
91def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
92                       AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
93
94def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
95                       AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
96
97def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
98                       AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
99
100def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
101                               AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
102def HasNEON          : Predicate<"Subtarget->hasNEON()">,
103                                 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
104def HasSM4           : Predicate<"Subtarget->hasSM4()">,
105                                 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
106def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
107                                 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
108def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
109                                 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
110def HasAES           : Predicate<"Subtarget->hasAES()">,
111                                 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
112def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
113                                 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
114def HasCRC           : Predicate<"Subtarget->hasCRC()">,
115                                 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
116def HasCSSC          : Predicate<"Subtarget->hasCSSC()">,
117                                 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">;
118def HasNoCSSC        : Predicate<"!Subtarget->hasCSSC()">;
119def HasLSE           : Predicate<"Subtarget->hasLSE()">,
120                                 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
121def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
122def HasRAS           : Predicate<"Subtarget->hasRAS()">,
123                                 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
124def HasRDM           : Predicate<"Subtarget->hasRDM()">,
125                                 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
126def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
127                                 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
128def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
129                                 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
130def HasSPE           : Predicate<"Subtarget->hasSPE()">,
131                                 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
132def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
133                                 AssemblerPredicateWithAll<(all_of FeatureFuseAES),
134                                 "fuse-aes">;
135def HasSVE           : Predicate<"Subtarget->hasSVE()">,
136                                 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
137def HasSVE2          : Predicate<"Subtarget->hasSVE2()">,
138                                 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
139def HasSVE2p1        : Predicate<"Subtarget->hasSVE2p1()">,
140                                 AssemblerPredicate<(any_of FeatureSVE2p1), "sve2p1">;
141def HasSVE2AES       : Predicate<"Subtarget->hasSVE2AES()">,
142                                 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
143def HasSVE2SM4       : Predicate<"Subtarget->hasSVE2SM4()">,
144                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
145def HasSVE2SHA3      : Predicate<"Subtarget->hasSVE2SHA3()">,
146                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
147def HasSVE2BitPerm   : Predicate<"Subtarget->hasSVE2BitPerm()">,
148                                 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
149def HasB16B16        : Predicate<"Subtarget->hasB16B16()">,
150                                 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">;
151def HasSME           : Predicate<"Subtarget->hasSME()">,
152                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
153def HasSMEF64F64     : Predicate<"Subtarget->hasSMEF64F64()">,
154                                 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
155def HasSMEF16F16     : Predicate<"Subtarget->hasSMEF16F16()">,
156                                 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
157def HasSMEI16I64     : Predicate<"Subtarget->hasSMEI16I64()">,
158                                 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
159def HasSME2          : Predicate<"Subtarget->hasSME2()">,
160                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
161def HasSME2p1        : Predicate<"Subtarget->hasSME2p1()">,
162                                 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
163
164// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
165// they should be enabled if either has been specified.
166def HasSVEorSME
167    : Predicate<"Subtarget->hasSVEorSME()">,
168                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
169                "sve or sme">;
170def HasSVE2orSME
171    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
172                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
173                "sve2 or sme">;
174def HasSVE2p1_or_HasSME
175    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
176                 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
177def HasSVE2p1_or_HasSME2
178    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">,
179                 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
180def HasSVE2p1_or_HasSME2p1
181    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">,
182                 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
183// A subset of NEON instructions are legal in Streaming SVE execution mode,
184// they should be enabled if either has been specified.
185def HasNEONorSME
186    : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
187                AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
188                "neon or sme">;
189def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
190                                 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
191def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
192                       AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
193def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
194                       AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
195def HasSB            : Predicate<"Subtarget->hasSB()">,
196                       AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
197def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
198                       AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
199def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
200                       AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
201def HasBTI           : Predicate<"Subtarget->hasBTI()">,
202                       AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
203def HasMTE           : Predicate<"Subtarget->hasMTE()">,
204                       AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
205def HasTME           : Predicate<"Subtarget->hasTME()">,
206                       AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
207def HasETE           : Predicate<"Subtarget->hasETE()">,
208                       AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
209def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
210                       AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
211def HasBF16          : Predicate<"Subtarget->hasBF16()">,
212                       AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
213def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
214                       AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
215def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
216                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
217def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
218                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
219def HasXS            : Predicate<"Subtarget->hasXS()">,
220                       AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
221def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
222                       AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
223def HasLS64          : Predicate<"Subtarget->hasLS64()">,
224                       AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
225def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
226                       AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
227def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
228                       AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
229def HasHBC           : Predicate<"Subtarget->hasHBC()">,
230                       AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
231def HasMOPS          : Predicate<"Subtarget->hasMOPS()">,
232                       AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
233def HasCLRBHB        : Predicate<"Subtarget->hasCLRBHB()">,
234                       AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
235def HasSPECRES2      : Predicate<"Subtarget->hasSPECRES2()">,
236                       AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
237def HasITE           : Predicate<"Subtarget->hasITE()">,
238                       AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
239def HasTHE           : Predicate<"Subtarget->hasTHE()">,
240                       AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
241def HasRCPC3         : Predicate<"Subtarget->hasRCPC3()">,
242                       AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
243def HasLSE128        : Predicate<"Subtarget->hasLSE128()">,
244                       AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
245def HasD128          : Predicate<"Subtarget->hasD128()">,
246                       AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
247def HasCHK           : Predicate<"Subtarget->hasCHK()">,
248                       AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
249def HasGCS           : Predicate<"Subtarget->hasGCS()">,
250                       AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
251def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
252def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
253def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
254def UseExperimentalZeroingPseudos
255    : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
256def UseAlternateSExtLoadCVTF32
257    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
258
259def UseNegativeImmediates
260    : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
261                                             "NegativeImmediates">;
262
263def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
264
265def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
266
267def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
268
269def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
270                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
271                                                       SDTCisInt<1>]>>;
272
273
274//===----------------------------------------------------------------------===//
275// AArch64-specific DAG Nodes.
276//
277
278// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
279def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
280                                              [SDTCisSameAs<0, 2>,
281                                               SDTCisSameAs<0, 3>,
282                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
283
284// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
285def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
286                                            [SDTCisSameAs<0, 1>,
287                                             SDTCisSameAs<0, 2>,
288                                             SDTCisInt<0>,
289                                             SDTCisVT<3, i32>]>;
290
291// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
292def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
293                                            [SDTCisSameAs<0, 2>,
294                                             SDTCisSameAs<0, 3>,
295                                             SDTCisInt<0>,
296                                             SDTCisVT<1, i32>,
297                                             SDTCisVT<4, i32>]>;
298
299def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
300                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
301                                      SDTCisVT<2, i32>]>;
302def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
303def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
304                                        SDTCisVT<2, OtherVT>]>;
305
306
307def SDT_AArch64CSel  : SDTypeProfile<1, 4,
308                                   [SDTCisSameAs<0, 1>,
309                                    SDTCisSameAs<0, 2>,
310                                    SDTCisInt<3>,
311                                    SDTCisVT<4, i32>]>;
312def SDT_AArch64CCMP : SDTypeProfile<1, 5,
313                                    [SDTCisVT<0, i32>,
314                                     SDTCisInt<1>,
315                                     SDTCisSameAs<1, 2>,
316                                     SDTCisInt<3>,
317                                     SDTCisInt<4>,
318                                     SDTCisVT<5, i32>]>;
319def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
320                                     [SDTCisVT<0, i32>,
321                                      SDTCisFP<1>,
322                                      SDTCisSameAs<1, 2>,
323                                      SDTCisInt<3>,
324                                      SDTCisInt<4>,
325                                      SDTCisVT<5, i32>]>;
326def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
327                                   [SDTCisFP<0>,
328                                    SDTCisSameAs<0, 1>]>;
329def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
330def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
331def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
332def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
333                                          SDTCisSameAs<0, 1>,
334                                          SDTCisSameAs<0, 2>]>;
335def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
336def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
337def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
338                                           SDTCisInt<2>, SDTCisInt<3>]>;
339def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
340def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
341                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
342def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
343def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
344                                         SDTCisVec<2>, SDTCisSameAs<2,3>]>;
345
346def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
347                                                 SDTCisSameAs<0,1>,
348                                                 SDTCisSameAs<0,2>]>;
349
350def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
351def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
352def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
353def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
354                                           SDTCisSameAs<0,2>]>;
355def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
356                                           SDTCisSameAs<0,2>,
357                                           SDTCisSameAs<0,3>]>;
358def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
359def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
360
361def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
362
363def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
364                                                 SDTCisPtrTy<1>]>;
365
366def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
367
368def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
369def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
370def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
371def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
372def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
373def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
374
375// Generates the general dynamic sequences, i.e.
376//  adrp  x0, :tlsdesc:var
377//  ldr   x1, [x0, #:tlsdesc_lo12:var]
378//  add   x0, x0, #:tlsdesc_lo12:var
379//  .tlsdesccall var
380//  blr   x1
381
382// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
383// number of operands (the variable)
384def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
385                                          [SDTCisPtrTy<0>]>;
386
387def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
388                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
389                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
390                                         SDTCisSameAs<1, 4>]>;
391
392def SDT_AArch64TBL : SDTypeProfile<1, 2, [
393  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
394]>;
395
396// non-extending masked load fragment.
397def nonext_masked_load :
398  PatFrag<(ops node:$ptr, node:$pred, node:$def),
399          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
400  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
401         cast<MaskedLoadSDNode>(N)->isUnindexed() &&
402         !cast<MaskedLoadSDNode>(N)->isNonTemporal();
403}]>;
404// Any/Zero extending masked load fragments.
405def azext_masked_load :
406  PatFrag<(ops node:$ptr, node:$pred, node:$def),
407          (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
408  return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
409          cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
410         cast<MaskedLoadSDNode>(N)->isUnindexed();
411}]>;
412def azext_masked_load_i8 :
413  PatFrag<(ops node:$ptr, node:$pred, node:$def),
414          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
415  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
416}]>;
417def azext_masked_load_i16 :
418  PatFrag<(ops node:$ptr, node:$pred, node:$def),
419          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
420  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
421}]>;
422def azext_masked_load_i32 :
423  PatFrag<(ops node:$ptr, node:$pred, node:$def),
424          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
425  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
426}]>;
427// Sign extending masked load fragments.
428def sext_masked_load :
429  PatFrag<(ops node:$ptr, node:$pred, node:$def),
430          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
431  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
432         cast<MaskedLoadSDNode>(N)->isUnindexed();
433}]>;
434def sext_masked_load_i8 :
435  PatFrag<(ops node:$ptr, node:$pred, node:$def),
436          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
437  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
438}]>;
439def sext_masked_load_i16 :
440  PatFrag<(ops node:$ptr, node:$pred, node:$def),
441          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
442  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
443}]>;
444def sext_masked_load_i32 :
445  PatFrag<(ops node:$ptr, node:$pred, node:$def),
446          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
447  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
448}]>;
449
450def non_temporal_load :
451   PatFrag<(ops node:$ptr, node:$pred, node:$def),
452           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
453   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
454          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
455          cast<MaskedLoadSDNode>(N)->isNonTemporal();
456}]>;
457
458// non-truncating masked store fragment.
459def nontrunc_masked_store :
460  PatFrag<(ops node:$val, node:$ptr, node:$pred),
461          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
462  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
463         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
464         !cast<MaskedStoreSDNode>(N)->isNonTemporal();
465}]>;
466// truncating masked store fragments.
467def trunc_masked_store :
468  PatFrag<(ops node:$val, node:$ptr, node:$pred),
469          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
470  return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
471         cast<MaskedStoreSDNode>(N)->isUnindexed();
472}]>;
473def trunc_masked_store_i8 :
474  PatFrag<(ops node:$val, node:$ptr, node:$pred),
475          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
476  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
477}]>;
478def trunc_masked_store_i16 :
479  PatFrag<(ops node:$val, node:$ptr, node:$pred),
480          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
481  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
482}]>;
483def trunc_masked_store_i32 :
484  PatFrag<(ops node:$val, node:$ptr, node:$pred),
485          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
486  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
487}]>;
488
489def non_temporal_store :
490  PatFrag<(ops node:$val, node:$ptr, node:$pred),
491          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
492  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
493         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
494         cast<MaskedStoreSDNode>(N)->isNonTemporal();
495}]>;
496
497multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
498  // offsets = (signed)Index << sizeof(elt)
499  def NAME#_signed_scaled :
500    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
501            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
502    auto MGS = cast<MaskedGatherScatterSDNode>(N);
503    bool Signed = MGS->isIndexSigned() ||
504        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
505    return Signed && MGS->isIndexScaled();
506  }]>;
507  // offsets = (signed)Index
508  def NAME#_signed_unscaled :
509    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
510            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
511    auto MGS = cast<MaskedGatherScatterSDNode>(N);
512    bool Signed = MGS->isIndexSigned() ||
513        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
514    return Signed && !MGS->isIndexScaled();
515  }]>;
516  // offsets = (unsigned)Index << sizeof(elt)
517  def NAME#_unsigned_scaled :
518    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
519            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
520    auto MGS = cast<MaskedGatherScatterSDNode>(N);
521    bool Signed = MGS->isIndexSigned() ||
522        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
523    return !Signed && MGS->isIndexScaled();
524  }]>;
525  // offsets = (unsigned)Index
526  def NAME#_unsigned_unscaled :
527    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
528            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
529    auto MGS = cast<MaskedGatherScatterSDNode>(N);
530    bool Signed = MGS->isIndexSigned() ||
531        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
532    return !Signed && !MGS->isIndexScaled();
533  }]>;
534}
535
536defm nonext_masked_gather    : masked_gather_scatter<nonext_masked_gather>;
537defm azext_masked_gather_i8  : masked_gather_scatter<azext_masked_gather_i8>;
538defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
539defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
540defm sext_masked_gather_i8   : masked_gather_scatter<sext_masked_gather_i8>;
541defm sext_masked_gather_i16  : masked_gather_scatter<sext_masked_gather_i16>;
542defm sext_masked_gather_i32  : masked_gather_scatter<sext_masked_gather_i32>;
543
544defm nontrunc_masked_scatter  : masked_gather_scatter<nontrunc_masked_scatter>;
545defm trunc_masked_scatter_i8  : masked_gather_scatter<trunc_masked_scatter_i8>;
546defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
547defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
548
549// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
550def top16Zero: PatLeaf<(i32 GPR32:$src), [{
551  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
552         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
553  }]>;
554
555// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
556def top32Zero: PatLeaf<(i64 GPR64:$src), [{
557  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
558         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
559  }]>;
560
561// topbitsallzero - Return true if all bits except the lowest bit are known zero
562def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
563  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
564         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
565  }]>;
566def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
567  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
568         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
569  }]>;
570
571// Node definitions.
572def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
573def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
574def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
575def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
576def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
577                                SDCallSeqStart<[ SDTCisVT<0, i32>,
578                                                 SDTCisVT<1, i32> ]>,
579                                [SDNPHasChain, SDNPOutGlue]>;
580def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
581                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
582                                               SDTCisVT<1, i32> ]>,
583                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
584def AArch64call          : SDNode<"AArch64ISD::CALL",
585                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
586                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
587                                 SDNPVariadic]>;
588
589def AArch64call_bti      : SDNode<"AArch64ISD::CALL_BTI",
590                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
591                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
592                                 SDNPVariadic]>;
593
594def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
595                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
596                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
597                              SDNPVariadic]>;
598
599def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
600                                [SDNPHasChain]>;
601def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
602                                [SDNPHasChain]>;
603def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
604                                [SDNPHasChain]>;
605def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
606                                [SDNPHasChain]>;
607def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
608                                [SDNPHasChain]>;
609
610
611def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
612def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
613def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
614def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
615def AArch64retglue       : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
616                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
617def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
618def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
619def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
620                            [SDNPCommutative]>;
621def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
622def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
623                            [SDNPCommutative]>;
624def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
625def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
626
627def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
628def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
629def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
630
631def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
632
633def AArch64fcmp         : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
634def AArch64strict_fcmp  : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
635                                 [SDNPHasChain]>;
636def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
637                                 [SDNPHasChain]>;
638def AArch64any_fcmp     : PatFrags<(ops node:$lhs, node:$rhs),
639                                   [(AArch64strict_fcmp node:$lhs, node:$rhs),
640                                    (AArch64fcmp node:$lhs, node:$rhs)]>;
641
642def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
643def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
644def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
645def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
646def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
647def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
648
649def AArch64insr      : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
650
651def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
652def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
653def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
654def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
655def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
656def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
657
658def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
659def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
660def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
661def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
662def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
663def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
664def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
665
666def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
667def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
668def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
669def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
670
671def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
672def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
673def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
674def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
675def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
676def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
677def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
678def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
679def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
680def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
681
682def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
683def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
684
685def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
686def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
687def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
688def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
689def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
690
691def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
692def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
693def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
694
695def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
696def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
697def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
698def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
699def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
700def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
701                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
702
703def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
704def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
705def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
706def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
707def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
708
709def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
710def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
711
712def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
713                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
714
715def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
716                               [SDNPHasChain, SDNPSideEffect]>;
717
718def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
719def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
720
721def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
722                                    SDT_AArch64TLSDescCallSeq,
723                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
724                                     SDNPVariadic]>;
725
726
727def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
728                                 SDT_AArch64WrapperLarge>;
729
730def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
731
732def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
733                                    SDTCisSameAs<1, 2>]>;
734def AArch64pmull    : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull,
735                             [SDNPCommutative]>;
736def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
737                             [SDNPCommutative]>;
738def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
739                             [SDNPCommutative]>;
740
741def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
742def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
743def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
744def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
745
746def AArch64sdot     : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
747def AArch64udot     : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
748
749def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
750def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
751def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
752def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
753def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
754def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
755
756def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
757                               [(abdu node:$lhs, node:$rhs),
758                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
759def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
760                               [(abds node:$lhs, node:$rhs),
761                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
762
763def AArch64addp_n   : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
764def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
765def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
766def AArch64addp     : PatFrags<(ops node:$Rn, node:$Rm),
767                               [(AArch64addp_n node:$Rn, node:$Rm),
768                                (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
769def AArch64uaddlp   : PatFrags<(ops node:$src),
770                               [(AArch64uaddlp_n node:$src),
771                                (int_aarch64_neon_uaddlp node:$src)]>;
772def AArch64saddlp   : PatFrags<(ops node:$src),
773                               [(AArch64saddlp_n node:$src),
774                                (int_aarch64_neon_saddlp node:$src)]>;
775def AArch64faddp     : PatFrags<(ops node:$Rn, node:$Rm),
776                                [(AArch64addp_n node:$Rn, node:$Rm),
777                                 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
778def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
779def AArch64facge     : PatFrags<(ops node:$Rn, node:$Rm),
780                                [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
781                                 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
782def AArch64facgt     : PatFrags<(ops node:$Rn, node:$Rm),
783                                [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
784                                 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
785
786def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
787                              [(vecreduce_fmax node:$Rn),
788                               (int_aarch64_neon_fmaxnmv node:$Rn)]>;
789def AArch64fminnmv : PatFrags<(ops node:$Rn),
790                              [(vecreduce_fmin node:$Rn),
791                               (int_aarch64_neon_fminnmv node:$Rn)]>;
792def AArch64fmaxv : PatFrags<(ops node:$Rn),
793                            [(vecreduce_fmaximum node:$Rn),
794                             (int_aarch64_neon_fmaxv node:$Rn)]>;
795def AArch64fminv : PatFrags<(ops node:$Rn),
796                            [(vecreduce_fminimum node:$Rn),
797                             (int_aarch64_neon_fminv node:$Rn)]>;
798
799def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
800def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
801def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
802def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
803def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
804
805def SDT_AArch64unpk : SDTypeProfile<1, 1, [
806    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
807]>;
808def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
809def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
810def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
811def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
812
813def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
814def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
815def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
816def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
817def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
818def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
819
820def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
821def AArch64mrs : SDNode<"AArch64ISD::MRS",
822                        SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
823                        [SDNPHasChain, SDNPOutGlue]>;
824
825// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
826// have no common bits.
827def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
828                         [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
829   if (N->getOpcode() == ISD::ADD)
830     return true;
831   return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
832}]> {
833  let GISelPredicateCode = [{
834     // Only handle G_ADD for now. FIXME. build capability to compute whether
835     // operands of G_OR have common bits set or not.
836     return MI.getOpcode() == TargetOpcode::G_ADD;
837  }];
838}
839
840// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
841def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
842  return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
843         CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
844}]>;
845
846//===----------------------------------------------------------------------===//
847
848//===----------------------------------------------------------------------===//
849
850// AArch64 Instruction Predicate Definitions.
851// We could compute these on a per-module basis but doing so requires accessing
852// the Function object through the <Target>Subtarget and objections were raised
853// to that (see post-commit review comments for r301750).
854let RecomputePerFunction = 1 in {
855  def ForCodeSize   : Predicate<"shouldOptForSize(MF)">;
856  def NotForCodeSize   : Predicate<"!shouldOptForSize(MF)">;
857  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
858  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
859
860  def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
861  def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
862
863  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
864  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
865  // Toggles patterns which aren't beneficial in GlobalISel when we aren't
866  // optimizing. This allows us to selectively use patterns without impacting
867  // SelectionDAG's behaviour.
868  // FIXME: One day there will probably be a nicer way to check for this, but
869  // today is not that day.
870  def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
871}
872
873include "AArch64InstrFormats.td"
874include "SVEInstrFormats.td"
875include "SMEInstrFormats.td"
876
877//===----------------------------------------------------------------------===//
878
879//===----------------------------------------------------------------------===//
880// Miscellaneous instructions.
881//===----------------------------------------------------------------------===//
882
883let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
884// We set Sched to empty list because we expect these instructions to simply get
885// removed in most cases.
886def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
887                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
888                              Sched<[]>;
889def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
890                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
891                            Sched<[]>;
892} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
893
894let isReMaterializable = 1, isCodeGenOnly = 1 in {
895// FIXME: The following pseudo instructions are only needed because remat
896// cannot handle multiple instructions.  When that changes, they can be
897// removed, along with the AArch64Wrapper node.
898
899let AddedComplexity = 10 in
900def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
901                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
902              Sched<[WriteLDAdr]>;
903
904// The MOVaddr instruction should match only when the add is not folded
905// into a load or store address.
906def MOVaddr
907    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
908             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
909                                            tglobaladdr:$low))]>,
910      Sched<[WriteAdrAdr]>;
911def MOVaddrJT
912    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
913             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
914                                             tjumptable:$low))]>,
915      Sched<[WriteAdrAdr]>;
916def MOVaddrCP
917    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
918             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
919                                             tconstpool:$low))]>,
920      Sched<[WriteAdrAdr]>;
921def MOVaddrBA
922    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
923             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
924                                             tblockaddress:$low))]>,
925      Sched<[WriteAdrAdr]>;
926def MOVaddrTLS
927    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
928             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
929                                            tglobaltlsaddr:$low))]>,
930      Sched<[WriteAdrAdr]>;
931def MOVaddrEXT
932    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
933             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
934                                            texternalsym:$low))]>,
935      Sched<[WriteAdrAdr]>;
936// Normally AArch64addlow either gets folded into a following ldr/str,
937// or together with an adrp into MOVaddr above. For cases with TLS, it
938// might appear without either of them, so allow lowering it into a plain
939// add.
940def ADDlowTLS
941    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
942             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
943                                            tglobaltlsaddr:$low))]>,
944      Sched<[WriteAdr]>;
945
946} // isReMaterializable, isCodeGenOnly
947
948def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
949          (LOADgot tglobaltlsaddr:$addr)>;
950
951def : Pat<(AArch64LOADgot texternalsym:$addr),
952          (LOADgot texternalsym:$addr)>;
953
954def : Pat<(AArch64LOADgot tconstpool:$addr),
955          (LOADgot tconstpool:$addr)>;
956
957// In general these get lowered into a sequence of three 4-byte instructions.
958// 32-bit jump table destination is actually only 2 instructions since we can
959// use the table itself as a PC-relative base. But optimization occurs after
960// branch relaxation so be pessimistic.
961let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
962    isNotDuplicable = 1 in {
963def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
964                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
965                      Sched<[]>;
966def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
967                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
968                      Sched<[]>;
969def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
970                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
971                     Sched<[]>;
972}
973
974// Space-consuming pseudo to aid testing of placement and reachability
975// algorithms. Immediate operand is the number of bytes this "instruction"
976// occupies; register operands can be used to enforce dependency and constrain
977// the scheduler.
978let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
979def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
980                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
981            Sched<[]>;
982
983let hasSideEffects = 1, isCodeGenOnly = 1 in {
984  def SpeculationSafeValueX
985      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
986  def SpeculationSafeValueW
987      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
988}
989
990// SpeculationBarrierEndBB must only be used after an unconditional control
991// flow, i.e. after a terminator for which isBarrier is True.
992let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
993  // This gets lowered to a pair of 4-byte instructions.
994  let Size = 8 in
995  def SpeculationBarrierISBDSBEndBB
996      : Pseudo<(outs), (ins), []>, Sched<[]>;
997  // This gets lowered to a 4-byte instruction.
998  let Size = 4 in
999  def SpeculationBarrierSBEndBB
1000      : Pseudo<(outs), (ins), []>, Sched<[]>;
1001}
1002
1003//===----------------------------------------------------------------------===//
1004// System instructions.
1005//===----------------------------------------------------------------------===//
1006
1007def HINT : HintI<"hint">;
1008def : InstAlias<"nop",  (HINT 0b000)>;
1009def : InstAlias<"yield",(HINT 0b001)>;
1010def : InstAlias<"wfe",  (HINT 0b010)>;
1011def : InstAlias<"wfi",  (HINT 0b011)>;
1012def : InstAlias<"sev",  (HINT 0b100)>;
1013def : InstAlias<"sevl", (HINT 0b101)>;
1014def : InstAlias<"dgh",  (HINT 0b110)>;
1015def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
1016def : InstAlias<"csdb", (HINT 20)>;
1017// In order to be able to write readable assembly, LLVM should accept assembly
1018// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
1019// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1020// should not emit these mnemonics unless BTI is enabled.
1021def : InstAlias<"bti",  (HINT 32), 0>;
1022def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
1023def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
1024def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
1025
1026// v8.2a Statistical Profiling extension
1027def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
1028
1029// As far as LLVM is concerned this writes to the system's exclusive monitors.
1030let mayLoad = 1, mayStore = 1 in
1031def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
1032
1033// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
1034// model patterns with sufficiently fine granularity.
1035let mayLoad = ?, mayStore = ? in {
1036def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
1037                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
1038
1039def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
1040                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
1041
1042def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
1043                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
1044
1045def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
1046  let CRm        = 0b0010;
1047  let Inst{12}   = 0;
1048  let Predicates = [HasTRACEV8_4];
1049}
1050
1051def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
1052  let CRm{1-0}   = 0b11;
1053  let Inst{9-8}  = 0b10;
1054  let Predicates = [HasXS];
1055}
1056
1057let Predicates = [HasWFxT] in {
1058def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
1059def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
1060}
1061
1062// Branch Record Buffer two-word mnemonic instructions
1063class BRBEI<bits<3> op2, string keyword>
1064    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
1065  let Inst{31-8} = 0b110101010000100101110010;
1066  let Inst{7-5} = op2;
1067  let Predicates = [HasBRBE];
1068}
1069def BRB_IALL: BRBEI<0b100, "\tiall">;
1070def BRB_INJ:  BRBEI<0b101, "\tinj">;
1071
1072}
1073
1074// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
1075def : TokenAlias<"INJ", "inj">;
1076def : TokenAlias<"IALL", "iall">;
1077
1078
1079// ARMv9.4-A Guarded Control Stack
1080class GCSNoOp<bits<3> op2, string mnemonic>
1081    : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
1082  let Inst{20-8} = 0b0100001110111;
1083  let Inst{7-5} = op2;
1084  let Predicates = [HasGCS];
1085}
1086def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
1087def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
1088def GCSPOPX  : GCSNoOp<0b110, "gcspopx">;
1089
1090class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
1091            list<dag> pattern = []>
1092    : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
1093  let Inst{20-19} = 0b01;
1094  let Inst{18-16} = op1;
1095  let Inst{15-8} = 0b01110111;
1096  let Inst{7-5} = op2;
1097  let Predicates = [HasGCS];
1098}
1099
1100def GCSSS1   : GCSRtIn<0b011, 0b010, "gcsss1">;
1101def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
1102
1103class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
1104            list<dag> pattern = []>
1105    : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
1106  let Inst{20-19} = 0b01;
1107  let Inst{18-16} = op1;
1108  let Inst{15-8} = 0b01110111;
1109  let Inst{7-5} = op2;
1110  let Predicates = [HasGCS];
1111}
1112
1113def GCSSS2  : GCSRtOut<0b011, 0b011, "gcsss2">;
1114def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
1115def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1116
1117def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
1118def GCSB_DSYNC         : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1119
1120def : TokenAlias<"DSYNC", "dsync">;
1121
1122let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1123  def CHKFEAT   : SystemNoOperands<0b000, "hint\t#40">;
1124}
1125def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
1126def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1127
1128class GCSSt<string mnemonic, bits<3> op>
1129    : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> {
1130  bits<5> Rt;
1131  bits<5> Rn;
1132  let Inst{31-15} = 0b11011001000111110;
1133  let Inst{14-12} = op;
1134  let Inst{11-10} = 0b11;
1135  let Inst{9-5} = Rn;
1136  let Inst{4-0} = Rt;
1137  let Predicates = [HasGCS];
1138}
1139def GCSSTR  : GCSSt<"gcsstr",  0b000>;
1140def GCSSTTR : GCSSt<"gcssttr", 0b001>;
1141
1142
1143// ARMv8.2-A Dot Product
1144let Predicates = [HasDotProd] in {
1145defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
1146defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1147defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
1148defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1149}
1150
1151// ARMv8.6-A BFloat
1152let Predicates = [HasNEON, HasBF16] in {
1153defm BFDOT       : SIMDThreeSameVectorBFDot<1, "bfdot">;
1154defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
1155def BFMMLA       : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
1156def BFMLALB      : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1157def BFMLALT      : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1158def BFMLALBIdx   : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1159def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1160def BFCVTN       : SIMD_BFCVTN;
1161def BFCVTN2      : SIMD_BFCVTN2;
1162
1163// Vector-scalar BFDOT:
1164// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1165// register (the instruction uses a single 32-bit lane from it), so the pattern
1166// is a bit tricky.
1167def : Pat<(v2f32 (int_aarch64_neon_bfdot
1168                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1169                    (v4bf16 (bitconvert
1170                      (v2i32 (AArch64duplane32
1171                        (v4i32 (bitconvert
1172                          (v8bf16 (insert_subvector undef,
1173                            (v4bf16 V64:$Rm),
1174                            (i64 0))))),
1175                        VectorIndexS:$idx)))))),
1176          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1177                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
1178                             VectorIndexS:$idx)>;
1179}
1180
1181let Predicates = [HasNEONorSME, HasBF16] in {
1182def BFCVT : BF16ToSinglePrecision<"bfcvt">;
1183}
1184
1185// ARMv8.6A AArch64 matrix multiplication
1186let Predicates = [HasMatMulInt8] in {
1187def  SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
1188def  UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1189def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1190defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
1191defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
1192
1193// sudot lane has a pattern where usdot is expected (there is no sudot).
1194// The second operand is used in the dup operation to repeat the indexed
1195// element.
1196class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
1197                         string rhs_kind, RegisterOperand RegType,
1198                         ValueType AccumType, ValueType InputType>
1199      : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind,
1200                                        lhs_kind, rhs_kind, RegType, AccumType,
1201                                        InputType, null_frag> {
1202  let Pattern = [(set (AccumType RegType:$dst),
1203                      (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
1204                                 (InputType (bitconvert (AccumType
1205                                    (AArch64duplane32 (v4i32 V128:$Rm),
1206                                        VectorIndexS:$idx)))),
1207                                 (InputType RegType:$Rn))))];
1208}
1209
1210multiclass SIMDSUDOTIndex {
1211  def v8i8  : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
1212  def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1213}
1214
1215defm SUDOTlane : SIMDSUDOTIndex;
1216
1217}
1218
1219// ARMv8.2-A FP16 Fused Multiply-Add Long
1220let Predicates = [HasNEON, HasFP16FML] in {
1221defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1222defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1223defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1224defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1225defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
1226defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
1227defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1228defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1229}
1230
1231// Armv8.2-A Crypto extensions
1232let Predicates = [HasSHA3] in {
1233def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
1234def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
1235def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
1236def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
1237def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
1238def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
1239def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
1240def XAR       : CryptoRRRi6<"xar">;
1241
1242class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
1243  : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
1244        (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
1245
1246def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1247          (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1248
1249def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
1250def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
1251def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
1252
1253def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
1254def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
1255def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
1256def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
1257
1258class EOR3_pattern<ValueType VecTy>
1259  : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1260        (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1261
1262def : EOR3_pattern<v16i8>;
1263def : EOR3_pattern<v8i16>;
1264def : EOR3_pattern<v4i32>;
1265def : EOR3_pattern<v2i64>;
1266
1267class BCAX_pattern<ValueType VecTy>
1268  : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
1269        (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1270
1271def : BCAX_pattern<v16i8>;
1272def : BCAX_pattern<v8i16>;
1273def : BCAX_pattern<v4i32>;
1274def : BCAX_pattern<v2i64>;
1275
1276def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
1277def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
1278def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
1279def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
1280
1281def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1282def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1283def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1284def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1285
1286def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1287def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1288def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1289def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1290
1291def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1292          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1293
1294def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1295          (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1296
1297def : Pat<(xor  (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1298          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1299
1300} // HasSHA3
1301
1302let Predicates = [HasSM4] in {
1303def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1304def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1305def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1306def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1307def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
1308def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1309def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1310def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1311def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1312
1313def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1314          (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1315
1316class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1317  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1318        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1319
1320class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1321  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1322        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1323
1324class SM4_pattern<Instruction INST, Intrinsic OpNode>
1325  : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1326        (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1327
1328def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1329def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1330
1331def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1332def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1333def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1334def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1335
1336def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1337def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1338} // HasSM4
1339
1340let Predicates = [HasRCPC] in {
1341  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1342  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
1343  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
1344  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
1345  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
1346}
1347
1348// v8.3a complex add and multiply-accumulate. No predicate here, that is done
1349// inside the multiclass as the FP16 versions need different predicates.
1350defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1351                                               "fcmla", null_frag>;
1352defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1353                                           "fcadd", null_frag>;
1354defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1355
1356let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1357  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1358            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1359  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1360            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1361  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1362            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1363  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1364            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1365}
1366
1367let Predicates = [HasComplxNum, HasNEON] in {
1368  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1369            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1370  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1371            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1372  foreach Ty = [v4f32, v2f64] in {
1373    def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1374              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1375    def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1376              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1377  }
1378}
1379
1380multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1381  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1382            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1383  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1384            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1385  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1386            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1387  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1388            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1389}
1390
1391multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1392  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1393            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1394  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1395            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1396  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1397            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1398  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1399            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1400}
1401
1402
1403let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1404  defm : FCMLA_PATS<v4f16, V64>;
1405  defm : FCMLA_PATS<v8f16, V128>;
1406
1407  defm : FCMLA_LANE_PATS<v4f16, V64,
1408                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1409  defm : FCMLA_LANE_PATS<v8f16, V128,
1410                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1411}
1412let Predicates = [HasComplxNum, HasNEON] in {
1413  defm : FCMLA_PATS<v2f32, V64>;
1414  defm : FCMLA_PATS<v4f32, V128>;
1415  defm : FCMLA_PATS<v2f64, V128>;
1416
1417  defm : FCMLA_LANE_PATS<v4f32, V128,
1418                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1419}
1420
1421// v8.3a Pointer Authentication
1422// These instructions inhabit part of the hint space and so can be used for
1423// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1424// important for compatibility with other assemblers (e.g. GAS) when building
1425// software compatible with both CPUs that do or don't implement PA.
1426let Uses = [LR], Defs = [LR] in {
1427  def PACIAZ   : SystemNoOperands<0b000, "hint\t#24">;
1428  def PACIBZ   : SystemNoOperands<0b010, "hint\t#26">;
1429  let isAuthenticated = 1 in {
1430    def AUTIAZ   : SystemNoOperands<0b100, "hint\t#28">;
1431    def AUTIBZ   : SystemNoOperands<0b110, "hint\t#30">;
1432  }
1433}
1434let Uses = [LR, SP], Defs = [LR] in {
1435  def PACIASP  : SystemNoOperands<0b001, "hint\t#25">;
1436  def PACIBSP  : SystemNoOperands<0b011, "hint\t#27">;
1437  let isAuthenticated = 1 in {
1438    def AUTIASP  : SystemNoOperands<0b101, "hint\t#29">;
1439    def AUTIBSP  : SystemNoOperands<0b111, "hint\t#31">;
1440  }
1441}
1442let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1443  def PACIA1716  : SystemNoOperands<0b000, "hint\t#8">;
1444  def PACIB1716  : SystemNoOperands<0b010, "hint\t#10">;
1445  let isAuthenticated = 1 in {
1446    def AUTIA1716  : SystemNoOperands<0b100, "hint\t#12">;
1447    def AUTIB1716  : SystemNoOperands<0b110, "hint\t#14">;
1448  }
1449}
1450
1451let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1452  def XPACLRI   : SystemNoOperands<0b111, "hint\t#7">;
1453}
1454
1455// In order to be able to write readable assembly, LLVM should accept assembly
1456// inputs that use pointer authentication mnemonics, even with PA disabled.
1457// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1458// should not emit these mnemonics unless PA is enabled.
1459def : InstAlias<"paciaz", (PACIAZ), 0>;
1460def : InstAlias<"pacibz", (PACIBZ), 0>;
1461def : InstAlias<"autiaz", (AUTIAZ), 0>;
1462def : InstAlias<"autibz", (AUTIBZ), 0>;
1463def : InstAlias<"paciasp", (PACIASP), 0>;
1464def : InstAlias<"pacibsp", (PACIBSP), 0>;
1465def : InstAlias<"autiasp", (AUTIASP), 0>;
1466def : InstAlias<"autibsp", (AUTIBSP), 0>;
1467def : InstAlias<"pacia1716", (PACIA1716), 0>;
1468def : InstAlias<"pacib1716", (PACIB1716), 0>;
1469def : InstAlias<"autia1716", (AUTIA1716), 0>;
1470def : InstAlias<"autib1716", (AUTIB1716), 0>;
1471def : InstAlias<"xpaclri", (XPACLRI), 0>;
1472
1473// These pointer authentication instructions require armv8.3a
1474let Predicates = [HasPAuth] in {
1475
1476  // When PA is enabled, a better mnemonic should be emitted.
1477  def : InstAlias<"paciaz", (PACIAZ), 1>;
1478  def : InstAlias<"pacibz", (PACIBZ), 1>;
1479  def : InstAlias<"autiaz", (AUTIAZ), 1>;
1480  def : InstAlias<"autibz", (AUTIBZ), 1>;
1481  def : InstAlias<"paciasp", (PACIASP), 1>;
1482  def : InstAlias<"pacibsp", (PACIBSP), 1>;
1483  def : InstAlias<"autiasp", (AUTIASP), 1>;
1484  def : InstAlias<"autibsp", (AUTIBSP), 1>;
1485  def : InstAlias<"pacia1716", (PACIA1716), 1>;
1486  def : InstAlias<"pacib1716", (PACIB1716), 1>;
1487  def : InstAlias<"autia1716", (AUTIA1716), 1>;
1488  def : InstAlias<"autib1716", (AUTIB1716), 1>;
1489  def : InstAlias<"xpaclri", (XPACLRI), 1>;
1490
1491  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
1492                      SDPatternOperator op> {
1493    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm,  "ia"), op>;
1494    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm,  "ib"), op>;
1495    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm,  "da"), op>;
1496    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm,  "db"), op>;
1497    def IZA  : SignAuthZero<prefix_z,  0b00, !strconcat(asm, "iza"), op>;
1498    def DZA  : SignAuthZero<prefix_z,  0b10, !strconcat(asm, "dza"), op>;
1499    def IZB  : SignAuthZero<prefix_z,  0b01, !strconcat(asm, "izb"), op>;
1500    def DZB  : SignAuthZero<prefix_z,  0b11, !strconcat(asm, "dzb"), op>;
1501  }
1502
1503  defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
1504  defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
1505
1506  def XPACI : ClearAuth<0, "xpaci">;
1507  def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>;
1508  def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1509
1510  def XPACD : ClearAuth<1, "xpacd">;
1511  def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>;
1512  def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>;
1513
1514  def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
1515
1516  // Combined Instructions
1517  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1518    def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
1519    def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
1520  }
1521  let isCall = 1, Defs = [LR], Uses = [SP] in {
1522    def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
1523    def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
1524  }
1525
1526  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1527    def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
1528    def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
1529  }
1530  let isCall = 1, Defs = [LR], Uses = [SP] in {
1531    def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
1532    def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
1533  }
1534
1535  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1536    def RETAA   : AuthReturn<0b010, 0, "retaa">;
1537    def RETAB   : AuthReturn<0b010, 1, "retab">;
1538    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
1539    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
1540  }
1541
1542  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
1543  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
1544
1545}
1546
1547// v8.3a floating point conversion for javascript
1548let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
1549def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
1550                                      "fjcvtzs",
1551                                      [(set GPR32:$Rd,
1552                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
1553  let Inst{31} = 0;
1554} // HasJS, HasFPARMv8
1555
1556// v8.4 Flag manipulation instructions
1557let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
1558def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
1559  let Inst{20-5} = 0b0000001000000000;
1560}
1561def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
1562def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1563def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
1564                        "{\t$Rn, $imm, $mask}">;
1565} // HasFlagM
1566
1567// v8.5 flag manipulation instructions
1568let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
1569
1570def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
1571  let Inst{18-16} = 0b000;
1572  let Inst{11-8} = 0b0000;
1573  let Unpredictable{11-8} = 0b1111;
1574  let Inst{7-5} = 0b001;
1575}
1576
1577def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
1578  let Inst{18-16} = 0b000;
1579  let Inst{11-8} = 0b0000;
1580  let Unpredictable{11-8} = 0b1111;
1581  let Inst{7-5} = 0b010;
1582}
1583} // HasAltNZCV
1584
1585
1586// Armv8.5-A speculation barrier
1587def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
1588  let Inst{20-5} = 0b0001100110000111;
1589  let Unpredictable{11-8} = 0b1111;
1590  let Predicates = [HasSB];
1591  let hasSideEffects = 1;
1592}
1593
1594def : InstAlias<"clrex", (CLREX 0xf)>;
1595def : InstAlias<"isb", (ISB 0xf)>;
1596def : InstAlias<"ssbb", (DSB 0)>;
1597def : InstAlias<"pssbb", (DSB 4)>;
1598def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
1599
1600def MRS    : MRSI;
1601def MSR    : MSRI;
1602def MSRpstateImm1 : MSRpstateImm0_1;
1603def MSRpstateImm4 : MSRpstateImm0_15;
1604
1605def : Pat<(AArch64mrs imm:$id),
1606          (MRS imm:$id)>;
1607
1608// The thread pointer (on Linux, at least, where this has been implemented) is
1609// TPIDR_EL0.
1610def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
1611                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
1612
1613// This gets lowered into a 24-byte instruction sequence
1614let Defs = [ X9, X16, X17, NZCV ], Size = 24 in {
1615def KCFI_CHECK : Pseudo<
1616  (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>;
1617}
1618
1619let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
1620def HWASAN_CHECK_MEMACCESS : Pseudo<
1621  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1622  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1623  Sched<[]>;
1624}
1625
1626let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
1627def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
1628  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1629  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1630  Sched<[]>;
1631}
1632
1633// The virtual cycle counter register is CNTVCT_EL0.
1634def : Pat<(readcyclecounter), (MRS 0xdf02)>;
1635
1636// FPCR register
1637let Uses = [FPCR] in
1638def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins),
1639                      [(set GPR64:$dst, (int_aarch64_get_fpcr))]>,
1640               PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>,
1641               Sched<[WriteSys]>;
1642let Defs = [FPCR] in
1643def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val),
1644                      [(int_aarch64_set_fpcr i64:$val)]>,
1645               PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>,
1646               Sched<[WriteSys]>;
1647
1648// Generic system instructions
1649def SYSxt  : SystemXtI<0, "sys">;
1650def SYSLxt : SystemLXtI<1, "sysl">;
1651
1652def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
1653                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
1654                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
1655
1656
1657let Predicates = [HasTME] in {
1658
1659def TSTART : TMSystemI<0b0000, "tstart",
1660                      [(set GPR64:$Rt, (int_aarch64_tstart))]>;
1661
1662def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
1663
1664def TCANCEL : TMSystemException<0b011, "tcancel",
1665                                [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
1666
1667def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
1668  let mayLoad = 0;
1669  let mayStore = 0;
1670}
1671} // HasTME
1672
1673//===----------------------------------------------------------------------===//
1674// Move immediate instructions.
1675//===----------------------------------------------------------------------===//
1676
1677defm MOVK : InsertImmediate<0b11, "movk">;
1678defm MOVN : MoveImmediate<0b00, "movn">;
1679
1680let PostEncoderMethod = "fixMOVZ" in
1681defm MOVZ : MoveImmediate<0b10, "movz">;
1682
1683// First group of aliases covers an implicit "lsl #0".
1684def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
1685def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
1686def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1687def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1688def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1689def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1690
1691// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
1692def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1693def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1694def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1695def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1696
1697def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1698def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1699def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1700def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1701
1702def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
1703def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
1704def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
1705def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
1706
1707def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1708def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1709
1710def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1711def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1712
1713def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
1714def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
1715
1716// Final group of aliases covers true "mov $Rd, $imm" cases.
1717multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
1718                          int width, int shift> {
1719  def _asmoperand : AsmOperandClass {
1720    let Name = basename # width # "_lsl" # shift # "MovAlias";
1721    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
1722                               # shift # ">";
1723    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
1724  }
1725
1726  def _movimm : Operand<i32> {
1727    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
1728  }
1729
1730  def : InstAlias<"mov $Rd, $imm",
1731                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
1732}
1733
1734defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
1735defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
1736
1737defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
1738defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
1739defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
1740defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
1741
1742defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
1743defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
1744
1745defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
1746defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
1747defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
1748defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
1749
1750let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
1751    isAsCheapAsAMove = 1 in {
1752// FIXME: The following pseudo instructions are only needed because remat
1753// cannot handle multiple instructions.  When that changes, we can select
1754// directly to the real instructions and get rid of these pseudos.
1755
1756def MOVi32imm
1757    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
1758             [(set GPR32:$dst, imm:$src)]>,
1759      Sched<[WriteImm]>;
1760def MOVi64imm
1761    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
1762             [(set GPR64:$dst, imm:$src)]>,
1763      Sched<[WriteImm]>;
1764} // isReMaterializable, isCodeGenOnly
1765
1766// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
1767// eventual expansion code fewer bits to worry about getting right. Marshalling
1768// the types is a little tricky though:
1769def i64imm_32bit : ImmLeaf<i64, [{
1770  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
1771}]>;
1772
1773def s64imm_32bit : ImmLeaf<i64, [{
1774  int64_t Imm64 = static_cast<int64_t>(Imm);
1775  return Imm64 >= std::numeric_limits<int32_t>::min() &&
1776         Imm64 <= std::numeric_limits<int32_t>::max();
1777}]>;
1778
1779def trunc_imm : SDNodeXForm<imm, [{
1780  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
1781}]>;
1782
1783def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
1784  GISDNodeXFormEquiv<trunc_imm>;
1785
1786let Predicates = [OptimizedGISelOrOtherSelector] in {
1787// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
1788// copies.
1789def : Pat<(i64 i64imm_32bit:$src),
1790          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
1791}
1792
1793// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
1794def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
1795return CurDAG->getTargetConstant(
1796  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
1797}]>;
1798
1799def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
1800return CurDAG->getTargetConstant(
1801  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
1802}]>;
1803
1804
1805def : Pat<(f32 fpimm:$in),
1806  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
1807def : Pat<(f64 fpimm:$in),
1808  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
1809
1810
1811// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
1812// sequences.
1813def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
1814                             tglobaladdr:$g1, tglobaladdr:$g0),
1815          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
1816                                  tglobaladdr:$g1, 16),
1817                          tglobaladdr:$g2, 32),
1818                  tglobaladdr:$g3, 48)>;
1819
1820def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
1821                             tblockaddress:$g1, tblockaddress:$g0),
1822          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
1823                                  tblockaddress:$g1, 16),
1824                          tblockaddress:$g2, 32),
1825                  tblockaddress:$g3, 48)>;
1826
1827def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
1828                             tconstpool:$g1, tconstpool:$g0),
1829          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
1830                                  tconstpool:$g1, 16),
1831                          tconstpool:$g2, 32),
1832                  tconstpool:$g3, 48)>;
1833
1834def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
1835                             tjumptable:$g1, tjumptable:$g0),
1836          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
1837                                  tjumptable:$g1, 16),
1838                          tjumptable:$g2, 32),
1839                  tjumptable:$g3, 48)>;
1840
1841
1842//===----------------------------------------------------------------------===//
1843// Arithmetic instructions.
1844//===----------------------------------------------------------------------===//
1845
1846// Add/subtract with carry.
1847defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
1848defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
1849
1850def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
1851def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
1852def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
1853def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
1854
1855// Add/subtract
1856defm ADD : AddSub<0, "add", "sub", add>;
1857defm SUB : AddSub<1, "sub", "add">;
1858
1859def : InstAlias<"mov $dst, $src",
1860                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
1861def : InstAlias<"mov $dst, $src",
1862                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
1863def : InstAlias<"mov $dst, $src",
1864                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
1865def : InstAlias<"mov $dst, $src",
1866                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
1867
1868defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
1869defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
1870
1871def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
1872  return N->getOpcode() == ISD::CopyFromReg &&
1873         cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
1874}]>;
1875
1876// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
1877def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
1878          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
1879def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
1880          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
1881def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
1882          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
1883def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
1884          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
1885def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
1886          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
1887def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
1888          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
1889let AddedComplexity = 1 in {
1890def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
1891          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
1892def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
1893          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
1894def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
1895          (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
1896}
1897
1898// Because of the immediate format for add/sub-imm instructions, the
1899// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
1900//  These patterns capture that transformation.
1901let AddedComplexity = 1 in {
1902def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1903          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1904def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1905          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1906def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1907          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1908def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1909          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1910}
1911
1912// Because of the immediate format for add/sub-imm instructions, the
1913// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
1914//  These patterns capture that transformation.
1915let AddedComplexity = 1 in {
1916def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1917          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1918def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1919          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1920def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1921          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1922def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1923          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1924}
1925
1926def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1927def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1928def : InstAlias<"neg $dst, $src$shift",
1929                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1930def : InstAlias<"neg $dst, $src$shift",
1931                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1932
1933def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1934def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1935def : InstAlias<"negs $dst, $src$shift",
1936                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1937def : InstAlias<"negs $dst, $src$shift",
1938                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1939
1940
1941// Unsigned/Signed divide
1942defm UDIV : Div<0, "udiv", udiv>;
1943defm SDIV : Div<1, "sdiv", sdiv>;
1944
1945def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
1946def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
1947def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
1948def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
1949
1950// Variable shift
1951defm ASRV : Shift<0b10, "asr", sra>;
1952defm LSLV : Shift<0b00, "lsl", shl>;
1953defm LSRV : Shift<0b01, "lsr", srl>;
1954defm RORV : Shift<0b11, "ror", rotr>;
1955
1956def : ShiftAlias<"asrv", ASRVWr, GPR32>;
1957def : ShiftAlias<"asrv", ASRVXr, GPR64>;
1958def : ShiftAlias<"lslv", LSLVWr, GPR32>;
1959def : ShiftAlias<"lslv", LSLVXr, GPR64>;
1960def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
1961def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
1962def : ShiftAlias<"rorv", RORVWr, GPR32>;
1963def : ShiftAlias<"rorv", RORVXr, GPR64>;
1964
1965// Multiply-add
1966let AddedComplexity = 5 in {
1967defm MADD : MulAccum<0, "madd">;
1968defm MSUB : MulAccum<1, "msub">;
1969
1970def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
1971          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1972def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
1973          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1974
1975def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
1976          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1977def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
1978          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1979def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
1980          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1981def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
1982          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1983} // AddedComplexity = 5
1984
1985let AddedComplexity = 5 in {
1986def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
1987def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
1988def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
1989def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
1990
1991def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
1992          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
1993def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
1994          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
1995def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
1996          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1997def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
1998          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
1999def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
2000          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2001def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
2002          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2003
2004def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
2005          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2006def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
2007          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2008
2009def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
2010          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2011def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
2012          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2013def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
2014          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2015                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2016
2017def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2018          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2019def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2020          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2021def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
2022          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2023                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2024
2025def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
2026          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2027def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
2028          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2029def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
2030                    GPR64:$Ra)),
2031          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2032                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2033
2034def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2035          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2036def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2037          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2038def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
2039                                    (s64imm_32bit:$C)))),
2040          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2041                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2042
2043def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
2044          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2045def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
2046          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2047
2048def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
2049          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2050def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
2051          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2052
2053def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2054          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2055def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2056          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2057
2058def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2059          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2060def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2061          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2062
2063def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
2064          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2065def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
2066          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2067
2068def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
2069          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2070def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
2071          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2072
2073def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
2074          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2075def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2076          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2077
2078def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
2079          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2080def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2081          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2082} // AddedComplexity = 5
2083
2084def : MulAccumWAlias<"mul", MADDWrrr>;
2085def : MulAccumXAlias<"mul", MADDXrrr>;
2086def : MulAccumWAlias<"mneg", MSUBWrrr>;
2087def : MulAccumXAlias<"mneg", MSUBXrrr>;
2088def : WideMulAccumAlias<"smull", SMADDLrrr>;
2089def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
2090def : WideMulAccumAlias<"umull", UMADDLrrr>;
2091def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
2092
2093// Multiply-high
2094def SMULHrr : MulHi<0b010, "smulh", mulhs>;
2095def UMULHrr : MulHi<0b110, "umulh", mulhu>;
2096
2097// CRC32
2098def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
2099def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
2100def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
2101def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2102
2103def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2104def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2105def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2106def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2107
2108// v8.1 atomic CAS
2109defm CAS   : CompareAndSwap<0, 0, "">;
2110defm CASA  : CompareAndSwap<1, 0, "a">;
2111defm CASL  : CompareAndSwap<0, 1, "l">;
2112defm CASAL : CompareAndSwap<1, 1, "al">;
2113
2114// v8.1 atomic CASP
2115defm CASP   : CompareAndSwapPair<0, 0, "">;
2116defm CASPA  : CompareAndSwapPair<1, 0, "a">;
2117defm CASPL  : CompareAndSwapPair<0, 1, "l">;
2118defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2119
2120// v8.1 atomic SWP
2121defm SWP   : Swap<0, 0, "">;
2122defm SWPA  : Swap<1, 0, "a">;
2123defm SWPL  : Swap<0, 1, "l">;
2124defm SWPAL : Swap<1, 1, "al">;
2125
2126// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2127defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
2128defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
2129defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
2130defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2131
2132defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
2133defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
2134defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
2135defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2136
2137defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
2138defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
2139defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
2140defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2141
2142defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
2143defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
2144defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
2145defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2146
2147defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
2148defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
2149defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
2150defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2151
2152defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
2153defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
2154defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
2155defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2156
2157defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
2158defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
2159defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
2160defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2161
2162defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
2163defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
2164defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
2165defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2166
2167// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2168defm : STOPregister<"stadd","LDADD">; // STADDx
2169defm : STOPregister<"stclr","LDCLR">; // STCLRx
2170defm : STOPregister<"steor","LDEOR">; // STEORx
2171defm : STOPregister<"stset","LDSET">; // STSETx
2172defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
2173defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
2174defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
2175defm : STOPregister<"stumin","LDUMIN">;// STUMINx
2176
2177// v8.5 Memory Tagging Extension
2178let Predicates = [HasMTE] in {
2179
2180def IRG   : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg",
2181                                 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>;
2182
2183def GMI   : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi",
2184                                 int_aarch64_gmi, GPR64sp>, Sched<[]> {
2185  let isNotDuplicable = 1;
2186}
2187def ADDG  : AddSubG<0, "addg", null_frag>;
2188def SUBG  : AddSubG<1, "subg", null_frag>;
2189
2190def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2191
2192def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
2193def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2194  let Defs = [NZCV];
2195}
2196
2197def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
2198
2199def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
2200
2201def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
2202          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
2203def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
2204          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2205
2206def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2207
2208def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2209                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
2210def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
2211                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
2212def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
2213                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
2214  let Inst{23} = 0;
2215}
2216
2217defm STG   : MemTagStore<0b00, "stg">;
2218defm STZG  : MemTagStore<0b01, "stzg">;
2219defm ST2G  : MemTagStore<0b10, "st2g">;
2220defm STZ2G : MemTagStore<0b11, "stz2g">;
2221
2222def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2223          (STGi $Rn, $Rm, $imm)>;
2224def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2225          (STZGi $Rn, $Rm, $imm)>;
2226def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2227          (ST2Gi $Rn, $Rm, $imm)>;
2228def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2229          (STZ2Gi $Rn, $Rm, $imm)>;
2230
2231defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
2232def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
2233def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
2234
2235def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2236          (STGi GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2237
2238def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
2239          (STGPi $Rt, $Rt2, $Rn, $imm)>;
2240
2241def IRGstack
2242    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
2243      Sched<[]>;
2244def TAGPstack
2245    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
2246      Sched<[]>;
2247
2248// Explicit SP in the first operand prevents ShrinkWrap optimization
2249// from leaving this instruction out of the stack frame. When IRGstack
2250// is transformed into IRG, this operand is replaced with the actual
2251// register / expression for the tagged base pointer of the current function.
2252def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
2253
2254// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2255// $Rn_wback is one past the end of the range. $Rm is the loop counter.
2256let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
2257def STGloop_wback
2258    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2259             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2260      Sched<[WriteAdr, WriteST]>;
2261
2262def STZGloop_wback
2263    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2264             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2265      Sched<[WriteAdr, WriteST]>;
2266
2267// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2268// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2269def STGloop
2270    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2271             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2272      Sched<[WriteAdr, WriteST]>;
2273
2274def STZGloop
2275    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2276             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2277      Sched<[WriteAdr, WriteST]>;
2278}
2279
2280} // Predicates = [HasMTE]
2281
2282//===----------------------------------------------------------------------===//
2283// Logical instructions.
2284//===----------------------------------------------------------------------===//
2285
2286// (immediate)
2287defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
2288defm AND  : LogicalImm<0b00, "and", and, "bic">;
2289defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
2290defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
2291
2292// FIXME: these aliases *are* canonical sometimes (when movz can't be
2293// used). Actually, it seems to be working right now, but putting logical_immXX
2294// here is a bit dodgy on the AsmParser side too.
2295def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
2296                                          logical_imm32:$imm), 0>;
2297def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
2298                                          logical_imm64:$imm), 0>;
2299
2300
2301// (register)
2302defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
2303defm BICS : LogicalRegS<0b11, 1, "bics",
2304                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
2305defm AND  : LogicalReg<0b00, 0, "and", and>;
2306defm BIC  : LogicalReg<0b00, 1, "bic",
2307                       BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
2308defm EON  : LogicalReg<0b10, 1, "eon",
2309                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
2310defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
2311defm ORN  : LogicalReg<0b01, 1, "orn",
2312                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
2313defm ORR  : LogicalReg<0b01, 0, "orr", or>;
2314
2315def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
2316def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
2317
2318def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
2319def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
2320
2321def : InstAlias<"mvn $Wd, $Wm$sh",
2322                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
2323def : InstAlias<"mvn $Xd, $Xm$sh",
2324                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
2325
2326def : InstAlias<"tst $src1, $src2",
2327                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
2328def : InstAlias<"tst $src1, $src2",
2329                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
2330
2331def : InstAlias<"tst $src1, $src2",
2332                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
2333def : InstAlias<"tst $src1, $src2",
2334                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
2335
2336def : InstAlias<"tst $src1, $src2$sh",
2337               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
2338def : InstAlias<"tst $src1, $src2$sh",
2339               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
2340
2341
2342def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
2343def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
2344
2345
2346//===----------------------------------------------------------------------===//
2347// One operand data processing instructions.
2348//===----------------------------------------------------------------------===//
2349
2350defm CLS    : OneOperandData<0b000101, "cls">;
2351defm CLZ    : OneOperandData<0b000100, "clz", ctlz>;
2352defm RBIT   : OneOperandData<0b000000, "rbit", bitreverse>;
2353
2354def  REV16Wr : OneWRegData<0b000001, "rev16",
2355                                     UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2356def  REV16Xr : OneXRegData<0b000001, "rev16", null_frag>;
2357
2358def : Pat<(cttz GPR32:$Rn),
2359          (CLZWr (RBITWr GPR32:$Rn))>;
2360def : Pat<(cttz GPR64:$Rn),
2361          (CLZXr (RBITXr GPR64:$Rn))>;
2362def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2363                (i32 1))),
2364          (CLSWr GPR32:$Rn)>;
2365def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2366                (i64 1))),
2367          (CLSXr GPR64:$Rn)>;
2368def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2369def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2370
2371// Unlike the other one operand instructions, the instructions with the "rev"
2372// mnemonic do *not* just different in the size bit, but actually use different
2373// opcode bits for the different sizes.
2374def REVWr   : OneWRegData<0b000010, "rev", bswap>;
2375def REVXr   : OneXRegData<0b000011, "rev", bswap>;
2376def REV32Xr : OneXRegData<0b000010, "rev32",
2377                                    UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2378
2379def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2380
2381// The bswap commutes with the rotr so we want a pattern for both possible
2382// orders.
2383def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2384def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2385
2386// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2387def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
2388def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
2389
2390def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
2391              (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
2392          (REV16Xr GPR64:$Rn)>;
2393
2394//===----------------------------------------------------------------------===//
2395// Bitfield immediate extraction instruction.
2396//===----------------------------------------------------------------------===//
2397let hasSideEffects = 0 in
2398defm EXTR : ExtractImm<"extr">;
2399def : InstAlias<"ror $dst, $src, $shift",
2400            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
2401def : InstAlias<"ror $dst, $src, $shift",
2402            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
2403
2404def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
2405          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
2406def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
2407          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
2408
2409//===----------------------------------------------------------------------===//
2410// Other bitfield immediate instructions.
2411//===----------------------------------------------------------------------===//
2412let hasSideEffects = 0 in {
2413defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
2414defm SBFM : BitfieldImm<0b00, "sbfm">;
2415defm UBFM : BitfieldImm<0b10, "ubfm">;
2416}
2417
2418def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
2419  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2420  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2421}]>;
2422
2423def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
2424  uint64_t enc = 31 - N->getZExtValue();
2425  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2426}]>;
2427
2428// min(7, 31 - shift_amt)
2429def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2430  uint64_t enc = 31 - N->getZExtValue();
2431  enc = enc > 7 ? 7 : enc;
2432  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2433}]>;
2434
2435// min(15, 31 - shift_amt)
2436def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2437  uint64_t enc = 31 - N->getZExtValue();
2438  enc = enc > 15 ? 15 : enc;
2439  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2440}]>;
2441
2442def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
2443  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2444  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2445}]>;
2446
2447def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
2448  uint64_t enc = 63 - N->getZExtValue();
2449  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2450}]>;
2451
2452// min(7, 63 - shift_amt)
2453def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2454  uint64_t enc = 63 - N->getZExtValue();
2455  enc = enc > 7 ? 7 : enc;
2456  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2457}]>;
2458
2459// min(15, 63 - shift_amt)
2460def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2461  uint64_t enc = 63 - N->getZExtValue();
2462  enc = enc > 15 ? 15 : enc;
2463  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2464}]>;
2465
2466// min(31, 63 - shift_amt)
2467def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
2468  uint64_t enc = 63 - N->getZExtValue();
2469  enc = enc > 31 ? 31 : enc;
2470  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2471}]>;
2472
2473def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
2474          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
2475                              (i64 (i32shift_b imm0_31:$imm)))>;
2476def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
2477          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
2478                              (i64 (i64shift_b imm0_63:$imm)))>;
2479
2480let AddedComplexity = 10 in {
2481def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
2482          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2483def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
2484          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2485}
2486
2487def : InstAlias<"asr $dst, $src, $shift",
2488                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2489def : InstAlias<"asr $dst, $src, $shift",
2490                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2491def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2492def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2493def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2494def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2495def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2496
2497def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
2498          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2499def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
2500          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2501
2502def : InstAlias<"lsr $dst, $src, $shift",
2503                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2504def : InstAlias<"lsr $dst, $src, $shift",
2505                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2506def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2507def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2508def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2509def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2510def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2511
2512//===----------------------------------------------------------------------===//
2513// Conditional comparison instructions.
2514//===----------------------------------------------------------------------===//
2515defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
2516defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2517
2518//===----------------------------------------------------------------------===//
2519// Conditional select instructions.
2520//===----------------------------------------------------------------------===//
2521defm CSEL  : CondSelect<0, 0b00, "csel">;
2522
2523def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
2524defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
2525defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2526defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2527
2528def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2529          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2530def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2531          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2532def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2533          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2534def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2535          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2536def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2537          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2538def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2539          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2540
2541def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2542          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
2543def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2544          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
2545def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2546          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2547def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2548          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2549def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2550          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2551def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2552          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2553def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2554          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
2555def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2556          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
2557def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2558          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2559def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
2560          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2561def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
2562          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2563def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
2564          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2565
2566def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2567          (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
2568def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2569          (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
2570
2571def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2572          (CSINCWr GPR32:$val, WZR, imm:$cc)>;
2573def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2574          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2575def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2576          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2577
2578def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2579          (CSELWr WZR, GPR32:$val, imm:$cc)>;
2580def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2581          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2582def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2583          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2584
2585// The inverse of the condition code from the alias instruction is what is used
2586// in the aliased instruction. The parser all ready inverts the condition code
2587// for these aliases.
2588def : InstAlias<"cset $dst, $cc",
2589                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2590def : InstAlias<"cset $dst, $cc",
2591                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2592
2593def : InstAlias<"csetm $dst, $cc",
2594                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2595def : InstAlias<"csetm $dst, $cc",
2596                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2597
2598def : InstAlias<"cinc $dst, $src, $cc",
2599                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2600def : InstAlias<"cinc $dst, $src, $cc",
2601                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2602
2603def : InstAlias<"cinv $dst, $src, $cc",
2604                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2605def : InstAlias<"cinv $dst, $src, $cc",
2606                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2607
2608def : InstAlias<"cneg $dst, $src, $cc",
2609                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2610def : InstAlias<"cneg $dst, $src, $cc",
2611                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2612
2613//===----------------------------------------------------------------------===//
2614// PC-relative instructions.
2615//===----------------------------------------------------------------------===//
2616let isReMaterializable = 1 in {
2617let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
2618def ADR  : ADRI<0, "adr", adrlabel,
2619                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
2620} // hasSideEffects = 0
2621
2622def ADRP : ADRI<1, "adrp", adrplabel,
2623                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
2624} // isReMaterializable = 1
2625
2626// page address of a constant pool entry, block address
2627def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
2628def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
2629def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
2630def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
2631def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
2632def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
2633def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
2634
2635//===----------------------------------------------------------------------===//
2636// Unconditional branch (register) instructions.
2637//===----------------------------------------------------------------------===//
2638
2639let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
2640def RET  : BranchReg<0b0010, "ret", []>;
2641def DRPS : SpecialReturn<0b0101, "drps">;
2642def ERET : SpecialReturn<0b0100, "eret">;
2643} // isReturn = 1, isTerminator = 1, isBarrier = 1
2644
2645// Default to the LR register.
2646def : InstAlias<"ret", (RET LR)>;
2647
2648let isCall = 1, Defs = [LR], Uses = [SP] in {
2649  def BLR : BranchReg<0b0001, "blr", []>;
2650  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
2651                Sched<[WriteBrReg]>,
2652                PseudoInstExpansion<(BLR GPR64:$Rn)>;
2653  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
2654                     Sched<[WriteBrReg]>;
2655  def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
2656                Sched<[WriteBrReg]>;
2657} // isCall
2658
2659def : Pat<(AArch64call GPR64:$Rn),
2660          (BLR GPR64:$Rn)>,
2661      Requires<[NoSLSBLRMitigation]>;
2662def : Pat<(AArch64call GPR64noip:$Rn),
2663          (BLRNoIP GPR64noip:$Rn)>,
2664      Requires<[SLSBLRMitigation]>;
2665
2666def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
2667          (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
2668      Requires<[NoSLSBLRMitigation]>;
2669
2670def : Pat<(AArch64call_bti GPR64:$Rn),
2671          (BLR_BTI GPR64:$Rn)>,
2672      Requires<[NoSLSBLRMitigation]>;
2673def : Pat<(AArch64call_bti GPR64noip:$Rn),
2674          (BLR_BTI GPR64noip:$Rn)>,
2675      Requires<[SLSBLRMitigation]>;
2676
2677let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2678def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
2679} // isBranch, isTerminator, isBarrier, isIndirectBranch
2680
2681// Create a separate pseudo-instruction for codegen to use so that we don't
2682// flag lr as used in every function. It'll be restored before the RET by the
2683// epilogue if it's legitimately used.
2684def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
2685                   Sched<[WriteBrReg]> {
2686  let isTerminator = 1;
2687  let isBarrier = 1;
2688  let isReturn = 1;
2689}
2690
2691// This is a directive-like pseudo-instruction. The purpose is to insert an
2692// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
2693// (which in the usual case is a BLR).
2694let hasSideEffects = 1 in
2695def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
2696  let AsmString = ".tlsdesccall $sym";
2697}
2698
2699// Pseudo instruction to tell the streamer to emit a 'B' character into the
2700// augmentation string.
2701def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
2702
2703// Pseudo instruction to tell the streamer to emit a 'G' character into the
2704// augmentation string.
2705def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
2706
2707// FIXME: maybe the scratch register used shouldn't be fixed to X1?
2708// FIXME: can "hasSideEffects be dropped?
2709// This gets lowered to an instruction sequence which takes 16 bytes
2710let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
2711    isCodeGenOnly = 1 in
2712def TLSDESC_CALLSEQ
2713    : Pseudo<(outs), (ins i64imm:$sym),
2714             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
2715      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
2716def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
2717          (TLSDESC_CALLSEQ texternalsym:$sym)>;
2718
2719//===----------------------------------------------------------------------===//
2720// Conditional branch (immediate) instruction.
2721//===----------------------------------------------------------------------===//
2722def Bcc : BranchCond<0, "b">;
2723
2724// Armv8.8-A variant form which hints to the branch predictor that
2725// this branch is very likely to go the same way nearly all the time
2726// (even though it is not known at compile time _which_ way that is).
2727def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
2728
2729//===----------------------------------------------------------------------===//
2730// Compare-and-branch instructions.
2731//===----------------------------------------------------------------------===//
2732defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
2733defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
2734
2735//===----------------------------------------------------------------------===//
2736// Test-bit-and-branch instructions.
2737//===----------------------------------------------------------------------===//
2738defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
2739defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
2740
2741//===----------------------------------------------------------------------===//
2742// Unconditional branch (immediate) instructions.
2743//===----------------------------------------------------------------------===//
2744let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
2745def B  : BranchImm<0, "b", [(br bb:$addr)]>;
2746} // isBranch, isTerminator, isBarrier
2747
2748let isCall = 1, Defs = [LR], Uses = [SP] in {
2749def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
2750} // isCall
2751def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
2752
2753//===----------------------------------------------------------------------===//
2754// Exception generation instructions.
2755//===----------------------------------------------------------------------===//
2756let isTrap = 1 in {
2757def BRK   : ExceptionGeneration<0b001, 0b00, "brk",
2758                                [(int_aarch64_break timm32_0_65535:$imm)]>;
2759}
2760def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
2761def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
2762def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
2763def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
2764def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
2765def SMC   : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
2766def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
2767
2768// DCPSn defaults to an immediate operand of zero if unspecified.
2769def : InstAlias<"dcps1", (DCPS1 0)>;
2770def : InstAlias<"dcps2", (DCPS2 0)>;
2771def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
2772
2773def UDF : UDFType<0, "udf">;
2774
2775//===----------------------------------------------------------------------===//
2776// Load instructions.
2777//===----------------------------------------------------------------------===//
2778
2779// Pair (indexed, offset)
2780defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
2781defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
2782defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
2783defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
2784defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
2785
2786defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2787
2788// Pair (pre-indexed)
2789def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2790def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2791def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2792def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2793def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2794
2795def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2796
2797// Pair (post-indexed)
2798def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2799def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2800def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2801def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2802def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2803
2804def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2805
2806
2807// Pair (no allocate)
2808defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
2809defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
2810defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
2811defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
2812defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
2813
2814def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
2815          (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
2816
2817def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
2818          (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
2819//---
2820// (register offset)
2821//---
2822
2823// Integer
2824defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
2825defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
2826defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
2827defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
2828
2829// Floating-point
2830defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", i8, load>;
2831defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
2832defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
2833defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
2834defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
2835
2836// Load sign-extended half-word
2837defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
2838defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
2839
2840// Load sign-extended byte
2841defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
2842defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
2843
2844// Load sign-extended word
2845defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
2846
2847// Pre-fetch.
2848defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
2849
2850// For regular load, we do not have any alignment requirement.
2851// Thus, it is safe to directly map the vector loads with interesting
2852// addressing modes.
2853// FIXME: We could do the same for bitconvert to floating point vectors.
2854multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
2855                              ValueType ScalTy, ValueType VecTy,
2856                              Instruction LOADW, Instruction LOADX,
2857                              SubRegIndex sub> {
2858  def : Pat<(VecTy (scalar_to_vector (ScalTy
2859              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
2860            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
2861                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
2862                           sub)>;
2863
2864  def : Pat<(VecTy (scalar_to_vector (ScalTy
2865              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
2866            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
2867                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
2868                           sub)>;
2869}
2870
2871let AddedComplexity = 10 in {
2872defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
2873defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
2874
2875defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
2876defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
2877
2878defm : ScalToVecROLoadPat<ro16, load,       i32, v4f16, LDRHroW, LDRHroX, hsub>;
2879defm : ScalToVecROLoadPat<ro16, load,       i32, v8f16, LDRHroW, LDRHroX, hsub>;
2880
2881defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
2882defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
2883
2884defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
2885defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
2886
2887defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
2888
2889defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
2890
2891
2892def : Pat <(v1i64 (scalar_to_vector (i64
2893                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
2894                                           ro_Wextend64:$extend))))),
2895           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
2896
2897def : Pat <(v1i64 (scalar_to_vector (i64
2898                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
2899                                           ro_Xextend64:$extend))))),
2900           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
2901}
2902
2903// Match all load 64 bits width whose type is compatible with FPR64
2904multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
2905                        Instruction LOADW, Instruction LOADX> {
2906
2907  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2908            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2909
2910  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2911            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2912}
2913
2914let AddedComplexity = 10 in {
2915let Predicates = [IsLE] in {
2916  // We must do vector loads with LD1 in big-endian.
2917  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
2918  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
2919  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
2920  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
2921  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
2922  defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
2923}
2924
2925defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
2926defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
2927
2928// Match all load 128 bits width whose type is compatible with FPR128
2929let Predicates = [IsLE] in {
2930  // We must do vector loads with LD1 in big-endian.
2931  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
2932  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
2933  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
2934  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
2935  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
2936  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
2937  defm : VecROLoadPat<ro128, v8bf16,  LDRQroW, LDRQroX>;
2938  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
2939}
2940} // AddedComplexity = 10
2941
2942// zextload -> i64
2943multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
2944                            Instruction INSTW, Instruction INSTX> {
2945  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2946            (SUBREG_TO_REG (i64 0),
2947                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
2948                           sub_32)>;
2949
2950  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2951            (SUBREG_TO_REG (i64 0),
2952                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
2953                           sub_32)>;
2954}
2955
2956let AddedComplexity = 10 in {
2957  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
2958  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
2959  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
2960
2961  // zextloadi1 -> zextloadi8
2962  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
2963
2964  // extload -> zextload
2965  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
2966  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
2967  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
2968
2969  // extloadi1 -> zextloadi8
2970  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
2971}
2972
2973
2974// zextload -> i64
2975multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
2976                            Instruction INSTW, Instruction INSTX> {
2977  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2978            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2979
2980  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2981            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2982
2983}
2984
2985let AddedComplexity = 10 in {
2986  // extload -> zextload
2987  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
2988  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
2989  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
2990
2991  // zextloadi1 -> zextloadi8
2992  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
2993}
2994
2995//---
2996// (unsigned immediate)
2997//---
2998defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
2999                   [(set GPR64z:$Rt,
3000                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3001defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
3002                   [(set GPR32z:$Rt,
3003                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3004defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3005                   [(set FPR8Op:$Rt,
3006                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
3007defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3008                   [(set (f16 FPR16Op:$Rt),
3009                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
3010defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3011                   [(set (f32 FPR32Op:$Rt),
3012                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3013defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3014                   [(set (f64 FPR64Op:$Rt),
3015                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3016defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3017                 [(set (f128 FPR128Op:$Rt),
3018                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
3019
3020// bf16 load pattern
3021def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3022           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
3023
3024// For regular load, we do not have any alignment requirement.
3025// Thus, it is safe to directly map the vector loads with interesting
3026// addressing modes.
3027// FIXME: We could do the same for bitconvert to floating point vectors.
3028def : Pat <(v8i8 (scalar_to_vector (i32
3029               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3030           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
3031                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3032def : Pat <(v16i8 (scalar_to_vector (i32
3033               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3034           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3035                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3036def : Pat <(v4i16 (scalar_to_vector (i32
3037               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3038           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
3039                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3040def : Pat <(v8i16 (scalar_to_vector (i32
3041               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3042           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3043                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3044def : Pat <(v2i32 (scalar_to_vector (i32
3045               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3046           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
3047                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3048def : Pat <(v4i32 (scalar_to_vector (i32
3049               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3050           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3051                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3052def : Pat <(v1i64 (scalar_to_vector (i64
3053               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3054           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3055def : Pat <(v2i64 (scalar_to_vector (i64
3056               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3057           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
3058                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
3059
3060// Match all load 64 bits width whose type is compatible with FPR64
3061let Predicates = [IsLE] in {
3062  // We must use LD1 to perform vector loads in big-endian.
3063  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3064            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3065  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3066            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3067  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3068            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3069  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3070            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3071  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3072            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3073  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3074            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3075}
3076def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3077          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3078def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3079          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3080
3081// Match all load 128 bits width whose type is compatible with FPR128
3082let Predicates = [IsLE] in {
3083  // We must use LD1 to perform vector loads in big-endian.
3084  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3085            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3086  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3087            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3088  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3089            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3090  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3091            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3092  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3093            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3094  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3095            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3096  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3097            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3098  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3099            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3100}
3101def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3102          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3103
3104defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
3105                    [(set GPR32:$Rt,
3106                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
3107                                                     uimm12s2:$offset)))]>;
3108defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
3109                    [(set GPR32:$Rt,
3110                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
3111                                                   uimm12s1:$offset)))]>;
3112// zextload -> i64
3113def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3114    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3115def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3116    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3117
3118// zextloadi1 -> zextloadi8
3119def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3120          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3121def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3122    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3123
3124// extload -> zextload
3125def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3126          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
3127def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3128          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3129def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3130          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3131def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3132    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3133def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3134    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3135def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3136    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3137def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3138    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3139
3140// load sign-extended half-word
3141defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
3142                     [(set GPR32:$Rt,
3143                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3144                                                      uimm12s2:$offset)))]>;
3145defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
3146                     [(set GPR64:$Rt,
3147                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3148                                                      uimm12s2:$offset)))]>;
3149
3150// load sign-extended byte
3151defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
3152                     [(set GPR32:$Rt,
3153                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3154                                                    uimm12s1:$offset)))]>;
3155defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
3156                     [(set GPR64:$Rt,
3157                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3158                                                    uimm12s1:$offset)))]>;
3159
3160// load sign-extended word
3161defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
3162                     [(set GPR64:$Rt,
3163                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
3164                                                      uimm12s4:$offset)))]>;
3165
3166// load zero-extended word
3167def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3168      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3169
3170// Pre-fetch.
3171def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
3172                        [(AArch64Prefetch timm:$Rt,
3173                                        (am_indexed64 GPR64sp:$Rn,
3174                                                      uimm12s8:$offset))]>;
3175
3176def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
3177
3178//---
3179// (literal)
3180
3181def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
3182  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
3183    const DataLayout &DL = MF->getDataLayout();
3184    Align Align = G->getGlobal()->getPointerAlignment(DL);
3185    return Align >= 4 && G->getOffset() % 4 == 0;
3186  }
3187  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
3188    return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3189  return false;
3190}]>;
3191
3192def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
3193  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3194def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
3195  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3196def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3197  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3198def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3199  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3200def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3201  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3202
3203// load sign-extended word
3204def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
3205  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
3206
3207let AddedComplexity = 20 in {
3208def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
3209        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
3210}
3211
3212// prefetch
3213def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
3214//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
3215
3216//---
3217// (unscaled immediate)
3218defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
3219                    [(set GPR64z:$Rt,
3220                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3221defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
3222                    [(set GPR32z:$Rt,
3223                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3224defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3225                    [(set FPR8Op:$Rt,
3226                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3227defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3228                    [(set (f16 FPR16Op:$Rt),
3229                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3230defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3231                    [(set (f32 FPR32Op:$Rt),
3232                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3233defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3234                    [(set (f64 FPR64Op:$Rt),
3235                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3236defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3237                    [(set (f128 FPR128Op:$Rt),
3238                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
3239
3240defm LDURHH
3241    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
3242             [(set GPR32:$Rt,
3243                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3244defm LDURBB
3245    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
3246             [(set GPR32:$Rt,
3247                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3248
3249// bf16 load pattern
3250def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3251           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
3252
3253// Match all load 64 bits width whose type is compatible with FPR64
3254let Predicates = [IsLE] in {
3255  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3256            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3257  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3258            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3259  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3260            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3261  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3262            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3263  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3264            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3265}
3266def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3267          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3268def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3269          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3270
3271// Match all load 128 bits width whose type is compatible with FPR128
3272let Predicates = [IsLE] in {
3273  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3274            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3275  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3276            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3277  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3278            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3279  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3280            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3281  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3282            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3283  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3284            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3285  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3286            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3287}
3288
3289//  anyext -> zext
3290def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3291          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3292def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3293          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3294def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3295          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3296def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3297    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3298def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3299    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3300def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3301    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3302def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3303    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3304// unscaled zext
3305def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3306          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3307def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3308          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3309def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3310          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3311def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3312    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3313def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3314    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3315def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3316    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3317def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3318    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3319
3320
3321//---
3322// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3323
3324// Define new assembler match classes as we want to only match these when
3325// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
3326// associate a DiagnosticType either, as we want the diagnostic for the
3327// canonical form (the scaled operand) to take precedence.
3328class SImm9OffsetOperand<int Width> : AsmOperandClass {
3329  let Name = "SImm9OffsetFB" # Width;
3330  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
3331  let RenderMethod = "addImmOperands";
3332}
3333
3334def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
3335def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
3336def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
3337def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
3338def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
3339
3340def simm9_offset_fb8 : Operand<i64> {
3341  let ParserMatchClass = SImm9OffsetFB8Operand;
3342}
3343def simm9_offset_fb16 : Operand<i64> {
3344  let ParserMatchClass = SImm9OffsetFB16Operand;
3345}
3346def simm9_offset_fb32 : Operand<i64> {
3347  let ParserMatchClass = SImm9OffsetFB32Operand;
3348}
3349def simm9_offset_fb64 : Operand<i64> {
3350  let ParserMatchClass = SImm9OffsetFB64Operand;
3351}
3352def simm9_offset_fb128 : Operand<i64> {
3353  let ParserMatchClass = SImm9OffsetFB128Operand;
3354}
3355
3356def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3357                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3358def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3359                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3360def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3361                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3362def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3363                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3364def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3365                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3366def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3367                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3368def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3369               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3370
3371// zextload -> i64
3372def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3373  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3374def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3375  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3376
3377// load sign-extended half-word
3378defm LDURSHW
3379    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
3380               [(set GPR32:$Rt,
3381                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3382defm LDURSHX
3383    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
3384              [(set GPR64:$Rt,
3385                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3386
3387// load sign-extended byte
3388defm LDURSBW
3389    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
3390                [(set GPR32:$Rt,
3391                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3392defm LDURSBX
3393    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
3394                [(set GPR64:$Rt,
3395                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3396
3397// load sign-extended word
3398defm LDURSW
3399    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3400              [(set GPR64:$Rt,
3401                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3402
3403// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3404def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3405                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3406def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3407                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3408def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3409                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3410def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3411                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3412def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3413                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3414def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3415                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3416def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3417                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3418
3419// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
3420// load, 0) can use a single load.
3421multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
3422                                  ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
3423                                  ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
3424                                  SubRegIndex SubReg> {
3425  // Scaled
3426  def : Pat <(vector_insert (VT immAllZerosV),
3427                (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3428            (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3429  // Unscaled
3430  def : Pat <(vector_insert (VT immAllZerosV),
3431                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3432             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3433
3434  // Half-vector patterns
3435  def : Pat <(vector_insert (HVT immAllZerosV),
3436                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3437             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3438  // Unscaled
3439  def : Pat <(vector_insert (HVT immAllZerosV),
3440                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3441             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3442
3443  // SVE patterns
3444  def : Pat <(vector_insert (SVT immAllZerosV),
3445                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3446             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3447  // Unscaled
3448  def : Pat <(vector_insert (SVT immAllZerosV),
3449                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3450             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3451}
3452
3453defm : LoadInsertZeroPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,  LDRBui, LDURBi,
3454                              am_indexed8,  am_unscaled8,  uimm12s1, bsub>;
3455defm : LoadInsertZeroPatterns<extloadi16, v8i16,  v4i16,  nxv8i16,  i32,  LDRHui, LDURHi,
3456                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3457defm : LoadInsertZeroPatterns<load,       v4i32,  v2i32,  nxv4i32,  i32,  LDRSui, LDURSi,
3458                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3459defm : LoadInsertZeroPatterns<load,       v2i64,  v1i64,  nxv2i64,  i64,  LDRDui, LDURDi,
3460                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3461defm : LoadInsertZeroPatterns<load,       v8f16,  v4f16,  nxv8f16,  f16,  LDRHui, LDURHi,
3462                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3463defm : LoadInsertZeroPatterns<load,       v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
3464                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3465defm : LoadInsertZeroPatterns<load,       v4f32,  v2f32,  nxv4f32,  f32,  LDRSui, LDURSi,
3466                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3467defm : LoadInsertZeroPatterns<load,       v2f64,  v1f64,  nxv2f64,  f64,  LDRDui, LDURDi,
3468                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3469
3470// Pre-fetch.
3471defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
3472                  [(AArch64Prefetch timm:$Rt,
3473                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3474
3475//---
3476// (unscaled immediate, unprivileged)
3477defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
3478defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
3479
3480defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
3481defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
3482
3483// load sign-extended half-word
3484defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
3485defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
3486
3487// load sign-extended byte
3488defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
3489defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
3490
3491// load sign-extended word
3492defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
3493
3494//---
3495// (immediate pre-indexed)
3496def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3497def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3498def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3499def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3500def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3501def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3502def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3503
3504// load sign-extended half-word
3505def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3506def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3507
3508// load sign-extended byte
3509def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3510def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3511
3512// load zero-extended byte
3513def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3514def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3515
3516// load sign-extended word
3517def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3518
3519//---
3520// (immediate post-indexed)
3521def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3522def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3523def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3524def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3525def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3526def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3527def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3528
3529// load sign-extended half-word
3530def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3531def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3532
3533// load sign-extended byte
3534def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3535def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3536
3537// load zero-extended byte
3538def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3539def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3540
3541// load sign-extended word
3542def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3543
3544//===----------------------------------------------------------------------===//
3545// Store instructions.
3546//===----------------------------------------------------------------------===//
3547
3548// Pair (indexed, offset)
3549// FIXME: Use dedicated range-checked addressing mode operand here.
3550defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
3551defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
3552defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
3553defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
3554defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
3555
3556// Pair (pre-indexed)
3557def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3558def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3559def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3560def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3561def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3562
3563// Pair (post-indexed)
3564def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3565def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3566def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3567def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3568def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3569
3570// Pair (no allocate)
3571defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
3572defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
3573defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
3574defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
3575defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
3576
3577def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3578          (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
3579
3580def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3581          (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
3582
3583
3584//---
3585// (Register offset)
3586
3587// Integer
3588defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
3589defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
3590defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
3591defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
3592
3593
3594// Floating-point
3595defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", i8, store>;
3596defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
3597defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
3598defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
3599defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
3600
3601let Predicates = [UseSTRQro], AddedComplexity = 10 in {
3602  def : Pat<(store (f128 FPR128:$Rt),
3603                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
3604                                        ro_Wextend128:$extend)),
3605            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
3606  def : Pat<(store (f128 FPR128:$Rt),
3607                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
3608                                        ro_Xextend128:$extend)),
3609            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
3610}
3611
3612multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
3613                                 Instruction STRW, Instruction STRX> {
3614
3615  def : Pat<(storeop GPR64:$Rt,
3616                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3617            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3618                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3619
3620  def : Pat<(storeop GPR64:$Rt,
3621                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3622            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3623                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3624}
3625
3626let AddedComplexity = 10 in {
3627  // truncstore i64
3628  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
3629  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
3630  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
3631}
3632
3633multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
3634                         Instruction STRW, Instruction STRX> {
3635  def : Pat<(store (VecTy FPR:$Rt),
3636                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3637            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3638
3639  def : Pat<(store (VecTy FPR:$Rt),
3640                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3641            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3642}
3643
3644let AddedComplexity = 10 in {
3645// Match all store 64 bits width whose type is compatible with FPR64
3646let Predicates = [IsLE] in {
3647  // We must use ST1 to store vectors in big-endian.
3648  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
3649  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
3650  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
3651  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
3652  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
3653  defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
3654}
3655
3656defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
3657defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
3658
3659// Match all store 128 bits width whose type is compatible with FPR128
3660let Predicates = [IsLE, UseSTRQro] in {
3661  // We must use ST1 to store vectors in big-endian.
3662  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
3663  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
3664  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
3665  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
3666  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
3667  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
3668  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
3669  defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
3670}
3671} // AddedComplexity = 10
3672
3673// Match stores from lane 0 to the appropriate subreg's store.
3674multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
3675                              ValueType VecTy, ValueType STy,
3676                              SubRegIndex SubRegIdx,
3677                              Instruction STRW, Instruction STRX> {
3678
3679  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3680                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3681            (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3682                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3683
3684  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3685                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3686            (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3687                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3688}
3689
3690let AddedComplexity = 19 in {
3691  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
3692  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, hsub, STRHroW, STRHroX>;
3693  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, ssub, STRSroW, STRSroX>;
3694  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, ssub, STRSroW, STRSroX>;
3695  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, dsub, STRDroW, STRDroX>;
3696  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, dsub, STRDroW, STRDroX>;
3697}
3698
3699//---
3700// (unsigned immediate)
3701defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
3702                   [(store GPR64z:$Rt,
3703                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3704defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
3705                    [(store GPR32z:$Rt,
3706                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3707defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
3708                    [(store FPR8Op:$Rt,
3709                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
3710defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
3711                    [(store (f16 FPR16Op:$Rt),
3712                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
3713defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
3714                    [(store (f32 FPR32Op:$Rt),
3715                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3716defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
3717                    [(store (f64 FPR64Op:$Rt),
3718                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3719defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
3720
3721defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
3722                     [(truncstorei16 GPR32z:$Rt,
3723                                     (am_indexed16 GPR64sp:$Rn,
3724                                                   uimm12s2:$offset))]>;
3725defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
3726                     [(truncstorei8 GPR32z:$Rt,
3727                                    (am_indexed8 GPR64sp:$Rn,
3728                                                 uimm12s1:$offset))]>;
3729
3730// bf16 store pattern
3731def : Pat<(store (bf16 FPR16Op:$Rt),
3732                 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3733          (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
3734
3735let AddedComplexity = 10 in {
3736
3737// Match all store 64 bits width whose type is compatible with FPR64
3738def : Pat<(store (v1i64 FPR64:$Rt),
3739                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3740          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3741def : Pat<(store (v1f64 FPR64:$Rt),
3742                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3743          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3744
3745let Predicates = [IsLE] in {
3746  // We must use ST1 to store vectors in big-endian.
3747  def : Pat<(store (v2f32 FPR64:$Rt),
3748                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3749            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3750  def : Pat<(store (v8i8 FPR64:$Rt),
3751                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3752            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3753  def : Pat<(store (v4i16 FPR64:$Rt),
3754                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3755            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3756  def : Pat<(store (v2i32 FPR64:$Rt),
3757                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3758            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3759  def : Pat<(store (v4f16 FPR64:$Rt),
3760                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3761            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3762  def : Pat<(store (v4bf16 FPR64:$Rt),
3763                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3764            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3765}
3766
3767// Match all store 128 bits width whose type is compatible with FPR128
3768def : Pat<(store (f128  FPR128:$Rt),
3769                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3770          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3771
3772let Predicates = [IsLE] in {
3773  // We must use ST1 to store vectors in big-endian.
3774  def : Pat<(store (v4f32 FPR128:$Rt),
3775                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3776            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3777  def : Pat<(store (v2f64 FPR128:$Rt),
3778                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3779            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3780  def : Pat<(store (v16i8 FPR128:$Rt),
3781                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3782            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3783  def : Pat<(store (v8i16 FPR128:$Rt),
3784                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3785            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3786  def : Pat<(store (v4i32 FPR128:$Rt),
3787                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3788            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3789  def : Pat<(store (v2i64 FPR128:$Rt),
3790                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3791            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3792  def : Pat<(store (v8f16 FPR128:$Rt),
3793                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3794            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3795  def : Pat<(store (v8bf16 FPR128:$Rt),
3796                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3797            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3798}
3799
3800// truncstore i64
3801def : Pat<(truncstorei32 GPR64:$Rt,
3802                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
3803  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
3804def : Pat<(truncstorei16 GPR64:$Rt,
3805                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3806  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
3807def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
3808  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
3809
3810} // AddedComplexity = 10
3811
3812// Match stores from lane 0 to the appropriate subreg's store.
3813multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
3814                            ValueType VTy, ValueType STy,
3815                            SubRegIndex SubRegIdx, Operand IndexType,
3816                            Instruction STR> {
3817  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
3818                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
3819            (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3820                 GPR64sp:$Rn, IndexType:$offset)>;
3821}
3822
3823let AddedComplexity = 19 in {
3824  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
3825  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, hsub, uimm12s2, STRHui>;
3826  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, ssub, uimm12s4, STRSui>;
3827  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, ssub, uimm12s4, STRSui>;
3828  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, dsub, uimm12s8, STRDui>;
3829  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, dsub, uimm12s8, STRDui>;
3830}
3831
3832//---
3833// (unscaled immediate)
3834defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
3835                         [(store GPR64z:$Rt,
3836                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3837defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
3838                         [(store GPR32z:$Rt,
3839                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
3840defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
3841                         [(store FPR8Op:$Rt,
3842                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
3843defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
3844                         [(store (f16 FPR16Op:$Rt),
3845                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
3846defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
3847                         [(store (f32 FPR32Op:$Rt),
3848                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
3849defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
3850                         [(store (f64 FPR64Op:$Rt),
3851                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3852defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
3853                         [(store (f128 FPR128Op:$Rt),
3854                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
3855defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
3856                         [(truncstorei16 GPR32z:$Rt,
3857                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
3858defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
3859                         [(truncstorei8 GPR32z:$Rt,
3860                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
3861
3862// Armv8.4 Weaker Release Consistency enhancements
3863//         LDAPR & STLR with Immediate Offset instructions
3864let Predicates = [HasRCPC_IMMO] in {
3865defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
3866defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
3867defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
3868defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
3869defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
3870defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
3871defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
3872defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
3873defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
3874defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
3875defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
3876defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
3877defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
3878}
3879
3880// Match all store 64 bits width whose type is compatible with FPR64
3881def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3882          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3883def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3884          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3885
3886let AddedComplexity = 10 in {
3887
3888let Predicates = [IsLE] in {
3889  // We must use ST1 to store vectors in big-endian.
3890  def : Pat<(store (v2f32 FPR64:$Rt),
3891                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3892            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3893  def : Pat<(store (v8i8 FPR64:$Rt),
3894                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3895            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3896  def : Pat<(store (v4i16 FPR64:$Rt),
3897                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3898            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3899  def : Pat<(store (v2i32 FPR64:$Rt),
3900                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3901            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3902  def : Pat<(store (v4f16 FPR64:$Rt),
3903                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3904            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3905  def : Pat<(store (v4bf16 FPR64:$Rt),
3906                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3907            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3908}
3909
3910// Match all store 128 bits width whose type is compatible with FPR128
3911def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3912          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3913
3914let Predicates = [IsLE] in {
3915  // We must use ST1 to store vectors in big-endian.
3916  def : Pat<(store (v4f32 FPR128:$Rt),
3917                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3918            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3919  def : Pat<(store (v2f64 FPR128:$Rt),
3920                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3921            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3922  def : Pat<(store (v16i8 FPR128:$Rt),
3923                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3924            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3925  def : Pat<(store (v8i16 FPR128:$Rt),
3926                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3927            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3928  def : Pat<(store (v4i32 FPR128:$Rt),
3929                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3930            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3931  def : Pat<(store (v2i64 FPR128:$Rt),
3932                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3933            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3934  def : Pat<(store (v2f64 FPR128:$Rt),
3935                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3936            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3937  def : Pat<(store (v8f16 FPR128:$Rt),
3938                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3939            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3940  def : Pat<(store (v8bf16 FPR128:$Rt),
3941                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3942            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3943}
3944
3945} // AddedComplexity = 10
3946
3947// unscaled i64 truncating stores
3948def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
3949  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3950def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
3951  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3952def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
3953  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3954
3955// Match stores from lane 0 to the appropriate subreg's store.
3956multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
3957                             ValueType VTy, ValueType STy,
3958                             SubRegIndex SubRegIdx, Instruction STR> {
3959  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
3960}
3961
3962let AddedComplexity = 19 in {
3963  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
3964  defm : VecStoreULane0Pat<store,         v8f16, f16, hsub, STURHi>;
3965  defm : VecStoreULane0Pat<store,         v4i32, i32, ssub, STURSi>;
3966  defm : VecStoreULane0Pat<store,         v4f32, f32, ssub, STURSi>;
3967  defm : VecStoreULane0Pat<store,         v2i64, i64, dsub, STURDi>;
3968  defm : VecStoreULane0Pat<store,         v2f64, f64, dsub, STURDi>;
3969}
3970
3971//---
3972// STR mnemonics fall back to STUR for negative or unaligned offsets.
3973def : InstAlias<"str $Rt, [$Rn, $offset]",
3974                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3975def : InstAlias<"str $Rt, [$Rn, $offset]",
3976                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3977def : InstAlias<"str $Rt, [$Rn, $offset]",
3978                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3979def : InstAlias<"str $Rt, [$Rn, $offset]",
3980                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3981def : InstAlias<"str $Rt, [$Rn, $offset]",
3982                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3983def : InstAlias<"str $Rt, [$Rn, $offset]",
3984                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3985def : InstAlias<"str $Rt, [$Rn, $offset]",
3986                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3987
3988def : InstAlias<"strb $Rt, [$Rn, $offset]",
3989                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3990def : InstAlias<"strh $Rt, [$Rn, $offset]",
3991                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3992
3993//---
3994// (unscaled immediate, unprivileged)
3995defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
3996defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
3997
3998defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
3999defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
4000
4001//---
4002// (immediate pre-indexed)
4003def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
4004def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
4005def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, i8>;
4006def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
4007def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
4008def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
4009def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4010
4011def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
4012def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
4013
4014// truncstore i64
4015def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4016  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4017           simm9:$off)>;
4018def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4019  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4020            simm9:$off)>;
4021def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4022  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4023            simm9:$off)>;
4024
4025def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4026          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4027def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4028          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4029def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4030          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4031def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4032          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4033def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4034          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4035def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4036          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4037def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4038          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4039
4040def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4041          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4042def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4043          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4044def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4045          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4046def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4047          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4048def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4049          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4050def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4051          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4052def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4053          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4054
4055//---
4056// (immediate post-indexed)
4057def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
4058def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
4059def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, i8>;
4060def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
4061def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
4062def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
4063def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4064
4065def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
4066def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
4067
4068// truncstore i64
4069def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4070  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4071            simm9:$off)>;
4072def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4073  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4074             simm9:$off)>;
4075def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4076  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4077             simm9:$off)>;
4078
4079def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4080          (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4081
4082def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4083          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4084def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4085          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4086def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4087          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4088def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4089          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4090def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4091          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4092def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4093          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4094def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4095          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4096def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4097          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4098
4099def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4100          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4101def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4102          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4103def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4104          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4105def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4106          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4107def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4108          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4109def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4110          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4111def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4112          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4113def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4114          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4115
4116//===----------------------------------------------------------------------===//
4117// Load/store exclusive instructions.
4118//===----------------------------------------------------------------------===//
4119
4120def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4121def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4122def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4123def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4124
4125def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4126def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4127def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4128def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4129
4130def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4131def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4132def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4133def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4134
4135def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4136def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4137def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4138def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4139
4140/*
4141Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4142of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4143alias for the case of immediate #0. This is because new STLR versions (from
4144LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4145appropriate anymore (it parses and discards the optional zero). This is not the
4146case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4147and the immediate values are not inside the [] brackets and thus not accepted
4148by GPR64sp0 parser.
4149*/
4150def STLRW0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4151def STLRX0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4152def STLRB0  : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4153def STLRH0  : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4154
4155def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4156def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4157def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4158def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4159
4160def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
4161def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
4162def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
4163def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
4164
4165def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4166def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4167
4168def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4169def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4170
4171def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4172def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4173
4174def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4175def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4176
4177let Predicates = [HasLOR] in {
4178  // v8.1a "Limited Order Region" extension load-acquire instructions
4179  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4180  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4181  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4182  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4183
4184  // v8.1a "Limited Order Region" extension store-release instructions
4185  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4186  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4187  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4188  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4189
4190  // Aliases for when offset=0
4191  def STLLRW0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4192  def STLLRX0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4193  def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]",  (STLLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4194  def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]",  (STLLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4195}
4196
4197//===----------------------------------------------------------------------===//
4198// Scaled floating point to integer conversion instructions.
4199//===----------------------------------------------------------------------===//
4200
4201defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
4202defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
4203defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
4204defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
4205defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
4206defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
4207defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
4208defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
4209defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4210defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4211defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4212defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4213
4214// AArch64's FCVT instructions saturate when out of range.
4215multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4216  let Predicates = [HasFullFP16] in {
4217  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
4218            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4219  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
4220            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4221  }
4222  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
4223            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4224  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
4225            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4226  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
4227            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4228  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
4229            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4230
4231  let Predicates = [HasFullFP16] in {
4232  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
4233            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4234  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
4235            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4236  }
4237  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
4238            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4239  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
4240            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4241  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
4242            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4243  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
4244            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4245}
4246
4247defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4248defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4249
4250multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
4251  let Predicates = [HasFullFP16] in {
4252  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
4253  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
4254  }
4255  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
4256  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
4257  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
4258  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
4259
4260  let Predicates = [HasFullFP16] in {
4261  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4262            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4263  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4264            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4265  }
4266  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4267            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4268  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4269            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4270  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4271            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4272  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4273            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4274}
4275
4276defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
4277defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
4278
4279multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
4280  def : Pat<(i32 (to_int (round f32:$Rn))),
4281            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4282  def : Pat<(i64 (to_int (round f32:$Rn))),
4283            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4284  def : Pat<(i32 (to_int (round f64:$Rn))),
4285            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4286  def : Pat<(i64 (to_int (round f64:$Rn))),
4287            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4288
4289  // These instructions saturate like fp_to_[su]int_sat.
4290  let Predicates = [HasFullFP16] in {
4291  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
4292            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4293  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
4294            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4295  }
4296  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
4297            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4298  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
4299            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4300  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
4301            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4302  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
4303            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4304}
4305
4306defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
4307defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
4308defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
4309defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
4310defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
4311defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
4312defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
4313defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
4314
4315
4316
4317let Predicates = [HasFullFP16] in {
4318  def : Pat<(i32 (any_lround f16:$Rn)),
4319            (FCVTASUWHr f16:$Rn)>;
4320  def : Pat<(i64 (any_lround f16:$Rn)),
4321            (FCVTASUXHr f16:$Rn)>;
4322  def : Pat<(i64 (any_llround f16:$Rn)),
4323            (FCVTASUXHr f16:$Rn)>;
4324}
4325def : Pat<(i32 (any_lround f32:$Rn)),
4326          (FCVTASUWSr f32:$Rn)>;
4327def : Pat<(i32 (any_lround f64:$Rn)),
4328          (FCVTASUWDr f64:$Rn)>;
4329def : Pat<(i64 (any_lround f32:$Rn)),
4330          (FCVTASUXSr f32:$Rn)>;
4331def : Pat<(i64 (any_lround f64:$Rn)),
4332          (FCVTASUXDr f64:$Rn)>;
4333def : Pat<(i64 (any_llround f32:$Rn)),
4334          (FCVTASUXSr f32:$Rn)>;
4335def : Pat<(i64 (any_llround f64:$Rn)),
4336          (FCVTASUXDr f64:$Rn)>;
4337
4338//===----------------------------------------------------------------------===//
4339// Scaled integer to floating point conversion instructions.
4340//===----------------------------------------------------------------------===//
4341
4342defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
4343defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
4344
4345//===----------------------------------------------------------------------===//
4346// Unscaled integer to floating point conversion instruction.
4347//===----------------------------------------------------------------------===//
4348
4349defm FMOV : UnscaledConversion<"fmov">;
4350
4351// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
4352let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
4353def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
4354    Sched<[WriteF]>;
4355def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
4356    Sched<[WriteF]>;
4357def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
4358    Sched<[WriteF]>;
4359}
4360
4361// Similarly add aliases
4362def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
4363    Requires<[HasFullFP16]>;
4364def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
4365def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
4366
4367def : Pat<(bf16 fpimm0),
4368          (FMOVH0)>;
4369
4370// Pattern for FP16 and BF16 immediates
4371let Predicates = [HasFullFP16] in {
4372  def : Pat<(f16 fpimm:$in),
4373            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4374
4375  def : Pat<(bf16 fpimm:$in),
4376            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>;
4377}
4378
4379//===----------------------------------------------------------------------===//
4380// Floating point conversion instruction.
4381//===----------------------------------------------------------------------===//
4382
4383defm FCVT : FPConversion<"fcvt">;
4384
4385//===----------------------------------------------------------------------===//
4386// Floating point single operand instructions.
4387//===----------------------------------------------------------------------===//
4388
4389defm FABS   : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
4390defm FMOV   : SingleOperandFPDataNoException<0b0000, "fmov">;
4391defm FNEG   : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
4392defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
4393defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
4394defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
4395defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
4396defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
4397
4398defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
4399defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
4400
4401let SchedRW = [WriteFDiv] in {
4402defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
4403}
4404
4405let Predicates = [HasFRInt3264] in {
4406  defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
4407  defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
4408  defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
4409  defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
4410} // HasFRInt3264
4411
4412// Emitting strict_lrint as two instructions is valid as any exceptions that
4413// occur will happen in exactly one of the instructions (e.g. if the input is
4414// not an integer the inexact exception will happen in the FRINTX but not then
4415// in the FCVTZS as the output of FRINTX is an integer).
4416let Predicates = [HasFullFP16] in {
4417  def : Pat<(i32 (any_lrint f16:$Rn)),
4418            (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
4419  def : Pat<(i64 (any_lrint f16:$Rn)),
4420            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4421  def : Pat<(i64 (any_llrint f16:$Rn)),
4422            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4423}
4424def : Pat<(i32 (any_lrint f32:$Rn)),
4425          (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
4426def : Pat<(i32 (any_lrint f64:$Rn)),
4427          (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
4428def : Pat<(i64 (any_lrint f32:$Rn)),
4429          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4430def : Pat<(i64 (any_lrint f64:$Rn)),
4431          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4432def : Pat<(i64 (any_llrint f32:$Rn)),
4433          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4434def : Pat<(i64 (any_llrint f64:$Rn)),
4435          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4436
4437//===----------------------------------------------------------------------===//
4438// Floating point two operand instructions.
4439//===----------------------------------------------------------------------===//
4440
4441defm FADD   : TwoOperandFPData<0b0010, "fadd", any_fadd>;
4442let SchedRW = [WriteFDiv] in {
4443defm FDIV   : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
4444}
4445defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
4446defm FMAX   : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
4447defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
4448defm FMIN   : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
4449let SchedRW = [WriteFMul] in {
4450defm FMUL   : TwoOperandFPData<0b0000, "fmul", any_fmul>;
4451defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
4452}
4453defm FSUB   : TwoOperandFPData<0b0011, "fsub", any_fsub>;
4454
4455multiclass FMULScalarFromIndexedLane0Patterns<string inst,
4456                                              string inst_f16_suffix,
4457                                              string inst_f32_suffix,
4458                                              string inst_f64_suffix,
4459                                              SDPatternOperator OpNode,
4460                                              list<Predicate> preds = []> {
4461  let Predicates = !listconcat(preds, [HasFullFP16]) in {
4462  def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
4463                         (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
4464            (!cast<Instruction>(inst # inst_f16_suffix)
4465              FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>;
4466  }
4467  let Predicates = preds in {
4468  def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
4469                         (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
4470            (!cast<Instruction>(inst # inst_f32_suffix)
4471              FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
4472  def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
4473                         (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
4474            (!cast<Instruction>(inst # inst_f64_suffix)
4475              FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
4476  }
4477}
4478
4479defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
4480                                          any_fmul>;
4481
4482// Match reassociated forms of FNMUL.
4483def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
4484          (FNMULHrr FPR16:$a, FPR16:$b)>,
4485          Requires<[HasFullFP16]>;
4486def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)),
4487          (FNMULSrr FPR32:$a, FPR32:$b)>;
4488def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)),
4489          (FNMULDrr FPR64:$a, FPR64:$b)>;
4490
4491def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4492          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
4493def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4494          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
4495def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4496          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
4497def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4498          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
4499
4500//===----------------------------------------------------------------------===//
4501// Floating point three operand instructions.
4502//===----------------------------------------------------------------------===//
4503
4504defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
4505defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
4506     TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
4507defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
4508     TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
4509defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
4510     TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
4511
4512// The following def pats catch the case where the LHS of an FMA is negated.
4513// The TriOpFrag above catches the case where the middle operand is negated.
4514
4515// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
4516// the NEON variant.
4517
4518// Here we handle first -(a + b*c) for FNMADD:
4519
4520let Predicates = [HasNEON, HasFullFP16] in
4521def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
4522          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4523
4524def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
4525          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4526
4527def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
4528          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4529
4530// Now it's time for "(-a) + (-b)*c"
4531
4532let Predicates = [HasNEON, HasFullFP16] in
4533def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
4534          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4535
4536def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
4537          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4538
4539def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
4540          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4541
4542//===----------------------------------------------------------------------===//
4543// Floating point comparison instructions.
4544//===----------------------------------------------------------------------===//
4545
4546defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
4547defm FCMP  : FPComparison<0, "fcmp", AArch64any_fcmp>;
4548
4549//===----------------------------------------------------------------------===//
4550// Floating point conditional comparison instructions.
4551//===----------------------------------------------------------------------===//
4552
4553defm FCCMPE : FPCondComparison<1, "fccmpe">;
4554defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
4555
4556//===----------------------------------------------------------------------===//
4557// Floating point conditional select instruction.
4558//===----------------------------------------------------------------------===//
4559
4560defm FCSEL : FPCondSelect<"fcsel">;
4561
4562let Predicates = [HasFullFP16] in
4563def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)),
4564          (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>;
4565
4566// CSEL instructions providing f128 types need to be handled by a
4567// pseudo-instruction since the eventual code will need to introduce basic
4568// blocks and control flow.
4569def F128CSEL : Pseudo<(outs FPR128:$Rd),
4570                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
4571                      [(set (f128 FPR128:$Rd),
4572                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
4573                                       (i32 imm:$cond), NZCV))]> {
4574  let Uses = [NZCV];
4575  let usesCustomInserter = 1;
4576  let hasNoSchedulingInfo = 1;
4577}
4578
4579//===----------------------------------------------------------------------===//
4580// Instructions used for emitting unwind opcodes on ARM64 Windows.
4581//===----------------------------------------------------------------------===//
4582let isPseudo = 1 in {
4583  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
4584  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4585  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4586  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4587  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4588  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4589  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4590  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4591  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4592  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4593  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4594  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
4595  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4596  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
4597  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4598  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
4599  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4600  def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
4601}
4602
4603// Pseudo instructions for Windows EH
4604//===----------------------------------------------------------------------===//
4605let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
4606    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
4607   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
4608   let usesCustomInserter = 1 in
4609     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
4610                    Sched<[]>;
4611}
4612
4613// Pseudo instructions for homogeneous prolog/epilog
4614let isPseudo = 1 in {
4615  // Save CSRs in order, {FPOffset}
4616  def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4617  // Restore CSRs in order
4618  def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4619}
4620
4621//===----------------------------------------------------------------------===//
4622// Floating point immediate move.
4623//===----------------------------------------------------------------------===//
4624
4625let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
4626defm FMOV : FPMoveImmediate<"fmov">;
4627}
4628
4629let Predicates = [HasFullFP16] in {
4630  def : Pat<(bf16 fpimmbf16:$in),
4631            (FMOVHi (fpimm16XForm bf16:$in))>;
4632}
4633
4634//===----------------------------------------------------------------------===//
4635// Advanced SIMD two vector instructions.
4636//===----------------------------------------------------------------------===//
4637
4638defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
4639                                          AArch64uabd>;
4640// Match UABDL in log2-shuffle patterns.
4641def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
4642                           (zext (v8i8 V64:$opB))))),
4643          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4644def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4645               (v8i16 (add (sub (zext (v8i8 V64:$opA)),
4646                                (zext (v8i8 V64:$opB))),
4647                           (AArch64vashr v8i16:$src, (i32 15))))),
4648          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4649def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4650                           (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
4651          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4652def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4653               (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4654                                (zext (extract_high_v16i8 (v16i8 V128:$opB)))),
4655                           (AArch64vashr v8i16:$src, (i32 15))))),
4656          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4657def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
4658                           (zext (v4i16 V64:$opB))))),
4659          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
4660def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
4661                           (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
4662          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
4663def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
4664                           (zext (v2i32 V64:$opB))))),
4665          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
4666def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
4667                           (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
4668          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
4669
4670defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
4671defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
4672defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
4673defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
4674defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
4675defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
4676defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
4677defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
4678defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
4679defm FABS   : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
4680
4681def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
4682          (CMLTv8i8rz V64:$Rn)>;
4683def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
4684          (CMLTv4i16rz V64:$Rn)>;
4685def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
4686          (CMLTv2i32rz V64:$Rn)>;
4687def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
4688          (CMLTv16i8rz V128:$Rn)>;
4689def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
4690          (CMLTv8i16rz V128:$Rn)>;
4691def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
4692          (CMLTv4i32rz V128:$Rn)>;
4693def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
4694          (CMLTv2i64rz V128:$Rn)>;
4695
4696defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
4697defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
4698defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
4699defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
4700defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
4701defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
4702defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
4703defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
4704def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
4705          (FCVTLv4i16 V64:$Rn)>;
4706def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
4707                                                                (i64 4)))),
4708          (FCVTLv8i16 V128:$Rn)>;
4709def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
4710          (FCVTLv2i32 V64:$Rn)>;
4711def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
4712          (FCVTLv4i32 V128:$Rn)>;
4713def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
4714          (FCVTLv4i16 V64:$Rn)>;
4715def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
4716          (FCVTLv8i16 V128:$Rn)>;
4717
4718defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
4719defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
4720defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
4721defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
4722defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
4723def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
4724          (FCVTNv4i16 V128:$Rn)>;
4725def : Pat<(concat_vectors V64:$Rd,
4726                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
4727          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4728def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
4729          (FCVTNv2i32 V128:$Rn)>;
4730def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
4731          (FCVTNv4i16 V128:$Rn)>;
4732def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
4733          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4734def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
4735          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4736defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
4737defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
4738defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
4739                                        int_aarch64_neon_fcvtxn>;
4740defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
4741defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
4742
4743// AArch64's FCVT instructions saturate when out of range.
4744multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
4745  let Predicates = [HasFullFP16] in {
4746  def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
4747            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
4748  def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
4749            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
4750  }
4751  def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
4752            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
4753  def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
4754            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
4755  def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
4756            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
4757}
4758defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
4759defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
4760
4761def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
4762def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
4763def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
4764def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
4765def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
4766
4767def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
4768def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
4769def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
4770def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
4771def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
4772
4773defm FNEG   : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
4774defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
4775defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
4776defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
4777defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
4778defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
4779defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
4780defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
4781defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
4782
4783let Predicates = [HasFRInt3264] in {
4784  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
4785  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
4786  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
4787  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
4788} // HasFRInt3264
4789
4790defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
4791defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
4792defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
4793                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
4794defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
4795// Aliases for MVN -> NOT.
4796def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
4797                (NOTv8i8 V64:$Vd, V64:$Vn)>;
4798def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
4799                (NOTv16i8 V128:$Vd, V128:$Vn)>;
4800
4801def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4802def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4803def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4804def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4805def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4806def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4807
4808defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
4809defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
4810defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
4811defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
4812defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
4813       BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
4814defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
4815defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
4816defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
4817defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
4818defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
4819defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
4820defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
4821defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
4822defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
4823       BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
4824defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
4825defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
4826defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
4827defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
4828defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
4829defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
4830defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
4831
4832def : Pat<(v4f16  (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
4833def : Pat<(v4f16  (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
4834def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
4835def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
4836def : Pat<(v8f16  (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
4837def : Pat<(v8f16  (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
4838def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
4839def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
4840def : Pat<(v2f32  (AArch64rev64 V64:$Rn)),  (REV64v2i32 V64:$Rn)>;
4841def : Pat<(v4f32  (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
4842
4843// Patterns for vector long shift (by element width). These need to match all
4844// three of zext, sext and anyext so it's easier to pull the patterns out of the
4845// definition.
4846multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
4847  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
4848            (SHLLv8i8 V64:$Rn)>;
4849  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
4850            (SHLLv16i8 V128:$Rn)>;
4851  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
4852            (SHLLv4i16 V64:$Rn)>;
4853  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
4854            (SHLLv8i16 V128:$Rn)>;
4855  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
4856            (SHLLv2i32 V64:$Rn)>;
4857  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
4858            (SHLLv4i32 V128:$Rn)>;
4859}
4860
4861defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
4862defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
4863defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
4864
4865// Constant vector values, used in the S/UQXTN patterns below.
4866def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
4867def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
4868def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
4869def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
4870def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
4871def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
4872
4873// trunc(umin(X, 255)) -> UQXTRN v8i8
4874def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
4875          (UQXTNv8i8 V128:$Vn)>;
4876// trunc(umin(X, 65535)) -> UQXTRN v4i16
4877def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
4878          (UQXTNv4i16 V128:$Vn)>;
4879// trunc(smin(smax(X, -128), 128)) -> SQXTRN
4880//  with reversed min/max
4881def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
4882                             (v8i16 VImm7F)))),
4883          (SQXTNv8i8 V128:$Vn)>;
4884def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
4885                             (v8i16 VImm80)))),
4886          (SQXTNv8i8 V128:$Vn)>;
4887// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
4888//  with reversed min/max
4889def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
4890                              (v4i32 VImm7FFF)))),
4891          (SQXTNv4i16 V128:$Vn)>;
4892def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
4893                              (v4i32 VImm8000)))),
4894          (SQXTNv4i16 V128:$Vn)>;
4895
4896// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
4897// with reversed min/max
4898def : Pat<(v16i8 (concat_vectors
4899                 (v8i8 V64:$Vd),
4900                 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
4901                                          (v8i16 VImm7F)))))),
4902          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4903def : Pat<(v16i8 (concat_vectors
4904                 (v8i8 V64:$Vd),
4905                 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
4906                                          (v8i16 VImm80)))))),
4907          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4908
4909// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
4910// with reversed min/max
4911def : Pat<(v8i16 (concat_vectors
4912                 (v4i16 V64:$Vd),
4913                 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
4914                                           (v4i32 VImm7FFF)))))),
4915          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4916def : Pat<(v8i16 (concat_vectors
4917                 (v4i16 V64:$Vd),
4918                 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
4919                                           (v4i32 VImm8000)))))),
4920          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4921
4922//===----------------------------------------------------------------------===//
4923// Advanced SIMD three vector instructions.
4924//===----------------------------------------------------------------------===//
4925
4926defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
4927defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
4928defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
4929defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
4930defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
4931defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
4932defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
4933defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
4934foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
4935def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
4936}
4937defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
4938let Predicates = [HasNEON] in {
4939foreach VT = [ v2f32, v4f32, v2f64 ] in
4940def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
4941}
4942let Predicates = [HasNEON, HasFullFP16] in {
4943foreach VT = [ v4f16, v8f16 ] in
4944def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
4945}
4946defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
4947defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
4948defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
4949defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
4950defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
4951defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
4952defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
4953defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
4954defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
4955defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
4956defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
4957defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
4958defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
4959defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
4960defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
4961defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
4962
4963// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
4964// instruction expects the addend first, while the fma intrinsic puts it last.
4965defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
4966            TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
4967defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
4968            TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
4969
4970defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
4971defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
4972defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
4973defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
4974defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
4975
4976// MLA and MLS are generated in MachineCombine
4977defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
4978defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
4979
4980defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
4981defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
4982defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
4983      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
4984defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
4985defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
4986defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
4987defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
4988defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
4989defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
4990defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
4991defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
4992defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
4993defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
4994defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
4995defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
4996defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
4997defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
4998defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
4999defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
5000defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
5001defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
5002      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
5003defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
5004defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
5005defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
5006defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
5007defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
5008defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
5009defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5010defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5011defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
5012defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5013defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5014defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5015defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5016defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5017defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5018                                                  int_aarch64_neon_sqrdmlah>;
5019defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5020                                                    int_aarch64_neon_sqrdmlsh>;
5021
5022// Extra saturate patterns, other than the intrinsics matches above
5023defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5024defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5025defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5026defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5027
5028defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
5029defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
5030                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
5031defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5032defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
5033                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
5034defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
5035
5036// Pseudo bitwise select pattern BSP.
5037// It is expanded into BSL/BIT/BIF after register allocation.
5038defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
5039                                                      (and (vnot node:$LHS), node:$RHS))>>;
5040defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5041defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
5042defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5043
5044def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
5045          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5046def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
5047          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5048def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
5049          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5050def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
5051          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5052
5053def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
5054          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5055def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
5056          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5057def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
5058          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5059def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
5060          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5061
5062def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
5063                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5064def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
5065                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5066def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
5067                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5068def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
5069                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5070
5071def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
5072                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5073def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
5074                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5075def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
5076                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5077def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5078                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5079
5080def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
5081                "|cmls.8b\t$dst, $src1, $src2}",
5082                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5083def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
5084                "|cmls.16b\t$dst, $src1, $src2}",
5085                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5086def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
5087                "|cmls.4h\t$dst, $src1, $src2}",
5088                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5089def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
5090                "|cmls.8h\t$dst, $src1, $src2}",
5091                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5092def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
5093                "|cmls.2s\t$dst, $src1, $src2}",
5094                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5095def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
5096                "|cmls.4s\t$dst, $src1, $src2}",
5097                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5098def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
5099                "|cmls.2d\t$dst, $src1, $src2}",
5100                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5101
5102def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
5103                "|cmlo.8b\t$dst, $src1, $src2}",
5104                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5105def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
5106                "|cmlo.16b\t$dst, $src1, $src2}",
5107                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5108def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
5109                "|cmlo.4h\t$dst, $src1, $src2}",
5110                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5111def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
5112                "|cmlo.8h\t$dst, $src1, $src2}",
5113                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5114def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
5115                "|cmlo.2s\t$dst, $src1, $src2}",
5116                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5117def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
5118                "|cmlo.4s\t$dst, $src1, $src2}",
5119                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5120def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
5121                "|cmlo.2d\t$dst, $src1, $src2}",
5122                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5123
5124def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
5125                "|cmle.8b\t$dst, $src1, $src2}",
5126                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5127def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
5128                "|cmle.16b\t$dst, $src1, $src2}",
5129                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5130def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
5131                "|cmle.4h\t$dst, $src1, $src2}",
5132                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5133def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
5134                "|cmle.8h\t$dst, $src1, $src2}",
5135                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5136def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
5137                "|cmle.2s\t$dst, $src1, $src2}",
5138                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5139def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
5140                "|cmle.4s\t$dst, $src1, $src2}",
5141                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5142def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
5143                "|cmle.2d\t$dst, $src1, $src2}",
5144                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5145
5146def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
5147                "|cmlt.8b\t$dst, $src1, $src2}",
5148                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5149def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
5150                "|cmlt.16b\t$dst, $src1, $src2}",
5151                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5152def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
5153                "|cmlt.4h\t$dst, $src1, $src2}",
5154                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5155def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
5156                "|cmlt.8h\t$dst, $src1, $src2}",
5157                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5158def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
5159                "|cmlt.2s\t$dst, $src1, $src2}",
5160                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5161def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
5162                "|cmlt.4s\t$dst, $src1, $src2}",
5163                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5164def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
5165                "|cmlt.2d\t$dst, $src1, $src2}",
5166                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5167
5168let Predicates = [HasNEON, HasFullFP16] in {
5169def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
5170                "|fcmle.4h\t$dst, $src1, $src2}",
5171                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5172def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
5173                "|fcmle.8h\t$dst, $src1, $src2}",
5174                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5175}
5176def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
5177                "|fcmle.2s\t$dst, $src1, $src2}",
5178                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5179def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
5180                "|fcmle.4s\t$dst, $src1, $src2}",
5181                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5182def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
5183                "|fcmle.2d\t$dst, $src1, $src2}",
5184                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5185
5186let Predicates = [HasNEON, HasFullFP16] in {
5187def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
5188                "|fcmlt.4h\t$dst, $src1, $src2}",
5189                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5190def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
5191                "|fcmlt.8h\t$dst, $src1, $src2}",
5192                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5193}
5194def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
5195                "|fcmlt.2s\t$dst, $src1, $src2}",
5196                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5197def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
5198                "|fcmlt.4s\t$dst, $src1, $src2}",
5199                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5200def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
5201                "|fcmlt.2d\t$dst, $src1, $src2}",
5202                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5203
5204let Predicates = [HasNEON, HasFullFP16] in {
5205def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
5206                "|facle.4h\t$dst, $src1, $src2}",
5207                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5208def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
5209                "|facle.8h\t$dst, $src1, $src2}",
5210                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5211}
5212def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
5213                "|facle.2s\t$dst, $src1, $src2}",
5214                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5215def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
5216                "|facle.4s\t$dst, $src1, $src2}",
5217                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5218def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
5219                "|facle.2d\t$dst, $src1, $src2}",
5220                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5221
5222let Predicates = [HasNEON, HasFullFP16] in {
5223def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
5224                "|faclt.4h\t$dst, $src1, $src2}",
5225                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5226def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
5227                "|faclt.8h\t$dst, $src1, $src2}",
5228                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5229}
5230def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
5231                "|faclt.2s\t$dst, $src1, $src2}",
5232                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5233def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
5234                "|faclt.4s\t$dst, $src1, $src2}",
5235                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5236def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
5237                "|faclt.2d\t$dst, $src1, $src2}",
5238                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5239
5240//===----------------------------------------------------------------------===//
5241// Advanced SIMD three scalar instructions.
5242//===----------------------------------------------------------------------===//
5243
5244defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
5245defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
5246defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
5247defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
5248defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
5249defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
5250defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
5251defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
5252def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5253          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
5254let Predicates = [HasNEON, HasFullFP16] in {
5255def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
5256}
5257let Predicates = [HasNEON] in {
5258def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
5259def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
5260}
5261defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
5262                                     int_aarch64_neon_facge>;
5263defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
5264                                     int_aarch64_neon_facgt>;
5265defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5266defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5267defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5268defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
5269defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
5270defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
5271defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
5272defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
5273defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5274defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
5275defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
5276defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
5277defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
5278defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
5279defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
5280defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
5281defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
5282defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
5283defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
5284defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
5285defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
5286let Predicates = [HasRDM] in {
5287  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
5288  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
5289  def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5290                                            (i32 FPR32:$Rm))),
5291            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5292  def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5293                                            (i32 FPR32:$Rm))),
5294            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5295}
5296
5297defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
5298                                          int_aarch64_neon_fmulx,
5299                                          [HasNEONorSME]>;
5300
5301def : InstAlias<"cmls $dst, $src1, $src2",
5302                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5303def : InstAlias<"cmle $dst, $src1, $src2",
5304                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5305def : InstAlias<"cmlo $dst, $src1, $src2",
5306                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5307def : InstAlias<"cmlt $dst, $src1, $src2",
5308                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5309def : InstAlias<"fcmle $dst, $src1, $src2",
5310                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5311def : InstAlias<"fcmle $dst, $src1, $src2",
5312                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5313def : InstAlias<"fcmlt $dst, $src1, $src2",
5314                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5315def : InstAlias<"fcmlt $dst, $src1, $src2",
5316                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5317def : InstAlias<"facle $dst, $src1, $src2",
5318                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5319def : InstAlias<"facle $dst, $src1, $src2",
5320                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5321def : InstAlias<"faclt $dst, $src1, $src2",
5322                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5323def : InstAlias<"faclt $dst, $src1, $src2",
5324                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5325
5326//===----------------------------------------------------------------------===//
5327// Advanced SIMD three scalar instructions (mixed operands).
5328//===----------------------------------------------------------------------===//
5329defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
5330                                       int_aarch64_neon_sqdmulls_scalar>;
5331defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
5332defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
5333
5334def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
5335                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5336                                                        (i32 FPR32:$Rm))))),
5337          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5338def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
5339                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5340                                                        (i32 FPR32:$Rm))))),
5341          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5342
5343//===----------------------------------------------------------------------===//
5344// Advanced SIMD two scalar instructions.
5345//===----------------------------------------------------------------------===//
5346
5347defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs, [HasNoCSSC]>;
5348defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
5349defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
5350defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
5351defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
5352defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
5353defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5354defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5355defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5356defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5357defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5358defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
5359defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
5360defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
5361defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
5362defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
5363defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
5364defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
5365defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
5366def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
5367defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
5368defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
5369defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe", HasNEONorSME>;
5370defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx", HasNEONorSME>;
5371defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte", HasNEONorSME>;
5372defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
5373                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5374defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
5375defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5376defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5377defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
5378defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
5379defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
5380                                     int_aarch64_neon_suqadd>;
5381defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
5382defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
5383defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
5384                                    int_aarch64_neon_usqadd>;
5385
5386def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
5387          (CMLTv1i64rz V64:$Rn)>;
5388
5389def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
5390          (FCVTASv1i64 FPR64:$Rn)>;
5391def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
5392          (FCVTAUv1i64 FPR64:$Rn)>;
5393def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
5394          (FCVTMSv1i64 FPR64:$Rn)>;
5395def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
5396          (FCVTMUv1i64 FPR64:$Rn)>;
5397def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
5398          (FCVTNSv1i64 FPR64:$Rn)>;
5399def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
5400          (FCVTNUv1i64 FPR64:$Rn)>;
5401def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
5402          (FCVTPSv1i64 FPR64:$Rn)>;
5403def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
5404          (FCVTPUv1i64 FPR64:$Rn)>;
5405def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
5406          (FCVTZSv1i64 FPR64:$Rn)>;
5407def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
5408          (FCVTZUv1i64 FPR64:$Rn)>;
5409
5410def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
5411          (FRECPEv1f16 FPR16:$Rn)>;
5412def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
5413          (FRECPEv1i32 FPR32:$Rn)>;
5414def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
5415          (FRECPEv1i64 FPR64:$Rn)>;
5416def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
5417          (FRECPEv1i64 FPR64:$Rn)>;
5418
5419def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
5420          (FRECPEv1i32 FPR32:$Rn)>;
5421def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
5422          (FRECPEv2f32 V64:$Rn)>;
5423def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
5424          (FRECPEv4f32 FPR128:$Rn)>;
5425def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
5426          (FRECPEv1i64 FPR64:$Rn)>;
5427def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
5428          (FRECPEv1i64 FPR64:$Rn)>;
5429def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
5430          (FRECPEv2f64 FPR128:$Rn)>;
5431
5432def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5433          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
5434def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5435          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
5436def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5437          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5438def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5439          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
5440def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5441          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5442
5443def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
5444          (FRECPXv1f16 FPR16:$Rn)>;
5445def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
5446          (FRECPXv1i32 FPR32:$Rn)>;
5447def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
5448          (FRECPXv1i64 FPR64:$Rn)>;
5449
5450def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
5451          (FRSQRTEv1f16 FPR16:$Rn)>;
5452def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
5453          (FRSQRTEv1i32 FPR32:$Rn)>;
5454def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
5455          (FRSQRTEv1i64 FPR64:$Rn)>;
5456def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
5457          (FRSQRTEv1i64 FPR64:$Rn)>;
5458
5459def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
5460          (FRSQRTEv1i32 FPR32:$Rn)>;
5461def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
5462          (FRSQRTEv2f32 V64:$Rn)>;
5463def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
5464          (FRSQRTEv4f32 FPR128:$Rn)>;
5465def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
5466          (FRSQRTEv1i64 FPR64:$Rn)>;
5467def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
5468          (FRSQRTEv1i64 FPR64:$Rn)>;
5469def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
5470          (FRSQRTEv2f64 FPR128:$Rn)>;
5471
5472def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5473          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
5474def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5475          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
5476def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5477          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5478def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5479          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
5480def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5481          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5482
5483// Some float -> int -> float conversion patterns for which we want to keep the
5484// int values in FP registers using the corresponding NEON instructions to
5485// avoid more costly int <-> fp register transfers.
5486let Predicates = [HasNEON] in {
5487def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
5488          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
5489def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
5490          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
5491def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
5492          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
5493def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
5494          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
5495
5496let Predicates = [HasFullFP16] in {
5497def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
5498          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
5499def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
5500          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
5501}
5502// If an integer is about to be converted to a floating point value,
5503// just load it on the floating point unit.
5504// Here are the patterns for 8 and 16-bits to float.
5505// 8-bits -> float.
5506multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
5507                             SDPatternOperator loadop, Instruction UCVTF,
5508                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
5509                             SubRegIndex sub> {
5510  def : Pat<(DstTy (uint_to_fp (SrcTy
5511                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
5512                                      ro.Wext:$extend))))),
5513           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5514                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
5515                                 sub))>;
5516
5517  def : Pat<(DstTy (uint_to_fp (SrcTy
5518                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
5519                                      ro.Wext:$extend))))),
5520           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5521                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
5522                                 sub))>;
5523}
5524
5525defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
5526                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
5527def : Pat <(f32 (uint_to_fp (i32
5528               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5529           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5530                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5531def : Pat <(f32 (uint_to_fp (i32
5532                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5533           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5534                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5535// 16-bits -> float.
5536defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
5537                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
5538def : Pat <(f32 (uint_to_fp (i32
5539                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5540           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5541                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5542def : Pat <(f32 (uint_to_fp (i32
5543                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5544           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5545                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5546// 32-bits are handled in target specific dag combine:
5547// performIntToFpCombine.
5548// 64-bits integer to 32-bits floating point, not possible with
5549// UCVTF on floating point registers (both source and destination
5550// must have the same size).
5551
5552// Here are the patterns for 8, 16, 32, and 64-bits to double.
5553// 8-bits -> double.
5554defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
5555                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
5556def : Pat <(f64 (uint_to_fp (i32
5557                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5558           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5559                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5560def : Pat <(f64 (uint_to_fp (i32
5561                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5562           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5563                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5564// 16-bits -> double.
5565defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
5566                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
5567def : Pat <(f64 (uint_to_fp (i32
5568                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5569           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5570                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5571def : Pat <(f64 (uint_to_fp (i32
5572                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5573           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5574                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5575// 32-bits -> double.
5576defm : UIntToFPROLoadPat<f64, i32, load,
5577                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
5578def : Pat <(f64 (uint_to_fp (i32
5579                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
5580           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5581                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
5582def : Pat <(f64 (uint_to_fp (i32
5583                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
5584           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5585                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
5586// 64-bits -> double are handled in target specific dag combine:
5587// performIntToFpCombine.
5588} // let Predicates = [HasNEON]
5589
5590//===----------------------------------------------------------------------===//
5591// Advanced SIMD three different-sized vector instructions.
5592//===----------------------------------------------------------------------===//
5593
5594defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
5595defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
5596defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
5597defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
5598defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
5599defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
5600                                             AArch64sabd>;
5601defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
5602                                          AArch64sabd>;
5603defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
5604            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
5605defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
5606                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
5607defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
5608    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5609defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
5610    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5611defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
5612defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
5613                                               int_aarch64_neon_sqadd>;
5614defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
5615                                               int_aarch64_neon_sqsub>;
5616defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
5617                                     int_aarch64_neon_sqdmull>;
5618defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
5619                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
5620defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
5621                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
5622defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
5623                                              AArch64uabd>;
5624defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
5625                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
5626defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
5627                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
5628defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
5629    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5630defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
5631    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5632defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
5633defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
5634                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
5635defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
5636                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
5637
5638// Additional patterns for [SU]ML[AS]L
5639multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
5640  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5641  def : Pat<(v4i16 (opnode
5642                    V64:$Ra,
5643                    (v4i16 (extract_subvector
5644                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
5645                            (i64 0))))),
5646             (EXTRACT_SUBREG (v8i16 (INST8B
5647                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
5648                                     V64:$Rn, V64:$Rm)), dsub)>;
5649  def : Pat<(v2i32 (opnode
5650                    V64:$Ra,
5651                    (v2i32 (extract_subvector
5652                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
5653                            (i64 0))))),
5654             (EXTRACT_SUBREG (v4i32 (INST4H
5655                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
5656                                     V64:$Rn, V64:$Rm)), dsub)>;
5657  def : Pat<(v1i64 (opnode
5658                    V64:$Ra,
5659                    (v1i64 (extract_subvector
5660                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
5661                            (i64 0))))),
5662             (EXTRACT_SUBREG (v2i64 (INST2S
5663                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
5664                                     V64:$Rn, V64:$Rm)), dsub)>;
5665}
5666
5667defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
5668     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
5669defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
5670     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
5671defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
5672     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
5673defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
5674     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
5675
5676
5677multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
5678  def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
5679                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5680            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
5681  def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
5682                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5683            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
5684  def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
5685                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5686            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
5687
5688  def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
5689                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5690            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5691  def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
5692                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5693            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5694  def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
5695                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5696            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5697}
5698
5699defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
5700defm : Neon_addl_extract_patterns<add, sext, "SADD">;
5701defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
5702defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
5703
5704// CodeGen patterns for addhn and subhn instructions, which can actually be
5705// written in LLVM IR without too much difficulty.
5706
5707// Prioritize ADDHN and SUBHN over UZP2.
5708let AddedComplexity = 10 in {
5709
5710// ADDHN
5711def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
5712          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5713def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5714                                           (i32 16))))),
5715          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5716def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5717                                           (i32 32))))),
5718          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5719def : Pat<(concat_vectors (v8i8 V64:$Rd),
5720                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5721                                                    (i32 8))))),
5722          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5723                            V128:$Rn, V128:$Rm)>;
5724def : Pat<(concat_vectors (v4i16 V64:$Rd),
5725                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5726                                                    (i32 16))))),
5727          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5728                            V128:$Rn, V128:$Rm)>;
5729def : Pat<(concat_vectors (v2i32 V64:$Rd),
5730                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5731                                                    (i32 32))))),
5732          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5733                            V128:$Rn, V128:$Rm)>;
5734
5735// SUBHN
5736def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
5737          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5738def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5739                                           (i32 16))))),
5740          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5741def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5742                                           (i32 32))))),
5743          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5744def : Pat<(concat_vectors (v8i8 V64:$Rd),
5745                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5746                                                    (i32 8))))),
5747          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5748                            V128:$Rn, V128:$Rm)>;
5749def : Pat<(concat_vectors (v4i16 V64:$Rd),
5750                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5751                                                    (i32 16))))),
5752          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5753                            V128:$Rn, V128:$Rm)>;
5754def : Pat<(concat_vectors (v2i32 V64:$Rd),
5755                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5756                                                    (i32 32))))),
5757          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5758                            V128:$Rn, V128:$Rm)>;
5759
5760} // AddedComplexity = 10
5761
5762//----------------------------------------------------------------------------
5763// AdvSIMD bitwise extract from vector instruction.
5764//----------------------------------------------------------------------------
5765
5766defm EXT : SIMDBitwiseExtract<"ext">;
5767
5768def AdjustExtImm : SDNodeXForm<imm, [{
5769  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
5770}]>;
5771multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
5772  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
5773            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
5774  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
5775            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
5776  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
5777  // 128-bit vector.
5778  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
5779            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
5780  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
5781  // single 128-bit EXT.
5782  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
5783                              (extract_subvector V128:$Rn, (i64 N)),
5784                              (i32 imm:$imm))),
5785            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
5786  // A 64-bit EXT of the high half of a 128-bit register can be done using a
5787  // 128-bit EXT of the whole register with an adjustment to the immediate. The
5788  // top half of the other operand will be unset, but that doesn't matter as it
5789  // will not be used.
5790  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
5791                              V64:$Rm,
5792                              (i32 imm:$imm))),
5793            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
5794                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
5795                                      (AdjustExtImm imm:$imm)), dsub)>;
5796}
5797
5798defm : ExtPat<v8i8, v16i8, 8>;
5799defm : ExtPat<v4i16, v8i16, 4>;
5800defm : ExtPat<v4f16, v8f16, 4>;
5801defm : ExtPat<v4bf16, v8bf16, 4>;
5802defm : ExtPat<v2i32, v4i32, 2>;
5803defm : ExtPat<v2f32, v4f32, 2>;
5804defm : ExtPat<v1i64, v2i64, 1>;
5805defm : ExtPat<v1f64, v2f64, 1>;
5806
5807//----------------------------------------------------------------------------
5808// AdvSIMD zip vector
5809//----------------------------------------------------------------------------
5810
5811defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
5812defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
5813defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
5814defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
5815defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
5816defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
5817
5818def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
5819                                 (v8i8 (trunc (v8i16 V128:$Vm))))),
5820          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
5821def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
5822                                 (v4i16 (trunc (v4i32 V128:$Vm))))),
5823          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
5824def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
5825                                 (v2i32 (trunc (v2i64 V128:$Vm))))),
5826          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
5827
5828def : Pat<(v16i8 (concat_vectors
5829                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
5830                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
5831          (UZP2v16i8 V128:$Vn, V128:$Vm)>;
5832def : Pat<(v8i16 (concat_vectors
5833                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
5834                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
5835          (UZP2v8i16 V128:$Vn, V128:$Vm)>;
5836def : Pat<(v4i32 (concat_vectors
5837                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
5838                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
5839          (UZP2v4i32 V128:$Vn, V128:$Vm)>;
5840
5841//----------------------------------------------------------------------------
5842// AdvSIMD TBL/TBX instructions
5843//----------------------------------------------------------------------------
5844
5845defm TBL : SIMDTableLookup<    0, "tbl">;
5846defm TBX : SIMDTableLookupTied<1, "tbx">;
5847
5848def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
5849          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
5850def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
5851          (TBLv16i8One V128:$Ri, V128:$Rn)>;
5852
5853def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
5854                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
5855          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
5856def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
5857                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
5858          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
5859
5860
5861//----------------------------------------------------------------------------
5862// AdvSIMD scalar DUP instruction
5863//----------------------------------------------------------------------------
5864
5865defm DUP : SIMDScalarDUP<"mov">;
5866
5867//----------------------------------------------------------------------------
5868// AdvSIMD scalar pairwise instructions
5869//----------------------------------------------------------------------------
5870
5871defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
5872defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
5873defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
5874defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
5875defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
5876defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
5877
5878// Only the lower half of the result of the inner FADDP is used in the patterns
5879// below, so the second operand does not matter. Re-use the first input
5880// operand, so no additional dependencies need to be introduced.
5881let Predicates = [HasFullFP16] in {
5882def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
5883            (FADDPv2i16p
5884              (EXTRACT_SUBREG
5885                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
5886               dsub))>;
5887def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
5888          (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
5889}
5890def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
5891          (FADDPv2i32p
5892            (EXTRACT_SUBREG
5893              (FADDPv4f32 V128:$Rn, V128:$Rn),
5894             dsub))>;
5895def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
5896          (FADDPv2i32p V64:$Rn)>;
5897def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
5898          (FADDPv2i64p V128:$Rn)>;
5899
5900def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
5901          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
5902def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
5903          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
5904def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
5905          (FADDPv2i32p V64:$Rn)>;
5906def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
5907          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
5908def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
5909          (FADDPv2i64p V128:$Rn)>;
5910def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
5911          (FMAXNMPv2i32p V64:$Rn)>;
5912def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
5913          (FMAXNMPv2i64p V128:$Rn)>;
5914def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
5915          (FMAXPv2i32p V64:$Rn)>;
5916def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
5917          (FMAXPv2i64p V128:$Rn)>;
5918def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
5919          (FMINNMPv2i32p V64:$Rn)>;
5920def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
5921          (FMINNMPv2i64p V128:$Rn)>;
5922def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
5923          (FMINPv2i32p V64:$Rn)>;
5924def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
5925          (FMINPv2i64p V128:$Rn)>;
5926
5927//----------------------------------------------------------------------------
5928// AdvSIMD INS/DUP instructions
5929//----------------------------------------------------------------------------
5930
5931def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
5932def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
5933def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
5934def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
5935def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
5936def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
5937def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
5938
5939def DUPv2i64lane : SIMDDup64FromElement;
5940def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
5941def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
5942def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
5943def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
5944def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
5945def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
5946
5947// DUP from a 64-bit register to a 64-bit register is just a copy
5948def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
5949          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
5950def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
5951          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
5952
5953def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
5954          (v2f32 (DUPv2i32lane
5955            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
5956            (i64 0)))>;
5957def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
5958          (v4f32 (DUPv4i32lane
5959            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
5960            (i64 0)))>;
5961def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
5962          (v2f64 (DUPv2i64lane
5963            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
5964            (i64 0)))>;
5965def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
5966          (v4f16 (DUPv4i16lane
5967            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5968            (i64 0)))>;
5969def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
5970          (v4bf16 (DUPv4i16lane
5971            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5972            (i64 0)))>;
5973def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
5974          (v8f16 (DUPv8i16lane
5975            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5976            (i64 0)))>;
5977def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
5978          (v8bf16 (DUPv8i16lane
5979            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5980            (i64 0)))>;
5981
5982def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
5983          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
5984def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
5985          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
5986
5987def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
5988          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
5989def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
5990          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
5991
5992def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
5993          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
5994def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
5995         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
5996def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
5997          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
5998
5999// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
6000// instruction even if the types don't match: we just have to remap the lane
6001// carefully. N.b. this trick only applies to truncations.
6002def VecIndex_x2 : SDNodeXForm<imm, [{
6003  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6004}]>;
6005def VecIndex_x4 : SDNodeXForm<imm, [{
6006  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6007}]>;
6008def VecIndex_x8 : SDNodeXForm<imm, [{
6009  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6010}]>;
6011
6012multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
6013                            ValueType Src128VT, ValueType ScalVT,
6014                            Instruction DUP, SDNodeXForm IdxXFORM> {
6015  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
6016                                                     imm:$idx)))),
6017            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6018
6019  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
6020                                                     imm:$idx)))),
6021            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6022}
6023
6024defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
6025defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
6026defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
6027
6028defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
6029defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
6030defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
6031
6032multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
6033                               SDNodeXForm IdxXFORM> {
6034  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
6035                                                         imm:$idx))))),
6036            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6037
6038  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
6039                                                       imm:$idx))))),
6040            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6041}
6042
6043defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
6044defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
6045defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
6046
6047defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
6048defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
6049defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
6050
6051// SMOV and UMOV definitions, with some extra patterns for convenience
6052defm SMOV : SMov;
6053defm UMOV : UMov;
6054
6055def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6056          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
6057def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6058          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6059def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6060          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6061def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6062          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6063def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6064          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6065def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
6066          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
6067
6068def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6069            VectorIndexB:$idx)))), i8),
6070          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6071def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6072            VectorIndexH:$idx)))), i16),
6073          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6074
6075// Extracting i8 or i16 elements will have the zero-extend transformed to
6076// an 'and' mask by type legalization since neither i8 nor i16 are legal types
6077// for AArch64. Match these patterns here since UMOV already zeroes out the high
6078// bits of the destination register.
6079def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
6080               (i32 0xff)),
6081          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
6082def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
6083               (i32 0xffff)),
6084          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
6085
6086def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6087            VectorIndexB:$idx)))), (i64 0xff))),
6088          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
6089def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6090            VectorIndexH:$idx)))), (i64 0xffff))),
6091          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
6092
6093defm INS : SIMDIns;
6094
6095def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6096          (SUBREG_TO_REG (i32 0),
6097                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6098def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6099          (SUBREG_TO_REG (i32 0),
6100                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6101
6102// The top bits will be zero from the FMOVWSr
6103def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
6104          (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
6105
6106def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6107          (SUBREG_TO_REG (i32 0),
6108                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6109def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6110          (SUBREG_TO_REG (i32 0),
6111                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6112
6113def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6114          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6115def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6116          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6117
6118def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6119          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6120def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6121          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6122
6123def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6124            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
6125                                  (i32 FPR32:$Rn), ssub))>;
6126def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6127            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6128                                  (i32 FPR32:$Rn), ssub))>;
6129
6130def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6131            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
6132                                  (i64 FPR64:$Rn), dsub))>;
6133
6134def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6135          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6136def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6137          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6138
6139def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6140          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6141def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6142          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6143
6144def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6145          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6146def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6147          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6148
6149def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6150          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
6151
6152def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
6153            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6154          (EXTRACT_SUBREG
6155            (INSvi16lane
6156              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6157              VectorIndexS:$imm,
6158              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6159              (i64 0)),
6160            dsub)>;
6161
6162def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6163          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
6164def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6165          (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
6166def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6167          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
6168def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6169          (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
6170def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
6171          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
6172
6173def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
6174            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6175          (INSvi16lane
6176            V128:$Rn, VectorIndexH:$imm,
6177            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6178            (i64 0))>;
6179
6180def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
6181            (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6182          (EXTRACT_SUBREG
6183            (INSvi16lane
6184              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6185              VectorIndexS:$imm,
6186              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6187              (i64 0)),
6188            dsub)>;
6189
6190def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
6191            (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6192          (INSvi16lane
6193            V128:$Rn, VectorIndexH:$imm,
6194            (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6195            (i64 0))>;
6196
6197def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
6198            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6199          (EXTRACT_SUBREG
6200            (INSvi32lane
6201              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6202              VectorIndexS:$imm,
6203              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6204              (i64 0)),
6205            dsub)>;
6206def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
6207            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6208          (INSvi32lane
6209            V128:$Rn, VectorIndexS:$imm,
6210            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6211            (i64 0))>;
6212def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
6213            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
6214          (INSvi64lane
6215            V128:$Rn, VectorIndexD:$imm,
6216            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
6217            (i64 0))>;
6218
6219def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
6220          (EXTRACT_SUBREG
6221            (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6222                        VectorIndexS:$imm, GPR32:$Rm),
6223            dsub)>;
6224def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
6225          (EXTRACT_SUBREG
6226            (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6227                        VectorIndexH:$imm, GPR32:$Rm),
6228            dsub)>;
6229def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
6230          (EXTRACT_SUBREG
6231            (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6232                       VectorIndexB:$imm, GPR32:$Rm),
6233            dsub)>;
6234
6235// Copy an element at a constant index in one vector into a constant indexed
6236// element of another.
6237// FIXME refactor to a shared class/dev parameterized on vector type, vector
6238// index type and INS extension
6239def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
6240                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
6241                   VectorIndexB:$idx2)),
6242          (v16i8 (INSvi8lane
6243                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
6244          )>;
6245def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
6246                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
6247                   VectorIndexH:$idx2)),
6248          (v8i16 (INSvi16lane
6249                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
6250          )>;
6251def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
6252                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
6253                   VectorIndexS:$idx2)),
6254          (v4i32 (INSvi32lane
6255                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
6256          )>;
6257def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
6258                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
6259                   VectorIndexD:$idx2)),
6260          (v2i64 (INSvi64lane
6261                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
6262          )>;
6263
6264multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
6265                                ValueType VTScal, Instruction INS> {
6266  def : Pat<(VT128 (vector_insert V128:$src,
6267                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6268                        imm:$Immd)),
6269            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
6270
6271  def : Pat<(VT128 (vector_insert V128:$src,
6272                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6273                        imm:$Immd)),
6274            (INS V128:$src, imm:$Immd,
6275                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
6276
6277  def : Pat<(VT64 (vector_insert V64:$src,
6278                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6279                        imm:$Immd)),
6280            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
6281                                 imm:$Immd, V128:$Rn, imm:$Immn),
6282                            dsub)>;
6283
6284  def : Pat<(VT64 (vector_insert V64:$src,
6285                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6286                        imm:$Immd)),
6287            (EXTRACT_SUBREG
6288                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
6289                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
6290                dsub)>;
6291}
6292
6293defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
6294defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
6295defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
6296defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
6297
6298defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
6299defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
6300defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
6301defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
6302
6303// Insert from bitcast
6304// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
6305def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6306          (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
6307def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6308          (EXTRACT_SUBREG
6309            (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
6310                         imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
6311            dsub)>;
6312def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)),
6313          (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
6314
6315// bitcast of an extract
6316// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
6317def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
6318          (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
6319def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))),
6320          (EXTRACT_SUBREG V128:$src, ssub)>;
6321def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
6322          (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
6323def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))),
6324          (EXTRACT_SUBREG V128:$src, dsub)>;
6325
6326// Floating point vector extractions are codegen'd as either a sequence of
6327// subregister extractions, or a MOV (aka DUP here) if
6328// the lane number is anything other than zero.
6329def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
6330          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
6331def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
6332          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
6333def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
6334          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6335def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
6336          (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6337
6338
6339def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
6340          (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
6341def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
6342          (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
6343def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
6344          (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6345def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
6346          (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6347
6348// All concat_vectors operations are canonicalised to act on i64 vectors for
6349// AArch64. In the general case we need an instruction, which had just as well be
6350// INS.
6351class ConcatPat<ValueType DstTy, ValueType SrcTy>
6352  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
6353        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
6354                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
6355
6356def : ConcatPat<v2i64, v1i64>;
6357def : ConcatPat<v2f64, v1f64>;
6358def : ConcatPat<v4i32, v2i32>;
6359def : ConcatPat<v4f32, v2f32>;
6360def : ConcatPat<v8i16, v4i16>;
6361def : ConcatPat<v8f16, v4f16>;
6362def : ConcatPat<v8bf16, v4bf16>;
6363def : ConcatPat<v16i8, v8i8>;
6364
6365// If the high lanes are undef, though, we can just ignore them:
6366class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
6367  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
6368        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
6369
6370def : ConcatUndefPat<v2i64, v1i64>;
6371def : ConcatUndefPat<v2f64, v1f64>;
6372def : ConcatUndefPat<v4i32, v2i32>;
6373def : ConcatUndefPat<v4f32, v2f32>;
6374def : ConcatUndefPat<v8i16, v4i16>;
6375def : ConcatUndefPat<v16i8, v8i8>;
6376
6377//----------------------------------------------------------------------------
6378// AdvSIMD across lanes instructions
6379//----------------------------------------------------------------------------
6380
6381defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
6382defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
6383defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
6384defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
6385defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
6386defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
6387defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
6388defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
6389defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
6390defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
6391defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
6392
6393multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
6394  // Patterns for addv(addlp(x)) ==> addlv
6395  def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
6396              (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
6397              (i64 0))), (i64 0))),
6398            (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6399              (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
6400  def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
6401            (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6402              (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
6403  def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
6404            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
6405
6406  // Patterns for addp(addlp(x))) ==> addlv
6407  def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
6408            (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
6409  def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
6410            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
6411}
6412
6413defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
6414defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
6415
6416// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
6417def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6418          (i64 (EXTRACT_SUBREG
6419            (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
6420            dsub))>;
6421
6422def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6423          (i32 (EXTRACT_SUBREG
6424            (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
6425            ssub))>;
6426
6427// Patterns for across-vector intrinsics, that have a node equivalent, that
6428// returns a vector (with only the low lane defined) instead of a scalar.
6429// In effect, opNode is the same as (scalar_to_vector (IntNode)).
6430multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
6431                                    SDPatternOperator opNode> {
6432// If a lane instruction caught the vector_extract around opNode, we can
6433// directly match the latter to the instruction.
6434def : Pat<(v8i8 (opNode V64:$Rn)),
6435          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6436           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
6437def : Pat<(v16i8 (opNode V128:$Rn)),
6438          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6439           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
6440def : Pat<(v4i16 (opNode V64:$Rn)),
6441          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6442           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
6443def : Pat<(v8i16 (opNode V128:$Rn)),
6444          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6445           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
6446def : Pat<(v4i32 (opNode V128:$Rn)),
6447          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6448           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
6449
6450
6451// If none did, fallback to the explicit patterns, consuming the vector_extract.
6452def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
6453            (i64 0)), (i64 0))),
6454          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6455            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
6456            bsub), ssub)>;
6457def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
6458          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6459            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
6460            bsub), ssub)>;
6461def : Pat<(i32 (vector_extract (insert_subvector undef,
6462            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
6463          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6464            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
6465            hsub), ssub)>;
6466def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
6467          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6468            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
6469            hsub), ssub)>;
6470def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
6471          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6472            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
6473            ssub), ssub)>;
6474
6475}
6476
6477multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
6478                                          SDPatternOperator opNode>
6479    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6480// If there is a sign extension after this intrinsic, consume it as smov already
6481// performed it
6482def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6483            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
6484          (i32 (SMOVvi8to32
6485            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6486              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6487            (i64 0)))>;
6488def : Pat<(i32 (sext_inreg (i32 (vector_extract
6489            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
6490          (i32 (SMOVvi8to32
6491            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6492             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6493            (i64 0)))>;
6494def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6495            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
6496          (i32 (SMOVvi16to32
6497           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6498            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6499           (i64 0)))>;
6500def : Pat<(i32 (sext_inreg (i32 (vector_extract
6501            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
6502          (i32 (SMOVvi16to32
6503            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6504             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6505            (i64 0)))>;
6506}
6507
6508multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
6509                                            SDPatternOperator opNode>
6510    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6511// If there is a masking operation keeping only what has been actually
6512// generated, consume it.
6513def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6514            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
6515      (i32 (EXTRACT_SUBREG
6516        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6517          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6518        ssub))>;
6519def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
6520            maski8_or_more)),
6521        (i32 (EXTRACT_SUBREG
6522          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6523            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6524          ssub))>;
6525def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6526            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
6527          (i32 (EXTRACT_SUBREG
6528            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6529              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6530            ssub))>;
6531def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
6532            maski16_or_more)),
6533        (i32 (EXTRACT_SUBREG
6534          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6535            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6536          ssub))>;
6537}
6538
6539defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
6540// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6541def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
6542          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6543
6544defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
6545// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6546def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
6547          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6548
6549defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
6550def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
6551          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
6552
6553defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
6554def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
6555          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
6556
6557defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
6558def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
6559          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
6560
6561defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
6562def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
6563          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
6564
6565multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
6566  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6567        (i32 (SMOVvi16to32
6568          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6569            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6570          (i64 0)))>;
6571def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6572        (i32 (SMOVvi16to32
6573          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6574           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6575          (i64 0)))>;
6576
6577def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6578          (i32 (EXTRACT_SUBREG
6579           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6580            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6581           ssub))>;
6582def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6583        (i32 (EXTRACT_SUBREG
6584          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6585           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6586          ssub))>;
6587
6588def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6589        (i64 (EXTRACT_SUBREG
6590          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6591           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6592          dsub))>;
6593}
6594
6595multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
6596                                                Intrinsic intOp> {
6597  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6598        (i32 (EXTRACT_SUBREG
6599          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6600            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6601          ssub))>;
6602def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6603        (i32 (EXTRACT_SUBREG
6604          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6605            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6606          ssub))>;
6607
6608def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6609          (i32 (EXTRACT_SUBREG
6610            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6611              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6612            ssub))>;
6613def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6614        (i32 (EXTRACT_SUBREG
6615          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6616            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6617          ssub))>;
6618
6619def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6620        (i64 (EXTRACT_SUBREG
6621          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6622            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6623          dsub))>;
6624}
6625
6626defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
6627defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
6628
6629// The vaddlv_s32 intrinsic gets mapped to SADDLP.
6630def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
6631          (i64 (EXTRACT_SUBREG
6632            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6633              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
6634            dsub))>;
6635// The vaddlv_u32 intrinsic gets mapped to UADDLP.
6636def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
6637          (i64 (EXTRACT_SUBREG
6638            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6639              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
6640            dsub))>;
6641
6642//------------------------------------------------------------------------------
6643// AdvSIMD modified immediate instructions
6644//------------------------------------------------------------------------------
6645
6646// AdvSIMD BIC
6647defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
6648// AdvSIMD ORR
6649defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
6650
6651def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6652def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6653def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6654def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6655
6656def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6657def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6658def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6659def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6660
6661def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6662def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6663def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6664def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6665
6666def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6667def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6668def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6669def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6670
6671// AdvSIMD FMOV
6672def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
6673                                              "fmov", ".2d",
6674                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6675def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
6676                                              "fmov", ".2s",
6677                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6678def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
6679                                              "fmov", ".4s",
6680                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6681let Predicates = [HasNEON, HasFullFP16] in {
6682def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
6683                                              "fmov", ".4h",
6684                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6685def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
6686                                              "fmov", ".8h",
6687                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6688} // Predicates = [HasNEON, HasFullFP16]
6689
6690// AdvSIMD MOVI
6691
6692// EDIT byte mask: scalar
6693let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6694def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
6695                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
6696// The movi_edit node has the immediate value already encoded, so we use
6697// a plain imm0_255 here.
6698def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
6699          (MOVID imm0_255:$shift)>;
6700
6701// EDIT byte mask: 2d
6702
6703// The movi_edit node has the immediate value already encoded, so we use
6704// a plain imm0_255 in the pattern
6705let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6706def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
6707                                                simdimmtype10,
6708                                                "movi", ".2d",
6709                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
6710
6711def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6712def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6713def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6714def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6715def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6716def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6717def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6718def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6719
6720def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6721def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6722def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6723def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6724
6725// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
6726// extract is free and this gives better MachineCSE results.
6727def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6728def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6729def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6730def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6731def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
6732def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
6733def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
6734def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
6735
6736def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6737def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6738def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6739def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6740
6741// EDIT per word & halfword: 2s, 4h, 4s, & 8h
6742let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6743defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
6744
6745let Predicates = [HasNEON] in {
6746  // Using the MOVI to materialize fp constants.
6747  def : Pat<(f32 fpimm32SIMDModImmType4:$in),
6748            (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
6749                                       (i32 24)),
6750                            ssub)>;
6751}
6752
6753def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6754def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6755def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6756def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6757
6758def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6759def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6760def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6761def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6762
6763def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6764          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
6765def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6766          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
6767def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6768          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
6769def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6770          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
6771
6772let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
6773// EDIT per word: 2s & 4s with MSL shifter
6774def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
6775                      [(set (v2i32 V64:$Rd),
6776                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6777def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
6778                      [(set (v4i32 V128:$Rd),
6779                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6780
6781// Per byte: 8b & 16b
6782def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
6783                                                 "movi", ".8b",
6784                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
6785
6786def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
6787                                                 "movi", ".16b",
6788                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
6789}
6790
6791// AdvSIMD MVNI
6792
6793// EDIT per word & halfword: 2s, 4h, 4s, & 8h
6794let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6795defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
6796
6797def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6798def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6799def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6800def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6801
6802def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6803def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6804def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6805def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6806
6807def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6808          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
6809def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6810          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
6811def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6812          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
6813def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6814          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
6815
6816// EDIT per word: 2s & 4s with MSL shifter
6817let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
6818def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
6819                      [(set (v2i32 V64:$Rd),
6820                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6821def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
6822                      [(set (v4i32 V128:$Rd),
6823                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6824}
6825
6826//----------------------------------------------------------------------------
6827// AdvSIMD indexed element
6828//----------------------------------------------------------------------------
6829
6830let hasSideEffects = 0 in {
6831  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
6832  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
6833}
6834
6835// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
6836// instruction expects the addend first, while the intrinsic expects it last.
6837
6838// On the other hand, there are quite a few valid combinatorial options due to
6839// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
6840defm : SIMDFPIndexedTiedPatterns<"FMLA",
6841           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
6842defm : SIMDFPIndexedTiedPatterns<"FMLA",
6843           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
6844
6845defm : SIMDFPIndexedTiedPatterns<"FMLS",
6846           TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
6847defm : SIMDFPIndexedTiedPatterns<"FMLS",
6848           TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
6849defm : SIMDFPIndexedTiedPatterns<"FMLS",
6850           TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
6851defm : SIMDFPIndexedTiedPatterns<"FMLS",
6852           TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
6853
6854multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
6855  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
6856  // and DUP scalar.
6857  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6858                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
6859                                           VectorIndexS:$idx))),
6860            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
6861  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6862                           (v2f32 (AArch64duplane32
6863                                      (v4f32 (insert_subvector undef,
6864                                                 (v2f32 (fneg V64:$Rm)),
6865                                                 (i64 0))),
6866                                      VectorIndexS:$idx)))),
6867            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
6868                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6869                               VectorIndexS:$idx)>;
6870  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6871                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
6872            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
6873                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
6874
6875  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
6876  // and DUP scalar.
6877  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6878                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
6879                                           VectorIndexS:$idx))),
6880            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
6881                               VectorIndexS:$idx)>;
6882  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6883                           (v4f32 (AArch64duplane32
6884                                      (v4f32 (insert_subvector undef,
6885                                                 (v2f32 (fneg V64:$Rm)),
6886                                                 (i64 0))),
6887                                      VectorIndexS:$idx)))),
6888            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
6889                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6890                               VectorIndexS:$idx)>;
6891  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6892                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
6893            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
6894                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
6895
6896  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
6897  // (DUPLANE from 64-bit would be trivial).
6898  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
6899                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
6900                                           VectorIndexD:$idx))),
6901            (FMLSv2i64_indexed
6902                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
6903  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
6904                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
6905            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
6906                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
6907
6908  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
6909  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
6910                         (vector_extract (v4f32 (fneg V128:$Rm)),
6911                                         VectorIndexS:$idx))),
6912            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
6913                V128:$Rm, VectorIndexS:$idx)>;
6914  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
6915                         (vector_extract (v4f32 (insert_subvector undef,
6916                                                    (v2f32 (fneg V64:$Rm)),
6917                                                    (i64 0))),
6918                                         VectorIndexS:$idx))),
6919            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
6920                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
6921
6922  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
6923  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
6924                         (vector_extract (v2f64 (fneg V128:$Rm)),
6925                                         VectorIndexS:$idx))),
6926            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
6927                V128:$Rm, VectorIndexS:$idx)>;
6928}
6929
6930defm : FMLSIndexedAfterNegPatterns<
6931           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
6932defm : FMLSIndexedAfterNegPatterns<
6933           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
6934
6935defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
6936defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
6937
6938def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
6939          (FMULv2i32_indexed V64:$Rn,
6940            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
6941            (i64 0))>;
6942def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
6943          (FMULv4i32_indexed V128:$Rn,
6944            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
6945            (i64 0))>;
6946def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
6947          (FMULv2i64_indexed V128:$Rn,
6948            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
6949            (i64 0))>;
6950
6951defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
6952defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
6953
6954defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
6955                                     int_aarch64_neon_sqdmulh_laneq>;
6956defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
6957                                      int_aarch64_neon_sqrdmulh_laneq>;
6958
6959// Generated by MachineCombine
6960defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
6961defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
6962
6963defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
6964defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
6965    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6966defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
6967    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6968defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
6969defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
6970                                           int_aarch64_neon_sqadd>;
6971defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
6972                                           int_aarch64_neon_sqsub>;
6973defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
6974                                          int_aarch64_neon_sqrdmlah>;
6975defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
6976                                          int_aarch64_neon_sqrdmlsh>;
6977defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
6978defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
6979    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6980defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
6981    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6982defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
6983
6984// A scalar sqdmull with the second operand being a vector lane can be
6985// handled directly with the indexed instruction encoding.
6986def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
6987                                          (vector_extract (v4i32 V128:$Vm),
6988                                                           VectorIndexS:$idx)),
6989          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
6990
6991//----------------------------------------------------------------------------
6992// AdvSIMD scalar shift instructions
6993//----------------------------------------------------------------------------
6994defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
6995defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
6996defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
6997defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
6998// Codegen patterns for the above. We don't put these directly on the
6999// instructions because TableGen's type inference can't handle the truth.
7000// Having the same base pattern for fp <--> int totally freaks it out.
7001def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
7002          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
7003def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
7004          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
7005def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
7006          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7007def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
7008          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7009def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
7010                                            vecshiftR64:$imm)),
7011          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7012def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
7013                                            vecshiftR64:$imm)),
7014          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7015def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
7016          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7017def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7018          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7019def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
7020                                            vecshiftR64:$imm)),
7021          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7022def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7023          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7024def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
7025                                            vecshiftR64:$imm)),
7026          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7027def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
7028          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7029
7030// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7031
7032def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7033          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7034def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7035          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7036def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7037          (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7038def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
7039            (and FPR32:$Rn, (i32 65535)),
7040            vecshiftR16:$imm)),
7041          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7042def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7043          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7044def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7045          (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7046def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
7047          (i32 (INSERT_SUBREG
7048            (i32 (IMPLICIT_DEF)),
7049            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
7050            hsub))>;
7051def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
7052          (i64 (INSERT_SUBREG
7053            (i64 (IMPLICIT_DEF)),
7054            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
7055            hsub))>;
7056def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
7057          (i32 (INSERT_SUBREG
7058            (i32 (IMPLICIT_DEF)),
7059            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
7060            hsub))>;
7061def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
7062          (i64 (INSERT_SUBREG
7063            (i64 (IMPLICIT_DEF)),
7064            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
7065            hsub))>;
7066def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7067          (i32 (INSERT_SUBREG
7068            (i32 (IMPLICIT_DEF)),
7069            (FACGE16 FPR16:$Rn, FPR16:$Rm),
7070            hsub))>;
7071def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7072          (i32 (INSERT_SUBREG
7073            (i32 (IMPLICIT_DEF)),
7074            (FACGT16 FPR16:$Rn, FPR16:$Rm),
7075            hsub))>;
7076
7077defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
7078defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
7079defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
7080                                     int_aarch64_neon_sqrshrn>;
7081defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
7082                                     int_aarch64_neon_sqrshrun>;
7083defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7084defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7085defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
7086                                     int_aarch64_neon_sqshrn>;
7087defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
7088                                     int_aarch64_neon_sqshrun>;
7089defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
7090defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
7091defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
7092    TriOpFrag<(add node:$LHS,
7093                   (AArch64srshri node:$MHS, node:$RHS))>>;
7094defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
7095defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
7096    TriOpFrag<(add_and_or_is_add node:$LHS,
7097                   (AArch64vashr node:$MHS, node:$RHS))>>;
7098defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
7099                                     int_aarch64_neon_uqrshrn>;
7100defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7101defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
7102                                     int_aarch64_neon_uqshrn>;
7103defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
7104defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
7105    TriOpFrag<(add node:$LHS,
7106                   (AArch64urshri node:$MHS, node:$RHS))>>;
7107defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
7108defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
7109    TriOpFrag<(add_and_or_is_add node:$LHS,
7110                   (AArch64vlshr node:$MHS, node:$RHS))>>;
7111
7112//----------------------------------------------------------------------------
7113// AdvSIMD vector shift instructions
7114//----------------------------------------------------------------------------
7115defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
7116defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
7117defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
7118                                   int_aarch64_neon_vcvtfxs2fp>;
7119defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
7120                          BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>;
7121defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
7122
7123// X << 1 ==> X + X
7124class SHLToADDPat<ValueType ty, RegisterClass regtype>
7125  : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
7126            (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
7127
7128def : SHLToADDPat<v16i8, FPR128>;
7129def : SHLToADDPat<v8i16, FPR128>;
7130def : SHLToADDPat<v4i32, FPR128>;
7131def : SHLToADDPat<v2i64, FPR128>;
7132def : SHLToADDPat<v8i8,  FPR64>;
7133def : SHLToADDPat<v4i16, FPR64>;
7134def : SHLToADDPat<v2i32, FPR64>;
7135
7136defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
7137                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
7138defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
7139def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7140                                      (i32 vecshiftL64:$imm))),
7141          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
7142defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
7143                                         int_aarch64_neon_sqrshrn>;
7144defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
7145                                         int_aarch64_neon_sqrshrun>;
7146defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7147defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7148defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
7149                                         int_aarch64_neon_sqshrn>;
7150defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
7151                                         int_aarch64_neon_sqshrun>;
7152defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
7153def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7154                                      (i32 vecshiftR64:$imm))),
7155          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
7156defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
7157defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
7158                 TriOpFrag<(add node:$LHS,
7159                                (AArch64srshri node:$MHS, node:$RHS))> >;
7160defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
7161                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
7162
7163defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
7164defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
7165                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
7166defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
7167                        int_aarch64_neon_vcvtfxu2fp>;
7168defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
7169                                         int_aarch64_neon_uqrshrn>;
7170defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7171defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
7172                                         int_aarch64_neon_uqshrn>;
7173defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
7174defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
7175                TriOpFrag<(add node:$LHS,
7176                               (AArch64urshri node:$MHS, node:$RHS))> >;
7177defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
7178                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
7179defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
7180defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
7181                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
7182
7183// RADDHN patterns for when RSHRN shifts by half the size of the vector element
7184def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))),
7185          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7186def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))),
7187          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7188let AddedComplexity = 5 in
7189def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))),
7190          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7191
7192// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
7193def : Pat<(v16i8 (concat_vectors
7194                 (v8i8 V64:$Vd),
7195                 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))),
7196          (RADDHNv8i16_v16i8
7197                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7198                 (v8i16 (MOVIv2d_ns (i32 0))))>;
7199def : Pat<(v8i16 (concat_vectors
7200                 (v4i16 V64:$Vd),
7201                 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))),
7202          (RADDHNv4i32_v8i16
7203                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7204                 (v4i32 (MOVIv2d_ns (i32 0))))>;
7205let AddedComplexity = 5 in
7206def : Pat<(v4i32 (concat_vectors
7207                 (v2i32 V64:$Vd),
7208                 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))),
7209          (RADDHNv2i64_v4i32
7210                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7211                 (v2i64 (MOVIv2d_ns (i32 0))))>;
7212
7213// SHRN patterns for when a logical right shift was used instead of arithmetic
7214// (the immediate guarantees no sign bits actually end up in the result so it
7215// doesn't matter).
7216def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
7217          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
7218def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
7219          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
7220def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
7221          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
7222
7223def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
7224                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
7225                                                    vecshiftR16Narrow:$imm)))),
7226          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7227                           V128:$Rn, vecshiftR16Narrow:$imm)>;
7228def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
7229                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
7230                                                    vecshiftR32Narrow:$imm)))),
7231          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7232                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7233def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
7234                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
7235                                                    vecshiftR64Narrow:$imm)))),
7236          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7237                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7238
7239// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
7240// Anyexts are implemented as zexts.
7241def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
7242def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7243def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7244def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
7245def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7246def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7247def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
7248def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7249def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7250// Also match an extend from the upper half of a 128 bit source register.
7251def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
7252          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7253def : Pat<(v8i16 (zext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
7254          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7255def : Pat<(v8i16 (sext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
7256          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
7257def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
7258          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7259def : Pat<(v4i32 (zext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
7260          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7261def : Pat<(v4i32 (sext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
7262          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
7263def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
7264          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7265def : Pat<(v2i64 (zext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
7266          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7267def : Pat<(v2i64 (sext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
7268          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
7269
7270// Vector shift sxtl aliases
7271def : InstAlias<"sxtl.8h $dst, $src1",
7272                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7273def : InstAlias<"sxtl $dst.8h, $src1.8b",
7274                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7275def : InstAlias<"sxtl.4s $dst, $src1",
7276                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7277def : InstAlias<"sxtl $dst.4s, $src1.4h",
7278                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7279def : InstAlias<"sxtl.2d $dst, $src1",
7280                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7281def : InstAlias<"sxtl $dst.2d, $src1.2s",
7282                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7283
7284// Vector shift sxtl2 aliases
7285def : InstAlias<"sxtl2.8h $dst, $src1",
7286                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7287def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
7288                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7289def : InstAlias<"sxtl2.4s $dst, $src1",
7290                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7291def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
7292                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7293def : InstAlias<"sxtl2.2d $dst, $src1",
7294                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7295def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
7296                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7297
7298// Vector shift uxtl aliases
7299def : InstAlias<"uxtl.8h $dst, $src1",
7300                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7301def : InstAlias<"uxtl $dst.8h, $src1.8b",
7302                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7303def : InstAlias<"uxtl.4s $dst, $src1",
7304                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7305def : InstAlias<"uxtl $dst.4s, $src1.4h",
7306                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7307def : InstAlias<"uxtl.2d $dst, $src1",
7308                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7309def : InstAlias<"uxtl $dst.2d, $src1.2s",
7310                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7311
7312// Vector shift uxtl2 aliases
7313def : InstAlias<"uxtl2.8h $dst, $src1",
7314                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7315def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
7316                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7317def : InstAlias<"uxtl2.4s $dst, $src1",
7318                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7319def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
7320                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7321def : InstAlias<"uxtl2.2d $dst, $src1",
7322                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7323def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
7324                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7325
7326// If an integer is about to be converted to a floating point value,
7327// just load it on the floating point unit.
7328// These patterns are more complex because floating point loads do not
7329// support sign extension.
7330// The sign extension has to be explicitly added and is only supported for
7331// one step: byte-to-half, half-to-word, word-to-doubleword.
7332// SCVTF GPR -> FPR is 9 cycles.
7333// SCVTF FPR -> FPR is 4 cyclces.
7334// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
7335// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
7336// and still being faster.
7337// However, this is not good for code size.
7338// 8-bits -> float. 2 sizes step-up.
7339class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
7340  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
7341        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7342                            (SSHLLv4i16_shift
7343                              (f64
7344                                (EXTRACT_SUBREG
7345                                  (SSHLLv8i8_shift
7346                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7347                                        INST,
7348                                        bsub),
7349                                    0),
7350                                  dsub)),
7351                               0),
7352                             ssub)))>,
7353    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7354
7355def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
7356                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
7357def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
7358                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
7359def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
7360                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
7361def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
7362                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
7363
7364// 16-bits -> float. 1 size step-up.
7365class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
7366  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7367        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7368                            (SSHLLv4i16_shift
7369                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7370                                  INST,
7371                                  hsub),
7372                                0),
7373                            ssub)))>,
7374    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7375
7376def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7377                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7378def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7379                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7380def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7381                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7382def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7383                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7384
7385// 32-bits to 32-bits are handled in target specific dag combine:
7386// performIntToFpCombine.
7387// 64-bits integer to 32-bits floating point, not possible with
7388// SCVTF on floating point registers (both source and destination
7389// must have the same size).
7390
7391// Here are the patterns for 8, 16, 32, and 64-bits to double.
7392// 8-bits -> double. 3 size step-up: give up.
7393// 16-bits -> double. 2 size step.
7394class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
7395  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7396           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7397                              (SSHLLv2i32_shift
7398                                 (f64
7399                                  (EXTRACT_SUBREG
7400                                    (SSHLLv4i16_shift
7401                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7402                                        INST,
7403                                        hsub),
7404                                     0),
7405                                   dsub)),
7406                               0),
7407                             dsub)))>,
7408    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7409
7410def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7411                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7412def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7413                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7414def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7415                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7416def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7417                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7418// 32-bits -> double. 1 size step-up.
7419class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
7420  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
7421           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7422                              (SSHLLv2i32_shift
7423                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7424                                  INST,
7425                                  ssub),
7426                               0),
7427                             dsub)))>,
7428    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7429
7430def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
7431                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
7432def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
7433                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
7434def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
7435                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
7436def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
7437                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
7438
7439// 64-bits -> double are handled in target specific dag combine:
7440// performIntToFpCombine.
7441
7442
7443//----------------------------------------------------------------------------
7444// AdvSIMD Load-Store Structure
7445//----------------------------------------------------------------------------
7446defm LD1 : SIMDLd1Multiple<"ld1">;
7447defm LD2 : SIMDLd2Multiple<"ld2">;
7448defm LD3 : SIMDLd3Multiple<"ld3">;
7449defm LD4 : SIMDLd4Multiple<"ld4">;
7450
7451defm ST1 : SIMDSt1Multiple<"st1">;
7452defm ST2 : SIMDSt2Multiple<"st2">;
7453defm ST3 : SIMDSt3Multiple<"st3">;
7454defm ST4 : SIMDSt4Multiple<"st4">;
7455
7456class Ld1Pat<ValueType ty, Instruction INST>
7457  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
7458
7459def : Ld1Pat<v16i8, LD1Onev16b>;
7460def : Ld1Pat<v8i16, LD1Onev8h>;
7461def : Ld1Pat<v4i32, LD1Onev4s>;
7462def : Ld1Pat<v2i64, LD1Onev2d>;
7463def : Ld1Pat<v8i8,  LD1Onev8b>;
7464def : Ld1Pat<v4i16, LD1Onev4h>;
7465def : Ld1Pat<v2i32, LD1Onev2s>;
7466def : Ld1Pat<v1i64, LD1Onev1d>;
7467
7468class St1Pat<ValueType ty, Instruction INST>
7469  : Pat<(store ty:$Vt, GPR64sp:$Rn),
7470        (INST ty:$Vt, GPR64sp:$Rn)>;
7471
7472def : St1Pat<v16i8, ST1Onev16b>;
7473def : St1Pat<v8i16, ST1Onev8h>;
7474def : St1Pat<v4i32, ST1Onev4s>;
7475def : St1Pat<v2i64, ST1Onev2d>;
7476def : St1Pat<v8i8,  ST1Onev8b>;
7477def : St1Pat<v4i16, ST1Onev4h>;
7478def : St1Pat<v2i32, ST1Onev2s>;
7479def : St1Pat<v1i64, ST1Onev1d>;
7480
7481//---
7482// Single-element
7483//---
7484
7485defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
7486defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
7487defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
7488defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
7489let mayLoad = 1, hasSideEffects = 0 in {
7490defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
7491defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
7492defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
7493defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
7494defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
7495defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
7496defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
7497defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
7498defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
7499defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
7500defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
7501defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
7502defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
7503defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
7504defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
7505defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
7506}
7507
7508def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7509          (LD1Rv8b GPR64sp:$Rn)>;
7510def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7511          (LD1Rv16b GPR64sp:$Rn)>;
7512def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7513          (LD1Rv4h GPR64sp:$Rn)>;
7514def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7515          (LD1Rv8h GPR64sp:$Rn)>;
7516def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7517          (LD1Rv2s GPR64sp:$Rn)>;
7518def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7519          (LD1Rv4s GPR64sp:$Rn)>;
7520def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7521          (LD1Rv2d GPR64sp:$Rn)>;
7522def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7523          (LD1Rv1d GPR64sp:$Rn)>;
7524
7525def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7526          (LD1Rv8b GPR64sp:$Rn)>;
7527def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
7528          (LD1Rv16b GPR64sp:$Rn)>;
7529def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7530          (LD1Rv4h GPR64sp:$Rn)>;
7531def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
7532          (LD1Rv8h GPR64sp:$Rn)>;
7533def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7534          (LD1Rv2s GPR64sp:$Rn)>;
7535def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
7536          (LD1Rv4s GPR64sp:$Rn)>;
7537def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
7538          (LD1Rv2d GPR64sp:$Rn)>;
7539
7540// Grab the floating point version too
7541def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7542          (LD1Rv2s GPR64sp:$Rn)>;
7543def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7544          (LD1Rv4s GPR64sp:$Rn)>;
7545def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7546          (LD1Rv2d GPR64sp:$Rn)>;
7547def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7548          (LD1Rv1d GPR64sp:$Rn)>;
7549def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7550          (LD1Rv4h GPR64sp:$Rn)>;
7551def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7552          (LD1Rv8h GPR64sp:$Rn)>;
7553def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7554          (LD1Rv4h GPR64sp:$Rn)>;
7555def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7556          (LD1Rv8h GPR64sp:$Rn)>;
7557
7558class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
7559                    ValueType VTy, ValueType STy, Instruction LD1>
7560  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7561           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7562        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
7563
7564def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
7565def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
7566def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
7567def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
7568def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
7569def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
7570def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
7571def : Ld1Lane128Pat<load,       VectorIndexH, v8bf16, bf16, LD1i16>;
7572
7573// Generate LD1 for extload if memory type does not match the
7574// destination type, for example:
7575//
7576//   (v4i32 (insert_vector_elt (load anyext from i8) idx))
7577//
7578// In this case, the index must be adjusted to match LD1 type.
7579//
7580class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
7581                         VecIndex, ValueType VTy, ValueType STy,
7582                         Instruction LD1, SDNodeXForm IdxOp>
7583  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7584                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7585        (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
7586
7587class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
7588                        ValueType VTy, ValueType STy, Instruction LD1,
7589                        SDNodeXForm IdxOp>
7590  : Pat<(vector_insert (VTy VecListOne64:$Rd),
7591                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7592        (EXTRACT_SUBREG
7593            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
7594                (IdxOp VecIndex:$idx), GPR64sp:$Rn),
7595            dsub)>;
7596
7597def VectorIndexStoH : SDNodeXForm<imm, [{
7598  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7599}]>;
7600def VectorIndexStoB : SDNodeXForm<imm, [{
7601  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
7602}]>;
7603def VectorIndexHtoB : SDNodeXForm<imm, [{
7604  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7605}]>;
7606
7607def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
7608def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
7609def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
7610
7611def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
7612def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
7613def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
7614
7615// Same as above, but the first element is populated using
7616// scalar_to_vector + insert_subvector instead of insert_vector_elt.
7617let Predicates = [IsNeonAvailable] in {
7618  class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
7619                          SDPatternOperator ExtLoad, Instruction LD1>
7620    : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
7621            (ResultTy (EXTRACT_SUBREG
7622              (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
7623
7624  def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
7625  def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
7626  def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
7627}
7628class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
7629                   ValueType VTy, ValueType STy, Instruction LD1>
7630  : Pat<(vector_insert (VTy VecListOne64:$Rd),
7631           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7632        (EXTRACT_SUBREG
7633            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
7634                          VecIndex:$idx, GPR64sp:$Rn),
7635            dsub)>;
7636
7637def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
7638def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
7639def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
7640def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
7641def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
7642def : Ld1Lane64Pat<load,       VectorIndexH, v4bf16, bf16, LD1i16>;
7643
7644
7645defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
7646defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
7647defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
7648defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
7649
7650// Stores
7651defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
7652defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
7653defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
7654defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
7655
7656let AddedComplexity = 19 in
7657class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
7658                    ValueType VTy, ValueType STy, Instruction ST1>
7659  : Pat<(scalar_store
7660             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
7661             GPR64sp:$Rn),
7662        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
7663
7664def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
7665def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
7666def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
7667def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
7668def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
7669def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
7670def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
7671def : St1Lane128Pat<store,         VectorIndexH, v8bf16, bf16, ST1i16>;
7672
7673let AddedComplexity = 19 in
7674class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
7675                   ValueType VTy, ValueType STy, Instruction ST1>
7676  : Pat<(scalar_store
7677             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
7678             GPR64sp:$Rn),
7679        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
7680             VecIndex:$idx, GPR64sp:$Rn)>;
7681
7682def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
7683def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
7684def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
7685def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
7686def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
7687def : St1Lane64Pat<store,         VectorIndexH, v4bf16, bf16, ST1i16>;
7688
7689multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
7690                             ValueType VTy, ValueType STy, Instruction ST1,
7691                             int offset> {
7692  def : Pat<(scalar_store
7693              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
7694              GPR64sp:$Rn, offset),
7695        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
7696             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
7697
7698  def : Pat<(scalar_store
7699              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
7700              GPR64sp:$Rn, GPR64:$Rm),
7701        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
7702             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
7703}
7704
7705defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
7706defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
7707                        2>;
7708defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
7709defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
7710defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
7711defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
7712defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
7713defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
7714
7715multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
7716                             ValueType VTy, ValueType STy, Instruction ST1,
7717                             int offset> {
7718  def : Pat<(scalar_store
7719              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
7720              GPR64sp:$Rn, offset),
7721        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
7722
7723  def : Pat<(scalar_store
7724              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
7725              GPR64sp:$Rn, GPR64:$Rm),
7726        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
7727}
7728
7729defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
7730                         1>;
7731defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
7732                         2>;
7733defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
7734defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
7735defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
7736defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
7737defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
7738defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
7739
7740let mayStore = 1, hasSideEffects = 0 in {
7741defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
7742defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
7743defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
7744defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
7745defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
7746defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
7747defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
7748defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
7749defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
7750defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
7751defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
7752defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
7753}
7754
7755defm ST1 : SIMDLdSt1SingleAliases<"st1">;
7756defm ST2 : SIMDLdSt2SingleAliases<"st2">;
7757defm ST3 : SIMDLdSt3SingleAliases<"st3">;
7758defm ST4 : SIMDLdSt4SingleAliases<"st4">;
7759
7760//----------------------------------------------------------------------------
7761// Crypto extensions
7762//----------------------------------------------------------------------------
7763
7764let Predicates = [HasAES] in {
7765def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
7766def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
7767def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
7768def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
7769}
7770
7771// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
7772// for AES fusion on some CPUs.
7773let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
7774def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
7775                        Sched<[WriteVq]>;
7776def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
7777                         Sched<[WriteVq]>;
7778}
7779
7780// Only use constrained versions of AES(I)MC instructions if they are paired with
7781// AESE/AESD.
7782def : Pat<(v16i8 (int_aarch64_crypto_aesmc
7783            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
7784                                            (v16i8 V128:$src2))))),
7785          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
7786                                             (v16i8 V128:$src2)))))>,
7787          Requires<[HasFuseAES]>;
7788
7789def : Pat<(v16i8 (int_aarch64_crypto_aesimc
7790            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
7791                                            (v16i8 V128:$src2))))),
7792          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
7793                                              (v16i8 V128:$src2)))))>,
7794          Requires<[HasFuseAES]>;
7795
7796let Predicates = [HasSHA2] in {
7797def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
7798def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
7799def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
7800def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
7801def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
7802def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
7803def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
7804
7805def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
7806def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
7807def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
7808}
7809
7810//----------------------------------------------------------------------------
7811// Compiler-pseudos
7812//----------------------------------------------------------------------------
7813// FIXME: Like for X86, these should go in their own separate .td file.
7814
7815// For an anyext, we don't care what the high bits are, so we can perform an
7816// INSERT_SUBREF into an IMPLICIT_DEF.
7817def : Pat<(i64 (anyext GPR32:$src)),
7818          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
7819
7820// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
7821// then assert the extension has happened.
7822def : Pat<(i64 (zext GPR32:$src)),
7823          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
7824
7825// To sign extend, we use a signed bitfield move instruction (SBFM) on the
7826// containing super-reg.
7827def : Pat<(i64 (sext GPR32:$src)),
7828   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
7829def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
7830def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
7831def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
7832def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
7833def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
7834def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
7835def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
7836
7837def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
7838          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
7839                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
7840def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
7841          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
7842                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
7843
7844def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
7845          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
7846                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
7847def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
7848          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
7849                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
7850
7851def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
7852          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7853                   (i64 (i64shift_a        imm0_63:$imm)),
7854                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
7855
7856def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
7857          (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7858                   (i64 (i64shift_a        imm0_63:$imm)),
7859                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
7860
7861// sra patterns have an AddedComplexity of 10, so make sure we have a higher
7862// AddedComplexity for the following patterns since we want to match sext + sra
7863// patterns before we attempt to match a single sra node.
7864let AddedComplexity = 20 in {
7865// We support all sext + sra combinations which preserve at least one bit of the
7866// original value which is to be sign extended. E.g. we support shifts up to
7867// bitwidth-1 bits.
7868def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
7869          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
7870def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
7871          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
7872
7873def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
7874          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
7875def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
7876          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
7877
7878def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
7879          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7880                   (i64 imm0_31:$imm), 31)>;
7881} // AddedComplexity = 20
7882
7883// To truncate, we can simply extract from a subregister.
7884def : Pat<(i32 (trunc GPR64sp:$src)),
7885          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
7886
7887// __builtin_trap() uses the BRK instruction on AArch64.
7888def : Pat<(trap), (BRK 1)>;
7889def : Pat<(debugtrap), (BRK 0xF000)>;
7890
7891def ubsan_trap_xform : SDNodeXForm<timm, [{
7892  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
7893}]>;
7894
7895def ubsan_trap_imm : TImmLeaf<i32, [{
7896  return isUInt<8>(Imm);
7897}], ubsan_trap_xform>;
7898
7899def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
7900
7901// Multiply high patterns which multiply the lower subvector using smull/umull
7902// and the upper subvector with smull2/umull2. Then shuffle the high the high
7903// part of both results together.
7904def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
7905          (UZP2v16i8
7906           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
7907                            (EXTRACT_SUBREG V128:$Rm, dsub)),
7908           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
7909def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
7910          (UZP2v8i16
7911           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
7912                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7913           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
7914def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
7915          (UZP2v4i32
7916           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
7917                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7918           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
7919
7920def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
7921          (UZP2v16i8
7922           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
7923                            (EXTRACT_SUBREG V128:$Rm, dsub)),
7924           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
7925def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
7926          (UZP2v8i16
7927           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
7928                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7929           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
7930def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
7931          (UZP2v4i32
7932           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
7933                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7934           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
7935
7936// Conversions within AdvSIMD types in the same register size are free.
7937// But because we need a consistent lane ordering, in big endian many
7938// conversions require one or more REV instructions.
7939//
7940// Consider a simple memory load followed by a bitconvert then a store.
7941//   v0 = load v2i32
7942//   v1 = BITCAST v2i32 v0 to v4i16
7943//        store v4i16 v2
7944//
7945// In big endian mode every memory access has an implicit byte swap. LDR and
7946// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
7947// is, they treat the vector as a sequence of elements to be byte-swapped.
7948// The two pairs of instructions are fundamentally incompatible. We've decided
7949// to use LD1/ST1 only to simplify compiler implementation.
7950//
7951// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
7952// the original code sequence:
7953//   v0 = load v2i32
7954//   v1 = REV v2i32                  (implicit)
7955//   v2 = BITCAST v2i32 v1 to v4i16
7956//   v3 = REV v4i16 v2               (implicit)
7957//        store v4i16 v3
7958//
7959// But this is now broken - the value stored is different to the value loaded
7960// due to lane reordering. To fix this, on every BITCAST we must perform two
7961// other REVs:
7962//   v0 = load v2i32
7963//   v1 = REV v2i32                  (implicit)
7964//   v2 = REV v2i32
7965//   v3 = BITCAST v2i32 v2 to v4i16
7966//   v4 = REV v4i16
7967//   v5 = REV v4i16 v4               (implicit)
7968//        store v4i16 v5
7969//
7970// This means an extra two instructions, but actually in most cases the two REV
7971// instructions can be combined into one. For example:
7972//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
7973//
7974// There is also no 128-bit REV instruction. This must be synthesized with an
7975// EXT instruction.
7976//
7977// Most bitconverts require some sort of conversion. The only exceptions are:
7978//   a) Identity conversions -  vNfX <-> vNiX
7979//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
7980//
7981
7982// Natural vector casts (64 bit)
7983foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
7984  foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
7985    def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
7986              (VT FPR64:$src)>;
7987
7988// Natural vector casts (128 bit)
7989foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7990  foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7991    def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
7992              (VT FPR128:$src)>;
7993
7994let Predicates = [IsLE] in {
7995def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7996def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7997def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7998def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7999def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8000def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8001
8002def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8003          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8004def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8005          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8006def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8007          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8008def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8009          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8010def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8011          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8012def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8013          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8014def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8015          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8016}
8017let Predicates = [IsBE] in {
8018def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
8019                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8020def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
8021                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8022def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
8023                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8024def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
8025                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8026def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
8027                  (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8028def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
8029                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8030
8031def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8032          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8033def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8034          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8035def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8036          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8037def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8038          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8039def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8040          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8041def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8042          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8043}
8044def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8045def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8046def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
8047          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8048def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8049          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8050def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8051          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8052def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8053
8054def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
8055          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
8056def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
8057          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
8058def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
8059          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8060def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
8061          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
8062def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8063          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8064
8065def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
8066def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
8067
8068let Predicates = [IsLE] in {
8069def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
8070def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
8071def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
8072def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
8073def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
8074def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
8075}
8076let Predicates = [IsBE] in {
8077def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
8078                             (v1i64 (REV64v2i32 FPR64:$src))>;
8079def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
8080                             (v1i64 (REV64v4i16 FPR64:$src))>;
8081def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
8082                             (v1i64 (REV64v8i8 FPR64:$src))>;
8083def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
8084                             (v1i64 (REV64v4i16 FPR64:$src))>;
8085def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
8086                             (v1i64 (REV64v4i16 FPR64:$src))>;
8087def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
8088                             (v1i64 (REV64v2i32 FPR64:$src))>;
8089}
8090def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
8091def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
8092
8093let Predicates = [IsLE] in {
8094def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
8095def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
8096def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
8097def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
8098def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
8099def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
8100def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
8101}
8102let Predicates = [IsBE] in {
8103def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
8104                             (v2i32 (REV64v2i32 FPR64:$src))>;
8105def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
8106                             (v2i32 (REV32v4i16 FPR64:$src))>;
8107def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
8108                             (v2i32 (REV32v8i8 FPR64:$src))>;
8109def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
8110                             (v2i32 (REV64v2i32 FPR64:$src))>;
8111def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
8112                             (v2i32 (REV64v2i32 FPR64:$src))>;
8113def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
8114                             (v2i32 (REV32v4i16 FPR64:$src))>;
8115def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
8116                             (v2i32 (REV32v4i16 FPR64:$src))>;
8117}
8118def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
8119
8120let Predicates = [IsLE] in {
8121def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
8122def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
8123def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
8124def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
8125def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
8126def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
8127}
8128let Predicates = [IsBE] in {
8129def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
8130                             (v4i16 (REV64v4i16 FPR64:$src))>;
8131def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
8132                             (v4i16 (REV32v4i16 FPR64:$src))>;
8133def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
8134                             (v4i16 (REV16v8i8 FPR64:$src))>;
8135def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
8136                             (v4i16 (REV64v4i16 FPR64:$src))>;
8137def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
8138                             (v4i16 (REV32v4i16 FPR64:$src))>;
8139def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
8140                             (v4i16 (REV64v4i16 FPR64:$src))>;
8141}
8142def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
8143def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
8144
8145let Predicates = [IsLE] in {
8146def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
8147def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
8148def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
8149def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
8150def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
8151def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
8152
8153def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8154def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8155def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))), (v4bf16 FPR64:$src)>;
8156def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))), (v4bf16 FPR64:$src)>;
8157def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8158def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8159}
8160let Predicates = [IsBE] in {
8161def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
8162                             (v4f16 (REV64v4i16 FPR64:$src))>;
8163def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
8164                             (v4f16 (REV32v4i16 FPR64:$src))>;
8165def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
8166                             (v4f16 (REV16v8i8 FPR64:$src))>;
8167def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
8168                             (v4f16 (REV64v4i16 FPR64:$src))>;
8169def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
8170                             (v4f16 (REV32v4i16 FPR64:$src))>;
8171def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
8172                             (v4f16 (REV64v4i16 FPR64:$src))>;
8173
8174def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
8175                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8176def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
8177                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8178def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))),
8179                             (v4bf16 (REV16v8i8 FPR64:$src))>;
8180def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))),
8181                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8182def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
8183                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8184def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
8185                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8186}
8187def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
8188def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
8189
8190let Predicates = [IsLE] in {
8191def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
8192def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
8193def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
8194def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
8195def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
8196def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
8197def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
8198def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))), (v8i8  FPR64:$src)>;
8199}
8200let Predicates = [IsBE] in {
8201def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
8202                             (v8i8 (REV64v8i8 FPR64:$src))>;
8203def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
8204                             (v8i8 (REV32v8i8 FPR64:$src))>;
8205def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
8206                             (v8i8 (REV16v8i8 FPR64:$src))>;
8207def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
8208                             (v8i8 (REV64v8i8 FPR64:$src))>;
8209def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
8210                             (v8i8 (REV32v8i8 FPR64:$src))>;
8211def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
8212                             (v8i8 (REV64v8i8 FPR64:$src))>;
8213def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
8214                             (v8i8 (REV16v8i8 FPR64:$src))>;
8215def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))),
8216                             (v8i8 (REV16v8i8 FPR64:$src))>;
8217}
8218
8219let Predicates = [IsLE] in {
8220def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
8221def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
8222def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
8223def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
8224def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
8225def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))), (f64   FPR64:$src)>;
8226}
8227let Predicates = [IsBE] in {
8228def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
8229                             (f64 (REV64v2i32 FPR64:$src))>;
8230def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
8231                             (f64 (REV64v4i16 FPR64:$src))>;
8232def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
8233                             (f64 (REV64v2i32 FPR64:$src))>;
8234def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
8235                             (f64 (REV64v8i8 FPR64:$src))>;
8236def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
8237                             (f64 (REV64v4i16 FPR64:$src))>;
8238def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))),
8239                             (f64 (REV64v4i16 FPR64:$src))>;
8240}
8241def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
8242def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
8243
8244let Predicates = [IsLE] in {
8245def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
8246def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
8247def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
8248def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
8249def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
8250def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
8251}
8252let Predicates = [IsBE] in {
8253def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
8254                             (v1f64 (REV64v2i32 FPR64:$src))>;
8255def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
8256                             (v1f64 (REV64v4i16 FPR64:$src))>;
8257def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
8258                             (v1f64 (REV64v8i8 FPR64:$src))>;
8259def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
8260                             (v1f64 (REV64v2i32 FPR64:$src))>;
8261def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
8262                             (v1f64 (REV64v4i16 FPR64:$src))>;
8263def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
8264                             (v1f64 (REV64v4i16 FPR64:$src))>;
8265}
8266def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
8267def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
8268
8269let Predicates = [IsLE] in {
8270def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
8271def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
8272def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
8273def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
8274def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
8275def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
8276def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
8277}
8278let Predicates = [IsBE] in {
8279def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
8280                             (v2f32 (REV64v2i32 FPR64:$src))>;
8281def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
8282                             (v2f32 (REV32v4i16 FPR64:$src))>;
8283def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
8284                             (v2f32 (REV32v8i8 FPR64:$src))>;
8285def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
8286                             (v2f32 (REV64v2i32 FPR64:$src))>;
8287def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
8288                             (v2f32 (REV64v2i32 FPR64:$src))>;
8289def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
8290                             (v2f32 (REV32v4i16 FPR64:$src))>;
8291def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
8292                             (v2f32 (REV32v4i16 FPR64:$src))>;
8293}
8294def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
8295
8296let Predicates = [IsLE] in {
8297def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
8298def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
8299def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
8300def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
8301def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
8302def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
8303def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
8304def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
8305}
8306let Predicates = [IsBE] in {
8307def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
8308                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8309def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
8310                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8311                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8312def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
8313                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8314                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8315def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
8316                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8317                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8318def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
8319                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8320                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8321def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
8322                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8323def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
8324                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8325                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8326def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
8327                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
8328                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
8329}
8330
8331let Predicates = [IsLE] in {
8332def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
8333def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
8334def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
8335def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
8336def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
8337def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
8338def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
8339}
8340let Predicates = [IsBE] in {
8341def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
8342                             (v2f64 (EXTv16i8 FPR128:$src,
8343                                              FPR128:$src, (i32 8)))>;
8344def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
8345                             (v2f64 (REV64v4i32 FPR128:$src))>;
8346def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
8347                             (v2f64 (REV64v8i16 FPR128:$src))>;
8348def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
8349                             (v2f64 (REV64v8i16 FPR128:$src))>;
8350def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
8351                             (v2f64 (REV64v8i16 FPR128:$src))>;
8352def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
8353                             (v2f64 (REV64v16i8 FPR128:$src))>;
8354def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
8355                             (v2f64 (REV64v4i32 FPR128:$src))>;
8356}
8357def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
8358
8359let Predicates = [IsLE] in {
8360def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
8361def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
8362def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
8363def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
8364def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
8365def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
8366def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
8367}
8368let Predicates = [IsBE] in {
8369def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
8370                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8371                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
8372def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
8373                             (v4f32 (REV32v8i16 FPR128:$src))>;
8374def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
8375                             (v4f32 (REV32v8i16 FPR128:$src))>;
8376def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
8377                             (v4f32 (REV32v8i16 FPR128:$src))>;
8378def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
8379                             (v4f32 (REV32v16i8 FPR128:$src))>;
8380def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
8381                             (v4f32 (REV64v4i32 FPR128:$src))>;
8382def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
8383                             (v4f32 (REV64v4i32 FPR128:$src))>;
8384}
8385def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
8386
8387let Predicates = [IsLE] in {
8388def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
8389def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
8390def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
8391def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
8392def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
8393def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
8394def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
8395}
8396let Predicates = [IsBE] in {
8397def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
8398                             (v2i64 (EXTv16i8 FPR128:$src,
8399                                              FPR128:$src, (i32 8)))>;
8400def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
8401                             (v2i64 (REV64v4i32 FPR128:$src))>;
8402def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
8403                             (v2i64 (REV64v8i16 FPR128:$src))>;
8404def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
8405                             (v2i64 (REV64v16i8 FPR128:$src))>;
8406def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
8407                             (v2i64 (REV64v4i32 FPR128:$src))>;
8408def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
8409                             (v2i64 (REV64v8i16 FPR128:$src))>;
8410def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
8411                             (v2i64 (REV64v8i16 FPR128:$src))>;
8412}
8413def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
8414
8415let Predicates = [IsLE] in {
8416def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
8417def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
8418def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
8419def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
8420def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
8421def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
8422def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
8423}
8424let Predicates = [IsBE] in {
8425def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
8426                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8427                                              (REV64v4i32 FPR128:$src),
8428                                              (i32 8)))>;
8429def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
8430                             (v4i32 (REV64v4i32 FPR128:$src))>;
8431def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
8432                             (v4i32 (REV32v8i16 FPR128:$src))>;
8433def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
8434                             (v4i32 (REV32v16i8 FPR128:$src))>;
8435def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
8436                             (v4i32 (REV64v4i32 FPR128:$src))>;
8437def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
8438                             (v4i32 (REV32v8i16 FPR128:$src))>;
8439def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
8440                             (v4i32 (REV32v8i16 FPR128:$src))>;
8441}
8442def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
8443
8444let Predicates = [IsLE] in {
8445def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
8446def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
8447def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
8448def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
8449def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
8450def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
8451}
8452let Predicates = [IsBE] in {
8453def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
8454                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8455                                              (REV64v8i16 FPR128:$src),
8456                                              (i32 8)))>;
8457def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
8458                             (v8i16 (REV64v8i16 FPR128:$src))>;
8459def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
8460                             (v8i16 (REV32v8i16 FPR128:$src))>;
8461def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
8462                             (v8i16 (REV16v16i8 FPR128:$src))>;
8463def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
8464                             (v8i16 (REV64v8i16 FPR128:$src))>;
8465def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
8466                             (v8i16 (REV32v8i16 FPR128:$src))>;
8467}
8468def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
8469def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
8470
8471let Predicates = [IsLE] in {
8472def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
8473def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
8474def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
8475def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
8476def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
8477def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
8478
8479def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))), (v8bf16 FPR128:$src)>;
8480def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8481def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8482def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
8483def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8484def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8485}
8486let Predicates = [IsBE] in {
8487def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
8488                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8489                                              (REV64v8i16 FPR128:$src),
8490                                              (i32 8)))>;
8491def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
8492                             (v8f16 (REV64v8i16 FPR128:$src))>;
8493def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
8494                             (v8f16 (REV32v8i16 FPR128:$src))>;
8495def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
8496                             (v8f16 (REV16v16i8 FPR128:$src))>;
8497def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
8498                             (v8f16 (REV64v8i16 FPR128:$src))>;
8499def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
8500                             (v8f16 (REV32v8i16 FPR128:$src))>;
8501
8502def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))),
8503                             (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8504                                              (REV64v8i16 FPR128:$src),
8505                                              (i32 8)))>;
8506def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
8507                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8508def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
8509                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8510def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
8511                             (v8bf16 (REV16v16i8 FPR128:$src))>;
8512def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
8513                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8514def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
8515                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8516}
8517def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
8518def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
8519
8520let Predicates = [IsLE] in {
8521def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
8522def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
8523def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
8524def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
8525def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
8526def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
8527def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
8528def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
8529}
8530let Predicates = [IsBE] in {
8531def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
8532                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
8533                                              (REV64v16i8 FPR128:$src),
8534                                              (i32 8)))>;
8535def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
8536                             (v16i8 (REV64v16i8 FPR128:$src))>;
8537def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
8538                             (v16i8 (REV32v16i8 FPR128:$src))>;
8539def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
8540                             (v16i8 (REV16v16i8 FPR128:$src))>;
8541def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
8542                             (v16i8 (REV64v16i8 FPR128:$src))>;
8543def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
8544                             (v16i8 (REV32v16i8 FPR128:$src))>;
8545def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
8546                             (v16i8 (REV16v16i8 FPR128:$src))>;
8547def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
8548                             (v16i8 (REV16v16i8 FPR128:$src))>;
8549}
8550
8551def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
8552           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8553def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
8554           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8555def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
8556           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8557def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
8558           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8559def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
8560           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8561def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
8562           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8563def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
8564           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8565def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
8566           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8567
8568def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
8569          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8570def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
8571          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8572def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
8573          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8574def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
8575          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8576
8577// A 64-bit subvector insert to the first 128-bit vector position
8578// is a subregister copy that needs no instruction.
8579multiclass InsertSubvectorUndef<ValueType Ty> {
8580  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
8581            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8582  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
8583            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8584  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
8585            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8586  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
8587            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8588  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
8589            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8590  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
8591            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8592  def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
8593            (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8594  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
8595            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8596}
8597
8598defm : InsertSubvectorUndef<i32>;
8599defm : InsertSubvectorUndef<i64>;
8600
8601// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
8602// or v2f32.
8603def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
8604                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
8605           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
8606def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
8607                         (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
8608           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
8609    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
8610    // so we match on v4f32 here, not v2f32. This will also catch adding
8611    // the low two lanes of a true v4f32 vector.
8612def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
8613                    (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
8614          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
8615def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
8616                    (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
8617          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
8618
8619// Prefer using the bottom lanes of addp Rn, Rn compared to
8620// addp extractlow(Rn), extracthigh(Rn)
8621def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
8622                       (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
8623          (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
8624def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
8625                       (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
8626          (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
8627def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
8628                       (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
8629          (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
8630
8631def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
8632                        (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
8633          (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
8634def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
8635                        (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
8636          (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
8637
8638// Scalar 64-bit shifts in FPR64 registers.
8639def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8640          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8641def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8642          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8643def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8644          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8645def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8646          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8647
8648// Patterns for nontemporal/no-allocate stores.
8649// We have to resort to tricks to turn a single-input store into a store pair,
8650// because there is no single-input nontemporal store, only STNP.
8651let Predicates = [IsLE] in {
8652let AddedComplexity = 15 in {
8653class NTStore128Pat<ValueType VT> :
8654  Pat<(nontemporalstore (VT FPR128:$Rt),
8655        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
8656      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
8657              (DUPi64 FPR128:$Rt, (i64 1)),
8658              GPR64sp:$Rn, simm7s8:$offset)>;
8659
8660def : NTStore128Pat<v2i64>;
8661def : NTStore128Pat<v4i32>;
8662def : NTStore128Pat<v8i16>;
8663def : NTStore128Pat<v16i8>;
8664
8665class NTStore64Pat<ValueType VT> :
8666  Pat<(nontemporalstore (VT FPR64:$Rt),
8667        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
8668      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
8669              (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
8670              GPR64sp:$Rn, simm7s4:$offset)>;
8671
8672// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
8673def : NTStore64Pat<v1f64>;
8674def : NTStore64Pat<v1i64>;
8675def : NTStore64Pat<v2i32>;
8676def : NTStore64Pat<v4i16>;
8677def : NTStore64Pat<v8i8>;
8678
8679def : Pat<(nontemporalstore GPR64:$Rt,
8680            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
8681          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
8682                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
8683                  GPR64sp:$Rn, simm7s4:$offset)>;
8684} // AddedComplexity=10
8685} // Predicates = [IsLE]
8686
8687// Tail call return handling. These are all compiler pseudo-instructions,
8688// so no encoding information or anything like that.
8689let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
8690  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
8691                   Sched<[WriteBrReg]>;
8692  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
8693                   Sched<[WriteBrReg]>;
8694  // Indirect tail-call with any register allowed, used by MachineOutliner when
8695  // this is proven safe.
8696  // FIXME: If we have to add any more hacks like this, we should instead relax
8697  // some verifier checks for outlined functions.
8698  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
8699                      Sched<[WriteBrReg]>;
8700  // Indirect tail-call limited to only use registers (x16 and x17) which are
8701  // allowed to tail-call a "BTI c" instruction.
8702  def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
8703                      Sched<[WriteBrReg]>;
8704}
8705
8706def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
8707          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
8708      Requires<[NotUseBTI]>;
8709def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
8710          (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
8711      Requires<[UseBTI]>;
8712def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
8713          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
8714def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
8715          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
8716
8717def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
8718def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
8719
8720// Extracting lane zero is a special case where we can just use a plain
8721// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
8722// rest of the compiler, especially the register allocator and copy propagation,
8723// to reason about, so is preferred when it's possible to use it.
8724let AddedComplexity = 10 in {
8725  def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
8726  def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
8727  def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
8728}
8729
8730// dot_v4i8
8731class mul_v4i8<SDPatternOperator ldop> :
8732  PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
8733          (mul (ldop (add node:$Rn, node:$offset)),
8734               (ldop (add node:$Rm, node:$offset)))>;
8735class mulz_v4i8<SDPatternOperator ldop> :
8736  PatFrag<(ops node:$Rn, node:$Rm),
8737          (mul (ldop node:$Rn), (ldop node:$Rm))>;
8738
8739def load_v4i8 :
8740  OutPatFrag<(ops node:$R),
8741             (INSERT_SUBREG
8742              (v2i32 (IMPLICIT_DEF)),
8743               (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
8744              ssub)>;
8745
8746class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
8747  Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
8748           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
8749           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
8750                (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
8751      (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
8752                                (load_v4i8 GPR64sp:$Rn),
8753                                (load_v4i8 GPR64sp:$Rm))),
8754                      sub_32)>, Requires<[HasDotProd]>;
8755
8756// dot_v8i8
8757class ee_v8i8<SDPatternOperator extend> :
8758  PatFrag<(ops node:$V, node:$K),
8759          (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
8760
8761class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
8762  PatFrag<(ops node:$M, node:$N, node:$K),
8763          (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
8764                 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
8765
8766class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
8767  PatFrag<(ops node:$M, node:$N),
8768          (i32 (extractelt
8769           (v4i32 (AArch64uaddv
8770            (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
8771                 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
8772           (i64 0)))>;
8773
8774// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
8775def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
8776
8777class odot_v8i8<Instruction DOT> :
8778  OutPatFrag<(ops node:$Vm, node:$Vn),
8779             (EXTRACT_SUBREG
8780              (VADDV_32
8781               (i64 (DOT (DUPv2i32gpr WZR),
8782                         (v8i8 node:$Vm),
8783                         (v8i8 node:$Vn)))),
8784              sub_32)>;
8785
8786class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
8787                    SDPatternOperator extend> :
8788  Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
8789      (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
8790  Requires<[HasDotProd]>;
8791
8792// dot_v16i8
8793class ee_v16i8<SDPatternOperator extend> :
8794  PatFrag<(ops node:$V, node:$K1, node:$K2),
8795          (v4i16 (extract_subvector
8796           (v8i16 (extend
8797            (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
8798
8799class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
8800  PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
8801          (v4i32
8802           (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
8803                  (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
8804
8805class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
8806  PatFrag<(ops node:$M, node:$N),
8807          (i32 (extractelt
8808           (v4i32 (AArch64uaddv
8809            (add
8810             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
8811                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
8812             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
8813                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
8814           (i64 0)))>;
8815
8816class odot_v16i8<Instruction DOT> :
8817  OutPatFrag<(ops node:$Vm, node:$Vn),
8818             (i32 (ADDVv4i32v
8819              (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
8820
8821class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
8822                SDPatternOperator extend> :
8823  Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
8824      (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
8825  Requires<[HasDotProd]>;
8826
8827let AddedComplexity = 10 in {
8828  def : dot_v4i8<SDOTv8i8, sextloadi8>;
8829  def : dot_v4i8<UDOTv8i8, zextloadi8>;
8830  def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
8831  def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
8832  def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
8833  def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
8834
8835  // FIXME: add patterns to generate vector by element dot product.
8836  // FIXME: add SVE dot-product patterns.
8837}
8838
8839// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
8840// so that it can be used as input to inline asm, and vice versa.
8841def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
8842def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
8843def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
8844                             GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
8845          (REG_SEQUENCE GPR64x8Class,
8846              $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
8847              $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
8848foreach i = 0-7 in {
8849  def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
8850            (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
8851}
8852
8853let Predicates = [HasLS64] in {
8854  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
8855                                          (outs GPR64x8:$Rt)>;
8856  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
8857                                          (outs)>;
8858  def ST64BV:   Store64BV<0b011, "st64bv">;
8859  def ST64BV0:  Store64BV<0b010, "st64bv0">;
8860
8861  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
8862    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
8863          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
8864
8865  def : ST64BPattern<int_aarch64_st64b, ST64B>;
8866  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
8867  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
8868}
8869
8870let Predicates = [HasMOPS] in {
8871  let Defs = [NZCV] in {
8872    defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
8873
8874    defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
8875
8876    defm SETP : MOPSMemorySetInsns<0b00, "setp">;
8877  }
8878  let Uses = [NZCV] in {
8879    defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
8880    defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
8881
8882    defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
8883    defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
8884
8885    defm SETM : MOPSMemorySetInsns<0b01, "setm">;
8886    defm SETE : MOPSMemorySetInsns<0b10, "sete">;
8887  }
8888}
8889let Predicates = [HasMOPS, HasMTE] in {
8890  let Defs = [NZCV] in {
8891    defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
8892  }
8893  let Uses = [NZCV] in {
8894    defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
8895    // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
8896    defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
8897  }
8898}
8899
8900// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
8901// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
8902def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
8903def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
8904def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
8905def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
8906def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
8907
8908// MOPS operations always contain three 4-byte instructions
8909let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
8910  let mayLoad = 1 in {
8911    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
8912                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
8913                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
8914    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
8915                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
8916                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
8917  }
8918  let mayLoad = 0 in {
8919    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
8920                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
8921                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
8922  }
8923}
8924let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
8925  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
8926                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
8927                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
8928}
8929
8930//-----------------------------------------------------------------------------
8931// v8.3 Pointer Authentication late patterns
8932
8933let Predicates = [HasPAuth] in {
8934def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
8935          (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>;
8936def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
8937          (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
8938}
8939
8940//-----------------------------------------------------------------------------
8941
8942// This gets lowered into an instruction sequence of 20 bytes
8943let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
8944def StoreSwiftAsyncContext
8945      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
8946               []>, Sched<[]>;
8947
8948def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
8949def : Pat<(AArch64AssertZExtBool GPR32:$op),
8950          (i32 GPR32:$op)>;
8951
8952//===----------------------------===//
8953// 2022 Architecture Extensions:
8954//===----------------------------===//
8955
8956def : InstAlias<"clrbhb",  (HINT 22), 0>;
8957let Predicates = [HasCLRBHB] in {
8958  def : InstAlias<"clrbhb",  (HINT 22), 1>;
8959}
8960
8961//===----------------------------------------------------------------------===//
8962// Translation Hardening Extension (FEAT_THE)
8963//===----------------------------------------------------------------------===//
8964defm RCW     : ReadCheckWriteCompareAndSwap;
8965
8966defm RCWCLR  : ReadCheckWriteOperation<0b001, "clr">;
8967defm RCWSET  : ReadCheckWriteOperation<0b011, "set">;
8968defm RCWSWP  : ReadCheckWriteOperation<0b010, "swp">;
8969
8970//===----------------------------------------------------------------------===//
8971// General Data-Processing Instructions (FEAT_V94_DP)
8972//===----------------------------------------------------------------------===//
8973defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>;
8974defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
8975defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
8976
8977defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>;
8978defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
8979defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
8980defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
8981
8982def RPRFM:
8983    I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn),
8984      "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>,
8985    Sched<[]> {
8986  bits<6> Rt;
8987  bits<5> Rn;
8988  bits<5> Rm;
8989  let Inst{2-0} = Rt{2-0};
8990  let Inst{4-3} = 0b11;
8991  let Inst{9-5} = Rn;
8992  let Inst{11-10} = 0b10;
8993  let Inst{13-12} = Rt{4-3};
8994  let Inst{14} = 0b1;
8995  let Inst{15} = Rt{5};
8996  let Inst{20-16} = Rm;
8997  let Inst{31-21} = 0b11111000101;
8998  let mayLoad = 0;
8999  let mayStore = 0;
9000  let hasSideEffects = 1;
9001  // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
9002  // Fail, the decoder should attempt to decode RPRFM. This requires setting
9003  // the decoder namespace to "Fallback".
9004  let DecoderNamespace = "Fallback";
9005}
9006
9007//===----------------------------------------------------------------------===//
9008// 128-bit Atomics (FEAT_LSE128)
9009//===----------------------------------------------------------------------===//
9010let Predicates = [HasLSE128] in {
9011  def SWPP     : LSE128Base<0b000, 0b00, 0b1, "swpp">;
9012  def SWPPA    : LSE128Base<0b000, 0b10, 0b1, "swppa">;
9013  def SWPPAL   : LSE128Base<0b000, 0b11, 0b1, "swppal">;
9014  def SWPPL    : LSE128Base<0b000, 0b01, 0b1, "swppl">;
9015  def LDCLRP   : LSE128Base<0b001, 0b00, 0b0, "ldclrp">;
9016  def LDCLRPA  : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">;
9017  def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">;
9018  def LDCLRPL  : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">;
9019  def LDSETP   : LSE128Base<0b011, 0b00, 0b0, "ldsetp">;
9020  def LDSETPA  : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">;
9021  def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">;
9022  def LDSETPL  : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">;
9023}
9024
9025//===----------------------------------------------------------------------===//
9026// RCPC Instructions (FEAT_LRCPC3)
9027//===----------------------------------------------------------------------===//
9028
9029let Predicates = [HasRCPC3] in {
9030  //                                             size   opc    opc2
9031  def STILPWpre:  BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
9032  def STILPXpre:  BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
9033  def STILPW:     BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9034  def STILPX:     BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9035  def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
9036  def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
9037  def LDIAPPW:    BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9038  def LDIAPPX:    BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9039
9040  def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
9041  def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
9042
9043  // Aliases for when offset=0
9044  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
9045  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
9046
9047  //                                        size   opc
9048  def STLRWpre:  BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback),            (ins GPR32:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
9049  def STLRXpre:  BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback),            (ins GPR64:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
9050  def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #4",   "$Rn = $wback">;
9051  def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #8",   "$Rn = $wback">;
9052}
9053
9054let Predicates = [HasRCPC3, HasNEON] in {
9055  //                                              size   opc regtype
9056  defm STLURb:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8  , (outs), (ins FPR8  :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9057  defm STLURh:  LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9058  defm STLURs:  LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9059  defm STLURd:  LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9060  defm STLURq:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9061  defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8  , (outs FPR8  :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9062  defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9063  defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9064  defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9065  defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9066
9067  //                                L
9068  def STL1:  LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
9069  def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;
9070
9071  // Aliases for when offset=0
9072  def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
9073}
9074
9075//===----------------------------------------------------------------------===//
9076// 128-bit System Instructions (FEAT_SYSINSTR128)
9077//===----------------------------------------------------------------------===//
9078let Predicates = [HasD128] in {
9079  def SYSPxt  : SystemPXtI<0, "sysp">;
9080
9081  def SYSPxt_XZR
9082    : BaseSystemI<0, (outs),
9083        (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair),
9084        "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">,
9085      Sched<[WriteSys]>
9086  {
9087    // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
9088    // and therefore autogenerates a decoder that builds an MC representation that has 4 fields
9089    // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
9090    // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc
9091    // is based off of the asm template (maybe) and therefore wants to print 5 operands.
9092    // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
9093    // overlap with the main SYSP instruction.
9094    let DecoderMethod = "DecodeSyspXzrInstruction";
9095    bits<3> op1;
9096    bits<4> Cn;
9097    bits<4> Cm;
9098    bits<3> op2;
9099    let Inst{22}    = 0b1; // override BaseSystemI
9100    let Inst{20-19} = 0b01;
9101    let Inst{18-16} = op1;
9102    let Inst{15-12} = Cn;
9103    let Inst{11-8}  = Cm;
9104    let Inst{7-5}   = op2;
9105    let Inst{4-0}   = 0b11111;
9106  }
9107
9108  def : InstAlias<"sysp $op1, $Cn, $Cm, $op2",
9109                  (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
9110}
9111
9112//---
9113// 128-bit System Registers (FEAT_SYSREG128)
9114//---
9115
9116// Instruction encoding:
9117//
9118//          31       22|21|20|19|18 16|15 12|11 8|7 5|4 0
9119// MRRS      1101010101| 1| 1|o0|  op1|   Cn|  Cm|op2| Rt
9120// MSRR      1101010101| 0| 1|o0|  op1|   Cn|  Cm|op2| Rt
9121
9122// Instruction syntax:
9123//
9124// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
9125// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
9126//
9127// ...where t is even (X0, X2, etc).
9128
9129let Predicates = [HasD128] in {
9130  def MRRS : RtSystemI128<1,
9131    (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg),
9132    "mrrs", "\t$Rt, $systemreg">
9133  {
9134    bits<16> systemreg;
9135    let Inst{20-5} = systemreg;
9136  }
9137
9138  def MSRR : RtSystemI128<0,
9139    (outs),  (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt),
9140    "msrr", "\t$systemreg, $Rt">
9141  {
9142    bits<16> systemreg;
9143    let Inst{20-5} = systemreg;
9144  }
9145}
9146
9147
9148include "AArch64InstrAtomics.td"
9149include "AArch64SVEInstrInfo.td"
9150include "AArch64SMEInstrInfo.td"
9151include "AArch64InstrGISel.td"
9152